1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file implements interprocedural passes which walk the 11 /// call-graph deducing and/or propagating function attributes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/FunctionAttrs.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/ADT/SCCIterator.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/SmallSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Analysis/AssumptionCache.h" 27 #include "llvm/Analysis/BasicAliasAnalysis.h" 28 #include "llvm/Analysis/CFG.h" 29 #include "llvm/Analysis/CGSCCPassManager.h" 30 #include "llvm/Analysis/CallGraph.h" 31 #include "llvm/Analysis/CallGraphSCCPass.h" 32 #include "llvm/Analysis/CaptureTracking.h" 33 #include "llvm/Analysis/LazyCallGraph.h" 34 #include "llvm/Analysis/MemoryLocation.h" 35 #include "llvm/Analysis/ValueTracking.h" 36 #include "llvm/IR/Argument.h" 37 #include "llvm/IR/Attributes.h" 38 #include "llvm/IR/BasicBlock.h" 39 #include "llvm/IR/Constant.h" 40 #include "llvm/IR/ConstantRangeList.h" 41 #include "llvm/IR/Constants.h" 42 #include "llvm/IR/Function.h" 43 #include "llvm/IR/InstIterator.h" 44 #include "llvm/IR/InstrTypes.h" 45 #include "llvm/IR/Instruction.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/IntrinsicInst.h" 48 #include "llvm/IR/Metadata.h" 49 #include "llvm/IR/ModuleSummaryIndex.h" 50 #include "llvm/IR/PassManager.h" 51 #include "llvm/IR/Type.h" 52 #include "llvm/IR/Use.h" 53 #include "llvm/IR/User.h" 54 #include "llvm/IR/Value.h" 55 #include "llvm/Support/Casting.h" 56 #include "llvm/Support/CommandLine.h" 57 #include "llvm/Support/Compiler.h" 58 #include "llvm/Support/Debug.h" 59 #include "llvm/Support/ErrorHandling.h" 60 #include "llvm/Support/raw_ostream.h" 61 #include "llvm/Transforms/IPO.h" 62 #include "llvm/Transforms/Utils/Local.h" 63 #include <cassert> 64 #include <iterator> 65 #include <map> 66 #include <optional> 67 #include <vector> 68 69 using namespace llvm; 70 71 #define DEBUG_TYPE "function-attrs" 72 73 STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute"); 74 STATISTIC(NumCapturesNone, "Number of arguments marked captures(none)"); 75 STATISTIC(NumCapturesPartial, "Number of arguments marked with captures " 76 "attribute other than captures(none)"); 77 STATISTIC(NumReturned, "Number of arguments marked returned"); 78 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); 79 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); 80 STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly"); 81 STATISTIC(NumNoAlias, "Number of function returns marked noalias"); 82 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); 83 STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef"); 84 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); 85 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); 86 STATISTIC(NumNoFree, "Number of functions marked as nofree"); 87 STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); 88 STATISTIC(NumNoSync, "Number of functions marked as nosync"); 89 STATISTIC(NumCold, "Number of functions marked as cold"); 90 91 STATISTIC(NumThinLinkNoRecurse, 92 "Number of functions marked as norecurse during thinlink"); 93 STATISTIC(NumThinLinkNoUnwind, 94 "Number of functions marked as nounwind during thinlink"); 95 96 static cl::opt<bool> EnableNonnullArgPropagation( 97 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, 98 cl::desc("Try to propagate nonnull argument attributes from callsites to " 99 "caller functions.")); 100 101 static cl::opt<bool> DisableNoUnwindInference( 102 "disable-nounwind-inference", cl::Hidden, 103 cl::desc("Stop inferring nounwind attribute during function-attrs pass")); 104 105 static cl::opt<bool> DisableNoFreeInference( 106 "disable-nofree-inference", cl::Hidden, 107 cl::desc("Stop inferring nofree attribute during function-attrs pass")); 108 109 static cl::opt<bool> DisableThinLTOPropagation( 110 "disable-thinlto-funcattrs", cl::init(true), cl::Hidden, 111 cl::desc("Don't propagate function-attrs in thinLTO")); 112 113 static void addCapturesStat(CaptureInfo CI) { 114 if (capturesNothing(CI)) 115 ++NumCapturesNone; 116 else 117 ++NumCapturesPartial; 118 } 119 120 namespace { 121 122 using SCCNodeSet = SmallSetVector<Function *, 8>; 123 124 } // end anonymous namespace 125 126 static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc, 127 ModRefInfo MR, AAResults &AAR) { 128 // Ignore accesses to known-invariant or local memory. 129 MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/true); 130 if (isNoModRef(MR)) 131 return; 132 133 const Value *UO = getUnderlyingObjectAggressive(Loc.Ptr); 134 if (isa<AllocaInst>(UO)) 135 return; 136 if (isa<Argument>(UO)) { 137 ME |= MemoryEffects::argMemOnly(MR); 138 return; 139 } 140 141 // If it's not an identified object, it might be an argument. 142 if (!isIdentifiedObject(UO)) 143 ME |= MemoryEffects::argMemOnly(MR); 144 ME |= MemoryEffects(IRMemLocation::ErrnoMem, MR); 145 ME |= MemoryEffects(IRMemLocation::Other, MR); 146 } 147 148 static void addArgLocs(MemoryEffects &ME, const CallBase *Call, 149 ModRefInfo ArgMR, AAResults &AAR) { 150 for (const Value *Arg : Call->args()) { 151 if (!Arg->getType()->isPtrOrPtrVectorTy()) 152 continue; 153 154 addLocAccess(ME, 155 MemoryLocation::getBeforeOrAfter(Arg, Call->getAAMetadata()), 156 ArgMR, AAR); 157 } 158 } 159 160 /// Returns the memory access attribute for function F using AAR for AA results, 161 /// where SCCNodes is the current SCC. 162 /// 163 /// If ThisBody is true, this function may examine the function body and will 164 /// return a result pertaining to this copy of the function. If it is false, the 165 /// result will be based only on AA results for the function declaration; it 166 /// will be assumed that some other (perhaps less optimized) version of the 167 /// function may be selected at link time. 168 /// 169 /// The return value is split into two parts: Memory effects that always apply, 170 /// and additional memory effects that apply if any of the functions in the SCC 171 /// can access argmem. 172 static std::pair<MemoryEffects, MemoryEffects> 173 checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, 174 const SCCNodeSet &SCCNodes) { 175 MemoryEffects OrigME = AAR.getMemoryEffects(&F); 176 if (OrigME.doesNotAccessMemory()) 177 // Already perfect! 178 return {OrigME, MemoryEffects::none()}; 179 180 if (!ThisBody) 181 return {OrigME, MemoryEffects::none()}; 182 183 MemoryEffects ME = MemoryEffects::none(); 184 // Additional locations accessed if the SCC accesses argmem. 185 MemoryEffects RecursiveArgME = MemoryEffects::none(); 186 187 // Inalloca and preallocated arguments are always clobbered by the call. 188 if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) || 189 F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) 190 ME |= MemoryEffects::argMemOnly(ModRefInfo::ModRef); 191 192 // Scan the function body for instructions that may read or write memory. 193 for (Instruction &I : instructions(F)) { 194 // Some instructions can be ignored even if they read or write memory. 195 // Detect these now, skipping to the next instruction if one is found. 196 if (auto *Call = dyn_cast<CallBase>(&I)) { 197 // We can optimistically ignore calls to functions in the same SCC, with 198 // two caveats: 199 // * Calls with operand bundles may have additional effects. 200 // * Argument memory accesses may imply additional effects depending on 201 // what the argument location is. 202 if (!Call->hasOperandBundles() && Call->getCalledFunction() && 203 SCCNodes.count(Call->getCalledFunction())) { 204 // Keep track of which additional locations are accessed if the SCC 205 // turns out to access argmem. 206 addArgLocs(RecursiveArgME, Call, ModRefInfo::ModRef, AAR); 207 continue; 208 } 209 210 MemoryEffects CallME = AAR.getMemoryEffects(Call); 211 212 // If the call doesn't access memory, we're done. 213 if (CallME.doesNotAccessMemory()) 214 continue; 215 216 // A pseudo probe call shouldn't change any function attribute since it 217 // doesn't translate to a real instruction. It comes with a memory access 218 // tag to prevent itself being removed by optimizations and not block 219 // other instructions being optimized. 220 if (isa<PseudoProbeInst>(I)) 221 continue; 222 223 // Merge callee's memory effects into caller's ones, including 224 // inaccessible and errno memory, but excluding argument memory, which is 225 // handled separately. 226 ME |= CallME.getWithoutLoc(IRMemLocation::ArgMem); 227 228 // If the call accesses captured memory (currently part of "other") and 229 // an argument is captured (currently not tracked), then it may also 230 // access argument memory. 231 ModRefInfo OtherMR = CallME.getModRef(IRMemLocation::Other); 232 ME |= MemoryEffects::argMemOnly(OtherMR); 233 234 // Check whether all pointer arguments point to local memory, and 235 // ignore calls that only access local memory. 236 ModRefInfo ArgMR = CallME.getModRef(IRMemLocation::ArgMem); 237 if (ArgMR != ModRefInfo::NoModRef) 238 addArgLocs(ME, Call, ArgMR, AAR); 239 continue; 240 } 241 242 ModRefInfo MR = ModRefInfo::NoModRef; 243 if (I.mayWriteToMemory()) 244 MR |= ModRefInfo::Mod; 245 if (I.mayReadFromMemory()) 246 MR |= ModRefInfo::Ref; 247 if (MR == ModRefInfo::NoModRef) 248 continue; 249 250 std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(&I); 251 if (!Loc) { 252 // If no location is known, conservatively assume anything can be 253 // accessed. 254 ME |= MemoryEffects(MR); 255 continue; 256 } 257 258 // Volatile operations may access inaccessible memory. 259 if (I.isVolatile()) 260 ME |= MemoryEffects::inaccessibleMemOnly(MR); 261 262 addLocAccess(ME, *Loc, MR, AAR); 263 } 264 265 return {OrigME & ME, RecursiveArgME}; 266 } 267 268 MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F, 269 AAResults &AAR) { 270 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}).first; 271 } 272 273 /// Deduce readonly/readnone/writeonly attributes for the SCC. 274 template <typename AARGetterT> 275 static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, 276 SmallSet<Function *, 8> &Changed) { 277 MemoryEffects ME = MemoryEffects::none(); 278 MemoryEffects RecursiveArgME = MemoryEffects::none(); 279 for (Function *F : SCCNodes) { 280 // Call the callable parameter to look up AA results for this function. 281 AAResults &AAR = AARGetter(*F); 282 // Non-exact function definitions may not be selected at link time, and an 283 // alternative version that writes to memory may be selected. See the 284 // comment on GlobalValue::isDefinitionExact for more details. 285 auto [FnME, FnRecursiveArgME] = 286 checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes); 287 ME |= FnME; 288 RecursiveArgME |= FnRecursiveArgME; 289 // Reached bottom of the lattice, we will not be able to improve the result. 290 if (ME == MemoryEffects::unknown()) 291 return; 292 } 293 294 // If the SCC accesses argmem, add recursive accesses resulting from that. 295 ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem); 296 if (ArgMR != ModRefInfo::NoModRef) 297 ME |= RecursiveArgME & MemoryEffects(ArgMR); 298 299 for (Function *F : SCCNodes) { 300 MemoryEffects OldME = F->getMemoryEffects(); 301 MemoryEffects NewME = ME & OldME; 302 if (NewME != OldME) { 303 ++NumMemoryAttr; 304 F->setMemoryEffects(NewME); 305 // Remove conflicting writable attributes. 306 if (!isModSet(NewME.getModRef(IRMemLocation::ArgMem))) 307 for (Argument &A : F->args()) 308 A.removeAttr(Attribute::Writable); 309 Changed.insert(F); 310 } 311 } 312 } 313 314 // Compute definitive function attributes for a function taking into account 315 // prevailing definitions and linkage types 316 static FunctionSummary *calculatePrevailingSummary( 317 ValueInfo VI, 318 DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary, 319 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 320 IsPrevailing) { 321 322 auto [It, Inserted] = CachedPrevailingSummary.try_emplace(VI); 323 if (!Inserted) 324 return It->second; 325 326 /// At this point, prevailing symbols have been resolved. The following leads 327 /// to returning a conservative result: 328 /// - Multiple instances with local linkage. Normally local linkage would be 329 /// unique per module 330 /// as the GUID includes the module path. We could have a guid alias if 331 /// there wasn't any distinguishing path when each file was compiled, but 332 /// that should be rare so we'll punt on those. 333 334 /// These next 2 cases should not happen and will assert: 335 /// - Multiple instances with external linkage. This should be caught in 336 /// symbol resolution 337 /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our 338 /// knowledge meaning we have to go conservative. 339 340 /// Otherwise, we calculate attributes for a function as: 341 /// 1. If we have a local linkage, take its attributes. If there's somehow 342 /// multiple, bail and go conservative. 343 /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is 344 /// prevailing, take its attributes. 345 /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic 346 /// differences. However, if the prevailing copy is known it will be used 347 /// so take its attributes. If the prevailing copy is in a native file 348 /// all IR copies will be dead and propagation will go conservative. 349 /// 4. AvailableExternally summaries without a prevailing copy are known to 350 /// occur in a couple of circumstances: 351 /// a. An internal function gets imported due to its caller getting 352 /// imported, it becomes AvailableExternally but no prevailing 353 /// definition exists. Because it has to get imported along with its 354 /// caller the attributes will be captured by propagating on its 355 /// caller. 356 /// b. C++11 [temp.explicit]p10 can generate AvailableExternally 357 /// definitions of explicitly instanced template declarations 358 /// for inlining which are ultimately dropped from the TU. Since this 359 /// is localized to the TU the attributes will have already made it to 360 /// the callers. 361 /// These are edge cases and already captured by their callers so we 362 /// ignore these for now. If they become relevant to optimize in the 363 /// future this can be revisited. 364 /// 5. Otherwise, go conservative. 365 366 FunctionSummary *Local = nullptr; 367 FunctionSummary *Prevailing = nullptr; 368 369 for (const auto &GVS : VI.getSummaryList()) { 370 if (!GVS->isLive()) 371 continue; 372 373 FunctionSummary *FS = dyn_cast<FunctionSummary>(GVS->getBaseObject()); 374 // Virtual and Unknown (e.g. indirect) calls require going conservative 375 if (!FS || FS->fflags().HasUnknownCall) 376 return nullptr; 377 378 const auto &Linkage = GVS->linkage(); 379 if (GlobalValue::isLocalLinkage(Linkage)) { 380 if (Local) { 381 LLVM_DEBUG( 382 dbgs() 383 << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on " 384 "function " 385 << VI.name() << " from " << FS->modulePath() << ". Previous module " 386 << Local->modulePath() << "\n"); 387 return nullptr; 388 } 389 Local = FS; 390 } else if (GlobalValue::isExternalLinkage(Linkage)) { 391 assert(IsPrevailing(VI.getGUID(), GVS.get())); 392 Prevailing = FS; 393 break; 394 } else if (GlobalValue::isWeakODRLinkage(Linkage) || 395 GlobalValue::isLinkOnceODRLinkage(Linkage) || 396 GlobalValue::isWeakAnyLinkage(Linkage) || 397 GlobalValue::isLinkOnceAnyLinkage(Linkage)) { 398 if (IsPrevailing(VI.getGUID(), GVS.get())) { 399 Prevailing = FS; 400 break; 401 } 402 } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) { 403 // TODO: Handle these cases if they become meaningful 404 continue; 405 } 406 } 407 408 auto &CPS = CachedPrevailingSummary[VI]; 409 if (Local) { 410 assert(!Prevailing); 411 CPS = Local; 412 } else if (Prevailing) { 413 assert(!Local); 414 CPS = Prevailing; 415 } 416 417 return CPS; 418 } 419 420 bool llvm::thinLTOPropagateFunctionAttrs( 421 ModuleSummaryIndex &Index, 422 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 423 IsPrevailing) { 424 // TODO: implement addNoAliasAttrs once 425 // there's more information about the return type in the summary 426 if (DisableThinLTOPropagation) 427 return false; 428 429 DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary; 430 bool Changed = false; 431 432 auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) { 433 // Assume we can propagate unless we discover otherwise 434 FunctionSummary::FFlags InferredFlags; 435 InferredFlags.NoRecurse = (SCCNodes.size() == 1); 436 InferredFlags.NoUnwind = true; 437 438 for (auto &V : SCCNodes) { 439 FunctionSummary *CallerSummary = 440 calculatePrevailingSummary(V, CachedPrevailingSummary, IsPrevailing); 441 442 // Function summaries can fail to contain information such as declarations 443 if (!CallerSummary) 444 return; 445 446 if (CallerSummary->fflags().MayThrow) 447 InferredFlags.NoUnwind = false; 448 449 for (const auto &Callee : CallerSummary->calls()) { 450 FunctionSummary *CalleeSummary = calculatePrevailingSummary( 451 Callee.first, CachedPrevailingSummary, IsPrevailing); 452 453 if (!CalleeSummary) 454 return; 455 456 if (!CalleeSummary->fflags().NoRecurse) 457 InferredFlags.NoRecurse = false; 458 459 if (!CalleeSummary->fflags().NoUnwind) 460 InferredFlags.NoUnwind = false; 461 462 if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse) 463 break; 464 } 465 } 466 467 if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) { 468 Changed = true; 469 for (auto &V : SCCNodes) { 470 if (InferredFlags.NoRecurse) { 471 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to " 472 << V.name() << "\n"); 473 ++NumThinLinkNoRecurse; 474 } 475 476 if (InferredFlags.NoUnwind) { 477 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to " 478 << V.name() << "\n"); 479 ++NumThinLinkNoUnwind; 480 } 481 482 for (const auto &S : V.getSummaryList()) { 483 if (auto *FS = dyn_cast<FunctionSummary>(S.get())) { 484 if (InferredFlags.NoRecurse) 485 FS->setNoRecurse(); 486 487 if (InferredFlags.NoUnwind) 488 FS->setNoUnwind(); 489 } 490 } 491 } 492 } 493 }; 494 495 // Call propagation functions on each SCC in the Index 496 for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(&Index); !I.isAtEnd(); 497 ++I) { 498 std::vector<ValueInfo> Nodes(*I); 499 PropagateAttributes(Nodes); 500 } 501 return Changed; 502 } 503 504 namespace { 505 506 /// For a given pointer Argument, this retains a list of Arguments of functions 507 /// in the same SCC that the pointer data flows into. We use this to build an 508 /// SCC of the arguments. 509 struct ArgumentGraphNode { 510 Argument *Definition; 511 /// CaptureComponents for this argument, excluding captures via Uses. 512 /// We don't distinguish between other/return captures here. 513 CaptureComponents CC = CaptureComponents::None; 514 SmallVector<ArgumentGraphNode *, 4> Uses; 515 }; 516 517 class ArgumentGraph { 518 // We store pointers to ArgumentGraphNode objects, so it's important that 519 // that they not move around upon insert. 520 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; 521 522 ArgumentMapTy ArgumentMap; 523 524 // There is no root node for the argument graph, in fact: 525 // void f(int *x, int *y) { if (...) f(x, y); } 526 // is an example where the graph is disconnected. The SCCIterator requires a 527 // single entry point, so we maintain a fake ("synthetic") root node that 528 // uses every node. Because the graph is directed and nothing points into 529 // the root, it will not participate in any SCCs (except for its own). 530 ArgumentGraphNode SyntheticRoot; 531 532 public: 533 ArgumentGraph() { SyntheticRoot.Definition = nullptr; } 534 535 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; 536 537 iterator begin() { return SyntheticRoot.Uses.begin(); } 538 iterator end() { return SyntheticRoot.Uses.end(); } 539 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } 540 541 ArgumentGraphNode *operator[](Argument *A) { 542 ArgumentGraphNode &Node = ArgumentMap[A]; 543 Node.Definition = A; 544 SyntheticRoot.Uses.push_back(&Node); 545 return &Node; 546 } 547 }; 548 549 /// This tracker checks whether callees are in the SCC, and if so it does not 550 /// consider that a capture, instead adding it to the "Uses" list and 551 /// continuing with the analysis. 552 struct ArgumentUsesTracker : public CaptureTracker { 553 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} 554 555 void tooManyUses() override { CI = CaptureInfo::all(); } 556 557 Action captured(const Use *U, UseCaptureInfo UseCI) override { 558 if (updateCaptureInfo(U, UseCI.UseCC)) { 559 // Don't bother continuing if we already capture everything. 560 if (capturesAll(CI.getOtherComponents())) 561 return Stop; 562 return Continue; 563 } 564 565 // For SCC argument tracking, we're not going to analyze other/ret 566 // components separately, so don't follow the return value. 567 return ContinueIgnoringReturn; 568 } 569 570 bool updateCaptureInfo(const Use *U, CaptureComponents CC) { 571 CallBase *CB = dyn_cast<CallBase>(U->getUser()); 572 if (!CB) { 573 if (isa<ReturnInst>(U->getUser())) 574 CI |= CaptureInfo::retOnly(CC); 575 else 576 // Conservatively assume that the captured value might make its way 577 // into the return value as well. This could be made more precise. 578 CI |= CaptureInfo(CC); 579 return true; 580 } 581 582 Function *F = CB->getCalledFunction(); 583 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { 584 CI |= CaptureInfo(CC); 585 return true; 586 } 587 588 assert(!CB->isCallee(U) && "callee operand reported captured?"); 589 const unsigned UseIndex = CB->getDataOperandNo(U); 590 if (UseIndex >= CB->arg_size()) { 591 // Data operand, but not a argument operand -- must be a bundle operand 592 assert(CB->hasOperandBundles() && "Must be!"); 593 594 // CaptureTracking told us that we're being captured by an operand bundle 595 // use. In this case it does not matter if the callee is within our SCC 596 // or not -- we've been captured in some unknown way, and we have to be 597 // conservative. 598 CI |= CaptureInfo(CC); 599 return true; 600 } 601 602 if (UseIndex >= F->arg_size()) { 603 assert(F->isVarArg() && "More params than args in non-varargs call"); 604 CI |= CaptureInfo(CC); 605 return true; 606 } 607 608 // TODO(captures): Could improve precision by remembering maximum 609 // capture components for the argument. 610 Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); 611 return false; 612 } 613 614 // Does not include potential captures via Uses in the SCC. 615 CaptureInfo CI = CaptureInfo::none(); 616 617 // Uses within our SCC. 618 SmallVector<Argument *, 4> Uses; 619 620 const SCCNodeSet &SCCNodes; 621 }; 622 623 /// A struct of argument use: a Use and the offset it accesses. This struct 624 /// is to track uses inside function via GEP. If GEP has a non-constant index, 625 /// the Offset field is nullopt. 626 struct ArgumentUse { 627 Use *U; 628 std::optional<int64_t> Offset; 629 }; 630 631 /// A struct of argument access info. "Unknown" accesses are the cases like 632 /// unrecognized instructions, instructions that have more than one use of 633 /// the argument, or volatile memory accesses. "WriteWithSideEffect" are call 634 /// instructions that not only write an argument but also capture it. 635 struct ArgumentAccessInfo { 636 enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown }; 637 AccessType ArgAccessType; 638 ConstantRangeList AccessRanges; 639 }; 640 641 /// A struct to wrap the argument use info per block. 642 struct UsesPerBlockInfo { 643 SmallDenseMap<Instruction *, ArgumentAccessInfo, 4> Insts; 644 bool HasWrites = false; 645 bool HasUnknownAccess = false; 646 }; 647 648 /// A struct to summarize the argument use info in a function. 649 struct ArgumentUsesSummary { 650 bool HasAnyWrite = false; 651 bool HasWriteOutsideEntryBB = false; 652 SmallDenseMap<const BasicBlock *, UsesPerBlockInfo, 16> UsesPerBlock; 653 }; 654 655 ArgumentAccessInfo getArgumentAccessInfo(const Instruction *I, 656 const ArgumentUse &ArgUse, 657 const DataLayout &DL) { 658 auto GetTypeAccessRange = 659 [&DL](Type *Ty, 660 std::optional<int64_t> Offset) -> std::optional<ConstantRange> { 661 auto TypeSize = DL.getTypeStoreSize(Ty); 662 if (!TypeSize.isScalable() && Offset) { 663 int64_t Size = TypeSize.getFixedValue(); 664 APInt Low(64, *Offset, true); 665 bool Overflow; 666 APInt High = Low.sadd_ov(APInt(64, Size, true), Overflow); 667 // Bail if the range overflows signed 64-bit int. 668 if (Overflow) 669 return std::nullopt; 670 return ConstantRange(Low, High); 671 } 672 return std::nullopt; 673 }; 674 auto GetConstantIntRange = 675 [](Value *Length, 676 std::optional<int64_t> Offset) -> std::optional<ConstantRange> { 677 auto *ConstantLength = dyn_cast<ConstantInt>(Length); 678 if (ConstantLength && Offset) { 679 int64_t Len = ConstantLength->getSExtValue(); 680 681 // Reject zero or negative lengths 682 if (Len <= 0) 683 return std::nullopt; 684 685 APInt Low(64, *Offset, true); 686 bool Overflow; 687 APInt High = Low.sadd_ov(APInt(64, Len, true), Overflow); 688 if (Overflow) 689 return std::nullopt; 690 691 return ConstantRange(Low, High); 692 } 693 return std::nullopt; 694 }; 695 696 if (auto *SI = dyn_cast<StoreInst>(I)) { 697 if (SI->isSimple() && &SI->getOperandUse(1) == ArgUse.U) { 698 // Get the fixed type size of "SI". Since the access range of a write 699 // will be unioned, if "SI" doesn't have a fixed type size, we just set 700 // the access range to empty. 701 ConstantRangeList AccessRanges; 702 if (auto TypeAccessRange = 703 GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset)) 704 AccessRanges.insert(*TypeAccessRange); 705 return {ArgumentAccessInfo::AccessType::Write, std::move(AccessRanges)}; 706 } 707 } else if (auto *LI = dyn_cast<LoadInst>(I)) { 708 if (LI->isSimple()) { 709 assert(&LI->getOperandUse(0) == ArgUse.U); 710 // Get the fixed type size of "LI". Different from Write, if "LI" 711 // doesn't have a fixed type size, we conservatively set as a clobber 712 // with an empty access range. 713 if (auto TypeAccessRange = 714 GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset)) 715 return {ArgumentAccessInfo::AccessType::Read, {*TypeAccessRange}}; 716 } 717 } else if (auto *MemSet = dyn_cast<MemSetInst>(I)) { 718 if (!MemSet->isVolatile()) { 719 ConstantRangeList AccessRanges; 720 if (auto AccessRange = 721 GetConstantIntRange(MemSet->getLength(), ArgUse.Offset)) 722 AccessRanges.insert(*AccessRange); 723 return {ArgumentAccessInfo::AccessType::Write, AccessRanges}; 724 } 725 } else if (auto *MTI = dyn_cast<MemTransferInst>(I)) { 726 if (!MTI->isVolatile()) { 727 if (&MTI->getOperandUse(0) == ArgUse.U) { 728 ConstantRangeList AccessRanges; 729 if (auto AccessRange = 730 GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) 731 AccessRanges.insert(*AccessRange); 732 return {ArgumentAccessInfo::AccessType::Write, AccessRanges}; 733 } else if (&MTI->getOperandUse(1) == ArgUse.U) { 734 if (auto AccessRange = 735 GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) 736 return {ArgumentAccessInfo::AccessType::Read, {*AccessRange}}; 737 } 738 } 739 } else if (auto *CB = dyn_cast<CallBase>(I)) { 740 if (CB->isArgOperand(ArgUse.U) && 741 !CB->isByValArgument(CB->getArgOperandNo(ArgUse.U))) { 742 unsigned ArgNo = CB->getArgOperandNo(ArgUse.U); 743 bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes); 744 if (IsInitialize && ArgUse.Offset) { 745 // Argument is a Write when parameter is writeonly/readnone 746 // and nocapture. Otherwise, it's a WriteWithSideEffect. 747 auto Access = CB->onlyWritesMemory(ArgNo) && CB->doesNotCapture(ArgNo) 748 ? ArgumentAccessInfo::AccessType::Write 749 : ArgumentAccessInfo::AccessType::WriteWithSideEffect; 750 ConstantRangeList AccessRanges; 751 Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes); 752 ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList(); 753 for (ConstantRange &CR : CBCRL) 754 AccessRanges.insert(ConstantRange(CR.getLower() + *ArgUse.Offset, 755 CR.getUpper() + *ArgUse.Offset)); 756 return {Access, AccessRanges}; 757 } 758 } 759 } 760 // Other unrecognized instructions are considered as unknown. 761 return {ArgumentAccessInfo::AccessType::Unknown, {}}; 762 } 763 764 // Collect the uses of argument "A" in "F". 765 ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) { 766 auto &DL = F.getParent()->getDataLayout(); 767 unsigned PointerSize = 768 DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace()); 769 ArgumentUsesSummary Result; 770 771 BasicBlock &EntryBB = F.getEntryBlock(); 772 SmallVector<ArgumentUse, 4> Worklist; 773 for (Use &U : A.uses()) 774 Worklist.push_back({&U, 0}); 775 776 // Update "UsesPerBlock" with the block of "I" as key and "Info" as value. 777 // Return true if the block of "I" has write accesses after updating. 778 auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) { 779 auto *BB = I->getParent(); 780 auto &BBInfo = Result.UsesPerBlock[BB]; 781 auto [It, Inserted] = BBInfo.Insts.try_emplace(I); 782 auto &IInfo = It->second; 783 784 // Instructions that have more than one use of the argument are considered 785 // as clobbers. 786 if (!Inserted) { 787 IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}}; 788 BBInfo.HasUnknownAccess = true; 789 return false; 790 } 791 792 IInfo = std::move(Info); 793 BBInfo.HasUnknownAccess |= 794 IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown; 795 bool InfoHasWrites = 796 (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write || 797 IInfo.ArgAccessType == 798 ArgumentAccessInfo::AccessType::WriteWithSideEffect) && 799 !IInfo.AccessRanges.empty(); 800 BBInfo.HasWrites |= InfoHasWrites; 801 return InfoHasWrites; 802 }; 803 804 // No need for a visited set because we don't look through phis, so there are 805 // no cycles. 806 while (!Worklist.empty()) { 807 ArgumentUse ArgUse = Worklist.pop_back_val(); 808 User *U = ArgUse.U->getUser(); 809 // Add GEP uses to worklist. 810 // If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt. 811 if (auto *GEP = dyn_cast<GEPOperator>(U)) { 812 std::optional<int64_t> NewOffset = std::nullopt; 813 if (ArgUse.Offset) { 814 APInt Offset(PointerSize, 0); 815 if (GEP->accumulateConstantOffset(DL, Offset)) 816 NewOffset = *ArgUse.Offset + Offset.getSExtValue(); 817 } 818 for (Use &U : GEP->uses()) 819 Worklist.push_back({&U, NewOffset}); 820 continue; 821 } 822 823 auto *I = cast<Instruction>(U); 824 bool HasWrite = UpdateUseInfo(I, getArgumentAccessInfo(I, ArgUse, DL)); 825 826 Result.HasAnyWrite |= HasWrite; 827 828 if (HasWrite && I->getParent() != &EntryBB) 829 Result.HasWriteOutsideEntryBB = true; 830 } 831 return Result; 832 } 833 834 } // end anonymous namespace 835 836 namespace llvm { 837 838 template <> struct GraphTraits<ArgumentGraphNode *> { 839 using NodeRef = ArgumentGraphNode *; 840 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; 841 842 static NodeRef getEntryNode(NodeRef A) { return A; } 843 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } 844 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } 845 }; 846 847 template <> 848 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { 849 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } 850 851 static ChildIteratorType nodes_begin(ArgumentGraph *AG) { 852 return AG->begin(); 853 } 854 855 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } 856 }; 857 858 } // end namespace llvm 859 860 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. 861 static Attribute::AttrKind 862 determinePointerAccessAttrs(Argument *A, 863 const SmallPtrSet<Argument *, 8> &SCCNodes) { 864 SmallVector<Use *, 32> Worklist; 865 SmallPtrSet<Use *, 32> Visited; 866 867 // inalloca arguments are always clobbered by the call. 868 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) 869 return Attribute::None; 870 871 bool IsRead = false; 872 bool IsWrite = false; 873 874 for (Use &U : A->uses()) { 875 Visited.insert(&U); 876 Worklist.push_back(&U); 877 } 878 879 while (!Worklist.empty()) { 880 if (IsWrite && IsRead) 881 // No point in searching further.. 882 return Attribute::None; 883 884 Use *U = Worklist.pop_back_val(); 885 Instruction *I = cast<Instruction>(U->getUser()); 886 887 switch (I->getOpcode()) { 888 case Instruction::BitCast: 889 case Instruction::GetElementPtr: 890 case Instruction::PHI: 891 case Instruction::Select: 892 case Instruction::AddrSpaceCast: 893 // The original value is not read/written via this if the new value isn't. 894 for (Use &UU : I->uses()) 895 if (Visited.insert(&UU).second) 896 Worklist.push_back(&UU); 897 break; 898 899 case Instruction::Call: 900 case Instruction::Invoke: { 901 CallBase &CB = cast<CallBase>(*I); 902 if (CB.isCallee(U)) { 903 IsRead = true; 904 // Note that indirect calls do not capture, see comment in 905 // CaptureTracking for context 906 continue; 907 } 908 909 // Given we've explicitly handled the callee operand above, what's left 910 // must be a data operand (e.g. argument or operand bundle) 911 const unsigned UseIndex = CB.getDataOperandNo(U); 912 913 // Some intrinsics (for instance ptrmask) do not capture their results, 914 // but return results thas alias their pointer argument, and thus should 915 // be handled like GEP or addrspacecast above. 916 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 917 &CB, /*MustPreserveNullness=*/false)) { 918 for (Use &UU : CB.uses()) 919 if (Visited.insert(&UU).second) 920 Worklist.push_back(&UU); 921 } else if (capturesAnyProvenance(CB.getCaptureInfo(UseIndex))) { 922 if (!CB.onlyReadsMemory()) 923 // If the callee can save a copy into other memory, then simply 924 // scanning uses of the call is insufficient. We have no way 925 // of tracking copies of the pointer through memory to see 926 // if a reloaded copy is written to, thus we must give up. 927 return Attribute::None; 928 // Push users for processing once we finish this one 929 if (!I->getType()->isVoidTy()) 930 for (Use &UU : I->uses()) 931 if (Visited.insert(&UU).second) 932 Worklist.push_back(&UU); 933 } 934 935 ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(IRMemLocation::ArgMem); 936 if (isNoModRef(ArgMR)) 937 continue; 938 939 if (Function *F = CB.getCalledFunction()) 940 if (CB.isArgOperand(U) && UseIndex < F->arg_size() && 941 SCCNodes.count(F->getArg(UseIndex))) 942 // This is an argument which is part of the speculative SCC. Note 943 // that only operands corresponding to formal arguments of the callee 944 // can participate in the speculation. 945 break; 946 947 // The accessors used on call site here do the right thing for calls and 948 // invokes with operand bundles. 949 if (CB.doesNotAccessMemory(UseIndex)) { 950 /* nop */ 951 } else if (!isModSet(ArgMR) || CB.onlyReadsMemory(UseIndex)) { 952 IsRead = true; 953 } else if (!isRefSet(ArgMR) || 954 CB.dataOperandHasImpliedAttr(UseIndex, Attribute::WriteOnly)) { 955 IsWrite = true; 956 } else { 957 return Attribute::None; 958 } 959 break; 960 } 961 962 case Instruction::Load: 963 // A volatile load has side effects beyond what readonly can be relied 964 // upon. 965 if (cast<LoadInst>(I)->isVolatile()) 966 return Attribute::None; 967 968 IsRead = true; 969 break; 970 971 case Instruction::Store: 972 if (cast<StoreInst>(I)->getValueOperand() == *U) 973 // untrackable capture 974 return Attribute::None; 975 976 // A volatile store has side effects beyond what writeonly can be relied 977 // upon. 978 if (cast<StoreInst>(I)->isVolatile()) 979 return Attribute::None; 980 981 IsWrite = true; 982 break; 983 984 case Instruction::ICmp: 985 case Instruction::Ret: 986 break; 987 988 default: 989 return Attribute::None; 990 } 991 } 992 993 if (IsWrite && IsRead) 994 return Attribute::None; 995 else if (IsRead) 996 return Attribute::ReadOnly; 997 else if (IsWrite) 998 return Attribute::WriteOnly; 999 else 1000 return Attribute::ReadNone; 1001 } 1002 1003 /// Deduce returned attributes for the SCC. 1004 static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes, 1005 SmallSet<Function *, 8> &Changed) { 1006 // Check each function in turn, determining if an argument is always returned. 1007 for (Function *F : SCCNodes) { 1008 // We can infer and propagate function attributes only when we know that the 1009 // definition we'll get at link time is *exactly* the definition we see now. 1010 // For more details, see GlobalValue::mayBeDerefined. 1011 if (!F->hasExactDefinition()) 1012 continue; 1013 1014 if (F->getReturnType()->isVoidTy()) 1015 continue; 1016 1017 // There is nothing to do if an argument is already marked as 'returned'. 1018 if (F->getAttributes().hasAttrSomewhere(Attribute::Returned)) 1019 continue; 1020 1021 auto FindRetArg = [&]() -> Argument * { 1022 Argument *RetArg = nullptr; 1023 for (BasicBlock &BB : *F) 1024 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 1025 // Note that stripPointerCasts should look through functions with 1026 // returned arguments. 1027 auto *RetVal = 1028 dyn_cast<Argument>(Ret->getReturnValue()->stripPointerCasts()); 1029 if (!RetVal || RetVal->getType() != F->getReturnType()) 1030 return nullptr; 1031 1032 if (!RetArg) 1033 RetArg = RetVal; 1034 else if (RetArg != RetVal) 1035 return nullptr; 1036 } 1037 1038 return RetArg; 1039 }; 1040 1041 if (Argument *RetArg = FindRetArg()) { 1042 RetArg->addAttr(Attribute::Returned); 1043 ++NumReturned; 1044 Changed.insert(F); 1045 } 1046 } 1047 } 1048 1049 /// If a callsite has arguments that are also arguments to the parent function, 1050 /// try to propagate attributes from the callsite's arguments to the parent's 1051 /// arguments. This may be important because inlining can cause information loss 1052 /// when attribute knowledge disappears with the inlined call. 1053 static bool addArgumentAttrsFromCallsites(Function &F) { 1054 if (!EnableNonnullArgPropagation) 1055 return false; 1056 1057 bool Changed = false; 1058 1059 // For an argument attribute to transfer from a callsite to the parent, the 1060 // call must be guaranteed to execute every time the parent is called. 1061 // Conservatively, just check for calls in the entry block that are guaranteed 1062 // to execute. 1063 // TODO: This could be enhanced by testing if the callsite post-dominates the 1064 // entry block or by doing simple forward walks or backward walks to the 1065 // callsite. 1066 BasicBlock &Entry = F.getEntryBlock(); 1067 for (Instruction &I : Entry) { 1068 if (auto *CB = dyn_cast<CallBase>(&I)) { 1069 if (auto *CalledFunc = CB->getCalledFunction()) { 1070 for (auto &CSArg : CalledFunc->args()) { 1071 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) 1072 continue; 1073 1074 // If the non-null callsite argument operand is an argument to 'F' 1075 // (the caller) and the call is guaranteed to execute, then the value 1076 // must be non-null throughout 'F'. 1077 auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); 1078 if (FArg && !FArg->hasNonNullAttr()) { 1079 FArg->addAttr(Attribute::NonNull); 1080 Changed = true; 1081 } 1082 } 1083 } 1084 } 1085 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 1086 break; 1087 } 1088 1089 return Changed; 1090 } 1091 1092 static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { 1093 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone || 1094 R == Attribute::WriteOnly) 1095 && "Must be an access attribute."); 1096 assert(A && "Argument must not be null."); 1097 1098 // If the argument already has the attribute, nothing needs to be done. 1099 if (A->hasAttribute(R)) 1100 return false; 1101 1102 // Otherwise, remove potentially conflicting attribute, add the new one, 1103 // and update statistics. 1104 A->removeAttr(Attribute::WriteOnly); 1105 A->removeAttr(Attribute::ReadOnly); 1106 A->removeAttr(Attribute::ReadNone); 1107 // Remove conflicting writable attribute. 1108 if (R == Attribute::ReadNone || R == Attribute::ReadOnly) 1109 A->removeAttr(Attribute::Writable); 1110 A->addAttr(R); 1111 if (R == Attribute::ReadOnly) 1112 ++NumReadOnlyArg; 1113 else if (R == Attribute::WriteOnly) 1114 ++NumWriteOnlyArg; 1115 else 1116 ++NumReadNoneArg; 1117 return true; 1118 } 1119 1120 static bool inferInitializes(Argument &A, Function &F) { 1121 auto ArgumentUses = collectArgumentUsesPerBlock(A, F); 1122 // No write anywhere in the function, bail. 1123 if (!ArgumentUses.HasAnyWrite) 1124 return false; 1125 1126 auto &UsesPerBlock = ArgumentUses.UsesPerBlock; 1127 BasicBlock &EntryBB = F.getEntryBlock(); 1128 // A map to store the argument ranges initialized by a BasicBlock (including 1129 // its successors). 1130 DenseMap<const BasicBlock *, ConstantRangeList> Initialized; 1131 // Visit the successors of "BB" block and the instructions in BB (post-order) 1132 // to get the argument ranges initialized by "BB" (including its successors). 1133 // The result will be cached in "Initialized". 1134 auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList { 1135 auto UPB = UsesPerBlock.find(BB); 1136 ConstantRangeList CRL; 1137 1138 // Start with intersection of successors. 1139 // If this block has any clobbering use, we're going to clear out the 1140 // ranges at some point in this block anyway, so don't bother looking at 1141 // successors. 1142 if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) { 1143 bool HasAddedSuccessor = false; 1144 for (auto *Succ : successors(BB)) { 1145 if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) { 1146 if (HasAddedSuccessor) { 1147 CRL = CRL.intersectWith(SuccI->second); 1148 } else { 1149 CRL = SuccI->second; 1150 HasAddedSuccessor = true; 1151 } 1152 } else { 1153 CRL = ConstantRangeList(); 1154 break; 1155 } 1156 } 1157 } 1158 1159 if (UPB != UsesPerBlock.end()) { 1160 // Sort uses in this block by instruction order. 1161 SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts; 1162 append_range(Insts, UPB->second.Insts); 1163 sort(Insts, [](std::pair<Instruction *, ArgumentAccessInfo> &LHS, 1164 std::pair<Instruction *, ArgumentAccessInfo> &RHS) { 1165 return LHS.first->comesBefore(RHS.first); 1166 }); 1167 1168 // From the end of the block to the beginning of the block, set 1169 // initializes ranges. 1170 for (auto &[_, Info] : reverse(Insts)) { 1171 if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown || 1172 Info.ArgAccessType == 1173 ArgumentAccessInfo::AccessType::WriteWithSideEffect) 1174 CRL = ConstantRangeList(); 1175 if (!Info.AccessRanges.empty()) { 1176 if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write || 1177 Info.ArgAccessType == 1178 ArgumentAccessInfo::AccessType::WriteWithSideEffect) { 1179 CRL = CRL.unionWith(Info.AccessRanges); 1180 } else { 1181 assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read); 1182 for (const auto &ReadRange : Info.AccessRanges) 1183 CRL.subtract(ReadRange); 1184 } 1185 } 1186 } 1187 } 1188 return CRL; 1189 }; 1190 1191 ConstantRangeList EntryCRL; 1192 // If all write instructions are in the EntryBB, or if the EntryBB has 1193 // a clobbering use, we only need to look at EntryBB. 1194 bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB; 1195 if (!OnlyScanEntryBlock) 1196 if (auto EntryUPB = UsesPerBlock.find(&EntryBB); 1197 EntryUPB != UsesPerBlock.end()) 1198 OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess; 1199 if (OnlyScanEntryBlock) { 1200 EntryCRL = VisitBlock(&EntryBB); 1201 if (EntryCRL.empty()) 1202 return false; 1203 } else { 1204 // Now we have to go through CFG to get the initialized argument ranges 1205 // across blocks. With dominance and post-dominance, the initialized ranges 1206 // by a block include both accesses inside this block and accesses in its 1207 // (transitive) successors. So visit successors before predecessors with a 1208 // post-order walk of the blocks and memorize the results in "Initialized". 1209 for (const BasicBlock *BB : post_order(&F)) { 1210 ConstantRangeList CRL = VisitBlock(BB); 1211 if (!CRL.empty()) 1212 Initialized[BB] = CRL; 1213 } 1214 1215 auto EntryCRLI = Initialized.find(&EntryBB); 1216 if (EntryCRLI == Initialized.end()) 1217 return false; 1218 1219 EntryCRL = EntryCRLI->second; 1220 } 1221 1222 assert(!EntryCRL.empty() && 1223 "should have bailed already if EntryCRL is empty"); 1224 1225 if (A.hasAttribute(Attribute::Initializes)) { 1226 ConstantRangeList PreviousCRL = 1227 A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList(); 1228 if (PreviousCRL == EntryCRL) 1229 return false; 1230 EntryCRL = EntryCRL.unionWith(PreviousCRL); 1231 } 1232 1233 A.addAttr(Attribute::get(A.getContext(), Attribute::Initializes, 1234 EntryCRL.rangesRef())); 1235 1236 return true; 1237 } 1238 1239 /// Deduce nocapture attributes for the SCC. 1240 static void addArgumentAttrs(const SCCNodeSet &SCCNodes, 1241 SmallSet<Function *, 8> &Changed, 1242 bool SkipInitializes) { 1243 ArgumentGraph AG; 1244 1245 auto DetermineAccessAttrsForSingleton = [](Argument *A) { 1246 SmallPtrSet<Argument *, 8> Self; 1247 Self.insert(A); 1248 Attribute::AttrKind R = determinePointerAccessAttrs(A, Self); 1249 if (R != Attribute::None) 1250 return addAccessAttr(A, R); 1251 return false; 1252 }; 1253 1254 // Check each function in turn, determining which pointer arguments are not 1255 // captured. 1256 for (Function *F : SCCNodes) { 1257 // We can infer and propagate function attributes only when we know that the 1258 // definition we'll get at link time is *exactly* the definition we see now. 1259 // For more details, see GlobalValue::mayBeDerefined. 1260 if (!F->hasExactDefinition()) 1261 continue; 1262 1263 if (addArgumentAttrsFromCallsites(*F)) 1264 Changed.insert(F); 1265 1266 // Functions that are readonly (or readnone) and nounwind and don't return 1267 // a value can't capture arguments. Don't analyze them. 1268 if (F->onlyReadsMemory() && F->doesNotThrow() && F->willReturn() && 1269 F->getReturnType()->isVoidTy()) { 1270 for (Argument &A : F->args()) { 1271 if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) { 1272 A.addAttr(Attribute::getWithCaptureInfo(A.getContext(), 1273 CaptureInfo::none())); 1274 ++NumCapturesNone; 1275 Changed.insert(F); 1276 } 1277 } 1278 continue; 1279 } 1280 1281 for (Argument &A : F->args()) { 1282 if (!A.getType()->isPointerTy()) 1283 continue; 1284 bool HasNonLocalUses = false; 1285 CaptureInfo OrigCI = A.getAttributes().getCaptureInfo(); 1286 if (!capturesNothing(OrigCI)) { 1287 ArgumentUsesTracker Tracker(SCCNodes); 1288 PointerMayBeCaptured(&A, &Tracker); 1289 CaptureInfo NewCI = Tracker.CI & OrigCI; 1290 if (NewCI != OrigCI) { 1291 if (Tracker.Uses.empty()) { 1292 // If the information is complete, add the attribute now. 1293 A.addAttr(Attribute::getWithCaptureInfo(A.getContext(), NewCI)); 1294 addCapturesStat(NewCI); 1295 Changed.insert(F); 1296 } else { 1297 // If it's not trivially captured and not trivially not captured, 1298 // then it must be calling into another function in our SCC. Save 1299 // its particulars for Argument-SCC analysis later. 1300 ArgumentGraphNode *Node = AG[&A]; 1301 Node->CC = CaptureComponents(NewCI); 1302 for (Argument *Use : Tracker.Uses) { 1303 Node->Uses.push_back(AG[Use]); 1304 if (Use != &A) 1305 HasNonLocalUses = true; 1306 } 1307 } 1308 } 1309 // Otherwise, it's captured. Don't bother doing SCC analysis on it. 1310 } 1311 if (!HasNonLocalUses && !A.onlyReadsMemory()) { 1312 // Can we determine that it's readonly/readnone/writeonly without doing 1313 // an SCC? Note that we don't allow any calls at all here, or else our 1314 // result will be dependent on the iteration order through the 1315 // functions in the SCC. 1316 if (DetermineAccessAttrsForSingleton(&A)) 1317 Changed.insert(F); 1318 } 1319 if (!SkipInitializes && !A.onlyReadsMemory()) { 1320 if (inferInitializes(A, *F)) 1321 Changed.insert(F); 1322 } 1323 } 1324 } 1325 1326 // The graph we've collected is partial because we stopped scanning for 1327 // argument uses once we solved the argument trivially. These partial nodes 1328 // show up as ArgumentGraphNode objects with an empty Uses list, and for 1329 // these nodes the final decision about whether they capture has already been 1330 // made. If the definition doesn't have a 'nocapture' attribute by now, it 1331 // captures. 1332 1333 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { 1334 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; 1335 if (ArgumentSCC.size() == 1) { 1336 if (!ArgumentSCC[0]->Definition) 1337 continue; // synthetic root node 1338 1339 // eg. "void f(int* x) { if (...) f(x); }" 1340 if (ArgumentSCC[0]->Uses.size() == 1 && 1341 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { 1342 Argument *A = ArgumentSCC[0]->Definition; 1343 CaptureInfo OrigCI = A->getAttributes().getCaptureInfo(); 1344 CaptureInfo NewCI = CaptureInfo(ArgumentSCC[0]->CC) & OrigCI; 1345 if (NewCI != OrigCI) { 1346 A->addAttr(Attribute::getWithCaptureInfo(A->getContext(), NewCI)); 1347 addCapturesStat(NewCI); 1348 Changed.insert(A->getParent()); 1349 } 1350 1351 // Infer the access attributes given the new captures one 1352 if (DetermineAccessAttrsForSingleton(A)) 1353 Changed.insert(A->getParent()); 1354 } 1355 continue; 1356 } 1357 1358 SmallPtrSet<Argument *, 8> ArgumentSCCNodes; 1359 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for 1360 // quickly looking up whether a given Argument is in this ArgumentSCC. 1361 for (ArgumentGraphNode *I : ArgumentSCC) { 1362 ArgumentSCCNodes.insert(I->Definition); 1363 } 1364 1365 // At the SCC level, only track merged CaptureComponents. We're not 1366 // currently prepared to handle propagation of return-only captures across 1367 // the SCC. 1368 CaptureComponents CC = CaptureComponents::None; 1369 for (ArgumentGraphNode *N : ArgumentSCC) { 1370 for (ArgumentGraphNode *Use : N->Uses) { 1371 Argument *A = Use->Definition; 1372 if (ArgumentSCCNodes.count(A)) 1373 CC |= Use->CC; 1374 else 1375 CC |= CaptureComponents(A->getAttributes().getCaptureInfo()); 1376 break; 1377 } 1378 if (capturesAll(CC)) 1379 break; 1380 } 1381 1382 if (!capturesAll(CC)) { 1383 for (ArgumentGraphNode *N : ArgumentSCC) { 1384 Argument *A = N->Definition; 1385 CaptureInfo OrigCI = A->getAttributes().getCaptureInfo(); 1386 CaptureInfo NewCI = CaptureInfo(N->CC | CC) & OrigCI; 1387 if (NewCI != OrigCI) { 1388 A->addAttr(Attribute::getWithCaptureInfo(A->getContext(), NewCI)); 1389 addCapturesStat(NewCI); 1390 Changed.insert(A->getParent()); 1391 } 1392 } 1393 } 1394 1395 if (capturesAnyProvenance(CC)) { 1396 // As the pointer provenance may be captured, determine the pointer 1397 // attributes looking at each argument individually. 1398 for (ArgumentGraphNode *N : ArgumentSCC) { 1399 if (DetermineAccessAttrsForSingleton(N->Definition)) 1400 Changed.insert(N->Definition->getParent()); 1401 } 1402 continue; 1403 } 1404 1405 // We also want to compute readonly/readnone/writeonly. With a small number 1406 // of false negatives, we can assume that any pointer which is captured 1407 // isn't going to be provably readonly or readnone, since by definition 1408 // we can't analyze all uses of a captured pointer. 1409 // 1410 // The false negatives happen when the pointer is captured by a function 1411 // that promises readonly/readnone behaviour on the pointer, then the 1412 // pointer's lifetime ends before anything that writes to arbitrary memory. 1413 // Also, a readonly/readnone pointer may be returned, but returning a 1414 // pointer is capturing it. 1415 1416 auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) { 1417 if (A == B) 1418 return A; 1419 if (A == Attribute::ReadNone) 1420 return B; 1421 if (B == Attribute::ReadNone) 1422 return A; 1423 return Attribute::None; 1424 }; 1425 1426 Attribute::AttrKind AccessAttr = Attribute::ReadNone; 1427 for (ArgumentGraphNode *N : ArgumentSCC) { 1428 Argument *A = N->Definition; 1429 Attribute::AttrKind K = determinePointerAccessAttrs(A, ArgumentSCCNodes); 1430 AccessAttr = meetAccessAttr(AccessAttr, K); 1431 if (AccessAttr == Attribute::None) 1432 break; 1433 } 1434 1435 if (AccessAttr != Attribute::None) { 1436 for (ArgumentGraphNode *N : ArgumentSCC) { 1437 Argument *A = N->Definition; 1438 if (addAccessAttr(A, AccessAttr)) 1439 Changed.insert(A->getParent()); 1440 } 1441 } 1442 } 1443 } 1444 1445 /// Tests whether a function is "malloc-like". 1446 /// 1447 /// A function is "malloc-like" if it returns either null or a pointer that 1448 /// doesn't alias any other pointer visible to the caller. 1449 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { 1450 SmallSetVector<Value *, 8> FlowsToReturn; 1451 for (BasicBlock &BB : *F) 1452 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 1453 FlowsToReturn.insert(Ret->getReturnValue()); 1454 1455 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 1456 Value *RetVal = FlowsToReturn[i]; 1457 1458 if (Constant *C = dyn_cast<Constant>(RetVal)) { 1459 if (!C->isNullValue() && !isa<UndefValue>(C)) 1460 return false; 1461 1462 continue; 1463 } 1464 1465 if (isa<Argument>(RetVal)) 1466 return false; 1467 1468 if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) 1469 switch (RVI->getOpcode()) { 1470 // Extend the analysis by looking upwards. 1471 case Instruction::BitCast: 1472 case Instruction::GetElementPtr: 1473 case Instruction::AddrSpaceCast: 1474 FlowsToReturn.insert(RVI->getOperand(0)); 1475 continue; 1476 case Instruction::Select: { 1477 SelectInst *SI = cast<SelectInst>(RVI); 1478 FlowsToReturn.insert(SI->getTrueValue()); 1479 FlowsToReturn.insert(SI->getFalseValue()); 1480 continue; 1481 } 1482 case Instruction::PHI: { 1483 PHINode *PN = cast<PHINode>(RVI); 1484 FlowsToReturn.insert_range(PN->incoming_values()); 1485 continue; 1486 } 1487 1488 // Check whether the pointer came from an allocation. 1489 case Instruction::Alloca: 1490 break; 1491 case Instruction::Call: 1492 case Instruction::Invoke: { 1493 CallBase &CB = cast<CallBase>(*RVI); 1494 if (CB.hasRetAttr(Attribute::NoAlias)) 1495 break; 1496 if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) 1497 break; 1498 [[fallthrough]]; 1499 } 1500 default: 1501 return false; // Did not come from an allocation. 1502 } 1503 1504 if (PointerMayBeCaptured(RetVal, /*ReturnCaptures=*/false)) 1505 return false; 1506 } 1507 1508 return true; 1509 } 1510 1511 /// Deduce noalias attributes for the SCC. 1512 static void addNoAliasAttrs(const SCCNodeSet &SCCNodes, 1513 SmallSet<Function *, 8> &Changed) { 1514 // Check each function in turn, determining which functions return noalias 1515 // pointers. 1516 for (Function *F : SCCNodes) { 1517 // Already noalias. 1518 if (F->returnDoesNotAlias()) 1519 continue; 1520 1521 // We can infer and propagate function attributes only when we know that the 1522 // definition we'll get at link time is *exactly* the definition we see now. 1523 // For more details, see GlobalValue::mayBeDerefined. 1524 if (!F->hasExactDefinition()) 1525 return; 1526 1527 // We annotate noalias return values, which are only applicable to 1528 // pointer types. 1529 if (!F->getReturnType()->isPointerTy()) 1530 continue; 1531 1532 if (!isFunctionMallocLike(F, SCCNodes)) 1533 return; 1534 } 1535 1536 for (Function *F : SCCNodes) { 1537 if (F->returnDoesNotAlias() || 1538 !F->getReturnType()->isPointerTy()) 1539 continue; 1540 1541 F->setReturnDoesNotAlias(); 1542 ++NumNoAlias; 1543 Changed.insert(F); 1544 } 1545 } 1546 1547 /// Tests whether this function is known to not return null. 1548 /// 1549 /// Requires that the function returns a pointer. 1550 /// 1551 /// Returns true if it believes the function will not return a null, and sets 1552 /// \p Speculative based on whether the returned conclusion is a speculative 1553 /// conclusion due to SCC calls. 1554 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, 1555 bool &Speculative) { 1556 assert(F->getReturnType()->isPointerTy() && 1557 "nonnull only meaningful on pointer types"); 1558 Speculative = false; 1559 1560 SmallSetVector<Value *, 8> FlowsToReturn; 1561 for (BasicBlock &BB : *F) 1562 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 1563 FlowsToReturn.insert(Ret->getReturnValue()); 1564 1565 auto &DL = F->getDataLayout(); 1566 1567 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 1568 Value *RetVal = FlowsToReturn[i]; 1569 1570 // If this value is locally known to be non-null, we're good 1571 if (isKnownNonZero(RetVal, DL)) 1572 continue; 1573 1574 // Otherwise, we need to look upwards since we can't make any local 1575 // conclusions. 1576 Instruction *RVI = dyn_cast<Instruction>(RetVal); 1577 if (!RVI) 1578 return false; 1579 switch (RVI->getOpcode()) { 1580 // Extend the analysis by looking upwards. 1581 case Instruction::BitCast: 1582 case Instruction::AddrSpaceCast: 1583 FlowsToReturn.insert(RVI->getOperand(0)); 1584 continue; 1585 case Instruction::GetElementPtr: 1586 if (cast<GEPOperator>(RVI)->isInBounds()) { 1587 FlowsToReturn.insert(RVI->getOperand(0)); 1588 continue; 1589 } 1590 return false; 1591 case Instruction::Select: { 1592 SelectInst *SI = cast<SelectInst>(RVI); 1593 FlowsToReturn.insert(SI->getTrueValue()); 1594 FlowsToReturn.insert(SI->getFalseValue()); 1595 continue; 1596 } 1597 case Instruction::PHI: { 1598 PHINode *PN = cast<PHINode>(RVI); 1599 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1600 FlowsToReturn.insert(PN->getIncomingValue(i)); 1601 continue; 1602 } 1603 case Instruction::Call: 1604 case Instruction::Invoke: { 1605 CallBase &CB = cast<CallBase>(*RVI); 1606 Function *Callee = CB.getCalledFunction(); 1607 // A call to a node within the SCC is assumed to return null until 1608 // proven otherwise 1609 if (Callee && SCCNodes.count(Callee)) { 1610 Speculative = true; 1611 continue; 1612 } 1613 return false; 1614 } 1615 default: 1616 return false; // Unknown source, may be null 1617 }; 1618 llvm_unreachable("should have either continued or returned"); 1619 } 1620 1621 return true; 1622 } 1623 1624 /// Deduce nonnull attributes for the SCC. 1625 static void addNonNullAttrs(const SCCNodeSet &SCCNodes, 1626 SmallSet<Function *, 8> &Changed) { 1627 // Speculative that all functions in the SCC return only nonnull 1628 // pointers. We may refute this as we analyze functions. 1629 bool SCCReturnsNonNull = true; 1630 1631 // Check each function in turn, determining which functions return nonnull 1632 // pointers. 1633 for (Function *F : SCCNodes) { 1634 // Already nonnull. 1635 if (F->getAttributes().hasRetAttr(Attribute::NonNull)) 1636 continue; 1637 1638 // We can infer and propagate function attributes only when we know that the 1639 // definition we'll get at link time is *exactly* the definition we see now. 1640 // For more details, see GlobalValue::mayBeDerefined. 1641 if (!F->hasExactDefinition()) 1642 return; 1643 1644 // We annotate nonnull return values, which are only applicable to 1645 // pointer types. 1646 if (!F->getReturnType()->isPointerTy()) 1647 continue; 1648 1649 bool Speculative = false; 1650 if (isReturnNonNull(F, SCCNodes, Speculative)) { 1651 if (!Speculative) { 1652 // Mark the function eagerly since we may discover a function 1653 // which prevents us from speculating about the entire SCC 1654 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() 1655 << " as nonnull\n"); 1656 F->addRetAttr(Attribute::NonNull); 1657 ++NumNonNullReturn; 1658 Changed.insert(F); 1659 } 1660 continue; 1661 } 1662 // At least one function returns something which could be null, can't 1663 // speculate any more. 1664 SCCReturnsNonNull = false; 1665 } 1666 1667 if (SCCReturnsNonNull) { 1668 for (Function *F : SCCNodes) { 1669 if (F->getAttributes().hasRetAttr(Attribute::NonNull) || 1670 !F->getReturnType()->isPointerTy()) 1671 continue; 1672 1673 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); 1674 F->addRetAttr(Attribute::NonNull); 1675 ++NumNonNullReturn; 1676 Changed.insert(F); 1677 } 1678 } 1679 } 1680 1681 /// Deduce noundef attributes for the SCC. 1682 static void addNoUndefAttrs(const SCCNodeSet &SCCNodes, 1683 SmallSet<Function *, 8> &Changed) { 1684 // Check each function in turn, determining which functions return noundef 1685 // values. 1686 for (Function *F : SCCNodes) { 1687 // Already noundef. 1688 AttributeList Attrs = F->getAttributes(); 1689 if (Attrs.hasRetAttr(Attribute::NoUndef)) 1690 continue; 1691 1692 // We can infer and propagate function attributes only when we know that the 1693 // definition we'll get at link time is *exactly* the definition we see now. 1694 // For more details, see GlobalValue::mayBeDerefined. 1695 if (!F->hasExactDefinition()) 1696 return; 1697 1698 // MemorySanitizer assumes that the definition and declaration of a 1699 // function will be consistent. A function with sanitize_memory attribute 1700 // should be skipped from inference. 1701 if (F->hasFnAttribute(Attribute::SanitizeMemory)) 1702 continue; 1703 1704 if (F->getReturnType()->isVoidTy()) 1705 continue; 1706 1707 const DataLayout &DL = F->getDataLayout(); 1708 if (all_of(*F, [&](BasicBlock &BB) { 1709 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 1710 // TODO: perform context-sensitive analysis? 1711 Value *RetVal = Ret->getReturnValue(); 1712 if (!isGuaranteedNotToBeUndefOrPoison(RetVal)) 1713 return false; 1714 1715 // We know the original return value is not poison now, but it 1716 // could still be converted to poison by another return attribute. 1717 // Try to explicitly re-prove the relevant attributes. 1718 if (Attrs.hasRetAttr(Attribute::NonNull) && 1719 !isKnownNonZero(RetVal, DL)) 1720 return false; 1721 1722 if (MaybeAlign Align = Attrs.getRetAlignment()) 1723 if (RetVal->getPointerAlignment(DL) < *Align) 1724 return false; 1725 1726 Attribute Attr = Attrs.getRetAttr(Attribute::Range); 1727 if (Attr.isValid() && 1728 !Attr.getRange().contains( 1729 computeConstantRange(RetVal, /*ForSigned=*/false))) 1730 return false; 1731 } 1732 return true; 1733 })) { 1734 F->addRetAttr(Attribute::NoUndef); 1735 ++NumNoUndefReturn; 1736 Changed.insert(F); 1737 } 1738 } 1739 } 1740 1741 namespace { 1742 1743 /// Collects a set of attribute inference requests and performs them all in one 1744 /// go on a single SCC Node. Inference involves scanning function bodies 1745 /// looking for instructions that violate attribute assumptions. 1746 /// As soon as all the bodies are fine we are free to set the attribute. 1747 /// Customization of inference for individual attributes is performed by 1748 /// providing a handful of predicates for each attribute. 1749 class AttributeInferer { 1750 public: 1751 /// Describes a request for inference of a single attribute. 1752 struct InferenceDescriptor { 1753 1754 /// Returns true if this function does not have to be handled. 1755 /// General intent for this predicate is to provide an optimization 1756 /// for functions that do not need this attribute inference at all 1757 /// (say, for functions that already have the attribute). 1758 std::function<bool(const Function &)> SkipFunction; 1759 1760 /// Returns true if this instruction violates attribute assumptions. 1761 std::function<bool(Instruction &)> InstrBreaksAttribute; 1762 1763 /// Sets the inferred attribute for this function. 1764 std::function<void(Function &)> SetAttribute; 1765 1766 /// Attribute we derive. 1767 Attribute::AttrKind AKind; 1768 1769 /// If true, only "exact" definitions can be used to infer this attribute. 1770 /// See GlobalValue::isDefinitionExact. 1771 bool RequiresExactDefinition; 1772 1773 InferenceDescriptor(Attribute::AttrKind AK, 1774 std::function<bool(const Function &)> SkipFunc, 1775 std::function<bool(Instruction &)> InstrScan, 1776 std::function<void(Function &)> SetAttr, 1777 bool ReqExactDef) 1778 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), 1779 SetAttribute(SetAttr), AKind(AK), 1780 RequiresExactDefinition(ReqExactDef) {} 1781 }; 1782 1783 private: 1784 SmallVector<InferenceDescriptor, 4> InferenceDescriptors; 1785 1786 public: 1787 void registerAttrInference(InferenceDescriptor AttrInference) { 1788 InferenceDescriptors.push_back(AttrInference); 1789 } 1790 1791 void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed); 1792 }; 1793 1794 /// Perform all the requested attribute inference actions according to the 1795 /// attribute predicates stored before. 1796 void AttributeInferer::run(const SCCNodeSet &SCCNodes, 1797 SmallSet<Function *, 8> &Changed) { 1798 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; 1799 // Go through all the functions in SCC and check corresponding attribute 1800 // assumptions for each of them. Attributes that are invalid for this SCC 1801 // will be removed from InferInSCC. 1802 for (Function *F : SCCNodes) { 1803 1804 // No attributes whose assumptions are still valid - done. 1805 if (InferInSCC.empty()) 1806 return; 1807 1808 // Check if our attributes ever need scanning/can be scanned. 1809 llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) { 1810 if (ID.SkipFunction(*F)) 1811 return false; 1812 1813 // Remove from further inference (invalidate) when visiting a function 1814 // that has no instructions to scan/has an unsuitable definition. 1815 return F->isDeclaration() || 1816 (ID.RequiresExactDefinition && !F->hasExactDefinition()); 1817 }); 1818 1819 // For each attribute still in InferInSCC that doesn't explicitly skip F, 1820 // set up the F instructions scan to verify assumptions of the attribute. 1821 SmallVector<InferenceDescriptor, 4> InferInThisFunc; 1822 llvm::copy_if( 1823 InferInSCC, std::back_inserter(InferInThisFunc), 1824 [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); 1825 1826 if (InferInThisFunc.empty()) 1827 continue; 1828 1829 // Start instruction scan. 1830 for (Instruction &I : instructions(*F)) { 1831 llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) { 1832 if (!ID.InstrBreaksAttribute(I)) 1833 return false; 1834 // Remove attribute from further inference on any other functions 1835 // because attribute assumptions have just been violated. 1836 llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) { 1837 return D.AKind == ID.AKind; 1838 }); 1839 // Remove attribute from the rest of current instruction scan. 1840 return true; 1841 }); 1842 1843 if (InferInThisFunc.empty()) 1844 break; 1845 } 1846 } 1847 1848 if (InferInSCC.empty()) 1849 return; 1850 1851 for (Function *F : SCCNodes) 1852 // At this point InferInSCC contains only functions that were either: 1853 // - explicitly skipped from scan/inference, or 1854 // - verified to have no instructions that break attribute assumptions. 1855 // Hence we just go and force the attribute for all non-skipped functions. 1856 for (auto &ID : InferInSCC) { 1857 if (ID.SkipFunction(*F)) 1858 continue; 1859 Changed.insert(F); 1860 ID.SetAttribute(*F); 1861 } 1862 } 1863 1864 struct SCCNodesResult { 1865 SCCNodeSet SCCNodes; 1866 bool HasUnknownCall; 1867 }; 1868 1869 } // end anonymous namespace 1870 1871 /// Helper for non-Convergent inference predicate InstrBreaksAttribute. 1872 static bool InstrBreaksNonConvergent(Instruction &I, 1873 const SCCNodeSet &SCCNodes) { 1874 const CallBase *CB = dyn_cast<CallBase>(&I); 1875 // Breaks non-convergent assumption if CS is a convergent call to a function 1876 // not in the SCC. 1877 return CB && CB->isConvergent() && 1878 !SCCNodes.contains(CB->getCalledFunction()); 1879 } 1880 1881 /// Helper for NoUnwind inference predicate InstrBreaksAttribute. 1882 static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { 1883 if (!I.mayThrow(/* IncludePhaseOneUnwind */ true)) 1884 return false; 1885 if (const auto *CI = dyn_cast<CallInst>(&I)) { 1886 if (Function *Callee = CI->getCalledFunction()) { 1887 // I is a may-throw call to a function inside our SCC. This doesn't 1888 // invalidate our current working assumption that the SCC is no-throw; we 1889 // just have to scan that other function. 1890 if (SCCNodes.contains(Callee)) 1891 return false; 1892 } 1893 } 1894 return true; 1895 } 1896 1897 /// Helper for NoFree inference predicate InstrBreaksAttribute. 1898 static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { 1899 CallBase *CB = dyn_cast<CallBase>(&I); 1900 if (!CB) 1901 return false; 1902 1903 if (CB->hasFnAttr(Attribute::NoFree)) 1904 return false; 1905 1906 // Speculatively assume in SCC. 1907 if (Function *Callee = CB->getCalledFunction()) 1908 if (SCCNodes.contains(Callee)) 1909 return false; 1910 1911 return true; 1912 } 1913 1914 // Return true if this is an atomic which has an ordering stronger than 1915 // unordered. Note that this is different than the predicate we use in 1916 // Attributor. Here we chose to be conservative and consider monotonic 1917 // operations potentially synchronizing. We generally don't do much with 1918 // monotonic operations, so this is simply risk reduction. 1919 static bool isOrderedAtomic(Instruction *I) { 1920 if (!I->isAtomic()) 1921 return false; 1922 1923 if (auto *FI = dyn_cast<FenceInst>(I)) 1924 // All legal orderings for fence are stronger than monotonic. 1925 return FI->getSyncScopeID() != SyncScope::SingleThread; 1926 else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I)) 1927 return true; 1928 else if (auto *SI = dyn_cast<StoreInst>(I)) 1929 return !SI->isUnordered(); 1930 else if (auto *LI = dyn_cast<LoadInst>(I)) 1931 return !LI->isUnordered(); 1932 else { 1933 llvm_unreachable("unknown atomic instruction?"); 1934 } 1935 } 1936 1937 static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { 1938 // Volatile may synchronize 1939 if (I.isVolatile()) 1940 return true; 1941 1942 // An ordered atomic may synchronize. (See comment about on monotonic.) 1943 if (isOrderedAtomic(&I)) 1944 return true; 1945 1946 auto *CB = dyn_cast<CallBase>(&I); 1947 if (!CB) 1948 // Non call site cases covered by the two checks above 1949 return false; 1950 1951 if (CB->hasFnAttr(Attribute::NoSync)) 1952 return false; 1953 1954 // Non volatile memset/memcpy/memmoves are nosync 1955 // NOTE: Only intrinsics with volatile flags should be handled here. All 1956 // others should be marked in Intrinsics.td. 1957 if (auto *MI = dyn_cast<MemIntrinsic>(&I)) 1958 if (!MI->isVolatile()) 1959 return false; 1960 1961 // Speculatively assume in SCC. 1962 if (Function *Callee = CB->getCalledFunction()) 1963 if (SCCNodes.contains(Callee)) 1964 return false; 1965 1966 return true; 1967 } 1968 1969 /// Attempt to remove convergent function attribute when possible. 1970 /// 1971 /// Returns true if any changes to function attributes were made. 1972 static void inferConvergent(const SCCNodeSet &SCCNodes, 1973 SmallSet<Function *, 8> &Changed) { 1974 AttributeInferer AI; 1975 1976 // Request to remove the convergent attribute from all functions in the SCC 1977 // if every callsite within the SCC is not convergent (except for calls 1978 // to functions within the SCC). 1979 // Note: Removal of the attr from the callsites will happen in 1980 // InstCombineCalls separately. 1981 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1982 Attribute::Convergent, 1983 // Skip non-convergent functions. 1984 [](const Function &F) { return !F.isConvergent(); }, 1985 // Instructions that break non-convergent assumption. 1986 [SCCNodes](Instruction &I) { 1987 return InstrBreaksNonConvergent(I, SCCNodes); 1988 }, 1989 [](Function &F) { 1990 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() 1991 << "\n"); 1992 F.setNotConvergent(); 1993 }, 1994 /* RequiresExactDefinition= */ false}); 1995 // Perform all the requested attribute inference actions. 1996 AI.run(SCCNodes, Changed); 1997 } 1998 1999 /// Infer attributes from all functions in the SCC by scanning every 2000 /// instruction for compliance to the attribute assumptions. 2001 /// 2002 /// Returns true if any changes to function attributes were made. 2003 static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes, 2004 SmallSet<Function *, 8> &Changed) { 2005 AttributeInferer AI; 2006 2007 if (!DisableNoUnwindInference) 2008 // Request to infer nounwind attribute for all the functions in the SCC if 2009 // every callsite within the SCC is not throwing (except for calls to 2010 // functions within the SCC). Note that nounwind attribute suffers from 2011 // derefinement - results may change depending on how functions are 2012 // optimized. Thus it can be inferred only from exact definitions. 2013 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 2014 Attribute::NoUnwind, 2015 // Skip non-throwing functions. 2016 [](const Function &F) { return F.doesNotThrow(); }, 2017 // Instructions that break non-throwing assumption. 2018 [&SCCNodes](Instruction &I) { 2019 return InstrBreaksNonThrowing(I, SCCNodes); 2020 }, 2021 [](Function &F) { 2022 LLVM_DEBUG(dbgs() 2023 << "Adding nounwind attr to fn " << F.getName() << "\n"); 2024 F.setDoesNotThrow(); 2025 ++NumNoUnwind; 2026 }, 2027 /* RequiresExactDefinition= */ true}); 2028 2029 if (!DisableNoFreeInference) 2030 // Request to infer nofree attribute for all the functions in the SCC if 2031 // every callsite within the SCC does not directly or indirectly free 2032 // memory (except for calls to functions within the SCC). Note that nofree 2033 // attribute suffers from derefinement - results may change depending on 2034 // how functions are optimized. Thus it can be inferred only from exact 2035 // definitions. 2036 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 2037 Attribute::NoFree, 2038 // Skip functions known not to free memory. 2039 [](const Function &F) { return F.doesNotFreeMemory(); }, 2040 // Instructions that break non-deallocating assumption. 2041 [&SCCNodes](Instruction &I) { 2042 return InstrBreaksNoFree(I, SCCNodes); 2043 }, 2044 [](Function &F) { 2045 LLVM_DEBUG(dbgs() 2046 << "Adding nofree attr to fn " << F.getName() << "\n"); 2047 F.setDoesNotFreeMemory(); 2048 ++NumNoFree; 2049 }, 2050 /* RequiresExactDefinition= */ true}); 2051 2052 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 2053 Attribute::NoSync, 2054 // Skip already marked functions. 2055 [](const Function &F) { return F.hasNoSync(); }, 2056 // Instructions that break nosync assumption. 2057 [&SCCNodes](Instruction &I) { 2058 return InstrBreaksNoSync(I, SCCNodes); 2059 }, 2060 [](Function &F) { 2061 LLVM_DEBUG(dbgs() 2062 << "Adding nosync attr to fn " << F.getName() << "\n"); 2063 F.setNoSync(); 2064 ++NumNoSync; 2065 }, 2066 /* RequiresExactDefinition= */ true}); 2067 2068 // Perform all the requested attribute inference actions. 2069 AI.run(SCCNodes, Changed); 2070 } 2071 2072 static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, 2073 SmallSet<Function *, 8> &Changed) { 2074 // Try and identify functions that do not recurse. 2075 2076 // If the SCC contains multiple nodes we know for sure there is recursion. 2077 if (SCCNodes.size() != 1) 2078 return; 2079 2080 Function *F = *SCCNodes.begin(); 2081 if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) 2082 return; 2083 2084 // If all of the calls in F are identifiable and are to norecurse functions, F 2085 // is norecurse. This check also detects self-recursion as F is not currently 2086 // marked norecurse, so any called from F to F will not be marked norecurse. 2087 for (auto &BB : *F) 2088 for (auto &I : BB.instructionsWithoutDebug()) 2089 if (auto *CB = dyn_cast<CallBase>(&I)) { 2090 Function *Callee = CB->getCalledFunction(); 2091 if (!Callee || Callee == F || 2092 (!Callee->doesNotRecurse() && 2093 !(Callee->isDeclaration() && 2094 Callee->hasFnAttribute(Attribute::NoCallback)))) 2095 // Function calls a potentially recursive function. 2096 return; 2097 } 2098 2099 // Every call was to a non-recursive function other than this function, and 2100 // we have no indirect recursion as the SCC size is one. This function cannot 2101 // recurse. 2102 F->setDoesNotRecurse(); 2103 ++NumNoRecurse; 2104 Changed.insert(F); 2105 } 2106 2107 // Set the noreturn function attribute if possible. 2108 static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, 2109 SmallSet<Function *, 8> &Changed) { 2110 for (Function *F : SCCNodes) { 2111 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 2112 F->doesNotReturn()) 2113 continue; 2114 2115 if (!canReturn(*F)) { 2116 F->setDoesNotReturn(); 2117 Changed.insert(F); 2118 } 2119 } 2120 } 2121 2122 static bool allPathsGoThroughCold(Function &F) { 2123 SmallDenseMap<BasicBlock *, bool, 16> ColdPaths; 2124 ColdPaths[&F.front()] = false; 2125 SmallVector<BasicBlock *> Jobs; 2126 Jobs.push_back(&F.front()); 2127 2128 while (!Jobs.empty()) { 2129 BasicBlock *BB = Jobs.pop_back_val(); 2130 2131 // If block contains a cold callsite this path through the CG is cold. 2132 // Ignore whether the instructions actually are guaranteed to transfer 2133 // execution. Divergent behavior is considered unlikely. 2134 if (any_of(*BB, [](Instruction &I) { 2135 if (auto *CB = dyn_cast<CallBase>(&I)) 2136 return CB->hasFnAttr(Attribute::Cold); 2137 return false; 2138 })) { 2139 ColdPaths[BB] = true; 2140 continue; 2141 } 2142 2143 auto Succs = successors(BB); 2144 // We found a path that doesn't go through any cold callsite. 2145 if (Succs.empty()) 2146 return false; 2147 2148 // We didn't find a cold callsite in this BB, so check that all successors 2149 // contain a cold callsite (or that their successors do). 2150 // Potential TODO: We could use static branch hints to assume certain 2151 // successor paths are inherently cold, irrespective of if they contain a 2152 // cold callsite. 2153 for (BasicBlock *Succ : Succs) { 2154 // Start with false, this is necessary to ensure we don't turn loops into 2155 // cold. 2156 auto [Iter, Inserted] = ColdPaths.try_emplace(Succ, false); 2157 if (!Inserted) { 2158 if (Iter->second) 2159 continue; 2160 return false; 2161 } 2162 Jobs.push_back(Succ); 2163 } 2164 } 2165 return true; 2166 } 2167 2168 // Set the cold function attribute if possible. 2169 static void addColdAttrs(const SCCNodeSet &SCCNodes, 2170 SmallSet<Function *, 8> &Changed) { 2171 for (Function *F : SCCNodes) { 2172 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 2173 F->hasFnAttribute(Attribute::Cold) || F->hasFnAttribute(Attribute::Hot)) 2174 continue; 2175 2176 // Potential TODO: We could add attribute `cold` on functions with `coldcc`. 2177 if (allPathsGoThroughCold(*F)) { 2178 F->addFnAttr(Attribute::Cold); 2179 ++NumCold; 2180 Changed.insert(F); 2181 continue; 2182 } 2183 } 2184 } 2185 2186 static bool functionWillReturn(const Function &F) { 2187 // We can infer and propagate function attributes only when we know that the 2188 // definition we'll get at link time is *exactly* the definition we see now. 2189 // For more details, see GlobalValue::mayBeDerefined. 2190 if (!F.hasExactDefinition()) 2191 return false; 2192 2193 // Must-progress function without side-effects must return. 2194 if (F.mustProgress() && F.onlyReadsMemory()) 2195 return true; 2196 2197 // Can only analyze functions with a definition. 2198 if (F.isDeclaration()) 2199 return false; 2200 2201 // Functions with loops require more sophisticated analysis, as the loop 2202 // may be infinite. For now, don't try to handle them. 2203 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; 2204 FindFunctionBackedges(F, Backedges); 2205 if (!Backedges.empty()) 2206 return false; 2207 2208 // If there are no loops, then the function is willreturn if all calls in 2209 // it are willreturn. 2210 return all_of(instructions(F), [](const Instruction &I) { 2211 return I.willReturn(); 2212 }); 2213 } 2214 2215 // Set the willreturn function attribute if possible. 2216 static void addWillReturn(const SCCNodeSet &SCCNodes, 2217 SmallSet<Function *, 8> &Changed) { 2218 for (Function *F : SCCNodes) { 2219 if (!F || F->willReturn() || !functionWillReturn(*F)) 2220 continue; 2221 2222 F->setWillReturn(); 2223 NumWillReturn++; 2224 Changed.insert(F); 2225 } 2226 } 2227 2228 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { 2229 SCCNodesResult Res; 2230 Res.HasUnknownCall = false; 2231 for (Function *F : Functions) { 2232 if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) || 2233 F->isPresplitCoroutine()) { 2234 // Treat any function we're trying not to optimize as if it were an 2235 // indirect call and omit it from the node set used below. 2236 Res.HasUnknownCall = true; 2237 continue; 2238 } 2239 // Track whether any functions in this SCC have an unknown call edge. 2240 // Note: if this is ever a performance hit, we can common it with 2241 // subsequent routines which also do scans over the instructions of the 2242 // function. 2243 if (!Res.HasUnknownCall) { 2244 for (Instruction &I : instructions(*F)) { 2245 if (auto *CB = dyn_cast<CallBase>(&I)) { 2246 if (!CB->getCalledFunction()) { 2247 Res.HasUnknownCall = true; 2248 break; 2249 } 2250 } 2251 } 2252 } 2253 Res.SCCNodes.insert(F); 2254 } 2255 return Res; 2256 } 2257 2258 template <typename AARGetterT> 2259 static SmallSet<Function *, 8> 2260 deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, 2261 bool ArgAttrsOnly) { 2262 SCCNodesResult Nodes = createSCCNodeSet(Functions); 2263 2264 // Bail if the SCC only contains optnone functions. 2265 if (Nodes.SCCNodes.empty()) 2266 return {}; 2267 2268 SmallSet<Function *, 8> Changed; 2269 if (ArgAttrsOnly) { 2270 // ArgAttrsOnly means to only infer attributes that may aid optimizations 2271 // on the *current* function. "initializes" attribute is to aid 2272 // optimizations (like DSE) on the callers, so skip "initializes" here. 2273 addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true); 2274 return Changed; 2275 } 2276 2277 addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); 2278 addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); 2279 addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/false); 2280 inferConvergent(Nodes.SCCNodes, Changed); 2281 addNoReturnAttrs(Nodes.SCCNodes, Changed); 2282 addColdAttrs(Nodes.SCCNodes, Changed); 2283 addWillReturn(Nodes.SCCNodes, Changed); 2284 addNoUndefAttrs(Nodes.SCCNodes, Changed); 2285 2286 // If we have no external nodes participating in the SCC, we can deduce some 2287 // more precise attributes as well. 2288 if (!Nodes.HasUnknownCall) { 2289 addNoAliasAttrs(Nodes.SCCNodes, Changed); 2290 addNonNullAttrs(Nodes.SCCNodes, Changed); 2291 inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed); 2292 addNoRecurseAttrs(Nodes.SCCNodes, Changed); 2293 } 2294 2295 // Finally, infer the maximal set of attributes from the ones we've inferred 2296 // above. This is handling the cases where one attribute on a signature 2297 // implies another, but for implementation reasons the inference rule for 2298 // the later is missing (or simply less sophisticated). 2299 for (Function *F : Nodes.SCCNodes) 2300 if (F) 2301 if (inferAttributesFromOthers(*F)) 2302 Changed.insert(F); 2303 2304 return Changed; 2305 } 2306 2307 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, 2308 CGSCCAnalysisManager &AM, 2309 LazyCallGraph &CG, 2310 CGSCCUpdateResult &) { 2311 // Skip non-recursive functions if requested. 2312 // Only infer argument attributes for non-recursive functions, because 2313 // it can affect optimization behavior in conjunction with noalias. 2314 bool ArgAttrsOnly = false; 2315 if (C.size() == 1 && SkipNonRecursive) { 2316 LazyCallGraph::Node &N = *C.begin(); 2317 if (!N->lookup(N)) 2318 ArgAttrsOnly = true; 2319 } 2320 2321 FunctionAnalysisManager &FAM = 2322 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 2323 2324 // We pass a lambda into functions to wire them up to the analysis manager 2325 // for getting function analyses. 2326 auto AARGetter = [&](Function &F) -> AAResults & { 2327 return FAM.getResult<AAManager>(F); 2328 }; 2329 2330 SmallVector<Function *, 8> Functions; 2331 for (LazyCallGraph::Node &N : C) { 2332 Functions.push_back(&N.getFunction()); 2333 } 2334 2335 auto ChangedFunctions = 2336 deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly); 2337 if (ChangedFunctions.empty()) 2338 return PreservedAnalyses::all(); 2339 2340 // Invalidate analyses for modified functions so that we don't have to 2341 // invalidate all analyses for all functions in this SCC. 2342 PreservedAnalyses FuncPA; 2343 // We haven't changed the CFG for modified functions. 2344 FuncPA.preserveSet<CFGAnalyses>(); 2345 for (Function *Changed : ChangedFunctions) { 2346 FAM.invalidate(*Changed, FuncPA); 2347 // Also invalidate any direct callers of changed functions since analyses 2348 // may care about attributes of direct callees. For example, MemorySSA cares 2349 // about whether or not a call's callee modifies memory and queries that 2350 // through function attributes. 2351 for (auto *U : Changed->users()) { 2352 if (auto *Call = dyn_cast<CallBase>(U)) { 2353 if (Call->getCalledFunction() == Changed) 2354 FAM.invalidate(*Call->getFunction(), FuncPA); 2355 } 2356 } 2357 } 2358 2359 PreservedAnalyses PA; 2360 // We have not added or removed functions. 2361 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 2362 // We already invalidated all relevant function analyses above. 2363 PA.preserveSet<AllAnalysesOn<Function>>(); 2364 return PA; 2365 } 2366 2367 void PostOrderFunctionAttrsPass::printPipeline( 2368 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 2369 static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline( 2370 OS, MapClassName2PassName); 2371 if (SkipNonRecursive) 2372 OS << "<skip-non-recursive-function-attrs>"; 2373 } 2374 2375 template <typename AARGetterT> 2376 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { 2377 SmallVector<Function *, 8> Functions; 2378 for (CallGraphNode *I : SCC) { 2379 Functions.push_back(I->getFunction()); 2380 } 2381 2382 return !deriveAttrsInPostOrder(Functions, AARGetter).empty(); 2383 } 2384 2385 static bool addNoRecurseAttrsTopDown(Function &F) { 2386 // We check the preconditions for the function prior to calling this to avoid 2387 // the cost of building up a reversible post-order list. We assert them here 2388 // to make sure none of the invariants this relies on were violated. 2389 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); 2390 assert(!F.doesNotRecurse() && 2391 "This function has already been deduced as norecurs!"); 2392 assert(F.hasInternalLinkage() && 2393 "Can only do top-down deduction for internal linkage functions!"); 2394 2395 // If F is internal and all of its uses are calls from a non-recursive 2396 // functions, then none of its calls could in fact recurse without going 2397 // through a function marked norecurse, and so we can mark this function too 2398 // as norecurse. Note that the uses must actually be calls -- otherwise 2399 // a pointer to this function could be returned from a norecurse function but 2400 // this function could be recursively (indirectly) called. Note that this 2401 // also detects if F is directly recursive as F is not yet marked as 2402 // a norecurse function. 2403 for (auto &U : F.uses()) { 2404 auto *I = dyn_cast<Instruction>(U.getUser()); 2405 if (!I) 2406 return false; 2407 CallBase *CB = dyn_cast<CallBase>(I); 2408 if (!CB || !CB->isCallee(&U) || 2409 !CB->getParent()->getParent()->doesNotRecurse()) 2410 return false; 2411 } 2412 F.setDoesNotRecurse(); 2413 ++NumNoRecurse; 2414 return true; 2415 } 2416 2417 static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) { 2418 // We only have a post-order SCC traversal (because SCCs are inherently 2419 // discovered in post-order), so we accumulate them in a vector and then walk 2420 // it in reverse. This is simpler than using the RPO iterator infrastructure 2421 // because we need to combine SCC detection and the PO walk of the call 2422 // graph. We can also cheat egregiously because we're primarily interested in 2423 // synthesizing norecurse and so we can only save the singular SCCs as SCCs 2424 // with multiple functions in them will clearly be recursive. 2425 2426 SmallVector<Function *, 16> Worklist; 2427 CG.buildRefSCCs(); 2428 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { 2429 for (LazyCallGraph::SCC &SCC : RC) { 2430 if (SCC.size() != 1) 2431 continue; 2432 Function &F = SCC.begin()->getFunction(); 2433 if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) 2434 Worklist.push_back(&F); 2435 } 2436 } 2437 bool Changed = false; 2438 for (auto *F : llvm::reverse(Worklist)) 2439 Changed |= addNoRecurseAttrsTopDown(*F); 2440 2441 return Changed; 2442 } 2443 2444 PreservedAnalyses 2445 ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { 2446 auto &CG = AM.getResult<LazyCallGraphAnalysis>(M); 2447 2448 if (!deduceFunctionAttributeInRPO(M, CG)) 2449 return PreservedAnalyses::all(); 2450 2451 PreservedAnalyses PA; 2452 PA.preserve<LazyCallGraphAnalysis>(); 2453 return PA; 2454 } 2455