1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file implements interprocedural passes which walk the 11 /// call-graph deducing and/or propagating function attributes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/FunctionAttrs.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/SCCIterator.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/AssumptionCache.h" 24 #include "llvm/Analysis/BasicAliasAnalysis.h" 25 #include "llvm/Analysis/CFG.h" 26 #include "llvm/Analysis/CGSCCPassManager.h" 27 #include "llvm/Analysis/CallGraph.h" 28 #include "llvm/Analysis/CallGraphSCCPass.h" 29 #include "llvm/Analysis/CaptureTracking.h" 30 #include "llvm/Analysis/LazyCallGraph.h" 31 #include "llvm/Analysis/MemoryBuiltins.h" 32 #include "llvm/Analysis/MemoryLocation.h" 33 #include "llvm/Analysis/ValueTracking.h" 34 #include "llvm/IR/Argument.h" 35 #include "llvm/IR/Attributes.h" 36 #include "llvm/IR/BasicBlock.h" 37 #include "llvm/IR/Constant.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/InstIterator.h" 41 #include "llvm/IR/InstrTypes.h" 42 #include "llvm/IR/Instruction.h" 43 #include "llvm/IR/Instructions.h" 44 #include "llvm/IR/IntrinsicInst.h" 45 #include "llvm/IR/Metadata.h" 46 #include "llvm/IR/PassManager.h" 47 #include "llvm/IR/Type.h" 48 #include "llvm/IR/Use.h" 49 #include "llvm/IR/User.h" 50 #include "llvm/IR/Value.h" 51 #include "llvm/InitializePasses.h" 52 #include "llvm/Pass.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Compiler.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/ErrorHandling.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/Transforms/IPO.h" 60 #include "llvm/Transforms/Utils/Local.h" 61 #include <cassert> 62 #include <iterator> 63 #include <map> 64 #include <vector> 65 66 using namespace llvm; 67 68 #define DEBUG_TYPE "function-attrs" 69 70 STATISTIC(NumReadNone, "Number of functions marked readnone"); 71 STATISTIC(NumReadOnly, "Number of functions marked readonly"); 72 STATISTIC(NumWriteOnly, "Number of functions marked writeonly"); 73 STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); 74 STATISTIC(NumReturned, "Number of arguments marked returned"); 75 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); 76 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); 77 STATISTIC(NumNoAlias, "Number of function returns marked noalias"); 78 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); 79 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); 80 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); 81 STATISTIC(NumNoFree, "Number of functions marked as nofree"); 82 STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); 83 STATISTIC(NumNoSync, "Number of functions marked as nosync"); 84 85 static cl::opt<bool> EnableNonnullArgPropagation( 86 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, 87 cl::desc("Try to propagate nonnull argument attributes from callsites to " 88 "caller functions.")); 89 90 static cl::opt<bool> DisableNoUnwindInference( 91 "disable-nounwind-inference", cl::Hidden, 92 cl::desc("Stop inferring nounwind attribute during function-attrs pass")); 93 94 static cl::opt<bool> DisableNoFreeInference( 95 "disable-nofree-inference", cl::Hidden, 96 cl::desc("Stop inferring nofree attribute during function-attrs pass")); 97 98 namespace { 99 100 using SCCNodeSet = SmallSetVector<Function *, 8>; 101 102 } // end anonymous namespace 103 104 /// Returns the memory access attribute for function F using AAR for AA results, 105 /// where SCCNodes is the current SCC. 106 /// 107 /// If ThisBody is true, this function may examine the function body and will 108 /// return a result pertaining to this copy of the function. If it is false, the 109 /// result will be based only on AA results for the function declaration; it 110 /// will be assumed that some other (perhaps less optimized) version of the 111 /// function may be selected at link time. 112 static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody, 113 AAResults &AAR, 114 const SCCNodeSet &SCCNodes) { 115 FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F); 116 if (MRB == FMRB_DoesNotAccessMemory) 117 // Already perfect! 118 return MAK_ReadNone; 119 120 if (!ThisBody) { 121 if (AliasAnalysis::onlyReadsMemory(MRB)) 122 return MAK_ReadOnly; 123 124 if (AliasAnalysis::doesNotReadMemory(MRB)) 125 return MAK_WriteOnly; 126 127 // Conservatively assume it reads and writes to memory. 128 return MAK_MayWrite; 129 } 130 131 // Scan the function body for instructions that may read or write memory. 132 bool ReadsMemory = false; 133 bool WritesMemory = false; 134 for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { 135 Instruction *I = &*II; 136 137 // Some instructions can be ignored even if they read or write memory. 138 // Detect these now, skipping to the next instruction if one is found. 139 if (auto *Call = dyn_cast<CallBase>(I)) { 140 // Ignore calls to functions in the same SCC, as long as the call sites 141 // don't have operand bundles. Calls with operand bundles are allowed to 142 // have memory effects not described by the memory effects of the call 143 // target. 144 if (!Call->hasOperandBundles() && Call->getCalledFunction() && 145 SCCNodes.count(Call->getCalledFunction())) 146 continue; 147 FunctionModRefBehavior MRB = AAR.getModRefBehavior(Call); 148 ModRefInfo MRI = createModRefInfo(MRB); 149 150 // If the call doesn't access memory, we're done. 151 if (isNoModRef(MRI)) 152 continue; 153 154 // A pseudo probe call shouldn't change any function attribute since it 155 // doesn't translate to a real instruction. It comes with a memory access 156 // tag to prevent itself being removed by optimizations and not block 157 // other instructions being optimized. 158 if (isa<PseudoProbeInst>(I)) 159 continue; 160 161 if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { 162 // The call could access any memory. If that includes writes, note it. 163 if (isModSet(MRI)) 164 WritesMemory = true; 165 // If it reads, note it. 166 if (isRefSet(MRI)) 167 ReadsMemory = true; 168 continue; 169 } 170 171 // Check whether all pointer arguments point to local memory, and 172 // ignore calls that only access local memory. 173 for (auto CI = Call->arg_begin(), CE = Call->arg_end(); CI != CE; ++CI) { 174 Value *Arg = *CI; 175 if (!Arg->getType()->isPtrOrPtrVectorTy()) 176 continue; 177 178 AAMDNodes AAInfo; 179 I->getAAMetadata(AAInfo); 180 MemoryLocation Loc = MemoryLocation::getBeforeOrAfter(Arg, AAInfo); 181 182 // Skip accesses to local or constant memory as they don't impact the 183 // externally visible mod/ref behavior. 184 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 185 continue; 186 187 if (isModSet(MRI)) 188 // Writes non-local memory. 189 WritesMemory = true; 190 if (isRefSet(MRI)) 191 // Ok, it reads non-local memory. 192 ReadsMemory = true; 193 } 194 continue; 195 } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 196 // Ignore non-volatile loads from local memory. (Atomic is okay here.) 197 if (!LI->isVolatile()) { 198 MemoryLocation Loc = MemoryLocation::get(LI); 199 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 200 continue; 201 } 202 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 203 // Ignore non-volatile stores to local memory. (Atomic is okay here.) 204 if (!SI->isVolatile()) { 205 MemoryLocation Loc = MemoryLocation::get(SI); 206 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 207 continue; 208 } 209 } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) { 210 // Ignore vaargs on local memory. 211 MemoryLocation Loc = MemoryLocation::get(VI); 212 if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true)) 213 continue; 214 } 215 216 // Any remaining instructions need to be taken seriously! Check if they 217 // read or write memory. 218 // 219 // Writes memory, remember that. 220 WritesMemory |= I->mayWriteToMemory(); 221 222 // If this instruction may read memory, remember that. 223 ReadsMemory |= I->mayReadFromMemory(); 224 } 225 226 if (WritesMemory) { 227 if (!ReadsMemory) 228 return MAK_WriteOnly; 229 else 230 return MAK_MayWrite; 231 } 232 233 return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone; 234 } 235 236 MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F, 237 AAResults &AAR) { 238 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}); 239 } 240 241 /// Deduce readonly/readnone attributes for the SCC. 242 template <typename AARGetterT> 243 static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) { 244 // Check if any of the functions in the SCC read or write memory. If they 245 // write memory then they can't be marked readnone or readonly. 246 bool ReadsMemory = false; 247 bool WritesMemory = false; 248 for (Function *F : SCCNodes) { 249 // Call the callable parameter to look up AA results for this function. 250 AAResults &AAR = AARGetter(*F); 251 252 // Non-exact function definitions may not be selected at link time, and an 253 // alternative version that writes to memory may be selected. See the 254 // comment on GlobalValue::isDefinitionExact for more details. 255 switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(), 256 AAR, SCCNodes)) { 257 case MAK_MayWrite: 258 return false; 259 case MAK_ReadOnly: 260 ReadsMemory = true; 261 break; 262 case MAK_WriteOnly: 263 WritesMemory = true; 264 break; 265 case MAK_ReadNone: 266 // Nothing to do! 267 break; 268 } 269 } 270 271 // If the SCC contains both functions that read and functions that write, then 272 // we cannot add readonly attributes. 273 if (ReadsMemory && WritesMemory) 274 return false; 275 276 // Success! Functions in this SCC do not access memory, or only read memory. 277 // Give them the appropriate attribute. 278 bool MadeChange = false; 279 280 for (Function *F : SCCNodes) { 281 if (F->doesNotAccessMemory()) 282 // Already perfect! 283 continue; 284 285 if (F->onlyReadsMemory() && ReadsMemory) 286 // No change. 287 continue; 288 289 if (F->doesNotReadMemory() && WritesMemory) 290 continue; 291 292 MadeChange = true; 293 294 // Clear out any existing attributes. 295 AttrBuilder AttrsToRemove; 296 AttrsToRemove.addAttribute(Attribute::ReadOnly); 297 AttrsToRemove.addAttribute(Attribute::ReadNone); 298 AttrsToRemove.addAttribute(Attribute::WriteOnly); 299 300 if (!WritesMemory && !ReadsMemory) { 301 // Clear out any "access range attributes" if readnone was deduced. 302 AttrsToRemove.addAttribute(Attribute::ArgMemOnly); 303 AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly); 304 AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); 305 } 306 F->removeAttributes(AttributeList::FunctionIndex, AttrsToRemove); 307 308 // Add in the new attribute. 309 if (WritesMemory && !ReadsMemory) 310 F->addFnAttr(Attribute::WriteOnly); 311 else 312 F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); 313 314 if (WritesMemory && !ReadsMemory) 315 ++NumWriteOnly; 316 else if (ReadsMemory) 317 ++NumReadOnly; 318 else 319 ++NumReadNone; 320 } 321 322 return MadeChange; 323 } 324 325 namespace { 326 327 /// For a given pointer Argument, this retains a list of Arguments of functions 328 /// in the same SCC that the pointer data flows into. We use this to build an 329 /// SCC of the arguments. 330 struct ArgumentGraphNode { 331 Argument *Definition; 332 SmallVector<ArgumentGraphNode *, 4> Uses; 333 }; 334 335 class ArgumentGraph { 336 // We store pointers to ArgumentGraphNode objects, so it's important that 337 // that they not move around upon insert. 338 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; 339 340 ArgumentMapTy ArgumentMap; 341 342 // There is no root node for the argument graph, in fact: 343 // void f(int *x, int *y) { if (...) f(x, y); } 344 // is an example where the graph is disconnected. The SCCIterator requires a 345 // single entry point, so we maintain a fake ("synthetic") root node that 346 // uses every node. Because the graph is directed and nothing points into 347 // the root, it will not participate in any SCCs (except for its own). 348 ArgumentGraphNode SyntheticRoot; 349 350 public: 351 ArgumentGraph() { SyntheticRoot.Definition = nullptr; } 352 353 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; 354 355 iterator begin() { return SyntheticRoot.Uses.begin(); } 356 iterator end() { return SyntheticRoot.Uses.end(); } 357 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } 358 359 ArgumentGraphNode *operator[](Argument *A) { 360 ArgumentGraphNode &Node = ArgumentMap[A]; 361 Node.Definition = A; 362 SyntheticRoot.Uses.push_back(&Node); 363 return &Node; 364 } 365 }; 366 367 /// This tracker checks whether callees are in the SCC, and if so it does not 368 /// consider that a capture, instead adding it to the "Uses" list and 369 /// continuing with the analysis. 370 struct ArgumentUsesTracker : public CaptureTracker { 371 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} 372 373 void tooManyUses() override { Captured = true; } 374 375 bool captured(const Use *U) override { 376 CallBase *CB = dyn_cast<CallBase>(U->getUser()); 377 if (!CB) { 378 Captured = true; 379 return true; 380 } 381 382 Function *F = CB->getCalledFunction(); 383 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { 384 Captured = true; 385 return true; 386 } 387 388 // Note: the callee and the two successor blocks *follow* the argument 389 // operands. This means there is no need to adjust UseIndex to account for 390 // these. 391 392 unsigned UseIndex = 393 std::distance(const_cast<const Use *>(CB->arg_begin()), U); 394 395 assert(UseIndex < CB->data_operands_size() && 396 "Indirect function calls should have been filtered above!"); 397 398 if (UseIndex >= CB->getNumArgOperands()) { 399 // Data operand, but not a argument operand -- must be a bundle operand 400 assert(CB->hasOperandBundles() && "Must be!"); 401 402 // CaptureTracking told us that we're being captured by an operand bundle 403 // use. In this case it does not matter if the callee is within our SCC 404 // or not -- we've been captured in some unknown way, and we have to be 405 // conservative. 406 Captured = true; 407 return true; 408 } 409 410 if (UseIndex >= F->arg_size()) { 411 assert(F->isVarArg() && "More params than args in non-varargs call"); 412 Captured = true; 413 return true; 414 } 415 416 Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); 417 return false; 418 } 419 420 // True only if certainly captured (used outside our SCC). 421 bool Captured = false; 422 423 // Uses within our SCC. 424 SmallVector<Argument *, 4> Uses; 425 426 const SCCNodeSet &SCCNodes; 427 }; 428 429 } // end anonymous namespace 430 431 namespace llvm { 432 433 template <> struct GraphTraits<ArgumentGraphNode *> { 434 using NodeRef = ArgumentGraphNode *; 435 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; 436 437 static NodeRef getEntryNode(NodeRef A) { return A; } 438 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } 439 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } 440 }; 441 442 template <> 443 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { 444 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } 445 446 static ChildIteratorType nodes_begin(ArgumentGraph *AG) { 447 return AG->begin(); 448 } 449 450 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } 451 }; 452 453 } // end namespace llvm 454 455 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. 456 static Attribute::AttrKind 457 determinePointerReadAttrs(Argument *A, 458 const SmallPtrSet<Argument *, 8> &SCCNodes) { 459 SmallVector<Use *, 32> Worklist; 460 SmallPtrSet<Use *, 32> Visited; 461 462 // inalloca arguments are always clobbered by the call. 463 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) 464 return Attribute::None; 465 466 bool IsRead = false; 467 // We don't need to track IsWritten. If A is written to, return immediately. 468 469 for (Use &U : A->uses()) { 470 Visited.insert(&U); 471 Worklist.push_back(&U); 472 } 473 474 while (!Worklist.empty()) { 475 Use *U = Worklist.pop_back_val(); 476 Instruction *I = cast<Instruction>(U->getUser()); 477 478 switch (I->getOpcode()) { 479 case Instruction::BitCast: 480 case Instruction::GetElementPtr: 481 case Instruction::PHI: 482 case Instruction::Select: 483 case Instruction::AddrSpaceCast: 484 // The original value is not read/written via this if the new value isn't. 485 for (Use &UU : I->uses()) 486 if (Visited.insert(&UU).second) 487 Worklist.push_back(&UU); 488 break; 489 490 case Instruction::Call: 491 case Instruction::Invoke: { 492 bool Captures = true; 493 494 if (I->getType()->isVoidTy()) 495 Captures = false; 496 497 auto AddUsersToWorklistIfCapturing = [&] { 498 if (Captures) 499 for (Use &UU : I->uses()) 500 if (Visited.insert(&UU).second) 501 Worklist.push_back(&UU); 502 }; 503 504 CallBase &CB = cast<CallBase>(*I); 505 if (CB.doesNotAccessMemory()) { 506 AddUsersToWorklistIfCapturing(); 507 continue; 508 } 509 510 Function *F = CB.getCalledFunction(); 511 if (!F) { 512 if (CB.onlyReadsMemory()) { 513 IsRead = true; 514 AddUsersToWorklistIfCapturing(); 515 continue; 516 } 517 return Attribute::None; 518 } 519 520 // Note: the callee and the two successor blocks *follow* the argument 521 // operands. This means there is no need to adjust UseIndex to account 522 // for these. 523 524 unsigned UseIndex = std::distance(CB.arg_begin(), U); 525 526 // U cannot be the callee operand use: since we're exploring the 527 // transitive uses of an Argument, having such a use be a callee would 528 // imply the call site is an indirect call or invoke; and we'd take the 529 // early exit above. 530 assert(UseIndex < CB.data_operands_size() && 531 "Data operand use expected!"); 532 533 bool IsOperandBundleUse = UseIndex >= CB.getNumArgOperands(); 534 535 if (UseIndex >= F->arg_size() && !IsOperandBundleUse) { 536 assert(F->isVarArg() && "More params than args in non-varargs call"); 537 return Attribute::None; 538 } 539 540 Captures &= !CB.doesNotCapture(UseIndex); 541 542 // Since the optimizer (by design) cannot see the data flow corresponding 543 // to a operand bundle use, these cannot participate in the optimistic SCC 544 // analysis. Instead, we model the operand bundle uses as arguments in 545 // call to a function external to the SCC. 546 if (IsOperandBundleUse || 547 !SCCNodes.count(&*std::next(F->arg_begin(), UseIndex))) { 548 549 // The accessors used on call site here do the right thing for calls and 550 // invokes with operand bundles. 551 552 if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(UseIndex)) 553 return Attribute::None; 554 if (!CB.doesNotAccessMemory(UseIndex)) 555 IsRead = true; 556 } 557 558 AddUsersToWorklistIfCapturing(); 559 break; 560 } 561 562 case Instruction::Load: 563 // A volatile load has side effects beyond what readonly can be relied 564 // upon. 565 if (cast<LoadInst>(I)->isVolatile()) 566 return Attribute::None; 567 568 IsRead = true; 569 break; 570 571 case Instruction::ICmp: 572 case Instruction::Ret: 573 break; 574 575 default: 576 return Attribute::None; 577 } 578 } 579 580 return IsRead ? Attribute::ReadOnly : Attribute::ReadNone; 581 } 582 583 /// Deduce returned attributes for the SCC. 584 static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { 585 bool Changed = false; 586 587 // Check each function in turn, determining if an argument is always returned. 588 for (Function *F : SCCNodes) { 589 // We can infer and propagate function attributes only when we know that the 590 // definition we'll get at link time is *exactly* the definition we see now. 591 // For more details, see GlobalValue::mayBeDerefined. 592 if (!F->hasExactDefinition()) 593 continue; 594 595 if (F->getReturnType()->isVoidTy()) 596 continue; 597 598 // There is nothing to do if an argument is already marked as 'returned'. 599 if (llvm::any_of(F->args(), 600 [](const Argument &Arg) { return Arg.hasReturnedAttr(); })) 601 continue; 602 603 auto FindRetArg = [&]() -> Value * { 604 Value *RetArg = nullptr; 605 for (BasicBlock &BB : *F) 606 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 607 // Note that stripPointerCasts should look through functions with 608 // returned arguments. 609 Value *RetVal = Ret->getReturnValue()->stripPointerCasts(); 610 if (!isa<Argument>(RetVal) || RetVal->getType() != F->getReturnType()) 611 return nullptr; 612 613 if (!RetArg) 614 RetArg = RetVal; 615 else if (RetArg != RetVal) 616 return nullptr; 617 } 618 619 return RetArg; 620 }; 621 622 if (Value *RetArg = FindRetArg()) { 623 auto *A = cast<Argument>(RetArg); 624 A->addAttr(Attribute::Returned); 625 ++NumReturned; 626 Changed = true; 627 } 628 } 629 630 return Changed; 631 } 632 633 /// If a callsite has arguments that are also arguments to the parent function, 634 /// try to propagate attributes from the callsite's arguments to the parent's 635 /// arguments. This may be important because inlining can cause information loss 636 /// when attribute knowledge disappears with the inlined call. 637 static bool addArgumentAttrsFromCallsites(Function &F) { 638 if (!EnableNonnullArgPropagation) 639 return false; 640 641 bool Changed = false; 642 643 // For an argument attribute to transfer from a callsite to the parent, the 644 // call must be guaranteed to execute every time the parent is called. 645 // Conservatively, just check for calls in the entry block that are guaranteed 646 // to execute. 647 // TODO: This could be enhanced by testing if the callsite post-dominates the 648 // entry block or by doing simple forward walks or backward walks to the 649 // callsite. 650 BasicBlock &Entry = F.getEntryBlock(); 651 for (Instruction &I : Entry) { 652 if (auto *CB = dyn_cast<CallBase>(&I)) { 653 if (auto *CalledFunc = CB->getCalledFunction()) { 654 for (auto &CSArg : CalledFunc->args()) { 655 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) 656 continue; 657 658 // If the non-null callsite argument operand is an argument to 'F' 659 // (the caller) and the call is guaranteed to execute, then the value 660 // must be non-null throughout 'F'. 661 auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); 662 if (FArg && !FArg->hasNonNullAttr()) { 663 FArg->addAttr(Attribute::NonNull); 664 Changed = true; 665 } 666 } 667 } 668 } 669 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 670 break; 671 } 672 673 return Changed; 674 } 675 676 static bool addReadAttr(Argument *A, Attribute::AttrKind R) { 677 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone) 678 && "Must be a Read attribute."); 679 assert(A && "Argument must not be null."); 680 681 // If the argument already has the attribute, nothing needs to be done. 682 if (A->hasAttribute(R)) 683 return false; 684 685 // Otherwise, remove potentially conflicting attribute, add the new one, 686 // and update statistics. 687 A->removeAttr(Attribute::WriteOnly); 688 A->removeAttr(Attribute::ReadOnly); 689 A->removeAttr(Attribute::ReadNone); 690 A->addAttr(R); 691 R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; 692 return true; 693 } 694 695 /// Deduce nocapture attributes for the SCC. 696 static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { 697 bool Changed = false; 698 699 ArgumentGraph AG; 700 701 // Check each function in turn, determining which pointer arguments are not 702 // captured. 703 for (Function *F : SCCNodes) { 704 // We can infer and propagate function attributes only when we know that the 705 // definition we'll get at link time is *exactly* the definition we see now. 706 // For more details, see GlobalValue::mayBeDerefined. 707 if (!F->hasExactDefinition()) 708 continue; 709 710 Changed |= addArgumentAttrsFromCallsites(*F); 711 712 // Functions that are readonly (or readnone) and nounwind and don't return 713 // a value can't capture arguments. Don't analyze them. 714 if (F->onlyReadsMemory() && F->doesNotThrow() && 715 F->getReturnType()->isVoidTy()) { 716 for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; 717 ++A) { 718 if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { 719 A->addAttr(Attribute::NoCapture); 720 ++NumNoCapture; 721 Changed = true; 722 } 723 } 724 continue; 725 } 726 727 for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; 728 ++A) { 729 if (!A->getType()->isPointerTy()) 730 continue; 731 bool HasNonLocalUses = false; 732 if (!A->hasNoCaptureAttr()) { 733 ArgumentUsesTracker Tracker(SCCNodes); 734 PointerMayBeCaptured(&*A, &Tracker); 735 if (!Tracker.Captured) { 736 if (Tracker.Uses.empty()) { 737 // If it's trivially not captured, mark it nocapture now. 738 A->addAttr(Attribute::NoCapture); 739 ++NumNoCapture; 740 Changed = true; 741 } else { 742 // If it's not trivially captured and not trivially not captured, 743 // then it must be calling into another function in our SCC. Save 744 // its particulars for Argument-SCC analysis later. 745 ArgumentGraphNode *Node = AG[&*A]; 746 for (Argument *Use : Tracker.Uses) { 747 Node->Uses.push_back(AG[Use]); 748 if (Use != &*A) 749 HasNonLocalUses = true; 750 } 751 } 752 } 753 // Otherwise, it's captured. Don't bother doing SCC analysis on it. 754 } 755 if (!HasNonLocalUses && !A->onlyReadsMemory()) { 756 // Can we determine that it's readonly/readnone without doing an SCC? 757 // Note that we don't allow any calls at all here, or else our result 758 // will be dependent on the iteration order through the functions in the 759 // SCC. 760 SmallPtrSet<Argument *, 8> Self; 761 Self.insert(&*A); 762 Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); 763 if (R != Attribute::None) 764 Changed = addReadAttr(A, R); 765 } 766 } 767 } 768 769 // The graph we've collected is partial because we stopped scanning for 770 // argument uses once we solved the argument trivially. These partial nodes 771 // show up as ArgumentGraphNode objects with an empty Uses list, and for 772 // these nodes the final decision about whether they capture has already been 773 // made. If the definition doesn't have a 'nocapture' attribute by now, it 774 // captures. 775 776 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { 777 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; 778 if (ArgumentSCC.size() == 1) { 779 if (!ArgumentSCC[0]->Definition) 780 continue; // synthetic root node 781 782 // eg. "void f(int* x) { if (...) f(x); }" 783 if (ArgumentSCC[0]->Uses.size() == 1 && 784 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { 785 Argument *A = ArgumentSCC[0]->Definition; 786 A->addAttr(Attribute::NoCapture); 787 ++NumNoCapture; 788 Changed = true; 789 } 790 continue; 791 } 792 793 bool SCCCaptured = false; 794 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); 795 I != E && !SCCCaptured; ++I) { 796 ArgumentGraphNode *Node = *I; 797 if (Node->Uses.empty()) { 798 if (!Node->Definition->hasNoCaptureAttr()) 799 SCCCaptured = true; 800 } 801 } 802 if (SCCCaptured) 803 continue; 804 805 SmallPtrSet<Argument *, 8> ArgumentSCCNodes; 806 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for 807 // quickly looking up whether a given Argument is in this ArgumentSCC. 808 for (ArgumentGraphNode *I : ArgumentSCC) { 809 ArgumentSCCNodes.insert(I->Definition); 810 } 811 812 for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); 813 I != E && !SCCCaptured; ++I) { 814 ArgumentGraphNode *N = *I; 815 for (ArgumentGraphNode *Use : N->Uses) { 816 Argument *A = Use->Definition; 817 if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) 818 continue; 819 SCCCaptured = true; 820 break; 821 } 822 } 823 if (SCCCaptured) 824 continue; 825 826 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 827 Argument *A = ArgumentSCC[i]->Definition; 828 A->addAttr(Attribute::NoCapture); 829 ++NumNoCapture; 830 Changed = true; 831 } 832 833 // We also want to compute readonly/readnone. With a small number of false 834 // negatives, we can assume that any pointer which is captured isn't going 835 // to be provably readonly or readnone, since by definition we can't 836 // analyze all uses of a captured pointer. 837 // 838 // The false negatives happen when the pointer is captured by a function 839 // that promises readonly/readnone behaviour on the pointer, then the 840 // pointer's lifetime ends before anything that writes to arbitrary memory. 841 // Also, a readonly/readnone pointer may be returned, but returning a 842 // pointer is capturing it. 843 844 Attribute::AttrKind ReadAttr = Attribute::ReadNone; 845 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 846 Argument *A = ArgumentSCC[i]->Definition; 847 Attribute::AttrKind K = determinePointerReadAttrs(A, ArgumentSCCNodes); 848 if (K == Attribute::ReadNone) 849 continue; 850 if (K == Attribute::ReadOnly) { 851 ReadAttr = Attribute::ReadOnly; 852 continue; 853 } 854 ReadAttr = K; 855 break; 856 } 857 858 if (ReadAttr != Attribute::None) { 859 for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { 860 Argument *A = ArgumentSCC[i]->Definition; 861 Changed = addReadAttr(A, ReadAttr); 862 } 863 } 864 } 865 866 return Changed; 867 } 868 869 /// Tests whether a function is "malloc-like". 870 /// 871 /// A function is "malloc-like" if it returns either null or a pointer that 872 /// doesn't alias any other pointer visible to the caller. 873 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { 874 SmallSetVector<Value *, 8> FlowsToReturn; 875 for (BasicBlock &BB : *F) 876 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 877 FlowsToReturn.insert(Ret->getReturnValue()); 878 879 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 880 Value *RetVal = FlowsToReturn[i]; 881 882 if (Constant *C = dyn_cast<Constant>(RetVal)) { 883 if (!C->isNullValue() && !isa<UndefValue>(C)) 884 return false; 885 886 continue; 887 } 888 889 if (isa<Argument>(RetVal)) 890 return false; 891 892 if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) 893 switch (RVI->getOpcode()) { 894 // Extend the analysis by looking upwards. 895 case Instruction::BitCast: 896 case Instruction::GetElementPtr: 897 case Instruction::AddrSpaceCast: 898 FlowsToReturn.insert(RVI->getOperand(0)); 899 continue; 900 case Instruction::Select: { 901 SelectInst *SI = cast<SelectInst>(RVI); 902 FlowsToReturn.insert(SI->getTrueValue()); 903 FlowsToReturn.insert(SI->getFalseValue()); 904 continue; 905 } 906 case Instruction::PHI: { 907 PHINode *PN = cast<PHINode>(RVI); 908 for (Value *IncValue : PN->incoming_values()) 909 FlowsToReturn.insert(IncValue); 910 continue; 911 } 912 913 // Check whether the pointer came from an allocation. 914 case Instruction::Alloca: 915 break; 916 case Instruction::Call: 917 case Instruction::Invoke: { 918 CallBase &CB = cast<CallBase>(*RVI); 919 if (CB.hasRetAttr(Attribute::NoAlias)) 920 break; 921 if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) 922 break; 923 LLVM_FALLTHROUGH; 924 } 925 default: 926 return false; // Did not come from an allocation. 927 } 928 929 if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) 930 return false; 931 } 932 933 return true; 934 } 935 936 /// Deduce noalias attributes for the SCC. 937 static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { 938 // Check each function in turn, determining which functions return noalias 939 // pointers. 940 for (Function *F : SCCNodes) { 941 // Already noalias. 942 if (F->returnDoesNotAlias()) 943 continue; 944 945 // We can infer and propagate function attributes only when we know that the 946 // definition we'll get at link time is *exactly* the definition we see now. 947 // For more details, see GlobalValue::mayBeDerefined. 948 if (!F->hasExactDefinition()) 949 return false; 950 951 // We annotate noalias return values, which are only applicable to 952 // pointer types. 953 if (!F->getReturnType()->isPointerTy()) 954 continue; 955 956 if (!isFunctionMallocLike(F, SCCNodes)) 957 return false; 958 } 959 960 bool MadeChange = false; 961 for (Function *F : SCCNodes) { 962 if (F->returnDoesNotAlias() || 963 !F->getReturnType()->isPointerTy()) 964 continue; 965 966 F->setReturnDoesNotAlias(); 967 ++NumNoAlias; 968 MadeChange = true; 969 } 970 971 return MadeChange; 972 } 973 974 /// Tests whether this function is known to not return null. 975 /// 976 /// Requires that the function returns a pointer. 977 /// 978 /// Returns true if it believes the function will not return a null, and sets 979 /// \p Speculative based on whether the returned conclusion is a speculative 980 /// conclusion due to SCC calls. 981 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, 982 bool &Speculative) { 983 assert(F->getReturnType()->isPointerTy() && 984 "nonnull only meaningful on pointer types"); 985 Speculative = false; 986 987 SmallSetVector<Value *, 8> FlowsToReturn; 988 for (BasicBlock &BB : *F) 989 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 990 FlowsToReturn.insert(Ret->getReturnValue()); 991 992 auto &DL = F->getParent()->getDataLayout(); 993 994 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 995 Value *RetVal = FlowsToReturn[i]; 996 997 // If this value is locally known to be non-null, we're good 998 if (isKnownNonZero(RetVal, DL)) 999 continue; 1000 1001 // Otherwise, we need to look upwards since we can't make any local 1002 // conclusions. 1003 Instruction *RVI = dyn_cast<Instruction>(RetVal); 1004 if (!RVI) 1005 return false; 1006 switch (RVI->getOpcode()) { 1007 // Extend the analysis by looking upwards. 1008 case Instruction::BitCast: 1009 case Instruction::GetElementPtr: 1010 case Instruction::AddrSpaceCast: 1011 FlowsToReturn.insert(RVI->getOperand(0)); 1012 continue; 1013 case Instruction::Select: { 1014 SelectInst *SI = cast<SelectInst>(RVI); 1015 FlowsToReturn.insert(SI->getTrueValue()); 1016 FlowsToReturn.insert(SI->getFalseValue()); 1017 continue; 1018 } 1019 case Instruction::PHI: { 1020 PHINode *PN = cast<PHINode>(RVI); 1021 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1022 FlowsToReturn.insert(PN->getIncomingValue(i)); 1023 continue; 1024 } 1025 case Instruction::Call: 1026 case Instruction::Invoke: { 1027 CallBase &CB = cast<CallBase>(*RVI); 1028 Function *Callee = CB.getCalledFunction(); 1029 // A call to a node within the SCC is assumed to return null until 1030 // proven otherwise 1031 if (Callee && SCCNodes.count(Callee)) { 1032 Speculative = true; 1033 continue; 1034 } 1035 return false; 1036 } 1037 default: 1038 return false; // Unknown source, may be null 1039 }; 1040 llvm_unreachable("should have either continued or returned"); 1041 } 1042 1043 return true; 1044 } 1045 1046 /// Deduce nonnull attributes for the SCC. 1047 static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) { 1048 // Speculative that all functions in the SCC return only nonnull 1049 // pointers. We may refute this as we analyze functions. 1050 bool SCCReturnsNonNull = true; 1051 1052 bool MadeChange = false; 1053 1054 // Check each function in turn, determining which functions return nonnull 1055 // pointers. 1056 for (Function *F : SCCNodes) { 1057 // Already nonnull. 1058 if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, 1059 Attribute::NonNull)) 1060 continue; 1061 1062 // We can infer and propagate function attributes only when we know that the 1063 // definition we'll get at link time is *exactly* the definition we see now. 1064 // For more details, see GlobalValue::mayBeDerefined. 1065 if (!F->hasExactDefinition()) 1066 return false; 1067 1068 // We annotate nonnull return values, which are only applicable to 1069 // pointer types. 1070 if (!F->getReturnType()->isPointerTy()) 1071 continue; 1072 1073 bool Speculative = false; 1074 if (isReturnNonNull(F, SCCNodes, Speculative)) { 1075 if (!Speculative) { 1076 // Mark the function eagerly since we may discover a function 1077 // which prevents us from speculating about the entire SCC 1078 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() 1079 << " as nonnull\n"); 1080 F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); 1081 ++NumNonNullReturn; 1082 MadeChange = true; 1083 } 1084 continue; 1085 } 1086 // At least one function returns something which could be null, can't 1087 // speculate any more. 1088 SCCReturnsNonNull = false; 1089 } 1090 1091 if (SCCReturnsNonNull) { 1092 for (Function *F : SCCNodes) { 1093 if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, 1094 Attribute::NonNull) || 1095 !F->getReturnType()->isPointerTy()) 1096 continue; 1097 1098 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); 1099 F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); 1100 ++NumNonNullReturn; 1101 MadeChange = true; 1102 } 1103 } 1104 1105 return MadeChange; 1106 } 1107 1108 namespace { 1109 1110 /// Collects a set of attribute inference requests and performs them all in one 1111 /// go on a single SCC Node. Inference involves scanning function bodies 1112 /// looking for instructions that violate attribute assumptions. 1113 /// As soon as all the bodies are fine we are free to set the attribute. 1114 /// Customization of inference for individual attributes is performed by 1115 /// providing a handful of predicates for each attribute. 1116 class AttributeInferer { 1117 public: 1118 /// Describes a request for inference of a single attribute. 1119 struct InferenceDescriptor { 1120 1121 /// Returns true if this function does not have to be handled. 1122 /// General intent for this predicate is to provide an optimization 1123 /// for functions that do not need this attribute inference at all 1124 /// (say, for functions that already have the attribute). 1125 std::function<bool(const Function &)> SkipFunction; 1126 1127 /// Returns true if this instruction violates attribute assumptions. 1128 std::function<bool(Instruction &)> InstrBreaksAttribute; 1129 1130 /// Sets the inferred attribute for this function. 1131 std::function<void(Function &)> SetAttribute; 1132 1133 /// Attribute we derive. 1134 Attribute::AttrKind AKind; 1135 1136 /// If true, only "exact" definitions can be used to infer this attribute. 1137 /// See GlobalValue::isDefinitionExact. 1138 bool RequiresExactDefinition; 1139 1140 InferenceDescriptor(Attribute::AttrKind AK, 1141 std::function<bool(const Function &)> SkipFunc, 1142 std::function<bool(Instruction &)> InstrScan, 1143 std::function<void(Function &)> SetAttr, 1144 bool ReqExactDef) 1145 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), 1146 SetAttribute(SetAttr), AKind(AK), 1147 RequiresExactDefinition(ReqExactDef) {} 1148 }; 1149 1150 private: 1151 SmallVector<InferenceDescriptor, 4> InferenceDescriptors; 1152 1153 public: 1154 void registerAttrInference(InferenceDescriptor AttrInference) { 1155 InferenceDescriptors.push_back(AttrInference); 1156 } 1157 1158 bool run(const SCCNodeSet &SCCNodes); 1159 }; 1160 1161 /// Perform all the requested attribute inference actions according to the 1162 /// attribute predicates stored before. 1163 bool AttributeInferer::run(const SCCNodeSet &SCCNodes) { 1164 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; 1165 // Go through all the functions in SCC and check corresponding attribute 1166 // assumptions for each of them. Attributes that are invalid for this SCC 1167 // will be removed from InferInSCC. 1168 for (Function *F : SCCNodes) { 1169 1170 // No attributes whose assumptions are still valid - done. 1171 if (InferInSCC.empty()) 1172 return false; 1173 1174 // Check if our attributes ever need scanning/can be scanned. 1175 llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) { 1176 if (ID.SkipFunction(*F)) 1177 return false; 1178 1179 // Remove from further inference (invalidate) when visiting a function 1180 // that has no instructions to scan/has an unsuitable definition. 1181 return F->isDeclaration() || 1182 (ID.RequiresExactDefinition && !F->hasExactDefinition()); 1183 }); 1184 1185 // For each attribute still in InferInSCC that doesn't explicitly skip F, 1186 // set up the F instructions scan to verify assumptions of the attribute. 1187 SmallVector<InferenceDescriptor, 4> InferInThisFunc; 1188 llvm::copy_if( 1189 InferInSCC, std::back_inserter(InferInThisFunc), 1190 [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); 1191 1192 if (InferInThisFunc.empty()) 1193 continue; 1194 1195 // Start instruction scan. 1196 for (Instruction &I : instructions(*F)) { 1197 llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) { 1198 if (!ID.InstrBreaksAttribute(I)) 1199 return false; 1200 // Remove attribute from further inference on any other functions 1201 // because attribute assumptions have just been violated. 1202 llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) { 1203 return D.AKind == ID.AKind; 1204 }); 1205 // Remove attribute from the rest of current instruction scan. 1206 return true; 1207 }); 1208 1209 if (InferInThisFunc.empty()) 1210 break; 1211 } 1212 } 1213 1214 if (InferInSCC.empty()) 1215 return false; 1216 1217 bool Changed = false; 1218 for (Function *F : SCCNodes) 1219 // At this point InferInSCC contains only functions that were either: 1220 // - explicitly skipped from scan/inference, or 1221 // - verified to have no instructions that break attribute assumptions. 1222 // Hence we just go and force the attribute for all non-skipped functions. 1223 for (auto &ID : InferInSCC) { 1224 if (ID.SkipFunction(*F)) 1225 continue; 1226 Changed = true; 1227 ID.SetAttribute(*F); 1228 } 1229 return Changed; 1230 } 1231 1232 struct SCCNodesResult { 1233 SCCNodeSet SCCNodes; 1234 bool HasUnknownCall; 1235 }; 1236 1237 } // end anonymous namespace 1238 1239 /// Helper for non-Convergent inference predicate InstrBreaksAttribute. 1240 static bool InstrBreaksNonConvergent(Instruction &I, 1241 const SCCNodeSet &SCCNodes) { 1242 const CallBase *CB = dyn_cast<CallBase>(&I); 1243 // Breaks non-convergent assumption if CS is a convergent call to a function 1244 // not in the SCC. 1245 return CB && CB->isConvergent() && 1246 SCCNodes.count(CB->getCalledFunction()) == 0; 1247 } 1248 1249 /// Helper for NoUnwind inference predicate InstrBreaksAttribute. 1250 static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { 1251 if (!I.mayThrow()) 1252 return false; 1253 if (const auto *CI = dyn_cast<CallInst>(&I)) { 1254 if (Function *Callee = CI->getCalledFunction()) { 1255 // I is a may-throw call to a function inside our SCC. This doesn't 1256 // invalidate our current working assumption that the SCC is no-throw; we 1257 // just have to scan that other function. 1258 if (SCCNodes.contains(Callee)) 1259 return false; 1260 } 1261 } 1262 return true; 1263 } 1264 1265 /// Helper for NoFree inference predicate InstrBreaksAttribute. 1266 static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { 1267 CallBase *CB = dyn_cast<CallBase>(&I); 1268 if (!CB) 1269 return false; 1270 1271 if (CB->hasFnAttr(Attribute::NoFree)) 1272 return false; 1273 1274 // Speculatively assume in SCC. 1275 if (Function *Callee = CB->getCalledFunction()) 1276 if (SCCNodes.contains(Callee)) 1277 return false; 1278 1279 return true; 1280 } 1281 1282 /// Attempt to remove convergent function attribute when possible. 1283 /// 1284 /// Returns true if any changes to function attributes were made. 1285 static bool inferConvergent(const SCCNodeSet &SCCNodes) { 1286 AttributeInferer AI; 1287 1288 // Request to remove the convergent attribute from all functions in the SCC 1289 // if every callsite within the SCC is not convergent (except for calls 1290 // to functions within the SCC). 1291 // Note: Removal of the attr from the callsites will happen in 1292 // InstCombineCalls separately. 1293 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1294 Attribute::Convergent, 1295 // Skip non-convergent functions. 1296 [](const Function &F) { return !F.isConvergent(); }, 1297 // Instructions that break non-convergent assumption. 1298 [SCCNodes](Instruction &I) { 1299 return InstrBreaksNonConvergent(I, SCCNodes); 1300 }, 1301 [](Function &F) { 1302 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() 1303 << "\n"); 1304 F.setNotConvergent(); 1305 }, 1306 /* RequiresExactDefinition= */ false}); 1307 // Perform all the requested attribute inference actions. 1308 return AI.run(SCCNodes); 1309 } 1310 1311 /// Infer attributes from all functions in the SCC by scanning every 1312 /// instruction for compliance to the attribute assumptions. Currently it 1313 /// does: 1314 /// - addition of NoUnwind attribute 1315 /// 1316 /// Returns true if any changes to function attributes were made. 1317 static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) { 1318 AttributeInferer AI; 1319 1320 if (!DisableNoUnwindInference) 1321 // Request to infer nounwind attribute for all the functions in the SCC if 1322 // every callsite within the SCC is not throwing (except for calls to 1323 // functions within the SCC). Note that nounwind attribute suffers from 1324 // derefinement - results may change depending on how functions are 1325 // optimized. Thus it can be inferred only from exact definitions. 1326 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1327 Attribute::NoUnwind, 1328 // Skip non-throwing functions. 1329 [](const Function &F) { return F.doesNotThrow(); }, 1330 // Instructions that break non-throwing assumption. 1331 [&SCCNodes](Instruction &I) { 1332 return InstrBreaksNonThrowing(I, SCCNodes); 1333 }, 1334 [](Function &F) { 1335 LLVM_DEBUG(dbgs() 1336 << "Adding nounwind attr to fn " << F.getName() << "\n"); 1337 F.setDoesNotThrow(); 1338 ++NumNoUnwind; 1339 }, 1340 /* RequiresExactDefinition= */ true}); 1341 1342 if (!DisableNoFreeInference) 1343 // Request to infer nofree attribute for all the functions in the SCC if 1344 // every callsite within the SCC does not directly or indirectly free 1345 // memory (except for calls to functions within the SCC). Note that nofree 1346 // attribute suffers from derefinement - results may change depending on 1347 // how functions are optimized. Thus it can be inferred only from exact 1348 // definitions. 1349 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1350 Attribute::NoFree, 1351 // Skip functions known not to free memory. 1352 [](const Function &F) { return F.doesNotFreeMemory(); }, 1353 // Instructions that break non-deallocating assumption. 1354 [&SCCNodes](Instruction &I) { 1355 return InstrBreaksNoFree(I, SCCNodes); 1356 }, 1357 [](Function &F) { 1358 LLVM_DEBUG(dbgs() 1359 << "Adding nofree attr to fn " << F.getName() << "\n"); 1360 F.setDoesNotFreeMemory(); 1361 ++NumNoFree; 1362 }, 1363 /* RequiresExactDefinition= */ true}); 1364 1365 // Perform all the requested attribute inference actions. 1366 return AI.run(SCCNodes); 1367 } 1368 1369 static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) { 1370 // Try and identify functions that do not recurse. 1371 1372 // If the SCC contains multiple nodes we know for sure there is recursion. 1373 if (SCCNodes.size() != 1) 1374 return false; 1375 1376 Function *F = *SCCNodes.begin(); 1377 if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) 1378 return false; 1379 1380 // If all of the calls in F are identifiable and are to norecurse functions, F 1381 // is norecurse. This check also detects self-recursion as F is not currently 1382 // marked norecurse, so any called from F to F will not be marked norecurse. 1383 for (auto &BB : *F) 1384 for (auto &I : BB.instructionsWithoutDebug()) 1385 if (auto *CB = dyn_cast<CallBase>(&I)) { 1386 Function *Callee = CB->getCalledFunction(); 1387 if (!Callee || Callee == F || !Callee->doesNotRecurse()) 1388 // Function calls a potentially recursive function. 1389 return false; 1390 } 1391 1392 // Every call was to a non-recursive function other than this function, and 1393 // we have no indirect recursion as the SCC size is one. This function cannot 1394 // recurse. 1395 F->setDoesNotRecurse(); 1396 ++NumNoRecurse; 1397 return true; 1398 } 1399 1400 static bool instructionDoesNotReturn(Instruction &I) { 1401 if (auto *CB = dyn_cast<CallBase>(&I)) 1402 return CB->hasFnAttr(Attribute::NoReturn); 1403 return false; 1404 } 1405 1406 // A basic block can only return if it terminates with a ReturnInst and does not 1407 // contain calls to noreturn functions. 1408 static bool basicBlockCanReturn(BasicBlock &BB) { 1409 if (!isa<ReturnInst>(BB.getTerminator())) 1410 return false; 1411 return none_of(BB, instructionDoesNotReturn); 1412 } 1413 1414 // Set the noreturn function attribute if possible. 1415 static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) { 1416 bool Changed = false; 1417 1418 for (Function *F : SCCNodes) { 1419 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 1420 F->doesNotReturn()) 1421 continue; 1422 1423 // The function can return if any basic blocks can return. 1424 // FIXME: this doesn't handle recursion or unreachable blocks. 1425 if (none_of(*F, basicBlockCanReturn)) { 1426 F->setDoesNotReturn(); 1427 Changed = true; 1428 } 1429 } 1430 1431 return Changed; 1432 } 1433 1434 static bool functionWillReturn(const Function &F) { 1435 // We can infer and propagate function attributes only when we know that the 1436 // definition we'll get at link time is *exactly* the definition we see now. 1437 // For more details, see GlobalValue::mayBeDerefined. 1438 if (!F.hasExactDefinition()) 1439 return false; 1440 1441 // Must-progress function without side-effects must return. 1442 if (F.mustProgress() && F.onlyReadsMemory()) 1443 return true; 1444 1445 // Can only analyze functions with a definition. 1446 if (F.isDeclaration()) 1447 return false; 1448 1449 // Functions with loops require more sophisticated analysis, as the loop 1450 // may be infinite. For now, don't try to handle them. 1451 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; 1452 FindFunctionBackedges(F, Backedges); 1453 if (!Backedges.empty()) 1454 return false; 1455 1456 // If there are no loops, then the function is willreturn if all calls in 1457 // it are willreturn. 1458 return all_of(instructions(F), [](const Instruction &I) { 1459 return I.willReturn(); 1460 }); 1461 } 1462 1463 // Set the willreturn function attribute if possible. 1464 static bool addWillReturn(const SCCNodeSet &SCCNodes) { 1465 bool Changed = false; 1466 1467 for (Function *F : SCCNodes) { 1468 if (!F || F->willReturn() || !functionWillReturn(*F)) 1469 continue; 1470 1471 F->setWillReturn(); 1472 NumWillReturn++; 1473 Changed = true; 1474 } 1475 1476 return Changed; 1477 } 1478 1479 // Return true if this is an atomic which has an ordering stronger than 1480 // unordered. Note that this is different than the predicate we use in 1481 // Attributor. Here we chose to be conservative and consider monotonic 1482 // operations potentially synchronizing. We generally don't do much with 1483 // monotonic operations, so this is simply risk reduction. 1484 static bool isOrderedAtomic(Instruction *I) { 1485 if (!I->isAtomic()) 1486 return false; 1487 1488 if (auto *FI = dyn_cast<FenceInst>(I)) 1489 // All legal orderings for fence are stronger than monotonic. 1490 return FI->getSyncScopeID() != SyncScope::SingleThread; 1491 else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I)) 1492 return true; 1493 else if (auto *SI = dyn_cast<StoreInst>(I)) 1494 return !SI->isUnordered(); 1495 else if (auto *LI = dyn_cast<LoadInst>(I)) 1496 return !LI->isUnordered(); 1497 else { 1498 llvm_unreachable("unknown atomic instruction?"); 1499 } 1500 } 1501 1502 static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { 1503 // Volatile may synchronize 1504 if (I.isVolatile()) 1505 return true; 1506 1507 // An ordered atomic may synchronize. (See comment about on monotonic.) 1508 if (isOrderedAtomic(&I)) 1509 return true; 1510 1511 auto *CB = dyn_cast<CallBase>(&I); 1512 if (!CB) 1513 // Non call site cases covered by the two checks above 1514 return false; 1515 1516 if (CB->hasFnAttr(Attribute::NoSync)) 1517 return false; 1518 1519 // Non volatile memset/memcpy/memmoves are nosync 1520 // NOTE: Only intrinsics with volatile flags should be handled here. All 1521 // others should be marked in Intrinsics.td. 1522 if (auto *MI = dyn_cast<MemIntrinsic>(&I)) 1523 if (!MI->isVolatile()) 1524 return false; 1525 1526 // Speculatively assume in SCC. 1527 if (Function *Callee = CB->getCalledFunction()) 1528 if (SCCNodes.contains(Callee)) 1529 return false; 1530 1531 return true; 1532 } 1533 1534 // Infer the nosync attribute. 1535 static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) { 1536 AttributeInferer AI; 1537 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1538 Attribute::NoSync, 1539 // Skip already marked functions. 1540 [](const Function &F) { return F.hasNoSync(); }, 1541 // Instructions that break nosync assumption. 1542 [&SCCNodes](Instruction &I) { 1543 return InstrBreaksNoSync(I, SCCNodes); 1544 }, 1545 [](Function &F) { 1546 LLVM_DEBUG(dbgs() 1547 << "Adding nosync attr to fn " << F.getName() << "\n"); 1548 F.setNoSync(); 1549 ++NumNoSync; 1550 }, 1551 /* RequiresExactDefinition= */ true}); 1552 return AI.run(SCCNodes); 1553 } 1554 1555 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { 1556 SCCNodesResult Res; 1557 Res.HasUnknownCall = false; 1558 for (Function *F : Functions) { 1559 if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) { 1560 // Treat any function we're trying not to optimize as if it were an 1561 // indirect call and omit it from the node set used below. 1562 Res.HasUnknownCall = true; 1563 continue; 1564 } 1565 // Track whether any functions in this SCC have an unknown call edge. 1566 // Note: if this is ever a performance hit, we can common it with 1567 // subsequent routines which also do scans over the instructions of the 1568 // function. 1569 if (!Res.HasUnknownCall) { 1570 for (Instruction &I : instructions(*F)) { 1571 if (auto *CB = dyn_cast<CallBase>(&I)) { 1572 if (!CB->getCalledFunction()) { 1573 Res.HasUnknownCall = true; 1574 break; 1575 } 1576 } 1577 } 1578 } 1579 Res.SCCNodes.insert(F); 1580 } 1581 return Res; 1582 } 1583 1584 template <typename AARGetterT> 1585 static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions, 1586 AARGetterT &&AARGetter) { 1587 SCCNodesResult Nodes = createSCCNodeSet(Functions); 1588 bool Changed = false; 1589 1590 // Bail if the SCC only contains optnone functions. 1591 if (Nodes.SCCNodes.empty()) 1592 return Changed; 1593 1594 Changed |= addArgumentReturnedAttrs(Nodes.SCCNodes); 1595 Changed |= addReadAttrs(Nodes.SCCNodes, AARGetter); 1596 Changed |= addArgumentAttrs(Nodes.SCCNodes); 1597 Changed |= inferConvergent(Nodes.SCCNodes); 1598 Changed |= addNoReturnAttrs(Nodes.SCCNodes); 1599 Changed |= addWillReturn(Nodes.SCCNodes); 1600 1601 // If we have no external nodes participating in the SCC, we can deduce some 1602 // more precise attributes as well. 1603 if (!Nodes.HasUnknownCall) { 1604 Changed |= addNoAliasAttrs(Nodes.SCCNodes); 1605 Changed |= addNonNullAttrs(Nodes.SCCNodes); 1606 Changed |= inferAttrsFromFunctionBodies(Nodes.SCCNodes); 1607 Changed |= addNoRecurseAttrs(Nodes.SCCNodes); 1608 } 1609 1610 Changed |= addNoSyncAttr(Nodes.SCCNodes); 1611 1612 // Finally, infer the maximal set of attributes from the ones we've inferred 1613 // above. This is handling the cases where one attribute on a signature 1614 // implies another, but for implementation reasons the inference rule for 1615 // the later is missing (or simply less sophisticated). 1616 for (Function *F : Nodes.SCCNodes) 1617 if (F) 1618 Changed |= inferAttributesFromOthers(*F); 1619 1620 return Changed; 1621 } 1622 1623 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, 1624 CGSCCAnalysisManager &AM, 1625 LazyCallGraph &CG, 1626 CGSCCUpdateResult &) { 1627 FunctionAnalysisManager &FAM = 1628 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 1629 1630 // We pass a lambda into functions to wire them up to the analysis manager 1631 // for getting function analyses. 1632 auto AARGetter = [&](Function &F) -> AAResults & { 1633 return FAM.getResult<AAManager>(F); 1634 }; 1635 1636 SmallVector<Function *, 8> Functions; 1637 for (LazyCallGraph::Node &N : C) { 1638 Functions.push_back(&N.getFunction()); 1639 } 1640 1641 if (deriveAttrsInPostOrder(Functions, AARGetter)) { 1642 // We have not changed the call graph or removed/added functions. 1643 PreservedAnalyses PA; 1644 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 1645 return PA; 1646 } 1647 1648 return PreservedAnalyses::all(); 1649 } 1650 1651 namespace { 1652 1653 struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { 1654 // Pass identification, replacement for typeid 1655 static char ID; 1656 1657 PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) { 1658 initializePostOrderFunctionAttrsLegacyPassPass( 1659 *PassRegistry::getPassRegistry()); 1660 } 1661 1662 bool runOnSCC(CallGraphSCC &SCC) override; 1663 1664 void getAnalysisUsage(AnalysisUsage &AU) const override { 1665 AU.setPreservesCFG(); 1666 AU.addRequired<AssumptionCacheTracker>(); 1667 getAAResultsAnalysisUsage(AU); 1668 CallGraphSCCPass::getAnalysisUsage(AU); 1669 } 1670 }; 1671 1672 } // end anonymous namespace 1673 1674 char PostOrderFunctionAttrsLegacyPass::ID = 0; 1675 INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs", 1676 "Deduce function attributes", false, false) 1677 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 1678 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1679 INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs", 1680 "Deduce function attributes", false, false) 1681 1682 Pass *llvm::createPostOrderFunctionAttrsLegacyPass() { 1683 return new PostOrderFunctionAttrsLegacyPass(); 1684 } 1685 1686 template <typename AARGetterT> 1687 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { 1688 SmallVector<Function *, 8> Functions; 1689 for (CallGraphNode *I : SCC) { 1690 Functions.push_back(I->getFunction()); 1691 } 1692 1693 return deriveAttrsInPostOrder(Functions, AARGetter); 1694 } 1695 1696 bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) { 1697 if (skipSCC(SCC)) 1698 return false; 1699 return runImpl(SCC, LegacyAARGetter(*this)); 1700 } 1701 1702 namespace { 1703 1704 struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass { 1705 // Pass identification, replacement for typeid 1706 static char ID; 1707 1708 ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) { 1709 initializeReversePostOrderFunctionAttrsLegacyPassPass( 1710 *PassRegistry::getPassRegistry()); 1711 } 1712 1713 bool runOnModule(Module &M) override; 1714 1715 void getAnalysisUsage(AnalysisUsage &AU) const override { 1716 AU.setPreservesCFG(); 1717 AU.addRequired<CallGraphWrapperPass>(); 1718 AU.addPreserved<CallGraphWrapperPass>(); 1719 } 1720 }; 1721 1722 } // end anonymous namespace 1723 1724 char ReversePostOrderFunctionAttrsLegacyPass::ID = 0; 1725 1726 INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass, 1727 "rpo-function-attrs", "Deduce function attributes in RPO", 1728 false, false) 1729 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1730 INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass, 1731 "rpo-function-attrs", "Deduce function attributes in RPO", 1732 false, false) 1733 1734 Pass *llvm::createReversePostOrderFunctionAttrsPass() { 1735 return new ReversePostOrderFunctionAttrsLegacyPass(); 1736 } 1737 1738 static bool addNoRecurseAttrsTopDown(Function &F) { 1739 // We check the preconditions for the function prior to calling this to avoid 1740 // the cost of building up a reversible post-order list. We assert them here 1741 // to make sure none of the invariants this relies on were violated. 1742 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); 1743 assert(!F.doesNotRecurse() && 1744 "This function has already been deduced as norecurs!"); 1745 assert(F.hasInternalLinkage() && 1746 "Can only do top-down deduction for internal linkage functions!"); 1747 1748 // If F is internal and all of its uses are calls from a non-recursive 1749 // functions, then none of its calls could in fact recurse without going 1750 // through a function marked norecurse, and so we can mark this function too 1751 // as norecurse. Note that the uses must actually be calls -- otherwise 1752 // a pointer to this function could be returned from a norecurse function but 1753 // this function could be recursively (indirectly) called. Note that this 1754 // also detects if F is directly recursive as F is not yet marked as 1755 // a norecurse function. 1756 for (auto *U : F.users()) { 1757 auto *I = dyn_cast<Instruction>(U); 1758 if (!I) 1759 return false; 1760 CallBase *CB = dyn_cast<CallBase>(I); 1761 if (!CB || !CB->getParent()->getParent()->doesNotRecurse()) 1762 return false; 1763 } 1764 F.setDoesNotRecurse(); 1765 ++NumNoRecurse; 1766 return true; 1767 } 1768 1769 static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) { 1770 // We only have a post-order SCC traversal (because SCCs are inherently 1771 // discovered in post-order), so we accumulate them in a vector and then walk 1772 // it in reverse. This is simpler than using the RPO iterator infrastructure 1773 // because we need to combine SCC detection and the PO walk of the call 1774 // graph. We can also cheat egregiously because we're primarily interested in 1775 // synthesizing norecurse and so we can only save the singular SCCs as SCCs 1776 // with multiple functions in them will clearly be recursive. 1777 SmallVector<Function *, 16> Worklist; 1778 for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { 1779 if (I->size() != 1) 1780 continue; 1781 1782 Function *F = I->front()->getFunction(); 1783 if (F && !F->isDeclaration() && !F->doesNotRecurse() && 1784 F->hasInternalLinkage()) 1785 Worklist.push_back(F); 1786 } 1787 1788 bool Changed = false; 1789 for (auto *F : llvm::reverse(Worklist)) 1790 Changed |= addNoRecurseAttrsTopDown(*F); 1791 1792 return Changed; 1793 } 1794 1795 bool ReversePostOrderFunctionAttrsLegacyPass::runOnModule(Module &M) { 1796 if (skipModule(M)) 1797 return false; 1798 1799 auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 1800 1801 return deduceFunctionAttributeInRPO(M, CG); 1802 } 1803 1804 PreservedAnalyses 1805 ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { 1806 auto &CG = AM.getResult<CallGraphAnalysis>(M); 1807 1808 if (!deduceFunctionAttributeInRPO(M, CG)) 1809 return PreservedAnalyses::all(); 1810 1811 PreservedAnalyses PA; 1812 PA.preserve<CallGraphAnalysis>(); 1813 return PA; 1814 } 1815