1 //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass marks all internal functions as always_inline and creates 11 /// duplicates of all other functions and marks the duplicates as always_inline. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUTargetMachine.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "llvm/ADT/SmallPtrSet.h" 19 #include "llvm/IR/Module.h" 20 #include "llvm/Transforms/Utils/Cloning.h" 21 22 using namespace llvm; 23 24 namespace { 25 26 static cl::opt<bool> StressCalls( 27 "amdgpu-stress-function-calls", 28 cl::Hidden, 29 cl::desc("Force all functions to be noinline"), 30 cl::init(false)); 31 32 class AMDGPUAlwaysInline : public ModulePass { 33 bool GlobalOpt; 34 35 void recursivelyVisitUsers(GlobalValue &GV, 36 SmallPtrSetImpl<Function *> &FuncsToAlwaysInline); 37 public: 38 static char ID; 39 40 AMDGPUAlwaysInline(bool GlobalOpt = false) : 41 ModulePass(ID), GlobalOpt(GlobalOpt) { } 42 bool runOnModule(Module &M) override; 43 44 void getAnalysisUsage(AnalysisUsage &AU) const override { 45 AU.setPreservesAll(); 46 } 47 }; 48 49 } // End anonymous namespace 50 51 INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", 52 "AMDGPU Inline All Functions", false, false) 53 54 char AMDGPUAlwaysInline::ID = 0; 55 56 void AMDGPUAlwaysInline::recursivelyVisitUsers( 57 GlobalValue &GV, 58 SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) { 59 SmallVector<User *, 16> Stack; 60 61 SmallPtrSet<const Value *, 8> Visited; 62 63 for (User *U : GV.users()) 64 Stack.push_back(U); 65 66 while (!Stack.empty()) { 67 User *U = Stack.pop_back_val(); 68 if (!Visited.insert(U).second) 69 continue; 70 71 if (Instruction *I = dyn_cast<Instruction>(U)) { 72 Function *F = I->getParent()->getParent(); 73 if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { 74 FuncsToAlwaysInline.insert(F); 75 Stack.push_back(F); 76 } 77 78 // No need to look at further users, but we do need to inline any callers. 79 continue; 80 } 81 82 for (User *UU : U->users()) 83 Stack.push_back(UU); 84 } 85 } 86 87 bool AMDGPUAlwaysInline::runOnModule(Module &M) { 88 std::vector<GlobalAlias*> AliasesToRemove; 89 90 SmallPtrSet<Function *, 8> FuncsToAlwaysInline; 91 SmallPtrSet<Function *, 8> FuncsToNoInline; 92 93 for (GlobalAlias &A : M.aliases()) { 94 if (Function* F = dyn_cast<Function>(A.getAliasee())) { 95 A.replaceAllUsesWith(F); 96 AliasesToRemove.push_back(&A); 97 } 98 99 // FIXME: If the aliasee isn't a function, it's some kind of constant expr 100 // cast that won't be inlined through. 101 } 102 103 if (GlobalOpt) { 104 for (GlobalAlias* A : AliasesToRemove) { 105 A->eraseFromParent(); 106 } 107 } 108 109 // Always force inlining of any function that uses an LDS global address. This 110 // is something of a workaround because we don't have a way of supporting LDS 111 // objects defined in functions. LDS is always allocated by a kernel, and it 112 // is difficult to manage LDS usage if a function may be used by multiple 113 // kernels. 114 // 115 // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this 116 // should only appear when IPO passes manages to move LDs defined in a kernel 117 // into a single user function. 118 119 for (GlobalVariable &GV : M.globals()) { 120 // TODO: Region address 121 unsigned AS = GV.getType()->getAddressSpace(); 122 if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS) 123 continue; 124 125 recursivelyVisitUsers(GV, FuncsToAlwaysInline); 126 } 127 128 if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { 129 auto IncompatAttr 130 = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; 131 132 for (Function &F : M) { 133 if (!F.isDeclaration() && !F.use_empty() && 134 !F.hasFnAttribute(IncompatAttr)) { 135 if (StressCalls) { 136 if (!FuncsToAlwaysInline.count(&F)) 137 FuncsToNoInline.insert(&F); 138 } else 139 FuncsToAlwaysInline.insert(&F); 140 } 141 } 142 } 143 144 for (Function *F : FuncsToAlwaysInline) 145 F->addFnAttr(Attribute::AlwaysInline); 146 147 for (Function *F : FuncsToNoInline) 148 F->addFnAttr(Attribute::NoInline); 149 150 return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); 151 } 152 153 ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) { 154 return new AMDGPUAlwaysInline(GlobalOpt); 155 } 156 157