1 //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass marks all internal functions as always_inline and creates 11 /// duplicates of all other functions and marks the duplicates as always_inline. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUTargetMachine.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "llvm/IR/Module.h" 19 #include "llvm/Pass.h" 20 #include "llvm/Support/CommandLine.h" 21 22 using namespace llvm; 23 24 namespace { 25 26 static cl::opt<bool> StressCalls( 27 "amdgpu-stress-function-calls", 28 cl::Hidden, 29 cl::desc("Force all functions to be noinline"), 30 cl::init(false)); 31 32 class AMDGPUAlwaysInline : public ModulePass { 33 bool GlobalOpt; 34 35 public: 36 static char ID; 37 38 AMDGPUAlwaysInline(bool GlobalOpt = false) : 39 ModulePass(ID), GlobalOpt(GlobalOpt) { } 40 bool runOnModule(Module &M) override; 41 42 void getAnalysisUsage(AnalysisUsage &AU) const override { 43 AU.setPreservesAll(); 44 } 45 }; 46 47 } // End anonymous namespace 48 49 INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", 50 "AMDGPU Inline All Functions", false, false) 51 52 char AMDGPUAlwaysInline::ID = 0; 53 54 static void 55 recursivelyVisitUsers(GlobalValue &GV, 56 SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) { 57 SmallVector<User *, 16> Stack(GV.users()); 58 59 SmallPtrSet<const Value *, 8> Visited; 60 61 while (!Stack.empty()) { 62 User *U = Stack.pop_back_val(); 63 if (!Visited.insert(U).second) 64 continue; 65 66 if (Instruction *I = dyn_cast<Instruction>(U)) { 67 Function *F = I->getParent()->getParent(); 68 if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { 69 // FIXME: This is a horrible hack. We should always respect noinline, 70 // and just let us hit the error when we can't handle this. 71 // 72 // Unfortunately, clang adds noinline to all functions at -O0. We have 73 // to override this here. until that's fixed. 74 F->removeFnAttr(Attribute::NoInline); 75 76 FuncsToAlwaysInline.insert(F); 77 Stack.push_back(F); 78 } 79 80 // No need to look at further users, but we do need to inline any callers. 81 continue; 82 } 83 84 append_range(Stack, U->users()); 85 } 86 } 87 88 static bool alwaysInlineImpl(Module &M, bool GlobalOpt) { 89 std::vector<GlobalAlias*> AliasesToRemove; 90 91 SmallPtrSet<Function *, 8> FuncsToAlwaysInline; 92 SmallPtrSet<Function *, 8> FuncsToNoInline; 93 94 for (GlobalAlias &A : M.aliases()) { 95 if (Function* F = dyn_cast<Function>(A.getAliasee())) { 96 A.replaceAllUsesWith(F); 97 AliasesToRemove.push_back(&A); 98 } 99 100 // FIXME: If the aliasee isn't a function, it's some kind of constant expr 101 // cast that won't be inlined through. 102 } 103 104 if (GlobalOpt) { 105 for (GlobalAlias* A : AliasesToRemove) { 106 A->eraseFromParent(); 107 } 108 } 109 110 // Always force inlining of any function that uses an LDS global address. This 111 // is something of a workaround because we don't have a way of supporting LDS 112 // objects defined in functions. LDS is always allocated by a kernel, and it 113 // is difficult to manage LDS usage if a function may be used by multiple 114 // kernels. 115 // 116 // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this 117 // should only appear when IPO passes manages to move LDs defined in a kernel 118 // into a single user function. 119 120 for (GlobalVariable &GV : M.globals()) { 121 // TODO: Region address 122 unsigned AS = GV.getAddressSpace(); 123 if ((AS == AMDGPUAS::REGION_ADDRESS) || 124 (AS == AMDGPUAS::LOCAL_ADDRESS && 125 !AMDGPUTargetMachine::EnableLowerModuleLDS)) 126 recursivelyVisitUsers(GV, FuncsToAlwaysInline); 127 } 128 129 if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { 130 auto IncompatAttr 131 = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; 132 133 for (Function &F : M) { 134 if (!F.isDeclaration() && !F.use_empty() && 135 !F.hasFnAttribute(IncompatAttr)) { 136 if (StressCalls) { 137 if (!FuncsToAlwaysInline.count(&F)) 138 FuncsToNoInline.insert(&F); 139 } else 140 FuncsToAlwaysInline.insert(&F); 141 } 142 } 143 } 144 145 for (Function *F : FuncsToAlwaysInline) 146 F->addFnAttr(Attribute::AlwaysInline); 147 148 for (Function *F : FuncsToNoInline) 149 F->addFnAttr(Attribute::NoInline); 150 151 return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); 152 } 153 154 bool AMDGPUAlwaysInline::runOnModule(Module &M) { 155 return alwaysInlineImpl(M, GlobalOpt); 156 } 157 158 ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) { 159 return new AMDGPUAlwaysInline(GlobalOpt); 160 } 161 162 PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M, 163 ModuleAnalysisManager &AM) { 164 alwaysInlineImpl(M, GlobalOpt); 165 return PreservedAnalyses::all(); 166 } 167