1 //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass marks all internal functions as always_inline and creates 11 /// duplicates of all other functions and marks the duplicates as always_inline. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUTargetMachine.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "llvm/CodeGen/CommandFlags.h" 19 #include "llvm/IR/Module.h" 20 #include "llvm/Pass.h" 21 #include "llvm/Support/CommandLine.h" 22 23 using namespace llvm; 24 25 namespace { 26 27 static cl::opt<bool> StressCalls( 28 "amdgpu-stress-function-calls", 29 cl::Hidden, 30 cl::desc("Force all functions to be noinline"), 31 cl::init(false)); 32 33 class AMDGPUAlwaysInline : public ModulePass { 34 bool GlobalOpt; 35 36 public: 37 static char ID; 38 39 AMDGPUAlwaysInline(bool GlobalOpt = false) : 40 ModulePass(ID), GlobalOpt(GlobalOpt) { } 41 bool runOnModule(Module &M) override; 42 43 void getAnalysisUsage(AnalysisUsage &AU) const override { 44 AU.setPreservesAll(); 45 } 46 }; 47 48 } // End anonymous namespace 49 50 INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", 51 "AMDGPU Inline All Functions", false, false) 52 53 char AMDGPUAlwaysInline::ID = 0; 54 55 static void 56 recursivelyVisitUsers(GlobalValue &GV, 57 SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) { 58 SmallVector<User *, 16> Stack(GV.users()); 59 60 SmallPtrSet<const Value *, 8> Visited; 61 62 while (!Stack.empty()) { 63 User *U = Stack.pop_back_val(); 64 if (!Visited.insert(U).second) 65 continue; 66 67 if (Instruction *I = dyn_cast<Instruction>(U)) { 68 Function *F = I->getParent()->getParent(); 69 if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { 70 // FIXME: This is a horrible hack. We should always respect noinline, 71 // and just let us hit the error when we can't handle this. 72 // 73 // Unfortunately, clang adds noinline to all functions at -O0. We have 74 // to override this here until that's fixed. 75 F->removeFnAttr(Attribute::NoInline); 76 77 FuncsToAlwaysInline.insert(F); 78 Stack.push_back(F); 79 } 80 81 // No need to look at further users, but we do need to inline any callers. 82 continue; 83 } 84 85 append_range(Stack, U->users()); 86 } 87 } 88 89 static bool alwaysInlineImpl(Module &M, bool GlobalOpt) { 90 std::vector<GlobalAlias*> AliasesToRemove; 91 92 bool Changed = false; 93 SmallPtrSet<Function *, 8> FuncsToAlwaysInline; 94 SmallPtrSet<Function *, 8> FuncsToNoInline; 95 Triple TT(M.getTargetTriple()); 96 97 for (GlobalAlias &A : M.aliases()) { 98 if (Function* F = dyn_cast<Function>(A.getAliasee())) { 99 if (TT.getArch() == Triple::amdgcn && 100 A.getLinkage() != GlobalValue::InternalLinkage) 101 continue; 102 Changed = true; 103 A.replaceAllUsesWith(F); 104 AliasesToRemove.push_back(&A); 105 } 106 107 // FIXME: If the aliasee isn't a function, it's some kind of constant expr 108 // cast that won't be inlined through. 109 } 110 111 if (GlobalOpt) { 112 for (GlobalAlias* A : AliasesToRemove) { 113 A->eraseFromParent(); 114 } 115 } 116 117 // Always force inlining of any function that uses an LDS global address. This 118 // is something of a workaround because we don't have a way of supporting LDS 119 // objects defined in functions. LDS is always allocated by a kernel, and it 120 // is difficult to manage LDS usage if a function may be used by multiple 121 // kernels. 122 // 123 // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this 124 // should only appear when IPO passes manages to move LDs defined in a kernel 125 // into a single user function. 126 127 for (GlobalVariable &GV : M.globals()) { 128 // TODO: Region address 129 unsigned AS = GV.getAddressSpace(); 130 if ((AS == AMDGPUAS::REGION_ADDRESS) || 131 (AS == AMDGPUAS::LOCAL_ADDRESS && 132 (!AMDGPUTargetMachine::EnableLowerModuleLDS))) 133 recursivelyVisitUsers(GV, FuncsToAlwaysInline); 134 } 135 136 if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { 137 auto IncompatAttr 138 = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; 139 140 for (Function &F : M) { 141 if (!F.isDeclaration() && !F.use_empty() && 142 !F.hasFnAttribute(IncompatAttr)) { 143 if (StressCalls) { 144 if (!FuncsToAlwaysInline.count(&F)) 145 FuncsToNoInline.insert(&F); 146 } else 147 FuncsToAlwaysInline.insert(&F); 148 } 149 } 150 } 151 152 for (Function *F : FuncsToAlwaysInline) 153 F->addFnAttr(Attribute::AlwaysInline); 154 155 for (Function *F : FuncsToNoInline) 156 F->addFnAttr(Attribute::NoInline); 157 158 return Changed || !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); 159 } 160 161 bool AMDGPUAlwaysInline::runOnModule(Module &M) { 162 return alwaysInlineImpl(M, GlobalOpt); 163 } 164 165 ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) { 166 return new AMDGPUAlwaysInline(GlobalOpt); 167 } 168 169 PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M, 170 ModuleAnalysisManager &AM) { 171 const bool Changed = alwaysInlineImpl(M, GlobalOpt); 172 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 173 } 174