10b57cec5SDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 9*349cc55cSDimitry Andric /// \file This pass propagates the uniform-work-group-size attribute from 10*349cc55cSDimitry Andric /// kernels to leaf functions when possible. It also adds additional attributes 11*349cc55cSDimitry Andric /// to hint ABI lowering optimizations later. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "AMDGPU.h" 16e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 170b57cec5SDimitry Andric #include "llvm/Analysis/CallGraph.h" 180b57cec5SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h" 190b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 20e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 21e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsR600.h" 220b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric using namespace llvm; 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric namespace { 290b57cec5SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { 300b57cec5SDimitry Andric private: 310b57cec5SDimitry Andric const TargetMachine *TM = nullptr; 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric bool addFeatureAttributes(Function &F); 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric public: 360b57cec5SDimitry Andric static char ID; 370b57cec5SDimitry Andric AMDGPUAnnotateKernelFeatures()380b57cec5SDimitry Andric AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric bool doInitialization(CallGraph &CG) override; 410b57cec5SDimitry Andric bool runOnSCC(CallGraphSCC &SCC) override; 420b57cec5SDimitry Andric getPassName() const430b57cec5SDimitry Andric StringRef getPassName() const override { 440b57cec5SDimitry Andric return "AMDGPU Annotate Kernel Features"; 450b57cec5SDimitry Andric } 460b57cec5SDimitry Andric getAnalysisUsage(AnalysisUsage & AU) const470b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 480b57cec5SDimitry Andric AU.setPreservesAll(); 490b57cec5SDimitry Andric CallGraphSCCPass::getAnalysisUsage(AU); 500b57cec5SDimitry Andric } 510b57cec5SDimitry Andric }; 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric } // end anonymous namespace 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0; 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 600b57cec5SDimitry Andric "Add AMDGPU function attributes", false, false) 610b57cec5SDimitry Andric addFeatureAttributes(Function & F)620b57cec5SDimitry Andricbool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { 635ffd83dbSDimitry Andric bool HaveStackObjects = false; 640b57cec5SDimitry Andric bool Changed = false; 650b57cec5SDimitry Andric bool HaveCall = false; 660b57cec5SDimitry Andric bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv()); 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric for (BasicBlock &BB : F) { 690b57cec5SDimitry Andric for (Instruction &I : BB) { 705ffd83dbSDimitry Andric if (isa<AllocaInst>(I)) { 715ffd83dbSDimitry Andric HaveStackObjects = true; 725ffd83dbSDimitry Andric continue; 735ffd83dbSDimitry Andric } 745ffd83dbSDimitry Andric 755ffd83dbSDimitry Andric if (auto *CB = dyn_cast<CallBase>(&I)) { 765ffd83dbSDimitry Andric const Function *Callee = 775ffd83dbSDimitry Andric dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts()); 780b57cec5SDimitry Andric 79*349cc55cSDimitry Andric // Note the occurrence of indirect call. 800b57cec5SDimitry Andric if (!Callee) { 81*349cc55cSDimitry Andric if (!CB->isInlineAsm()) 820b57cec5SDimitry Andric HaveCall = true; 83*349cc55cSDimitry Andric 840b57cec5SDimitry Andric continue; 850b57cec5SDimitry Andric } 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric Intrinsic::ID IID = Callee->getIntrinsicID(); 880b57cec5SDimitry Andric if (IID == Intrinsic::not_intrinsic) { 890b57cec5SDimitry Andric HaveCall = true; 900b57cec5SDimitry Andric Changed = true; 910b57cec5SDimitry Andric } 920b57cec5SDimitry Andric } 930b57cec5SDimitry Andric } 945ffd83dbSDimitry Andric } 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric // TODO: We could refine this to captured pointers that could possibly be 970b57cec5SDimitry Andric // accessed by flat instructions. For now this is mostly a poor way of 980b57cec5SDimitry Andric // estimating whether there are calls before argument lowering. 995ffd83dbSDimitry Andric if (!IsFunc && HaveCall) { 1005ffd83dbSDimitry Andric F.addFnAttr("amdgpu-calls"); 1015ffd83dbSDimitry Andric Changed = true; 1025ffd83dbSDimitry Andric } 1035ffd83dbSDimitry Andric 1045ffd83dbSDimitry Andric if (HaveStackObjects) { 1055ffd83dbSDimitry Andric F.addFnAttr("amdgpu-stack-objects"); 1060b57cec5SDimitry Andric Changed = true; 1070b57cec5SDimitry Andric } 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric return Changed; 1100b57cec5SDimitry Andric } 1110b57cec5SDimitry Andric runOnSCC(CallGraphSCC & SCC)1120b57cec5SDimitry Andricbool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { 1130b57cec5SDimitry Andric bool Changed = false; 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric for (CallGraphNode *I : SCC) { 1160b57cec5SDimitry Andric Function *F = I->getFunction(); 117fe6060f1SDimitry Andric // Ignore functions with graphics calling conventions, these are currently 118fe6060f1SDimitry Andric // not allowed to have kernel arguments. 119fe6060f1SDimitry Andric if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv())) 1200b57cec5SDimitry Andric continue; 121fe6060f1SDimitry Andric // Add feature attributes 1220b57cec5SDimitry Andric Changed |= addFeatureAttributes(*F); 1230b57cec5SDimitry Andric } 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric return Changed; 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric doInitialization(CallGraph & CG)1280b57cec5SDimitry Andricbool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { 1290b57cec5SDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 1300b57cec5SDimitry Andric if (!TPC) 1310b57cec5SDimitry Andric report_fatal_error("TargetMachine is required"); 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric TM = &TPC->getTM<TargetMachine>(); 1340b57cec5SDimitry Andric return false; 1350b57cec5SDimitry Andric } 1360b57cec5SDimitry Andric createAMDGPUAnnotateKernelFeaturesPass()1370b57cec5SDimitry AndricPass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 1380b57cec5SDimitry Andric return new AMDGPUAnnotateKernelFeatures(); 1390b57cec5SDimitry Andric } 140