10b57cec5SDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file This pass adds target attributes to functions which use intrinsics 100b57cec5SDimitry Andric /// which will impact calling convention lowering. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "AMDGPU.h" 150b57cec5SDimitry Andric #include "AMDGPUSubtarget.h" 160b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 170b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 190b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 200b57cec5SDimitry Andric #include "llvm/ADT/Triple.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/CallGraph.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h" 230b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 240b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 250b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 260b57cec5SDimitry Andric #include "llvm/IR/Function.h" 270b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 280b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 290b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 300b57cec5SDimitry Andric #include "llvm/IR/Module.h" 310b57cec5SDimitry Andric #include "llvm/IR/Type.h" 320b57cec5SDimitry Andric #include "llvm/IR/Use.h" 330b57cec5SDimitry Andric #include "llvm/Pass.h" 340b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 350b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 360b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric using namespace llvm; 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric namespace { 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { 450b57cec5SDimitry Andric private: 460b57cec5SDimitry Andric const TargetMachine *TM = nullptr; 470b57cec5SDimitry Andric SmallVector<CallGraphNode*, 8> NodeList; 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric bool addFeatureAttributes(Function &F); 500b57cec5SDimitry Andric bool processUniformWorkGroupAttribute(); 510b57cec5SDimitry Andric bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee); 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric public: 540b57cec5SDimitry Andric static char ID; 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} 570b57cec5SDimitry Andric 580b57cec5SDimitry Andric bool doInitialization(CallGraph &CG) override; 590b57cec5SDimitry Andric bool runOnSCC(CallGraphSCC &SCC) override; 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric StringRef getPassName() const override { 620b57cec5SDimitry Andric return "AMDGPU Annotate Kernel Features"; 630b57cec5SDimitry Andric } 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 660b57cec5SDimitry Andric AU.setPreservesAll(); 670b57cec5SDimitry Andric CallGraphSCCPass::getAnalysisUsage(AU); 680b57cec5SDimitry Andric } 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric static bool visitConstantExpr(const ConstantExpr *CE); 710b57cec5SDimitry Andric static bool visitConstantExprsRecursively( 720b57cec5SDimitry Andric const Constant *EntryC, 73*5ffd83dbSDimitry Andric SmallPtrSet<const Constant *, 8> &ConstantExprVisited, bool IsFunc, 74*5ffd83dbSDimitry Andric bool HasApertureRegs); 750b57cec5SDimitry Andric }; 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric } // end anonymous namespace 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0; 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 840b57cec5SDimitry Andric "Add AMDGPU function attributes", false, false) 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric // The queue ptr is only needed when casting to flat, not from it. 880b57cec5SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) { 890b57cec5SDimitry Andric return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 900b57cec5SDimitry Andric } 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { 930b57cec5SDimitry Andric return castRequiresQueuePtr(ASC->getSrcAddressSpace()); 940b57cec5SDimitry Andric } 950b57cec5SDimitry Andric 96*5ffd83dbSDimitry Andric static bool isDSAddress(const Constant *C) { 97*5ffd83dbSDimitry Andric const GlobalValue *GV = dyn_cast<GlobalValue>(C); 98*5ffd83dbSDimitry Andric if (!GV) 99*5ffd83dbSDimitry Andric return false; 100*5ffd83dbSDimitry Andric unsigned AS = GV->getAddressSpace(); 101*5ffd83dbSDimitry Andric return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 102*5ffd83dbSDimitry Andric } 103*5ffd83dbSDimitry Andric 1040b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { 1050b57cec5SDimitry Andric if (CE->getOpcode() == Instruction::AddrSpaceCast) { 1060b57cec5SDimitry Andric unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 1070b57cec5SDimitry Andric return castRequiresQueuePtr(SrcAS); 1080b57cec5SDimitry Andric } 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric return false; 1110b57cec5SDimitry Andric } 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 1140b57cec5SDimitry Andric const Constant *EntryC, 115*5ffd83dbSDimitry Andric SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 116*5ffd83dbSDimitry Andric bool IsFunc, bool HasApertureRegs) { 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric if (!ConstantExprVisited.insert(EntryC).second) 1190b57cec5SDimitry Andric return false; 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric SmallVector<const Constant *, 16> Stack; 1220b57cec5SDimitry Andric Stack.push_back(EntryC); 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric while (!Stack.empty()) { 1250b57cec5SDimitry Andric const Constant *C = Stack.pop_back_val(); 1260b57cec5SDimitry Andric 127*5ffd83dbSDimitry Andric // We need to trap on DS globals in non-entry functions. 128*5ffd83dbSDimitry Andric if (IsFunc && isDSAddress(C)) 129*5ffd83dbSDimitry Andric return true; 130*5ffd83dbSDimitry Andric 1310b57cec5SDimitry Andric // Check this constant expression. 1320b57cec5SDimitry Andric if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 133*5ffd83dbSDimitry Andric if (!HasApertureRegs && visitConstantExpr(CE)) 1340b57cec5SDimitry Andric return true; 1350b57cec5SDimitry Andric } 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric // Visit all sub-expressions. 1380b57cec5SDimitry Andric for (const Use &U : C->operands()) { 1390b57cec5SDimitry Andric const auto *OpC = dyn_cast<Constant>(U); 1400b57cec5SDimitry Andric if (!OpC) 1410b57cec5SDimitry Andric continue; 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric if (!ConstantExprVisited.insert(OpC).second) 1440b57cec5SDimitry Andric continue; 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric Stack.push_back(OpC); 1470b57cec5SDimitry Andric } 1480b57cec5SDimitry Andric } 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric return false; 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always 1540b57cec5SDimitry Andric // initialized. 1550b57cec5SDimitry Andric // 1560b57cec5SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup 1570b57cec5SDimitry Andric // size is 1 for y/z. 1580b57cec5SDimitry Andric static StringRef intrinsicToAttrName(Intrinsic::ID ID, 1590b57cec5SDimitry Andric bool &NonKernelOnly, 1600b57cec5SDimitry Andric bool &IsQueuePtr) { 1610b57cec5SDimitry Andric switch (ID) { 1620b57cec5SDimitry Andric case Intrinsic::amdgcn_workitem_id_x: 1630b57cec5SDimitry Andric NonKernelOnly = true; 1640b57cec5SDimitry Andric return "amdgpu-work-item-id-x"; 1650b57cec5SDimitry Andric case Intrinsic::amdgcn_workgroup_id_x: 1660b57cec5SDimitry Andric NonKernelOnly = true; 1670b57cec5SDimitry Andric return "amdgpu-work-group-id-x"; 1680b57cec5SDimitry Andric case Intrinsic::amdgcn_workitem_id_y: 1690b57cec5SDimitry Andric case Intrinsic::r600_read_tidig_y: 1700b57cec5SDimitry Andric return "amdgpu-work-item-id-y"; 1710b57cec5SDimitry Andric case Intrinsic::amdgcn_workitem_id_z: 1720b57cec5SDimitry Andric case Intrinsic::r600_read_tidig_z: 1730b57cec5SDimitry Andric return "amdgpu-work-item-id-z"; 1740b57cec5SDimitry Andric case Intrinsic::amdgcn_workgroup_id_y: 1750b57cec5SDimitry Andric case Intrinsic::r600_read_tgid_y: 1760b57cec5SDimitry Andric return "amdgpu-work-group-id-y"; 1770b57cec5SDimitry Andric case Intrinsic::amdgcn_workgroup_id_z: 1780b57cec5SDimitry Andric case Intrinsic::r600_read_tgid_z: 1790b57cec5SDimitry Andric return "amdgpu-work-group-id-z"; 1800b57cec5SDimitry Andric case Intrinsic::amdgcn_dispatch_ptr: 1810b57cec5SDimitry Andric return "amdgpu-dispatch-ptr"; 1820b57cec5SDimitry Andric case Intrinsic::amdgcn_dispatch_id: 1830b57cec5SDimitry Andric return "amdgpu-dispatch-id"; 1840b57cec5SDimitry Andric case Intrinsic::amdgcn_kernarg_segment_ptr: 1850b57cec5SDimitry Andric return "amdgpu-kernarg-segment-ptr"; 1860b57cec5SDimitry Andric case Intrinsic::amdgcn_implicitarg_ptr: 1870b57cec5SDimitry Andric return "amdgpu-implicitarg-ptr"; 1880b57cec5SDimitry Andric case Intrinsic::amdgcn_queue_ptr: 1898bcb0991SDimitry Andric case Intrinsic::amdgcn_is_shared: 1908bcb0991SDimitry Andric case Intrinsic::amdgcn_is_private: 1918bcb0991SDimitry Andric // TODO: Does not require queue ptr on gfx9+ 1920b57cec5SDimitry Andric case Intrinsic::trap: 1930b57cec5SDimitry Andric case Intrinsic::debugtrap: 1940b57cec5SDimitry Andric IsQueuePtr = true; 1950b57cec5SDimitry Andric return "amdgpu-queue-ptr"; 1960b57cec5SDimitry Andric default: 1970b57cec5SDimitry Andric return ""; 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric } 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andric static bool handleAttr(Function &Parent, const Function &Callee, 2020b57cec5SDimitry Andric StringRef Name) { 2030b57cec5SDimitry Andric if (Callee.hasFnAttribute(Name)) { 2040b57cec5SDimitry Andric Parent.addFnAttr(Name); 2050b57cec5SDimitry Andric return true; 2060b57cec5SDimitry Andric } 2070b57cec5SDimitry Andric return false; 2080b57cec5SDimitry Andric } 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric static void copyFeaturesToFunction(Function &Parent, const Function &Callee, 2110b57cec5SDimitry Andric bool &NeedQueuePtr) { 2120b57cec5SDimitry Andric // X ids unnecessarily propagated to kernels. 2138bcb0991SDimitry Andric static constexpr StringLiteral AttrNames[] = { 2148bcb0991SDimitry Andric "amdgpu-work-item-id-x", "amdgpu-work-item-id-y", 2158bcb0991SDimitry Andric "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", 2168bcb0991SDimitry Andric "amdgpu-work-group-id-y", "amdgpu-work-group-id-z", 2178bcb0991SDimitry Andric "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", 218*5ffd83dbSDimitry Andric "amdgpu-implicitarg-ptr"}; 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) 2210b57cec5SDimitry Andric NeedQueuePtr = true; 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric for (StringRef AttrName : AttrNames) 2240b57cec5SDimitry Andric handleAttr(Parent, Callee, AttrName); 2250b57cec5SDimitry Andric } 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() { 2280b57cec5SDimitry Andric bool Changed = false; 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric for (auto *Node : reverse(NodeList)) { 2310b57cec5SDimitry Andric Function *Caller = Node->getFunction(); 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric for (auto I : *Node) { 2340b57cec5SDimitry Andric Function *Callee = std::get<1>(I)->getFunction(); 2350b57cec5SDimitry Andric if (Callee) 2360b57cec5SDimitry Andric Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee); 2370b57cec5SDimitry Andric } 2380b57cec5SDimitry Andric } 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric return Changed; 2410b57cec5SDimitry Andric } 2420b57cec5SDimitry Andric 2430b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute( 2440b57cec5SDimitry Andric Function &Caller, Function &Callee) { 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric // Check for externally defined function 2470b57cec5SDimitry Andric if (!Callee.hasExactDefinition()) { 2480b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "false"); 2490b57cec5SDimitry Andric if (!Caller.hasFnAttribute("uniform-work-group-size")) 2500b57cec5SDimitry Andric Caller.addFnAttr("uniform-work-group-size", "false"); 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric return true; 2530b57cec5SDimitry Andric } 2540b57cec5SDimitry Andric // Check if the Caller has the attribute 2550b57cec5SDimitry Andric if (Caller.hasFnAttribute("uniform-work-group-size")) { 2560b57cec5SDimitry Andric // Check if the value of the attribute is true 2570b57cec5SDimitry Andric if (Caller.getFnAttribute("uniform-work-group-size") 2580b57cec5SDimitry Andric .getValueAsString().equals("true")) { 2590b57cec5SDimitry Andric // Propagate the attribute to the Callee, if it does not have it 2600b57cec5SDimitry Andric if (!Callee.hasFnAttribute("uniform-work-group-size")) { 2610b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "true"); 2620b57cec5SDimitry Andric return true; 2630b57cec5SDimitry Andric } 2640b57cec5SDimitry Andric } else { 2650b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "false"); 2660b57cec5SDimitry Andric return true; 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric } else { 2690b57cec5SDimitry Andric // If the attribute is absent, set it as false 2700b57cec5SDimitry Andric Caller.addFnAttr("uniform-work-group-size", "false"); 2710b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "false"); 2720b57cec5SDimitry Andric return true; 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric return false; 2750b57cec5SDimitry Andric } 2760b57cec5SDimitry Andric 2770b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { 2780b57cec5SDimitry Andric const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F); 2790b57cec5SDimitry Andric bool HasApertureRegs = ST.hasApertureRegs(); 2800b57cec5SDimitry Andric SmallPtrSet<const Constant *, 8> ConstantExprVisited; 2810b57cec5SDimitry Andric 282*5ffd83dbSDimitry Andric bool HaveStackObjects = false; 2830b57cec5SDimitry Andric bool Changed = false; 2840b57cec5SDimitry Andric bool NeedQueuePtr = false; 2850b57cec5SDimitry Andric bool HaveCall = false; 2860b57cec5SDimitry Andric bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv()); 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric for (BasicBlock &BB : F) { 2890b57cec5SDimitry Andric for (Instruction &I : BB) { 290*5ffd83dbSDimitry Andric if (isa<AllocaInst>(I)) { 291*5ffd83dbSDimitry Andric HaveStackObjects = true; 292*5ffd83dbSDimitry Andric continue; 293*5ffd83dbSDimitry Andric } 294*5ffd83dbSDimitry Andric 295*5ffd83dbSDimitry Andric if (auto *CB = dyn_cast<CallBase>(&I)) { 296*5ffd83dbSDimitry Andric const Function *Callee = 297*5ffd83dbSDimitry Andric dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts()); 2980b57cec5SDimitry Andric 2990b57cec5SDimitry Andric // TODO: Do something with indirect calls. 3000b57cec5SDimitry Andric if (!Callee) { 301*5ffd83dbSDimitry Andric if (!CB->isInlineAsm()) 3020b57cec5SDimitry Andric HaveCall = true; 3030b57cec5SDimitry Andric continue; 3040b57cec5SDimitry Andric } 3050b57cec5SDimitry Andric 3060b57cec5SDimitry Andric Intrinsic::ID IID = Callee->getIntrinsicID(); 3070b57cec5SDimitry Andric if (IID == Intrinsic::not_intrinsic) { 3080b57cec5SDimitry Andric HaveCall = true; 3090b57cec5SDimitry Andric copyFeaturesToFunction(F, *Callee, NeedQueuePtr); 3100b57cec5SDimitry Andric Changed = true; 3110b57cec5SDimitry Andric } else { 3120b57cec5SDimitry Andric bool NonKernelOnly = false; 313*5ffd83dbSDimitry Andric 314*5ffd83dbSDimitry Andric if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) { 315*5ffd83dbSDimitry Andric F.addFnAttr("amdgpu-kernarg-segment-ptr"); 316*5ffd83dbSDimitry Andric } else { 317*5ffd83dbSDimitry Andric StringRef AttrName = intrinsicToAttrName(IID, NonKernelOnly, 318*5ffd83dbSDimitry Andric NeedQueuePtr); 3190b57cec5SDimitry Andric if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) { 3200b57cec5SDimitry Andric F.addFnAttr(AttrName); 3210b57cec5SDimitry Andric Changed = true; 3220b57cec5SDimitry Andric } 3230b57cec5SDimitry Andric } 3240b57cec5SDimitry Andric } 325*5ffd83dbSDimitry Andric } 3260b57cec5SDimitry Andric 327*5ffd83dbSDimitry Andric if (NeedQueuePtr || (!IsFunc && HasApertureRegs)) 3280b57cec5SDimitry Andric continue; 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 331*5ffd83dbSDimitry Andric if (!HasApertureRegs && castRequiresQueuePtr(ASC)) { 3320b57cec5SDimitry Andric NeedQueuePtr = true; 3330b57cec5SDimitry Andric continue; 3340b57cec5SDimitry Andric } 3350b57cec5SDimitry Andric } 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric for (const Use &U : I.operands()) { 3380b57cec5SDimitry Andric const auto *OpC = dyn_cast<Constant>(U); 3390b57cec5SDimitry Andric if (!OpC) 3400b57cec5SDimitry Andric continue; 3410b57cec5SDimitry Andric 342*5ffd83dbSDimitry Andric if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc, 343*5ffd83dbSDimitry Andric HasApertureRegs)) { 3440b57cec5SDimitry Andric NeedQueuePtr = true; 3450b57cec5SDimitry Andric break; 3460b57cec5SDimitry Andric } 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric } 3490b57cec5SDimitry Andric } 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric if (NeedQueuePtr) { 3520b57cec5SDimitry Andric F.addFnAttr("amdgpu-queue-ptr"); 3530b57cec5SDimitry Andric Changed = true; 3540b57cec5SDimitry Andric } 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric // TODO: We could refine this to captured pointers that could possibly be 3570b57cec5SDimitry Andric // accessed by flat instructions. For now this is mostly a poor way of 3580b57cec5SDimitry Andric // estimating whether there are calls before argument lowering. 359*5ffd83dbSDimitry Andric if (!IsFunc && HaveCall) { 360*5ffd83dbSDimitry Andric F.addFnAttr("amdgpu-calls"); 361*5ffd83dbSDimitry Andric Changed = true; 362*5ffd83dbSDimitry Andric } 363*5ffd83dbSDimitry Andric 364*5ffd83dbSDimitry Andric if (HaveStackObjects) { 365*5ffd83dbSDimitry Andric F.addFnAttr("amdgpu-stack-objects"); 3660b57cec5SDimitry Andric Changed = true; 3670b57cec5SDimitry Andric } 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric return Changed; 3700b57cec5SDimitry Andric } 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { 3730b57cec5SDimitry Andric bool Changed = false; 3740b57cec5SDimitry Andric 3750b57cec5SDimitry Andric for (CallGraphNode *I : SCC) { 3760b57cec5SDimitry Andric // Build a list of CallGraphNodes from most number of uses to least 3770b57cec5SDimitry Andric if (I->getNumReferences()) 3780b57cec5SDimitry Andric NodeList.push_back(I); 3790b57cec5SDimitry Andric else { 3800b57cec5SDimitry Andric processUniformWorkGroupAttribute(); 3810b57cec5SDimitry Andric NodeList.clear(); 3820b57cec5SDimitry Andric } 3830b57cec5SDimitry Andric 3840b57cec5SDimitry Andric Function *F = I->getFunction(); 3850b57cec5SDimitry Andric // Add feature attributes 3860b57cec5SDimitry Andric if (!F || F->isDeclaration()) 3870b57cec5SDimitry Andric continue; 3880b57cec5SDimitry Andric Changed |= addFeatureAttributes(*F); 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric 3910b57cec5SDimitry Andric return Changed; 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { 3950b57cec5SDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 3960b57cec5SDimitry Andric if (!TPC) 3970b57cec5SDimitry Andric report_fatal_error("TargetMachine is required"); 3980b57cec5SDimitry Andric 3990b57cec5SDimitry Andric TM = &TPC->getTM<TargetMachine>(); 4000b57cec5SDimitry Andric return false; 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 4040b57cec5SDimitry Andric return new AMDGPUAnnotateKernelFeatures(); 4050b57cec5SDimitry Andric } 406