10b57cec5SDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file This pass adds target attributes to functions which use intrinsics 100b57cec5SDimitry Andric /// which will impact calling convention lowering. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "AMDGPU.h" 15e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 160b57cec5SDimitry Andric #include "llvm/Analysis/CallGraph.h" 170b57cec5SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h" 180b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 19e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 20e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsR600.h" 210b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric using namespace llvm; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric namespace { 28*fe6060f1SDimitry Andric static constexpr StringLiteral ImplicitAttrNames[] = { 29*fe6060f1SDimitry Andric // X ids unnecessarily propagated to kernels. 30*fe6060f1SDimitry Andric "amdgpu-work-item-id-x", "amdgpu-work-item-id-y", 31*fe6060f1SDimitry Andric "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", 32*fe6060f1SDimitry Andric "amdgpu-work-group-id-y", "amdgpu-work-group-id-z", 33*fe6060f1SDimitry Andric "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", 34*fe6060f1SDimitry Andric "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"}; 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { 370b57cec5SDimitry Andric private: 380b57cec5SDimitry Andric const TargetMachine *TM = nullptr; 390b57cec5SDimitry Andric SmallVector<CallGraphNode*, 8> NodeList; 400b57cec5SDimitry Andric 410b57cec5SDimitry Andric bool addFeatureAttributes(Function &F); 420b57cec5SDimitry Andric bool processUniformWorkGroupAttribute(); 430b57cec5SDimitry Andric bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee); 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric public: 460b57cec5SDimitry Andric static char ID; 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric bool doInitialization(CallGraph &CG) override; 510b57cec5SDimitry Andric bool runOnSCC(CallGraphSCC &SCC) override; 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric StringRef getPassName() const override { 540b57cec5SDimitry Andric return "AMDGPU Annotate Kernel Features"; 550b57cec5SDimitry Andric } 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 580b57cec5SDimitry Andric AU.setPreservesAll(); 590b57cec5SDimitry Andric CallGraphSCCPass::getAnalysisUsage(AU); 600b57cec5SDimitry Andric } 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric static bool visitConstantExpr(const ConstantExpr *CE); 630b57cec5SDimitry Andric static bool visitConstantExprsRecursively( 640b57cec5SDimitry Andric const Constant *EntryC, 655ffd83dbSDimitry Andric SmallPtrSet<const Constant *, 8> &ConstantExprVisited, bool IsFunc, 665ffd83dbSDimitry Andric bool HasApertureRegs); 670b57cec5SDimitry Andric }; 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric } // end anonymous namespace 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0; 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 760b57cec5SDimitry Andric "Add AMDGPU function attributes", false, false) 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric // The queue ptr is only needed when casting to flat, not from it. 800b57cec5SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) { 810b57cec5SDimitry Andric return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 820b57cec5SDimitry Andric } 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { 850b57cec5SDimitry Andric return castRequiresQueuePtr(ASC->getSrcAddressSpace()); 860b57cec5SDimitry Andric } 870b57cec5SDimitry Andric 885ffd83dbSDimitry Andric static bool isDSAddress(const Constant *C) { 895ffd83dbSDimitry Andric const GlobalValue *GV = dyn_cast<GlobalValue>(C); 905ffd83dbSDimitry Andric if (!GV) 915ffd83dbSDimitry Andric return false; 925ffd83dbSDimitry Andric unsigned AS = GV->getAddressSpace(); 935ffd83dbSDimitry Andric return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; 945ffd83dbSDimitry Andric } 955ffd83dbSDimitry Andric 960b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { 970b57cec5SDimitry Andric if (CE->getOpcode() == Instruction::AddrSpaceCast) { 980b57cec5SDimitry Andric unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 990b57cec5SDimitry Andric return castRequiresQueuePtr(SrcAS); 1000b57cec5SDimitry Andric } 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric return false; 1030b57cec5SDimitry Andric } 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 1060b57cec5SDimitry Andric const Constant *EntryC, 1075ffd83dbSDimitry Andric SmallPtrSet<const Constant *, 8> &ConstantExprVisited, 1085ffd83dbSDimitry Andric bool IsFunc, bool HasApertureRegs) { 1090b57cec5SDimitry Andric 1100b57cec5SDimitry Andric if (!ConstantExprVisited.insert(EntryC).second) 1110b57cec5SDimitry Andric return false; 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric SmallVector<const Constant *, 16> Stack; 1140b57cec5SDimitry Andric Stack.push_back(EntryC); 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric while (!Stack.empty()) { 1170b57cec5SDimitry Andric const Constant *C = Stack.pop_back_val(); 1180b57cec5SDimitry Andric 1195ffd83dbSDimitry Andric // We need to trap on DS globals in non-entry functions. 1205ffd83dbSDimitry Andric if (IsFunc && isDSAddress(C)) 1215ffd83dbSDimitry Andric return true; 1225ffd83dbSDimitry Andric 1230b57cec5SDimitry Andric // Check this constant expression. 1240b57cec5SDimitry Andric if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 1255ffd83dbSDimitry Andric if (!HasApertureRegs && visitConstantExpr(CE)) 1260b57cec5SDimitry Andric return true; 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric // Visit all sub-expressions. 1300b57cec5SDimitry Andric for (const Use &U : C->operands()) { 1310b57cec5SDimitry Andric const auto *OpC = dyn_cast<Constant>(U); 1320b57cec5SDimitry Andric if (!OpC) 1330b57cec5SDimitry Andric continue; 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric if (!ConstantExprVisited.insert(OpC).second) 1360b57cec5SDimitry Andric continue; 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric Stack.push_back(OpC); 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric } 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric return false; 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always 1460b57cec5SDimitry Andric // initialized. 1470b57cec5SDimitry Andric // 1480b57cec5SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup 1490b57cec5SDimitry Andric // size is 1 for y/z. 1500b57cec5SDimitry Andric static StringRef intrinsicToAttrName(Intrinsic::ID ID, 1510b57cec5SDimitry Andric bool &NonKernelOnly, 1520b57cec5SDimitry Andric bool &IsQueuePtr) { 1530b57cec5SDimitry Andric switch (ID) { 1540b57cec5SDimitry Andric case Intrinsic::amdgcn_workitem_id_x: 1550b57cec5SDimitry Andric NonKernelOnly = true; 1560b57cec5SDimitry Andric return "amdgpu-work-item-id-x"; 1570b57cec5SDimitry Andric case Intrinsic::amdgcn_workgroup_id_x: 1580b57cec5SDimitry Andric NonKernelOnly = true; 1590b57cec5SDimitry Andric return "amdgpu-work-group-id-x"; 1600b57cec5SDimitry Andric case Intrinsic::amdgcn_workitem_id_y: 1610b57cec5SDimitry Andric case Intrinsic::r600_read_tidig_y: 1620b57cec5SDimitry Andric return "amdgpu-work-item-id-y"; 1630b57cec5SDimitry Andric case Intrinsic::amdgcn_workitem_id_z: 1640b57cec5SDimitry Andric case Intrinsic::r600_read_tidig_z: 1650b57cec5SDimitry Andric return "amdgpu-work-item-id-z"; 1660b57cec5SDimitry Andric case Intrinsic::amdgcn_workgroup_id_y: 1670b57cec5SDimitry Andric case Intrinsic::r600_read_tgid_y: 1680b57cec5SDimitry Andric return "amdgpu-work-group-id-y"; 1690b57cec5SDimitry Andric case Intrinsic::amdgcn_workgroup_id_z: 1700b57cec5SDimitry Andric case Intrinsic::r600_read_tgid_z: 1710b57cec5SDimitry Andric return "amdgpu-work-group-id-z"; 1720b57cec5SDimitry Andric case Intrinsic::amdgcn_dispatch_ptr: 1730b57cec5SDimitry Andric return "amdgpu-dispatch-ptr"; 1740b57cec5SDimitry Andric case Intrinsic::amdgcn_dispatch_id: 1750b57cec5SDimitry Andric return "amdgpu-dispatch-id"; 1760b57cec5SDimitry Andric case Intrinsic::amdgcn_kernarg_segment_ptr: 1770b57cec5SDimitry Andric return "amdgpu-kernarg-segment-ptr"; 1780b57cec5SDimitry Andric case Intrinsic::amdgcn_implicitarg_ptr: 1790b57cec5SDimitry Andric return "amdgpu-implicitarg-ptr"; 1800b57cec5SDimitry Andric case Intrinsic::amdgcn_queue_ptr: 1818bcb0991SDimitry Andric case Intrinsic::amdgcn_is_shared: 1828bcb0991SDimitry Andric case Intrinsic::amdgcn_is_private: 1838bcb0991SDimitry Andric // TODO: Does not require queue ptr on gfx9+ 1840b57cec5SDimitry Andric case Intrinsic::trap: 1850b57cec5SDimitry Andric case Intrinsic::debugtrap: 1860b57cec5SDimitry Andric IsQueuePtr = true; 1870b57cec5SDimitry Andric return "amdgpu-queue-ptr"; 1880b57cec5SDimitry Andric default: 1890b57cec5SDimitry Andric return ""; 1900b57cec5SDimitry Andric } 1910b57cec5SDimitry Andric } 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric static bool handleAttr(Function &Parent, const Function &Callee, 1940b57cec5SDimitry Andric StringRef Name) { 1950b57cec5SDimitry Andric if (Callee.hasFnAttribute(Name)) { 1960b57cec5SDimitry Andric Parent.addFnAttr(Name); 1970b57cec5SDimitry Andric return true; 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric return false; 2000b57cec5SDimitry Andric } 2010b57cec5SDimitry Andric 2020b57cec5SDimitry Andric static void copyFeaturesToFunction(Function &Parent, const Function &Callee, 2030b57cec5SDimitry Andric bool &NeedQueuePtr) { 2040b57cec5SDimitry Andric if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) 2050b57cec5SDimitry Andric NeedQueuePtr = true; 2060b57cec5SDimitry Andric 207*fe6060f1SDimitry Andric for (StringRef AttrName : ImplicitAttrNames) 2080b57cec5SDimitry Andric handleAttr(Parent, Callee, AttrName); 2090b57cec5SDimitry Andric } 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() { 2120b57cec5SDimitry Andric bool Changed = false; 2130b57cec5SDimitry Andric 2140b57cec5SDimitry Andric for (auto *Node : reverse(NodeList)) { 2150b57cec5SDimitry Andric Function *Caller = Node->getFunction(); 2160b57cec5SDimitry Andric 2170b57cec5SDimitry Andric for (auto I : *Node) { 2180b57cec5SDimitry Andric Function *Callee = std::get<1>(I)->getFunction(); 2190b57cec5SDimitry Andric if (Callee) 2200b57cec5SDimitry Andric Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee); 2210b57cec5SDimitry Andric } 2220b57cec5SDimitry Andric } 2230b57cec5SDimitry Andric 2240b57cec5SDimitry Andric return Changed; 2250b57cec5SDimitry Andric } 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute( 2280b57cec5SDimitry Andric Function &Caller, Function &Callee) { 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric // Check for externally defined function 2310b57cec5SDimitry Andric if (!Callee.hasExactDefinition()) { 2320b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "false"); 2330b57cec5SDimitry Andric if (!Caller.hasFnAttribute("uniform-work-group-size")) 2340b57cec5SDimitry Andric Caller.addFnAttr("uniform-work-group-size", "false"); 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric return true; 2370b57cec5SDimitry Andric } 2380b57cec5SDimitry Andric // Check if the Caller has the attribute 2390b57cec5SDimitry Andric if (Caller.hasFnAttribute("uniform-work-group-size")) { 2400b57cec5SDimitry Andric // Check if the value of the attribute is true 2410b57cec5SDimitry Andric if (Caller.getFnAttribute("uniform-work-group-size") 2420b57cec5SDimitry Andric .getValueAsString().equals("true")) { 2430b57cec5SDimitry Andric // Propagate the attribute to the Callee, if it does not have it 2440b57cec5SDimitry Andric if (!Callee.hasFnAttribute("uniform-work-group-size")) { 2450b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "true"); 2460b57cec5SDimitry Andric return true; 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric } else { 2490b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "false"); 2500b57cec5SDimitry Andric return true; 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric } else { 2530b57cec5SDimitry Andric // If the attribute is absent, set it as false 2540b57cec5SDimitry Andric Caller.addFnAttr("uniform-work-group-size", "false"); 2550b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "false"); 2560b57cec5SDimitry Andric return true; 2570b57cec5SDimitry Andric } 2580b57cec5SDimitry Andric return false; 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { 2620b57cec5SDimitry Andric const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F); 2630b57cec5SDimitry Andric bool HasApertureRegs = ST.hasApertureRegs(); 2640b57cec5SDimitry Andric SmallPtrSet<const Constant *, 8> ConstantExprVisited; 2650b57cec5SDimitry Andric 2665ffd83dbSDimitry Andric bool HaveStackObjects = false; 2670b57cec5SDimitry Andric bool Changed = false; 2680b57cec5SDimitry Andric bool NeedQueuePtr = false; 2690b57cec5SDimitry Andric bool HaveCall = false; 270*fe6060f1SDimitry Andric bool HasIndirectCall = false; 2710b57cec5SDimitry Andric bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv()); 272*fe6060f1SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 273*fe6060f1SDimitry Andric bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx); 274*fe6060f1SDimitry Andric 275*fe6060f1SDimitry Andric // If this function hasAddressTaken() = true 276*fe6060f1SDimitry Andric // then add all attributes corresponding to the implicit args. 277*fe6060f1SDimitry Andric if (CallingConvSupportsAllImplicits && 278*fe6060f1SDimitry Andric F.hasAddressTaken(nullptr, true, true, true)) { 279*fe6060f1SDimitry Andric for (StringRef AttrName : ImplicitAttrNames) { 280*fe6060f1SDimitry Andric F.addFnAttr(AttrName); 281*fe6060f1SDimitry Andric } 282*fe6060f1SDimitry Andric Changed = true; 283*fe6060f1SDimitry Andric } 2840b57cec5SDimitry Andric 2850b57cec5SDimitry Andric for (BasicBlock &BB : F) { 2860b57cec5SDimitry Andric for (Instruction &I : BB) { 2875ffd83dbSDimitry Andric if (isa<AllocaInst>(I)) { 2885ffd83dbSDimitry Andric HaveStackObjects = true; 2895ffd83dbSDimitry Andric continue; 2905ffd83dbSDimitry Andric } 2915ffd83dbSDimitry Andric 2925ffd83dbSDimitry Andric if (auto *CB = dyn_cast<CallBase>(&I)) { 2935ffd83dbSDimitry Andric const Function *Callee = 2945ffd83dbSDimitry Andric dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts()); 2950b57cec5SDimitry Andric 296*fe6060f1SDimitry Andric // Note the occurence of indirect call. 2970b57cec5SDimitry Andric if (!Callee) { 298*fe6060f1SDimitry Andric if (!CB->isInlineAsm()) { 299*fe6060f1SDimitry Andric HasIndirectCall = true; 3000b57cec5SDimitry Andric HaveCall = true; 301*fe6060f1SDimitry Andric } 3020b57cec5SDimitry Andric continue; 3030b57cec5SDimitry Andric } 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric Intrinsic::ID IID = Callee->getIntrinsicID(); 3060b57cec5SDimitry Andric if (IID == Intrinsic::not_intrinsic) { 3070b57cec5SDimitry Andric HaveCall = true; 3080b57cec5SDimitry Andric copyFeaturesToFunction(F, *Callee, NeedQueuePtr); 3090b57cec5SDimitry Andric Changed = true; 3100b57cec5SDimitry Andric } else { 3110b57cec5SDimitry Andric bool NonKernelOnly = false; 3125ffd83dbSDimitry Andric 3135ffd83dbSDimitry Andric if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) { 3145ffd83dbSDimitry Andric F.addFnAttr("amdgpu-kernarg-segment-ptr"); 3155ffd83dbSDimitry Andric } else { 3165ffd83dbSDimitry Andric StringRef AttrName = intrinsicToAttrName(IID, NonKernelOnly, 3175ffd83dbSDimitry Andric NeedQueuePtr); 3180b57cec5SDimitry Andric if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) { 3190b57cec5SDimitry Andric F.addFnAttr(AttrName); 3200b57cec5SDimitry Andric Changed = true; 3210b57cec5SDimitry Andric } 3220b57cec5SDimitry Andric } 3230b57cec5SDimitry Andric } 3245ffd83dbSDimitry Andric } 3250b57cec5SDimitry Andric 3265ffd83dbSDimitry Andric if (NeedQueuePtr || (!IsFunc && HasApertureRegs)) 3270b57cec5SDimitry Andric continue; 3280b57cec5SDimitry Andric 3290b57cec5SDimitry Andric if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 3305ffd83dbSDimitry Andric if (!HasApertureRegs && castRequiresQueuePtr(ASC)) { 3310b57cec5SDimitry Andric NeedQueuePtr = true; 3320b57cec5SDimitry Andric continue; 3330b57cec5SDimitry Andric } 3340b57cec5SDimitry Andric } 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric for (const Use &U : I.operands()) { 3370b57cec5SDimitry Andric const auto *OpC = dyn_cast<Constant>(U); 3380b57cec5SDimitry Andric if (!OpC) 3390b57cec5SDimitry Andric continue; 3400b57cec5SDimitry Andric 3415ffd83dbSDimitry Andric if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc, 3425ffd83dbSDimitry Andric HasApertureRegs)) { 3430b57cec5SDimitry Andric NeedQueuePtr = true; 3440b57cec5SDimitry Andric break; 3450b57cec5SDimitry Andric } 3460b57cec5SDimitry Andric } 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric } 3490b57cec5SDimitry Andric 3500b57cec5SDimitry Andric if (NeedQueuePtr) { 3510b57cec5SDimitry Andric F.addFnAttr("amdgpu-queue-ptr"); 3520b57cec5SDimitry Andric Changed = true; 3530b57cec5SDimitry Andric } 3540b57cec5SDimitry Andric 3550b57cec5SDimitry Andric // TODO: We could refine this to captured pointers that could possibly be 3560b57cec5SDimitry Andric // accessed by flat instructions. For now this is mostly a poor way of 3570b57cec5SDimitry Andric // estimating whether there are calls before argument lowering. 3585ffd83dbSDimitry Andric if (!IsFunc && HaveCall) { 3595ffd83dbSDimitry Andric F.addFnAttr("amdgpu-calls"); 3605ffd83dbSDimitry Andric Changed = true; 3615ffd83dbSDimitry Andric } 3625ffd83dbSDimitry Andric 3635ffd83dbSDimitry Andric if (HaveStackObjects) { 3645ffd83dbSDimitry Andric F.addFnAttr("amdgpu-stack-objects"); 3650b57cec5SDimitry Andric Changed = true; 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric 368*fe6060f1SDimitry Andric // This pass cannot copy attributes from callees to callers 369*fe6060f1SDimitry Andric // if there is an indirect call and in thus such cases, 370*fe6060f1SDimitry Andric // hasAddressTaken() would be false for kernels and functions 371*fe6060f1SDimitry Andric // making an indirect call (if they are themselves not indirectly called). 372*fe6060f1SDimitry Andric // We must tag all such kernels/functions with all implicits attributes 373*fe6060f1SDimitry Andric // for correctness. 374*fe6060f1SDimitry Andric // e.g. 375*fe6060f1SDimitry Andric // 1. Kernel K1 makes an indirect call to function F1. 376*fe6060f1SDimitry Andric // Without detecting an indirect call in K1, this pass will not 377*fe6060f1SDimitry Andric // add all implicit args to K1 (which is incorrect). 378*fe6060f1SDimitry Andric // 2. Kernel K1 makes direct call to F1 which makes indirect call to function 379*fe6060f1SDimitry Andric // F2. 380*fe6060f1SDimitry Andric // Without detecting an indirect call in F1 (whose hasAddressTaken() is 381*fe6060f1SDimitry Andric // false), the pass will not add all implicit args to F1 (which is 382*fe6060f1SDimitry Andric // essential for correctness). 383*fe6060f1SDimitry Andric if (CallingConvSupportsAllImplicits && HasIndirectCall) { 384*fe6060f1SDimitry Andric for (StringRef AttrName : ImplicitAttrNames) { 385*fe6060f1SDimitry Andric F.addFnAttr(AttrName); 386*fe6060f1SDimitry Andric } 387*fe6060f1SDimitry Andric Changed = true; 388*fe6060f1SDimitry Andric } 389*fe6060f1SDimitry Andric 3900b57cec5SDimitry Andric return Changed; 3910b57cec5SDimitry Andric } 3920b57cec5SDimitry Andric 3930b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { 3940b57cec5SDimitry Andric bool Changed = false; 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric for (CallGraphNode *I : SCC) { 3970b57cec5SDimitry Andric // Build a list of CallGraphNodes from most number of uses to least 3980b57cec5SDimitry Andric if (I->getNumReferences()) 3990b57cec5SDimitry Andric NodeList.push_back(I); 4000b57cec5SDimitry Andric else { 4010b57cec5SDimitry Andric processUniformWorkGroupAttribute(); 4020b57cec5SDimitry Andric NodeList.clear(); 4030b57cec5SDimitry Andric } 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric Function *F = I->getFunction(); 406*fe6060f1SDimitry Andric // Ignore functions with graphics calling conventions, these are currently 407*fe6060f1SDimitry Andric // not allowed to have kernel arguments. 408*fe6060f1SDimitry Andric if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv())) 4090b57cec5SDimitry Andric continue; 410*fe6060f1SDimitry Andric // Add feature attributes 4110b57cec5SDimitry Andric Changed |= addFeatureAttributes(*F); 4120b57cec5SDimitry Andric } 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric return Changed; 4150b57cec5SDimitry Andric } 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { 4180b57cec5SDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 4190b57cec5SDimitry Andric if (!TPC) 4200b57cec5SDimitry Andric report_fatal_error("TargetMachine is required"); 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric TM = &TPC->getTM<TargetMachine>(); 4230b57cec5SDimitry Andric return false; 4240b57cec5SDimitry Andric } 4250b57cec5SDimitry Andric 4260b57cec5SDimitry Andric Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 4270b57cec5SDimitry Andric return new AMDGPUAnnotateKernelFeatures(); 4280b57cec5SDimitry Andric } 429