xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
10b57cec5SDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file This pass adds target attributes to functions which use intrinsics
100b57cec5SDimitry Andric /// which will impact calling convention lowering.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "AMDGPU.h"
150b57cec5SDimitry Andric #include "AMDGPUSubtarget.h"
160b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
170b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
190b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
200b57cec5SDimitry Andric #include "llvm/ADT/Triple.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/CallGraph.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
240b57cec5SDimitry Andric #include "llvm/IR/Constant.h"
250b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
260b57cec5SDimitry Andric #include "llvm/IR/Function.h"
270b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
280b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
290b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
300b57cec5SDimitry Andric #include "llvm/IR/Module.h"
310b57cec5SDimitry Andric #include "llvm/IR/Type.h"
320b57cec5SDimitry Andric #include "llvm/IR/Use.h"
330b57cec5SDimitry Andric #include "llvm/Pass.h"
340b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
350b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
360b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric using namespace llvm;
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric namespace {
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
450b57cec5SDimitry Andric private:
460b57cec5SDimitry Andric   const TargetMachine *TM = nullptr;
470b57cec5SDimitry Andric   SmallVector<CallGraphNode*, 8> NodeList;
480b57cec5SDimitry Andric 
490b57cec5SDimitry Andric   bool addFeatureAttributes(Function &F);
500b57cec5SDimitry Andric   bool processUniformWorkGroupAttribute();
510b57cec5SDimitry Andric   bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric public:
540b57cec5SDimitry Andric   static char ID;
550b57cec5SDimitry Andric 
560b57cec5SDimitry Andric   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
570b57cec5SDimitry Andric 
580b57cec5SDimitry Andric   bool doInitialization(CallGraph &CG) override;
590b57cec5SDimitry Andric   bool runOnSCC(CallGraphSCC &SCC) override;
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric   StringRef getPassName() const override {
620b57cec5SDimitry Andric     return "AMDGPU Annotate Kernel Features";
630b57cec5SDimitry Andric   }
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
660b57cec5SDimitry Andric     AU.setPreservesAll();
670b57cec5SDimitry Andric     CallGraphSCCPass::getAnalysisUsage(AU);
680b57cec5SDimitry Andric   }
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric   static bool visitConstantExpr(const ConstantExpr *CE);
710b57cec5SDimitry Andric   static bool visitConstantExprsRecursively(
720b57cec5SDimitry Andric     const Constant *EntryC,
73*5ffd83dbSDimitry Andric     SmallPtrSet<const Constant *, 8> &ConstantExprVisited, bool IsFunc,
74*5ffd83dbSDimitry Andric     bool HasApertureRegs);
750b57cec5SDimitry Andric };
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric } // end anonymous namespace
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0;
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
840b57cec5SDimitry Andric                 "Add AMDGPU function attributes", false, false)
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric 
870b57cec5SDimitry Andric // The queue ptr is only needed when casting to flat, not from it.
880b57cec5SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) {
890b57cec5SDimitry Andric   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
900b57cec5SDimitry Andric }
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
930b57cec5SDimitry Andric   return castRequiresQueuePtr(ASC->getSrcAddressSpace());
940b57cec5SDimitry Andric }
950b57cec5SDimitry Andric 
96*5ffd83dbSDimitry Andric static bool isDSAddress(const Constant *C) {
97*5ffd83dbSDimitry Andric   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
98*5ffd83dbSDimitry Andric   if (!GV)
99*5ffd83dbSDimitry Andric     return false;
100*5ffd83dbSDimitry Andric   unsigned AS = GV->getAddressSpace();
101*5ffd83dbSDimitry Andric   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
102*5ffd83dbSDimitry Andric }
103*5ffd83dbSDimitry Andric 
1040b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
1050b57cec5SDimitry Andric   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
1060b57cec5SDimitry Andric     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
1070b57cec5SDimitry Andric     return castRequiresQueuePtr(SrcAS);
1080b57cec5SDimitry Andric   }
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric   return false;
1110b57cec5SDimitry Andric }
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
1140b57cec5SDimitry Andric   const Constant *EntryC,
115*5ffd83dbSDimitry Andric   SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
116*5ffd83dbSDimitry Andric   bool IsFunc, bool HasApertureRegs) {
1170b57cec5SDimitry Andric 
1180b57cec5SDimitry Andric   if (!ConstantExprVisited.insert(EntryC).second)
1190b57cec5SDimitry Andric     return false;
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric   SmallVector<const Constant *, 16> Stack;
1220b57cec5SDimitry Andric   Stack.push_back(EntryC);
1230b57cec5SDimitry Andric 
1240b57cec5SDimitry Andric   while (!Stack.empty()) {
1250b57cec5SDimitry Andric     const Constant *C = Stack.pop_back_val();
1260b57cec5SDimitry Andric 
127*5ffd83dbSDimitry Andric     // We need to trap on DS globals in non-entry functions.
128*5ffd83dbSDimitry Andric     if (IsFunc && isDSAddress(C))
129*5ffd83dbSDimitry Andric       return true;
130*5ffd83dbSDimitry Andric 
1310b57cec5SDimitry Andric     // Check this constant expression.
1320b57cec5SDimitry Andric     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
133*5ffd83dbSDimitry Andric       if (!HasApertureRegs && visitConstantExpr(CE))
1340b57cec5SDimitry Andric         return true;
1350b57cec5SDimitry Andric     }
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric     // Visit all sub-expressions.
1380b57cec5SDimitry Andric     for (const Use &U : C->operands()) {
1390b57cec5SDimitry Andric       const auto *OpC = dyn_cast<Constant>(U);
1400b57cec5SDimitry Andric       if (!OpC)
1410b57cec5SDimitry Andric         continue;
1420b57cec5SDimitry Andric 
1430b57cec5SDimitry Andric       if (!ConstantExprVisited.insert(OpC).second)
1440b57cec5SDimitry Andric         continue;
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric       Stack.push_back(OpC);
1470b57cec5SDimitry Andric     }
1480b57cec5SDimitry Andric   }
1490b57cec5SDimitry Andric 
1500b57cec5SDimitry Andric   return false;
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always
1540b57cec5SDimitry Andric // initialized.
1550b57cec5SDimitry Andric //
1560b57cec5SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup
1570b57cec5SDimitry Andric // size is 1 for y/z.
1580b57cec5SDimitry Andric static StringRef intrinsicToAttrName(Intrinsic::ID ID,
1590b57cec5SDimitry Andric                                      bool &NonKernelOnly,
1600b57cec5SDimitry Andric                                      bool &IsQueuePtr) {
1610b57cec5SDimitry Andric   switch (ID) {
1620b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_x:
1630b57cec5SDimitry Andric     NonKernelOnly = true;
1640b57cec5SDimitry Andric     return "amdgpu-work-item-id-x";
1650b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_x:
1660b57cec5SDimitry Andric     NonKernelOnly = true;
1670b57cec5SDimitry Andric     return "amdgpu-work-group-id-x";
1680b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_y:
1690b57cec5SDimitry Andric   case Intrinsic::r600_read_tidig_y:
1700b57cec5SDimitry Andric     return "amdgpu-work-item-id-y";
1710b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_z:
1720b57cec5SDimitry Andric   case Intrinsic::r600_read_tidig_z:
1730b57cec5SDimitry Andric     return "amdgpu-work-item-id-z";
1740b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_y:
1750b57cec5SDimitry Andric   case Intrinsic::r600_read_tgid_y:
1760b57cec5SDimitry Andric     return "amdgpu-work-group-id-y";
1770b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_z:
1780b57cec5SDimitry Andric   case Intrinsic::r600_read_tgid_z:
1790b57cec5SDimitry Andric     return "amdgpu-work-group-id-z";
1800b57cec5SDimitry Andric   case Intrinsic::amdgcn_dispatch_ptr:
1810b57cec5SDimitry Andric     return "amdgpu-dispatch-ptr";
1820b57cec5SDimitry Andric   case Intrinsic::amdgcn_dispatch_id:
1830b57cec5SDimitry Andric     return "amdgpu-dispatch-id";
1840b57cec5SDimitry Andric   case Intrinsic::amdgcn_kernarg_segment_ptr:
1850b57cec5SDimitry Andric     return "amdgpu-kernarg-segment-ptr";
1860b57cec5SDimitry Andric   case Intrinsic::amdgcn_implicitarg_ptr:
1870b57cec5SDimitry Andric     return "amdgpu-implicitarg-ptr";
1880b57cec5SDimitry Andric   case Intrinsic::amdgcn_queue_ptr:
1898bcb0991SDimitry Andric   case Intrinsic::amdgcn_is_shared:
1908bcb0991SDimitry Andric   case Intrinsic::amdgcn_is_private:
1918bcb0991SDimitry Andric     // TODO: Does not require queue ptr on gfx9+
1920b57cec5SDimitry Andric   case Intrinsic::trap:
1930b57cec5SDimitry Andric   case Intrinsic::debugtrap:
1940b57cec5SDimitry Andric     IsQueuePtr = true;
1950b57cec5SDimitry Andric     return "amdgpu-queue-ptr";
1960b57cec5SDimitry Andric   default:
1970b57cec5SDimitry Andric     return "";
1980b57cec5SDimitry Andric   }
1990b57cec5SDimitry Andric }
2000b57cec5SDimitry Andric 
2010b57cec5SDimitry Andric static bool handleAttr(Function &Parent, const Function &Callee,
2020b57cec5SDimitry Andric                        StringRef Name) {
2030b57cec5SDimitry Andric   if (Callee.hasFnAttribute(Name)) {
2040b57cec5SDimitry Andric     Parent.addFnAttr(Name);
2050b57cec5SDimitry Andric     return true;
2060b57cec5SDimitry Andric   }
2070b57cec5SDimitry Andric   return false;
2080b57cec5SDimitry Andric }
2090b57cec5SDimitry Andric 
2100b57cec5SDimitry Andric static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
2110b57cec5SDimitry Andric                                    bool &NeedQueuePtr) {
2120b57cec5SDimitry Andric   // X ids unnecessarily propagated to kernels.
2138bcb0991SDimitry Andric   static constexpr StringLiteral AttrNames[] = {
2148bcb0991SDimitry Andric       "amdgpu-work-item-id-x",      "amdgpu-work-item-id-y",
2158bcb0991SDimitry Andric       "amdgpu-work-item-id-z",      "amdgpu-work-group-id-x",
2168bcb0991SDimitry Andric       "amdgpu-work-group-id-y",     "amdgpu-work-group-id-z",
2178bcb0991SDimitry Andric       "amdgpu-dispatch-ptr",        "amdgpu-dispatch-id",
218*5ffd83dbSDimitry Andric       "amdgpu-implicitarg-ptr"};
2190b57cec5SDimitry Andric 
2200b57cec5SDimitry Andric   if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
2210b57cec5SDimitry Andric     NeedQueuePtr = true;
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric   for (StringRef AttrName : AttrNames)
2240b57cec5SDimitry Andric     handleAttr(Parent, Callee, AttrName);
2250b57cec5SDimitry Andric }
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
2280b57cec5SDimitry Andric   bool Changed = false;
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   for (auto *Node : reverse(NodeList)) {
2310b57cec5SDimitry Andric     Function *Caller = Node->getFunction();
2320b57cec5SDimitry Andric 
2330b57cec5SDimitry Andric     for (auto I : *Node) {
2340b57cec5SDimitry Andric       Function *Callee = std::get<1>(I)->getFunction();
2350b57cec5SDimitry Andric       if (Callee)
2360b57cec5SDimitry Andric         Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
2370b57cec5SDimitry Andric     }
2380b57cec5SDimitry Andric   }
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric   return Changed;
2410b57cec5SDimitry Andric }
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
2440b57cec5SDimitry Andric        Function &Caller, Function &Callee) {
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   // Check for externally defined function
2470b57cec5SDimitry Andric   if (!Callee.hasExactDefinition()) {
2480b57cec5SDimitry Andric     Callee.addFnAttr("uniform-work-group-size", "false");
2490b57cec5SDimitry Andric     if (!Caller.hasFnAttribute("uniform-work-group-size"))
2500b57cec5SDimitry Andric       Caller.addFnAttr("uniform-work-group-size", "false");
2510b57cec5SDimitry Andric 
2520b57cec5SDimitry Andric     return true;
2530b57cec5SDimitry Andric   }
2540b57cec5SDimitry Andric   // Check if the Caller has the attribute
2550b57cec5SDimitry Andric   if (Caller.hasFnAttribute("uniform-work-group-size")) {
2560b57cec5SDimitry Andric     // Check if the value of the attribute is true
2570b57cec5SDimitry Andric     if (Caller.getFnAttribute("uniform-work-group-size")
2580b57cec5SDimitry Andric         .getValueAsString().equals("true")) {
2590b57cec5SDimitry Andric       // Propagate the attribute to the Callee, if it does not have it
2600b57cec5SDimitry Andric       if (!Callee.hasFnAttribute("uniform-work-group-size")) {
2610b57cec5SDimitry Andric         Callee.addFnAttr("uniform-work-group-size", "true");
2620b57cec5SDimitry Andric         return true;
2630b57cec5SDimitry Andric       }
2640b57cec5SDimitry Andric     } else {
2650b57cec5SDimitry Andric       Callee.addFnAttr("uniform-work-group-size", "false");
2660b57cec5SDimitry Andric       return true;
2670b57cec5SDimitry Andric     }
2680b57cec5SDimitry Andric   } else {
2690b57cec5SDimitry Andric     // If the attribute is absent, set it as false
2700b57cec5SDimitry Andric     Caller.addFnAttr("uniform-work-group-size", "false");
2710b57cec5SDimitry Andric     Callee.addFnAttr("uniform-work-group-size", "false");
2720b57cec5SDimitry Andric     return true;
2730b57cec5SDimitry Andric   }
2740b57cec5SDimitry Andric   return false;
2750b57cec5SDimitry Andric }
2760b57cec5SDimitry Andric 
2770b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
2780b57cec5SDimitry Andric   const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
2790b57cec5SDimitry Andric   bool HasApertureRegs = ST.hasApertureRegs();
2800b57cec5SDimitry Andric   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
2810b57cec5SDimitry Andric 
282*5ffd83dbSDimitry Andric   bool HaveStackObjects = false;
2830b57cec5SDimitry Andric   bool Changed = false;
2840b57cec5SDimitry Andric   bool NeedQueuePtr = false;
2850b57cec5SDimitry Andric   bool HaveCall = false;
2860b57cec5SDimitry Andric   bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
2870b57cec5SDimitry Andric 
2880b57cec5SDimitry Andric   for (BasicBlock &BB : F) {
2890b57cec5SDimitry Andric     for (Instruction &I : BB) {
290*5ffd83dbSDimitry Andric       if (isa<AllocaInst>(I)) {
291*5ffd83dbSDimitry Andric         HaveStackObjects = true;
292*5ffd83dbSDimitry Andric         continue;
293*5ffd83dbSDimitry Andric       }
294*5ffd83dbSDimitry Andric 
295*5ffd83dbSDimitry Andric       if (auto *CB = dyn_cast<CallBase>(&I)) {
296*5ffd83dbSDimitry Andric         const Function *Callee =
297*5ffd83dbSDimitry Andric             dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
2980b57cec5SDimitry Andric 
2990b57cec5SDimitry Andric         // TODO: Do something with indirect calls.
3000b57cec5SDimitry Andric         if (!Callee) {
301*5ffd83dbSDimitry Andric           if (!CB->isInlineAsm())
3020b57cec5SDimitry Andric             HaveCall = true;
3030b57cec5SDimitry Andric           continue;
3040b57cec5SDimitry Andric         }
3050b57cec5SDimitry Andric 
3060b57cec5SDimitry Andric         Intrinsic::ID IID = Callee->getIntrinsicID();
3070b57cec5SDimitry Andric         if (IID == Intrinsic::not_intrinsic) {
3080b57cec5SDimitry Andric           HaveCall = true;
3090b57cec5SDimitry Andric           copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
3100b57cec5SDimitry Andric           Changed = true;
3110b57cec5SDimitry Andric         } else {
3120b57cec5SDimitry Andric           bool NonKernelOnly = false;
313*5ffd83dbSDimitry Andric 
314*5ffd83dbSDimitry Andric           if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
315*5ffd83dbSDimitry Andric             F.addFnAttr("amdgpu-kernarg-segment-ptr");
316*5ffd83dbSDimitry Andric           } else {
317*5ffd83dbSDimitry Andric             StringRef AttrName = intrinsicToAttrName(IID, NonKernelOnly,
318*5ffd83dbSDimitry Andric                                                      NeedQueuePtr);
3190b57cec5SDimitry Andric             if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
3200b57cec5SDimitry Andric               F.addFnAttr(AttrName);
3210b57cec5SDimitry Andric               Changed = true;
3220b57cec5SDimitry Andric             }
3230b57cec5SDimitry Andric           }
3240b57cec5SDimitry Andric         }
325*5ffd83dbSDimitry Andric       }
3260b57cec5SDimitry Andric 
327*5ffd83dbSDimitry Andric       if (NeedQueuePtr || (!IsFunc && HasApertureRegs))
3280b57cec5SDimitry Andric         continue;
3290b57cec5SDimitry Andric 
3300b57cec5SDimitry Andric       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
331*5ffd83dbSDimitry Andric         if (!HasApertureRegs && castRequiresQueuePtr(ASC)) {
3320b57cec5SDimitry Andric           NeedQueuePtr = true;
3330b57cec5SDimitry Andric           continue;
3340b57cec5SDimitry Andric         }
3350b57cec5SDimitry Andric       }
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric       for (const Use &U : I.operands()) {
3380b57cec5SDimitry Andric         const auto *OpC = dyn_cast<Constant>(U);
3390b57cec5SDimitry Andric         if (!OpC)
3400b57cec5SDimitry Andric           continue;
3410b57cec5SDimitry Andric 
342*5ffd83dbSDimitry Andric         if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc,
343*5ffd83dbSDimitry Andric                                           HasApertureRegs)) {
3440b57cec5SDimitry Andric           NeedQueuePtr = true;
3450b57cec5SDimitry Andric           break;
3460b57cec5SDimitry Andric         }
3470b57cec5SDimitry Andric       }
3480b57cec5SDimitry Andric     }
3490b57cec5SDimitry Andric   }
3500b57cec5SDimitry Andric 
3510b57cec5SDimitry Andric   if (NeedQueuePtr) {
3520b57cec5SDimitry Andric     F.addFnAttr("amdgpu-queue-ptr");
3530b57cec5SDimitry Andric     Changed = true;
3540b57cec5SDimitry Andric   }
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric   // TODO: We could refine this to captured pointers that could possibly be
3570b57cec5SDimitry Andric   // accessed by flat instructions. For now this is mostly a poor way of
3580b57cec5SDimitry Andric   // estimating whether there are calls before argument lowering.
359*5ffd83dbSDimitry Andric   if (!IsFunc && HaveCall) {
360*5ffd83dbSDimitry Andric     F.addFnAttr("amdgpu-calls");
361*5ffd83dbSDimitry Andric     Changed = true;
362*5ffd83dbSDimitry Andric   }
363*5ffd83dbSDimitry Andric 
364*5ffd83dbSDimitry Andric   if (HaveStackObjects) {
365*5ffd83dbSDimitry Andric     F.addFnAttr("amdgpu-stack-objects");
3660b57cec5SDimitry Andric     Changed = true;
3670b57cec5SDimitry Andric   }
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric   return Changed;
3700b57cec5SDimitry Andric }
3710b57cec5SDimitry Andric 
3720b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
3730b57cec5SDimitry Andric   bool Changed = false;
3740b57cec5SDimitry Andric 
3750b57cec5SDimitry Andric   for (CallGraphNode *I : SCC) {
3760b57cec5SDimitry Andric     // Build a list of CallGraphNodes from most number of uses to least
3770b57cec5SDimitry Andric     if (I->getNumReferences())
3780b57cec5SDimitry Andric       NodeList.push_back(I);
3790b57cec5SDimitry Andric     else {
3800b57cec5SDimitry Andric       processUniformWorkGroupAttribute();
3810b57cec5SDimitry Andric       NodeList.clear();
3820b57cec5SDimitry Andric     }
3830b57cec5SDimitry Andric 
3840b57cec5SDimitry Andric     Function *F = I->getFunction();
3850b57cec5SDimitry Andric     // Add feature attributes
3860b57cec5SDimitry Andric     if (!F || F->isDeclaration())
3870b57cec5SDimitry Andric       continue;
3880b57cec5SDimitry Andric     Changed |= addFeatureAttributes(*F);
3890b57cec5SDimitry Andric   }
3900b57cec5SDimitry Andric 
3910b57cec5SDimitry Andric   return Changed;
3920b57cec5SDimitry Andric }
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
3950b57cec5SDimitry Andric   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
3960b57cec5SDimitry Andric   if (!TPC)
3970b57cec5SDimitry Andric     report_fatal_error("TargetMachine is required");
3980b57cec5SDimitry Andric 
3990b57cec5SDimitry Andric   TM = &TPC->getTM<TargetMachine>();
4000b57cec5SDimitry Andric   return false;
4010b57cec5SDimitry Andric }
4020b57cec5SDimitry Andric 
4030b57cec5SDimitry Andric Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
4040b57cec5SDimitry Andric   return new AMDGPUAnnotateKernelFeatures();
4050b57cec5SDimitry Andric }
406