xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp (revision fe6060f10f634930ff71b7c50291ddc610da2475)
10b57cec5SDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file This pass adds target attributes to functions which use intrinsics
100b57cec5SDimitry Andric /// which will impact calling convention lowering.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "AMDGPU.h"
15e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
160b57cec5SDimitry Andric #include "llvm/Analysis/CallGraph.h"
170b57cec5SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
19e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
20e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsR600.h"
210b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric using namespace llvm;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric namespace {
28*fe6060f1SDimitry Andric static constexpr StringLiteral ImplicitAttrNames[] = {
29*fe6060f1SDimitry Andric     // X ids unnecessarily propagated to kernels.
30*fe6060f1SDimitry Andric     "amdgpu-work-item-id-x",  "amdgpu-work-item-id-y",
31*fe6060f1SDimitry Andric     "amdgpu-work-item-id-z",  "amdgpu-work-group-id-x",
32*fe6060f1SDimitry Andric     "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
33*fe6060f1SDimitry Andric     "amdgpu-dispatch-ptr",    "amdgpu-dispatch-id",
34*fe6060f1SDimitry Andric     "amdgpu-queue-ptr",       "amdgpu-implicitarg-ptr"};
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
370b57cec5SDimitry Andric private:
380b57cec5SDimitry Andric   const TargetMachine *TM = nullptr;
390b57cec5SDimitry Andric   SmallVector<CallGraphNode*, 8> NodeList;
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric   bool addFeatureAttributes(Function &F);
420b57cec5SDimitry Andric   bool processUniformWorkGroupAttribute();
430b57cec5SDimitry Andric   bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric public:
460b57cec5SDimitry Andric   static char ID;
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric   bool doInitialization(CallGraph &CG) override;
510b57cec5SDimitry Andric   bool runOnSCC(CallGraphSCC &SCC) override;
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric   StringRef getPassName() const override {
540b57cec5SDimitry Andric     return "AMDGPU Annotate Kernel Features";
550b57cec5SDimitry Andric   }
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
580b57cec5SDimitry Andric     AU.setPreservesAll();
590b57cec5SDimitry Andric     CallGraphSCCPass::getAnalysisUsage(AU);
600b57cec5SDimitry Andric   }
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric   static bool visitConstantExpr(const ConstantExpr *CE);
630b57cec5SDimitry Andric   static bool visitConstantExprsRecursively(
640b57cec5SDimitry Andric     const Constant *EntryC,
655ffd83dbSDimitry Andric     SmallPtrSet<const Constant *, 8> &ConstantExprVisited, bool IsFunc,
665ffd83dbSDimitry Andric     bool HasApertureRegs);
670b57cec5SDimitry Andric };
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric } // end anonymous namespace
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0;
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
760b57cec5SDimitry Andric                 "Add AMDGPU function attributes", false, false)
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric // The queue ptr is only needed when casting to flat, not from it.
800b57cec5SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) {
810b57cec5SDimitry Andric   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
820b57cec5SDimitry Andric }
830b57cec5SDimitry Andric 
840b57cec5SDimitry Andric static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
850b57cec5SDimitry Andric   return castRequiresQueuePtr(ASC->getSrcAddressSpace());
860b57cec5SDimitry Andric }
870b57cec5SDimitry Andric 
885ffd83dbSDimitry Andric static bool isDSAddress(const Constant *C) {
895ffd83dbSDimitry Andric   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
905ffd83dbSDimitry Andric   if (!GV)
915ffd83dbSDimitry Andric     return false;
925ffd83dbSDimitry Andric   unsigned AS = GV->getAddressSpace();
935ffd83dbSDimitry Andric   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
945ffd83dbSDimitry Andric }
955ffd83dbSDimitry Andric 
960b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
970b57cec5SDimitry Andric   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
980b57cec5SDimitry Andric     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
990b57cec5SDimitry Andric     return castRequiresQueuePtr(SrcAS);
1000b57cec5SDimitry Andric   }
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric   return false;
1030b57cec5SDimitry Andric }
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
1060b57cec5SDimitry Andric   const Constant *EntryC,
1075ffd83dbSDimitry Andric   SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
1085ffd83dbSDimitry Andric   bool IsFunc, bool HasApertureRegs) {
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric   if (!ConstantExprVisited.insert(EntryC).second)
1110b57cec5SDimitry Andric     return false;
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric   SmallVector<const Constant *, 16> Stack;
1140b57cec5SDimitry Andric   Stack.push_back(EntryC);
1150b57cec5SDimitry Andric 
1160b57cec5SDimitry Andric   while (!Stack.empty()) {
1170b57cec5SDimitry Andric     const Constant *C = Stack.pop_back_val();
1180b57cec5SDimitry Andric 
1195ffd83dbSDimitry Andric     // We need to trap on DS globals in non-entry functions.
1205ffd83dbSDimitry Andric     if (IsFunc && isDSAddress(C))
1215ffd83dbSDimitry Andric       return true;
1225ffd83dbSDimitry Andric 
1230b57cec5SDimitry Andric     // Check this constant expression.
1240b57cec5SDimitry Andric     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
1255ffd83dbSDimitry Andric       if (!HasApertureRegs && visitConstantExpr(CE))
1260b57cec5SDimitry Andric         return true;
1270b57cec5SDimitry Andric     }
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric     // Visit all sub-expressions.
1300b57cec5SDimitry Andric     for (const Use &U : C->operands()) {
1310b57cec5SDimitry Andric       const auto *OpC = dyn_cast<Constant>(U);
1320b57cec5SDimitry Andric       if (!OpC)
1330b57cec5SDimitry Andric         continue;
1340b57cec5SDimitry Andric 
1350b57cec5SDimitry Andric       if (!ConstantExprVisited.insert(OpC).second)
1360b57cec5SDimitry Andric         continue;
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric       Stack.push_back(OpC);
1390b57cec5SDimitry Andric     }
1400b57cec5SDimitry Andric   }
1410b57cec5SDimitry Andric 
1420b57cec5SDimitry Andric   return false;
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always
1460b57cec5SDimitry Andric // initialized.
1470b57cec5SDimitry Andric //
1480b57cec5SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup
1490b57cec5SDimitry Andric // size is 1 for y/z.
1500b57cec5SDimitry Andric static StringRef intrinsicToAttrName(Intrinsic::ID ID,
1510b57cec5SDimitry Andric                                      bool &NonKernelOnly,
1520b57cec5SDimitry Andric                                      bool &IsQueuePtr) {
1530b57cec5SDimitry Andric   switch (ID) {
1540b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_x:
1550b57cec5SDimitry Andric     NonKernelOnly = true;
1560b57cec5SDimitry Andric     return "amdgpu-work-item-id-x";
1570b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_x:
1580b57cec5SDimitry Andric     NonKernelOnly = true;
1590b57cec5SDimitry Andric     return "amdgpu-work-group-id-x";
1600b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_y:
1610b57cec5SDimitry Andric   case Intrinsic::r600_read_tidig_y:
1620b57cec5SDimitry Andric     return "amdgpu-work-item-id-y";
1630b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_z:
1640b57cec5SDimitry Andric   case Intrinsic::r600_read_tidig_z:
1650b57cec5SDimitry Andric     return "amdgpu-work-item-id-z";
1660b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_y:
1670b57cec5SDimitry Andric   case Intrinsic::r600_read_tgid_y:
1680b57cec5SDimitry Andric     return "amdgpu-work-group-id-y";
1690b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_z:
1700b57cec5SDimitry Andric   case Intrinsic::r600_read_tgid_z:
1710b57cec5SDimitry Andric     return "amdgpu-work-group-id-z";
1720b57cec5SDimitry Andric   case Intrinsic::amdgcn_dispatch_ptr:
1730b57cec5SDimitry Andric     return "amdgpu-dispatch-ptr";
1740b57cec5SDimitry Andric   case Intrinsic::amdgcn_dispatch_id:
1750b57cec5SDimitry Andric     return "amdgpu-dispatch-id";
1760b57cec5SDimitry Andric   case Intrinsic::amdgcn_kernarg_segment_ptr:
1770b57cec5SDimitry Andric     return "amdgpu-kernarg-segment-ptr";
1780b57cec5SDimitry Andric   case Intrinsic::amdgcn_implicitarg_ptr:
1790b57cec5SDimitry Andric     return "amdgpu-implicitarg-ptr";
1800b57cec5SDimitry Andric   case Intrinsic::amdgcn_queue_ptr:
1818bcb0991SDimitry Andric   case Intrinsic::amdgcn_is_shared:
1828bcb0991SDimitry Andric   case Intrinsic::amdgcn_is_private:
1838bcb0991SDimitry Andric     // TODO: Does not require queue ptr on gfx9+
1840b57cec5SDimitry Andric   case Intrinsic::trap:
1850b57cec5SDimitry Andric   case Intrinsic::debugtrap:
1860b57cec5SDimitry Andric     IsQueuePtr = true;
1870b57cec5SDimitry Andric     return "amdgpu-queue-ptr";
1880b57cec5SDimitry Andric   default:
1890b57cec5SDimitry Andric     return "";
1900b57cec5SDimitry Andric   }
1910b57cec5SDimitry Andric }
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric static bool handleAttr(Function &Parent, const Function &Callee,
1940b57cec5SDimitry Andric                        StringRef Name) {
1950b57cec5SDimitry Andric   if (Callee.hasFnAttribute(Name)) {
1960b57cec5SDimitry Andric     Parent.addFnAttr(Name);
1970b57cec5SDimitry Andric     return true;
1980b57cec5SDimitry Andric   }
1990b57cec5SDimitry Andric   return false;
2000b57cec5SDimitry Andric }
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
2030b57cec5SDimitry Andric                                    bool &NeedQueuePtr) {
2040b57cec5SDimitry Andric   if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
2050b57cec5SDimitry Andric     NeedQueuePtr = true;
2060b57cec5SDimitry Andric 
207*fe6060f1SDimitry Andric   for (StringRef AttrName : ImplicitAttrNames)
2080b57cec5SDimitry Andric     handleAttr(Parent, Callee, AttrName);
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
2120b57cec5SDimitry Andric   bool Changed = false;
2130b57cec5SDimitry Andric 
2140b57cec5SDimitry Andric   for (auto *Node : reverse(NodeList)) {
2150b57cec5SDimitry Andric     Function *Caller = Node->getFunction();
2160b57cec5SDimitry Andric 
2170b57cec5SDimitry Andric     for (auto I : *Node) {
2180b57cec5SDimitry Andric       Function *Callee = std::get<1>(I)->getFunction();
2190b57cec5SDimitry Andric       if (Callee)
2200b57cec5SDimitry Andric         Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
2210b57cec5SDimitry Andric     }
2220b57cec5SDimitry Andric   }
2230b57cec5SDimitry Andric 
2240b57cec5SDimitry Andric   return Changed;
2250b57cec5SDimitry Andric }
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
2280b57cec5SDimitry Andric        Function &Caller, Function &Callee) {
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   // Check for externally defined function
2310b57cec5SDimitry Andric   if (!Callee.hasExactDefinition()) {
2320b57cec5SDimitry Andric     Callee.addFnAttr("uniform-work-group-size", "false");
2330b57cec5SDimitry Andric     if (!Caller.hasFnAttribute("uniform-work-group-size"))
2340b57cec5SDimitry Andric       Caller.addFnAttr("uniform-work-group-size", "false");
2350b57cec5SDimitry Andric 
2360b57cec5SDimitry Andric     return true;
2370b57cec5SDimitry Andric   }
2380b57cec5SDimitry Andric   // Check if the Caller has the attribute
2390b57cec5SDimitry Andric   if (Caller.hasFnAttribute("uniform-work-group-size")) {
2400b57cec5SDimitry Andric     // Check if the value of the attribute is true
2410b57cec5SDimitry Andric     if (Caller.getFnAttribute("uniform-work-group-size")
2420b57cec5SDimitry Andric         .getValueAsString().equals("true")) {
2430b57cec5SDimitry Andric       // Propagate the attribute to the Callee, if it does not have it
2440b57cec5SDimitry Andric       if (!Callee.hasFnAttribute("uniform-work-group-size")) {
2450b57cec5SDimitry Andric         Callee.addFnAttr("uniform-work-group-size", "true");
2460b57cec5SDimitry Andric         return true;
2470b57cec5SDimitry Andric       }
2480b57cec5SDimitry Andric     } else {
2490b57cec5SDimitry Andric       Callee.addFnAttr("uniform-work-group-size", "false");
2500b57cec5SDimitry Andric       return true;
2510b57cec5SDimitry Andric     }
2520b57cec5SDimitry Andric   } else {
2530b57cec5SDimitry Andric     // If the attribute is absent, set it as false
2540b57cec5SDimitry Andric     Caller.addFnAttr("uniform-work-group-size", "false");
2550b57cec5SDimitry Andric     Callee.addFnAttr("uniform-work-group-size", "false");
2560b57cec5SDimitry Andric     return true;
2570b57cec5SDimitry Andric   }
2580b57cec5SDimitry Andric   return false;
2590b57cec5SDimitry Andric }
2600b57cec5SDimitry Andric 
2610b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
2620b57cec5SDimitry Andric   const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
2630b57cec5SDimitry Andric   bool HasApertureRegs = ST.hasApertureRegs();
2640b57cec5SDimitry Andric   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
2650b57cec5SDimitry Andric 
2665ffd83dbSDimitry Andric   bool HaveStackObjects = false;
2670b57cec5SDimitry Andric   bool Changed = false;
2680b57cec5SDimitry Andric   bool NeedQueuePtr = false;
2690b57cec5SDimitry Andric   bool HaveCall = false;
270*fe6060f1SDimitry Andric   bool HasIndirectCall = false;
2710b57cec5SDimitry Andric   bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
272*fe6060f1SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
273*fe6060f1SDimitry Andric   bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
274*fe6060f1SDimitry Andric 
275*fe6060f1SDimitry Andric   // If this function hasAddressTaken() = true
276*fe6060f1SDimitry Andric   // then add all attributes corresponding to the implicit args.
277*fe6060f1SDimitry Andric   if (CallingConvSupportsAllImplicits &&
278*fe6060f1SDimitry Andric       F.hasAddressTaken(nullptr, true, true, true)) {
279*fe6060f1SDimitry Andric     for (StringRef AttrName : ImplicitAttrNames) {
280*fe6060f1SDimitry Andric       F.addFnAttr(AttrName);
281*fe6060f1SDimitry Andric     }
282*fe6060f1SDimitry Andric     Changed = true;
283*fe6060f1SDimitry Andric   }
2840b57cec5SDimitry Andric 
2850b57cec5SDimitry Andric   for (BasicBlock &BB : F) {
2860b57cec5SDimitry Andric     for (Instruction &I : BB) {
2875ffd83dbSDimitry Andric       if (isa<AllocaInst>(I)) {
2885ffd83dbSDimitry Andric         HaveStackObjects = true;
2895ffd83dbSDimitry Andric         continue;
2905ffd83dbSDimitry Andric       }
2915ffd83dbSDimitry Andric 
2925ffd83dbSDimitry Andric       if (auto *CB = dyn_cast<CallBase>(&I)) {
2935ffd83dbSDimitry Andric         const Function *Callee =
2945ffd83dbSDimitry Andric             dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
2950b57cec5SDimitry Andric 
296*fe6060f1SDimitry Andric         // Note the occurence of indirect call.
2970b57cec5SDimitry Andric         if (!Callee) {
298*fe6060f1SDimitry Andric           if (!CB->isInlineAsm()) {
299*fe6060f1SDimitry Andric             HasIndirectCall = true;
3000b57cec5SDimitry Andric             HaveCall = true;
301*fe6060f1SDimitry Andric           }
3020b57cec5SDimitry Andric           continue;
3030b57cec5SDimitry Andric         }
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric         Intrinsic::ID IID = Callee->getIntrinsicID();
3060b57cec5SDimitry Andric         if (IID == Intrinsic::not_intrinsic) {
3070b57cec5SDimitry Andric           HaveCall = true;
3080b57cec5SDimitry Andric           copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
3090b57cec5SDimitry Andric           Changed = true;
3100b57cec5SDimitry Andric         } else {
3110b57cec5SDimitry Andric           bool NonKernelOnly = false;
3125ffd83dbSDimitry Andric 
3135ffd83dbSDimitry Andric           if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
3145ffd83dbSDimitry Andric             F.addFnAttr("amdgpu-kernarg-segment-ptr");
3155ffd83dbSDimitry Andric           } else {
3165ffd83dbSDimitry Andric             StringRef AttrName = intrinsicToAttrName(IID, NonKernelOnly,
3175ffd83dbSDimitry Andric                                                      NeedQueuePtr);
3180b57cec5SDimitry Andric             if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
3190b57cec5SDimitry Andric               F.addFnAttr(AttrName);
3200b57cec5SDimitry Andric               Changed = true;
3210b57cec5SDimitry Andric             }
3220b57cec5SDimitry Andric           }
3230b57cec5SDimitry Andric         }
3245ffd83dbSDimitry Andric       }
3250b57cec5SDimitry Andric 
3265ffd83dbSDimitry Andric       if (NeedQueuePtr || (!IsFunc && HasApertureRegs))
3270b57cec5SDimitry Andric         continue;
3280b57cec5SDimitry Andric 
3290b57cec5SDimitry Andric       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
3305ffd83dbSDimitry Andric         if (!HasApertureRegs && castRequiresQueuePtr(ASC)) {
3310b57cec5SDimitry Andric           NeedQueuePtr = true;
3320b57cec5SDimitry Andric           continue;
3330b57cec5SDimitry Andric         }
3340b57cec5SDimitry Andric       }
3350b57cec5SDimitry Andric 
3360b57cec5SDimitry Andric       for (const Use &U : I.operands()) {
3370b57cec5SDimitry Andric         const auto *OpC = dyn_cast<Constant>(U);
3380b57cec5SDimitry Andric         if (!OpC)
3390b57cec5SDimitry Andric           continue;
3400b57cec5SDimitry Andric 
3415ffd83dbSDimitry Andric         if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc,
3425ffd83dbSDimitry Andric                                           HasApertureRegs)) {
3430b57cec5SDimitry Andric           NeedQueuePtr = true;
3440b57cec5SDimitry Andric           break;
3450b57cec5SDimitry Andric         }
3460b57cec5SDimitry Andric       }
3470b57cec5SDimitry Andric     }
3480b57cec5SDimitry Andric   }
3490b57cec5SDimitry Andric 
3500b57cec5SDimitry Andric   if (NeedQueuePtr) {
3510b57cec5SDimitry Andric     F.addFnAttr("amdgpu-queue-ptr");
3520b57cec5SDimitry Andric     Changed = true;
3530b57cec5SDimitry Andric   }
3540b57cec5SDimitry Andric 
3550b57cec5SDimitry Andric   // TODO: We could refine this to captured pointers that could possibly be
3560b57cec5SDimitry Andric   // accessed by flat instructions. For now this is mostly a poor way of
3570b57cec5SDimitry Andric   // estimating whether there are calls before argument lowering.
3585ffd83dbSDimitry Andric   if (!IsFunc && HaveCall) {
3595ffd83dbSDimitry Andric     F.addFnAttr("amdgpu-calls");
3605ffd83dbSDimitry Andric     Changed = true;
3615ffd83dbSDimitry Andric   }
3625ffd83dbSDimitry Andric 
3635ffd83dbSDimitry Andric   if (HaveStackObjects) {
3645ffd83dbSDimitry Andric     F.addFnAttr("amdgpu-stack-objects");
3650b57cec5SDimitry Andric     Changed = true;
3660b57cec5SDimitry Andric   }
3670b57cec5SDimitry Andric 
368*fe6060f1SDimitry Andric   // This pass cannot copy attributes from callees to callers
369*fe6060f1SDimitry Andric   // if there is an indirect call and in thus such cases,
370*fe6060f1SDimitry Andric   // hasAddressTaken() would be false for kernels and functions
371*fe6060f1SDimitry Andric   // making an indirect call (if they are themselves not indirectly called).
372*fe6060f1SDimitry Andric   // We must tag all such kernels/functions with all implicits attributes
373*fe6060f1SDimitry Andric   // for correctness.
374*fe6060f1SDimitry Andric   // e.g.
375*fe6060f1SDimitry Andric   // 1. Kernel K1 makes an indirect call to function F1.
376*fe6060f1SDimitry Andric   //    Without detecting an indirect call in K1, this pass will not
377*fe6060f1SDimitry Andric   //    add all implicit args to K1 (which is incorrect).
378*fe6060f1SDimitry Andric   // 2. Kernel K1 makes direct call to F1 which makes indirect call to function
379*fe6060f1SDimitry Andric   // F2.
380*fe6060f1SDimitry Andric   //    Without detecting an indirect call in F1 (whose hasAddressTaken() is
381*fe6060f1SDimitry Andric   //    false), the pass will not add all implicit args to F1 (which is
382*fe6060f1SDimitry Andric   //    essential for correctness).
383*fe6060f1SDimitry Andric   if (CallingConvSupportsAllImplicits && HasIndirectCall) {
384*fe6060f1SDimitry Andric     for (StringRef AttrName : ImplicitAttrNames) {
385*fe6060f1SDimitry Andric       F.addFnAttr(AttrName);
386*fe6060f1SDimitry Andric     }
387*fe6060f1SDimitry Andric     Changed = true;
388*fe6060f1SDimitry Andric   }
389*fe6060f1SDimitry Andric 
3900b57cec5SDimitry Andric   return Changed;
3910b57cec5SDimitry Andric }
3920b57cec5SDimitry Andric 
3930b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
3940b57cec5SDimitry Andric   bool Changed = false;
3950b57cec5SDimitry Andric 
3960b57cec5SDimitry Andric   for (CallGraphNode *I : SCC) {
3970b57cec5SDimitry Andric     // Build a list of CallGraphNodes from most number of uses to least
3980b57cec5SDimitry Andric     if (I->getNumReferences())
3990b57cec5SDimitry Andric       NodeList.push_back(I);
4000b57cec5SDimitry Andric     else {
4010b57cec5SDimitry Andric       processUniformWorkGroupAttribute();
4020b57cec5SDimitry Andric       NodeList.clear();
4030b57cec5SDimitry Andric     }
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric     Function *F = I->getFunction();
406*fe6060f1SDimitry Andric     // Ignore functions with graphics calling conventions, these are currently
407*fe6060f1SDimitry Andric     // not allowed to have kernel arguments.
408*fe6060f1SDimitry Andric     if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv()))
4090b57cec5SDimitry Andric       continue;
410*fe6060f1SDimitry Andric     // Add feature attributes
4110b57cec5SDimitry Andric     Changed |= addFeatureAttributes(*F);
4120b57cec5SDimitry Andric   }
4130b57cec5SDimitry Andric 
4140b57cec5SDimitry Andric   return Changed;
4150b57cec5SDimitry Andric }
4160b57cec5SDimitry Andric 
4170b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
4180b57cec5SDimitry Andric   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
4190b57cec5SDimitry Andric   if (!TPC)
4200b57cec5SDimitry Andric     report_fatal_error("TargetMachine is required");
4210b57cec5SDimitry Andric 
4220b57cec5SDimitry Andric   TM = &TPC->getTM<TargetMachine>();
4230b57cec5SDimitry Andric   return false;
4240b57cec5SDimitry Andric }
4250b57cec5SDimitry Andric 
4260b57cec5SDimitry Andric Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
4270b57cec5SDimitry Andric   return new AMDGPUAnnotateKernelFeatures();
4280b57cec5SDimitry Andric }
429