xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric /// \file This pass adds target attributes to functions which use intrinsics
10*0b57cec5SDimitry Andric /// which will impact calling convention lowering.
11*0b57cec5SDimitry Andric //
12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
13*0b57cec5SDimitry Andric 
14*0b57cec5SDimitry Andric #include "AMDGPU.h"
15*0b57cec5SDimitry Andric #include "AMDGPUSubtarget.h"
16*0b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
17*0b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
18*0b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
19*0b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
20*0b57cec5SDimitry Andric #include "llvm/ADT/Triple.h"
21*0b57cec5SDimitry Andric #include "llvm/Analysis/CallGraph.h"
22*0b57cec5SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h"
23*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
24*0b57cec5SDimitry Andric #include "llvm/IR/CallSite.h"
25*0b57cec5SDimitry Andric #include "llvm/IR/Constant.h"
26*0b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
27*0b57cec5SDimitry Andric #include "llvm/IR/Function.h"
28*0b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
29*0b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
30*0b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
31*0b57cec5SDimitry Andric #include "llvm/IR/Module.h"
32*0b57cec5SDimitry Andric #include "llvm/IR/Type.h"
33*0b57cec5SDimitry Andric #include "llvm/IR/Use.h"
34*0b57cec5SDimitry Andric #include "llvm/Pass.h"
35*0b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
36*0b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
37*0b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
38*0b57cec5SDimitry Andric 
39*0b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
40*0b57cec5SDimitry Andric 
41*0b57cec5SDimitry Andric using namespace llvm;
42*0b57cec5SDimitry Andric 
43*0b57cec5SDimitry Andric namespace {
44*0b57cec5SDimitry Andric 
45*0b57cec5SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
46*0b57cec5SDimitry Andric private:
47*0b57cec5SDimitry Andric   const TargetMachine *TM = nullptr;
48*0b57cec5SDimitry Andric   SmallVector<CallGraphNode*, 8> NodeList;
49*0b57cec5SDimitry Andric 
50*0b57cec5SDimitry Andric   bool addFeatureAttributes(Function &F);
51*0b57cec5SDimitry Andric   bool processUniformWorkGroupAttribute();
52*0b57cec5SDimitry Andric   bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
53*0b57cec5SDimitry Andric 
54*0b57cec5SDimitry Andric public:
55*0b57cec5SDimitry Andric   static char ID;
56*0b57cec5SDimitry Andric 
57*0b57cec5SDimitry Andric   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
58*0b57cec5SDimitry Andric 
59*0b57cec5SDimitry Andric   bool doInitialization(CallGraph &CG) override;
60*0b57cec5SDimitry Andric   bool runOnSCC(CallGraphSCC &SCC) override;
61*0b57cec5SDimitry Andric 
62*0b57cec5SDimitry Andric   StringRef getPassName() const override {
63*0b57cec5SDimitry Andric     return "AMDGPU Annotate Kernel Features";
64*0b57cec5SDimitry Andric   }
65*0b57cec5SDimitry Andric 
66*0b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
67*0b57cec5SDimitry Andric     AU.setPreservesAll();
68*0b57cec5SDimitry Andric     CallGraphSCCPass::getAnalysisUsage(AU);
69*0b57cec5SDimitry Andric   }
70*0b57cec5SDimitry Andric 
71*0b57cec5SDimitry Andric   static bool visitConstantExpr(const ConstantExpr *CE);
72*0b57cec5SDimitry Andric   static bool visitConstantExprsRecursively(
73*0b57cec5SDimitry Andric     const Constant *EntryC,
74*0b57cec5SDimitry Andric     SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
75*0b57cec5SDimitry Andric };
76*0b57cec5SDimitry Andric 
77*0b57cec5SDimitry Andric } // end anonymous namespace
78*0b57cec5SDimitry Andric 
79*0b57cec5SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0;
80*0b57cec5SDimitry Andric 
81*0b57cec5SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
82*0b57cec5SDimitry Andric 
83*0b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
84*0b57cec5SDimitry Andric                 "Add AMDGPU function attributes", false, false)
85*0b57cec5SDimitry Andric 
86*0b57cec5SDimitry Andric 
87*0b57cec5SDimitry Andric // The queue ptr is only needed when casting to flat, not from it.
88*0b57cec5SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) {
89*0b57cec5SDimitry Andric   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
90*0b57cec5SDimitry Andric }
91*0b57cec5SDimitry Andric 
92*0b57cec5SDimitry Andric static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
93*0b57cec5SDimitry Andric   return castRequiresQueuePtr(ASC->getSrcAddressSpace());
94*0b57cec5SDimitry Andric }
95*0b57cec5SDimitry Andric 
96*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
97*0b57cec5SDimitry Andric   if (CE->getOpcode() == Instruction::AddrSpaceCast) {
98*0b57cec5SDimitry Andric     unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
99*0b57cec5SDimitry Andric     return castRequiresQueuePtr(SrcAS);
100*0b57cec5SDimitry Andric   }
101*0b57cec5SDimitry Andric 
102*0b57cec5SDimitry Andric   return false;
103*0b57cec5SDimitry Andric }
104*0b57cec5SDimitry Andric 
105*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
106*0b57cec5SDimitry Andric   const Constant *EntryC,
107*0b57cec5SDimitry Andric   SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
108*0b57cec5SDimitry Andric 
109*0b57cec5SDimitry Andric   if (!ConstantExprVisited.insert(EntryC).second)
110*0b57cec5SDimitry Andric     return false;
111*0b57cec5SDimitry Andric 
112*0b57cec5SDimitry Andric   SmallVector<const Constant *, 16> Stack;
113*0b57cec5SDimitry Andric   Stack.push_back(EntryC);
114*0b57cec5SDimitry Andric 
115*0b57cec5SDimitry Andric   while (!Stack.empty()) {
116*0b57cec5SDimitry Andric     const Constant *C = Stack.pop_back_val();
117*0b57cec5SDimitry Andric 
118*0b57cec5SDimitry Andric     // Check this constant expression.
119*0b57cec5SDimitry Andric     if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
120*0b57cec5SDimitry Andric       if (visitConstantExpr(CE))
121*0b57cec5SDimitry Andric         return true;
122*0b57cec5SDimitry Andric     }
123*0b57cec5SDimitry Andric 
124*0b57cec5SDimitry Andric     // Visit all sub-expressions.
125*0b57cec5SDimitry Andric     for (const Use &U : C->operands()) {
126*0b57cec5SDimitry Andric       const auto *OpC = dyn_cast<Constant>(U);
127*0b57cec5SDimitry Andric       if (!OpC)
128*0b57cec5SDimitry Andric         continue;
129*0b57cec5SDimitry Andric 
130*0b57cec5SDimitry Andric       if (!ConstantExprVisited.insert(OpC).second)
131*0b57cec5SDimitry Andric         continue;
132*0b57cec5SDimitry Andric 
133*0b57cec5SDimitry Andric       Stack.push_back(OpC);
134*0b57cec5SDimitry Andric     }
135*0b57cec5SDimitry Andric   }
136*0b57cec5SDimitry Andric 
137*0b57cec5SDimitry Andric   return false;
138*0b57cec5SDimitry Andric }
139*0b57cec5SDimitry Andric 
140*0b57cec5SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always
141*0b57cec5SDimitry Andric // initialized.
142*0b57cec5SDimitry Andric //
143*0b57cec5SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup
144*0b57cec5SDimitry Andric // size is 1 for y/z.
145*0b57cec5SDimitry Andric static StringRef intrinsicToAttrName(Intrinsic::ID ID,
146*0b57cec5SDimitry Andric                                      bool &NonKernelOnly,
147*0b57cec5SDimitry Andric                                      bool &IsQueuePtr) {
148*0b57cec5SDimitry Andric   switch (ID) {
149*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_x:
150*0b57cec5SDimitry Andric     NonKernelOnly = true;
151*0b57cec5SDimitry Andric     return "amdgpu-work-item-id-x";
152*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_x:
153*0b57cec5SDimitry Andric     NonKernelOnly = true;
154*0b57cec5SDimitry Andric     return "amdgpu-work-group-id-x";
155*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_y:
156*0b57cec5SDimitry Andric   case Intrinsic::r600_read_tidig_y:
157*0b57cec5SDimitry Andric     return "amdgpu-work-item-id-y";
158*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_workitem_id_z:
159*0b57cec5SDimitry Andric   case Intrinsic::r600_read_tidig_z:
160*0b57cec5SDimitry Andric     return "amdgpu-work-item-id-z";
161*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_y:
162*0b57cec5SDimitry Andric   case Intrinsic::r600_read_tgid_y:
163*0b57cec5SDimitry Andric     return "amdgpu-work-group-id-y";
164*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_workgroup_id_z:
165*0b57cec5SDimitry Andric   case Intrinsic::r600_read_tgid_z:
166*0b57cec5SDimitry Andric     return "amdgpu-work-group-id-z";
167*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_dispatch_ptr:
168*0b57cec5SDimitry Andric     return "amdgpu-dispatch-ptr";
169*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_dispatch_id:
170*0b57cec5SDimitry Andric     return "amdgpu-dispatch-id";
171*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_kernarg_segment_ptr:
172*0b57cec5SDimitry Andric     return "amdgpu-kernarg-segment-ptr";
173*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_implicitarg_ptr:
174*0b57cec5SDimitry Andric     return "amdgpu-implicitarg-ptr";
175*0b57cec5SDimitry Andric   case Intrinsic::amdgcn_queue_ptr:
176*0b57cec5SDimitry Andric   case Intrinsic::trap:
177*0b57cec5SDimitry Andric   case Intrinsic::debugtrap:
178*0b57cec5SDimitry Andric     IsQueuePtr = true;
179*0b57cec5SDimitry Andric     return "amdgpu-queue-ptr";
180*0b57cec5SDimitry Andric   default:
181*0b57cec5SDimitry Andric     return "";
182*0b57cec5SDimitry Andric   }
183*0b57cec5SDimitry Andric }
184*0b57cec5SDimitry Andric 
185*0b57cec5SDimitry Andric static bool handleAttr(Function &Parent, const Function &Callee,
186*0b57cec5SDimitry Andric                        StringRef Name) {
187*0b57cec5SDimitry Andric   if (Callee.hasFnAttribute(Name)) {
188*0b57cec5SDimitry Andric     Parent.addFnAttr(Name);
189*0b57cec5SDimitry Andric     return true;
190*0b57cec5SDimitry Andric   }
191*0b57cec5SDimitry Andric   return false;
192*0b57cec5SDimitry Andric }
193*0b57cec5SDimitry Andric 
194*0b57cec5SDimitry Andric static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
195*0b57cec5SDimitry Andric                                    bool &NeedQueuePtr) {
196*0b57cec5SDimitry Andric   // X ids unnecessarily propagated to kernels.
197*0b57cec5SDimitry Andric   static const StringRef AttrNames[] = {
198*0b57cec5SDimitry Andric     { "amdgpu-work-item-id-x" },
199*0b57cec5SDimitry Andric     { "amdgpu-work-item-id-y" },
200*0b57cec5SDimitry Andric     { "amdgpu-work-item-id-z" },
201*0b57cec5SDimitry Andric     { "amdgpu-work-group-id-x" },
202*0b57cec5SDimitry Andric     { "amdgpu-work-group-id-y" },
203*0b57cec5SDimitry Andric     { "amdgpu-work-group-id-z" },
204*0b57cec5SDimitry Andric     { "amdgpu-dispatch-ptr" },
205*0b57cec5SDimitry Andric     { "amdgpu-dispatch-id" },
206*0b57cec5SDimitry Andric     { "amdgpu-kernarg-segment-ptr" },
207*0b57cec5SDimitry Andric     { "amdgpu-implicitarg-ptr" }
208*0b57cec5SDimitry Andric   };
209*0b57cec5SDimitry Andric 
210*0b57cec5SDimitry Andric   if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
211*0b57cec5SDimitry Andric     NeedQueuePtr = true;
212*0b57cec5SDimitry Andric 
213*0b57cec5SDimitry Andric   for (StringRef AttrName : AttrNames)
214*0b57cec5SDimitry Andric     handleAttr(Parent, Callee, AttrName);
215*0b57cec5SDimitry Andric }
216*0b57cec5SDimitry Andric 
217*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
218*0b57cec5SDimitry Andric   bool Changed = false;
219*0b57cec5SDimitry Andric 
220*0b57cec5SDimitry Andric   for (auto *Node : reverse(NodeList)) {
221*0b57cec5SDimitry Andric     Function *Caller = Node->getFunction();
222*0b57cec5SDimitry Andric 
223*0b57cec5SDimitry Andric     for (auto I : *Node) {
224*0b57cec5SDimitry Andric       Function *Callee = std::get<1>(I)->getFunction();
225*0b57cec5SDimitry Andric       if (Callee)
226*0b57cec5SDimitry Andric         Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
227*0b57cec5SDimitry Andric     }
228*0b57cec5SDimitry Andric   }
229*0b57cec5SDimitry Andric 
230*0b57cec5SDimitry Andric   return Changed;
231*0b57cec5SDimitry Andric }
232*0b57cec5SDimitry Andric 
233*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
234*0b57cec5SDimitry Andric        Function &Caller, Function &Callee) {
235*0b57cec5SDimitry Andric 
236*0b57cec5SDimitry Andric   // Check for externally defined function
237*0b57cec5SDimitry Andric   if (!Callee.hasExactDefinition()) {
238*0b57cec5SDimitry Andric     Callee.addFnAttr("uniform-work-group-size", "false");
239*0b57cec5SDimitry Andric     if (!Caller.hasFnAttribute("uniform-work-group-size"))
240*0b57cec5SDimitry Andric       Caller.addFnAttr("uniform-work-group-size", "false");
241*0b57cec5SDimitry Andric 
242*0b57cec5SDimitry Andric     return true;
243*0b57cec5SDimitry Andric   }
244*0b57cec5SDimitry Andric   // Check if the Caller has the attribute
245*0b57cec5SDimitry Andric   if (Caller.hasFnAttribute("uniform-work-group-size")) {
246*0b57cec5SDimitry Andric     // Check if the value of the attribute is true
247*0b57cec5SDimitry Andric     if (Caller.getFnAttribute("uniform-work-group-size")
248*0b57cec5SDimitry Andric         .getValueAsString().equals("true")) {
249*0b57cec5SDimitry Andric       // Propagate the attribute to the Callee, if it does not have it
250*0b57cec5SDimitry Andric       if (!Callee.hasFnAttribute("uniform-work-group-size")) {
251*0b57cec5SDimitry Andric         Callee.addFnAttr("uniform-work-group-size", "true");
252*0b57cec5SDimitry Andric         return true;
253*0b57cec5SDimitry Andric       }
254*0b57cec5SDimitry Andric     } else {
255*0b57cec5SDimitry Andric       Callee.addFnAttr("uniform-work-group-size", "false");
256*0b57cec5SDimitry Andric       return true;
257*0b57cec5SDimitry Andric     }
258*0b57cec5SDimitry Andric   } else {
259*0b57cec5SDimitry Andric     // If the attribute is absent, set it as false
260*0b57cec5SDimitry Andric     Caller.addFnAttr("uniform-work-group-size", "false");
261*0b57cec5SDimitry Andric     Callee.addFnAttr("uniform-work-group-size", "false");
262*0b57cec5SDimitry Andric     return true;
263*0b57cec5SDimitry Andric   }
264*0b57cec5SDimitry Andric   return false;
265*0b57cec5SDimitry Andric }
266*0b57cec5SDimitry Andric 
267*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
268*0b57cec5SDimitry Andric   const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
269*0b57cec5SDimitry Andric   bool HasFlat = ST.hasFlatAddressSpace();
270*0b57cec5SDimitry Andric   bool HasApertureRegs = ST.hasApertureRegs();
271*0b57cec5SDimitry Andric   SmallPtrSet<const Constant *, 8> ConstantExprVisited;
272*0b57cec5SDimitry Andric 
273*0b57cec5SDimitry Andric   bool Changed = false;
274*0b57cec5SDimitry Andric   bool NeedQueuePtr = false;
275*0b57cec5SDimitry Andric   bool HaveCall = false;
276*0b57cec5SDimitry Andric   bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
277*0b57cec5SDimitry Andric 
278*0b57cec5SDimitry Andric   for (BasicBlock &BB : F) {
279*0b57cec5SDimitry Andric     for (Instruction &I : BB) {
280*0b57cec5SDimitry Andric       CallSite CS(&I);
281*0b57cec5SDimitry Andric       if (CS) {
282*0b57cec5SDimitry Andric         Function *Callee = CS.getCalledFunction();
283*0b57cec5SDimitry Andric 
284*0b57cec5SDimitry Andric         // TODO: Do something with indirect calls.
285*0b57cec5SDimitry Andric         if (!Callee) {
286*0b57cec5SDimitry Andric           if (!CS.isInlineAsm())
287*0b57cec5SDimitry Andric             HaveCall = true;
288*0b57cec5SDimitry Andric           continue;
289*0b57cec5SDimitry Andric         }
290*0b57cec5SDimitry Andric 
291*0b57cec5SDimitry Andric         Intrinsic::ID IID = Callee->getIntrinsicID();
292*0b57cec5SDimitry Andric         if (IID == Intrinsic::not_intrinsic) {
293*0b57cec5SDimitry Andric           HaveCall = true;
294*0b57cec5SDimitry Andric           copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
295*0b57cec5SDimitry Andric           Changed = true;
296*0b57cec5SDimitry Andric         } else {
297*0b57cec5SDimitry Andric           bool NonKernelOnly = false;
298*0b57cec5SDimitry Andric           StringRef AttrName = intrinsicToAttrName(IID,
299*0b57cec5SDimitry Andric                                                    NonKernelOnly, NeedQueuePtr);
300*0b57cec5SDimitry Andric           if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
301*0b57cec5SDimitry Andric             F.addFnAttr(AttrName);
302*0b57cec5SDimitry Andric             Changed = true;
303*0b57cec5SDimitry Andric           }
304*0b57cec5SDimitry Andric         }
305*0b57cec5SDimitry Andric       }
306*0b57cec5SDimitry Andric 
307*0b57cec5SDimitry Andric       if (NeedQueuePtr || HasApertureRegs)
308*0b57cec5SDimitry Andric         continue;
309*0b57cec5SDimitry Andric 
310*0b57cec5SDimitry Andric       if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
311*0b57cec5SDimitry Andric         if (castRequiresQueuePtr(ASC)) {
312*0b57cec5SDimitry Andric           NeedQueuePtr = true;
313*0b57cec5SDimitry Andric           continue;
314*0b57cec5SDimitry Andric         }
315*0b57cec5SDimitry Andric       }
316*0b57cec5SDimitry Andric 
317*0b57cec5SDimitry Andric       for (const Use &U : I.operands()) {
318*0b57cec5SDimitry Andric         const auto *OpC = dyn_cast<Constant>(U);
319*0b57cec5SDimitry Andric         if (!OpC)
320*0b57cec5SDimitry Andric           continue;
321*0b57cec5SDimitry Andric 
322*0b57cec5SDimitry Andric         if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
323*0b57cec5SDimitry Andric           NeedQueuePtr = true;
324*0b57cec5SDimitry Andric           break;
325*0b57cec5SDimitry Andric         }
326*0b57cec5SDimitry Andric       }
327*0b57cec5SDimitry Andric     }
328*0b57cec5SDimitry Andric   }
329*0b57cec5SDimitry Andric 
330*0b57cec5SDimitry Andric   if (NeedQueuePtr) {
331*0b57cec5SDimitry Andric     F.addFnAttr("amdgpu-queue-ptr");
332*0b57cec5SDimitry Andric     Changed = true;
333*0b57cec5SDimitry Andric   }
334*0b57cec5SDimitry Andric 
335*0b57cec5SDimitry Andric   // TODO: We could refine this to captured pointers that could possibly be
336*0b57cec5SDimitry Andric   // accessed by flat instructions. For now this is mostly a poor way of
337*0b57cec5SDimitry Andric   // estimating whether there are calls before argument lowering.
338*0b57cec5SDimitry Andric   if (HasFlat && !IsFunc && HaveCall) {
339*0b57cec5SDimitry Andric     F.addFnAttr("amdgpu-flat-scratch");
340*0b57cec5SDimitry Andric     Changed = true;
341*0b57cec5SDimitry Andric   }
342*0b57cec5SDimitry Andric 
343*0b57cec5SDimitry Andric   return Changed;
344*0b57cec5SDimitry Andric }
345*0b57cec5SDimitry Andric 
346*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
347*0b57cec5SDimitry Andric   bool Changed = false;
348*0b57cec5SDimitry Andric 
349*0b57cec5SDimitry Andric   for (CallGraphNode *I : SCC) {
350*0b57cec5SDimitry Andric     // Build a list of CallGraphNodes from most number of uses to least
351*0b57cec5SDimitry Andric     if (I->getNumReferences())
352*0b57cec5SDimitry Andric       NodeList.push_back(I);
353*0b57cec5SDimitry Andric     else {
354*0b57cec5SDimitry Andric       processUniformWorkGroupAttribute();
355*0b57cec5SDimitry Andric       NodeList.clear();
356*0b57cec5SDimitry Andric     }
357*0b57cec5SDimitry Andric 
358*0b57cec5SDimitry Andric     Function *F = I->getFunction();
359*0b57cec5SDimitry Andric     // Add feature attributes
360*0b57cec5SDimitry Andric     if (!F || F->isDeclaration())
361*0b57cec5SDimitry Andric       continue;
362*0b57cec5SDimitry Andric     Changed |= addFeatureAttributes(*F);
363*0b57cec5SDimitry Andric   }
364*0b57cec5SDimitry Andric 
365*0b57cec5SDimitry Andric   return Changed;
366*0b57cec5SDimitry Andric }
367*0b57cec5SDimitry Andric 
368*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
369*0b57cec5SDimitry Andric   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
370*0b57cec5SDimitry Andric   if (!TPC)
371*0b57cec5SDimitry Andric     report_fatal_error("TargetMachine is required");
372*0b57cec5SDimitry Andric 
373*0b57cec5SDimitry Andric   TM = &TPC->getTM<TargetMachine>();
374*0b57cec5SDimitry Andric   return false;
375*0b57cec5SDimitry Andric }
376*0b57cec5SDimitry Andric 
377*0b57cec5SDimitry Andric Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
378*0b57cec5SDimitry Andric   return new AMDGPUAnnotateKernelFeatures();
379*0b57cec5SDimitry Andric }
380