1*0b57cec5SDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric /// \file This pass adds target attributes to functions which use intrinsics 10*0b57cec5SDimitry Andric /// which will impact calling convention lowering. 11*0b57cec5SDimitry Andric // 12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 13*0b57cec5SDimitry Andric 14*0b57cec5SDimitry Andric #include "AMDGPU.h" 15*0b57cec5SDimitry Andric #include "AMDGPUSubtarget.h" 16*0b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 17*0b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 18*0b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 19*0b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 20*0b57cec5SDimitry Andric #include "llvm/ADT/Triple.h" 21*0b57cec5SDimitry Andric #include "llvm/Analysis/CallGraph.h" 22*0b57cec5SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h" 23*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 24*0b57cec5SDimitry Andric #include "llvm/IR/CallSite.h" 25*0b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 26*0b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 27*0b57cec5SDimitry Andric #include "llvm/IR/Function.h" 28*0b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 29*0b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 30*0b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 31*0b57cec5SDimitry Andric #include "llvm/IR/Module.h" 32*0b57cec5SDimitry Andric #include "llvm/IR/Type.h" 33*0b57cec5SDimitry Andric #include "llvm/IR/Use.h" 34*0b57cec5SDimitry Andric #include "llvm/Pass.h" 35*0b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 36*0b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 37*0b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 38*0b57cec5SDimitry Andric 39*0b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features" 40*0b57cec5SDimitry Andric 41*0b57cec5SDimitry Andric using namespace llvm; 42*0b57cec5SDimitry Andric 43*0b57cec5SDimitry Andric namespace { 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass { 46*0b57cec5SDimitry Andric private: 47*0b57cec5SDimitry Andric const TargetMachine *TM = nullptr; 48*0b57cec5SDimitry Andric SmallVector<CallGraphNode*, 8> NodeList; 49*0b57cec5SDimitry Andric 50*0b57cec5SDimitry Andric bool addFeatureAttributes(Function &F); 51*0b57cec5SDimitry Andric bool processUniformWorkGroupAttribute(); 52*0b57cec5SDimitry Andric bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee); 53*0b57cec5SDimitry Andric 54*0b57cec5SDimitry Andric public: 55*0b57cec5SDimitry Andric static char ID; 56*0b57cec5SDimitry Andric 57*0b57cec5SDimitry Andric AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {} 58*0b57cec5SDimitry Andric 59*0b57cec5SDimitry Andric bool doInitialization(CallGraph &CG) override; 60*0b57cec5SDimitry Andric bool runOnSCC(CallGraphSCC &SCC) override; 61*0b57cec5SDimitry Andric 62*0b57cec5SDimitry Andric StringRef getPassName() const override { 63*0b57cec5SDimitry Andric return "AMDGPU Annotate Kernel Features"; 64*0b57cec5SDimitry Andric } 65*0b57cec5SDimitry Andric 66*0b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 67*0b57cec5SDimitry Andric AU.setPreservesAll(); 68*0b57cec5SDimitry Andric CallGraphSCCPass::getAnalysisUsage(AU); 69*0b57cec5SDimitry Andric } 70*0b57cec5SDimitry Andric 71*0b57cec5SDimitry Andric static bool visitConstantExpr(const ConstantExpr *CE); 72*0b57cec5SDimitry Andric static bool visitConstantExprsRecursively( 73*0b57cec5SDimitry Andric const Constant *EntryC, 74*0b57cec5SDimitry Andric SmallPtrSet<const Constant *, 8> &ConstantExprVisited); 75*0b57cec5SDimitry Andric }; 76*0b57cec5SDimitry Andric 77*0b57cec5SDimitry Andric } // end anonymous namespace 78*0b57cec5SDimitry Andric 79*0b57cec5SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0; 80*0b57cec5SDimitry Andric 81*0b57cec5SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID; 82*0b57cec5SDimitry Andric 83*0b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, 84*0b57cec5SDimitry Andric "Add AMDGPU function attributes", false, false) 85*0b57cec5SDimitry Andric 86*0b57cec5SDimitry Andric 87*0b57cec5SDimitry Andric // The queue ptr is only needed when casting to flat, not from it. 88*0b57cec5SDimitry Andric static bool castRequiresQueuePtr(unsigned SrcAS) { 89*0b57cec5SDimitry Andric return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; 90*0b57cec5SDimitry Andric } 91*0b57cec5SDimitry Andric 92*0b57cec5SDimitry Andric static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { 93*0b57cec5SDimitry Andric return castRequiresQueuePtr(ASC->getSrcAddressSpace()); 94*0b57cec5SDimitry Andric } 95*0b57cec5SDimitry Andric 96*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { 97*0b57cec5SDimitry Andric if (CE->getOpcode() == Instruction::AddrSpaceCast) { 98*0b57cec5SDimitry Andric unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); 99*0b57cec5SDimitry Andric return castRequiresQueuePtr(SrcAS); 100*0b57cec5SDimitry Andric } 101*0b57cec5SDimitry Andric 102*0b57cec5SDimitry Andric return false; 103*0b57cec5SDimitry Andric } 104*0b57cec5SDimitry Andric 105*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( 106*0b57cec5SDimitry Andric const Constant *EntryC, 107*0b57cec5SDimitry Andric SmallPtrSet<const Constant *, 8> &ConstantExprVisited) { 108*0b57cec5SDimitry Andric 109*0b57cec5SDimitry Andric if (!ConstantExprVisited.insert(EntryC).second) 110*0b57cec5SDimitry Andric return false; 111*0b57cec5SDimitry Andric 112*0b57cec5SDimitry Andric SmallVector<const Constant *, 16> Stack; 113*0b57cec5SDimitry Andric Stack.push_back(EntryC); 114*0b57cec5SDimitry Andric 115*0b57cec5SDimitry Andric while (!Stack.empty()) { 116*0b57cec5SDimitry Andric const Constant *C = Stack.pop_back_val(); 117*0b57cec5SDimitry Andric 118*0b57cec5SDimitry Andric // Check this constant expression. 119*0b57cec5SDimitry Andric if (const auto *CE = dyn_cast<ConstantExpr>(C)) { 120*0b57cec5SDimitry Andric if (visitConstantExpr(CE)) 121*0b57cec5SDimitry Andric return true; 122*0b57cec5SDimitry Andric } 123*0b57cec5SDimitry Andric 124*0b57cec5SDimitry Andric // Visit all sub-expressions. 125*0b57cec5SDimitry Andric for (const Use &U : C->operands()) { 126*0b57cec5SDimitry Andric const auto *OpC = dyn_cast<Constant>(U); 127*0b57cec5SDimitry Andric if (!OpC) 128*0b57cec5SDimitry Andric continue; 129*0b57cec5SDimitry Andric 130*0b57cec5SDimitry Andric if (!ConstantExprVisited.insert(OpC).second) 131*0b57cec5SDimitry Andric continue; 132*0b57cec5SDimitry Andric 133*0b57cec5SDimitry Andric Stack.push_back(OpC); 134*0b57cec5SDimitry Andric } 135*0b57cec5SDimitry Andric } 136*0b57cec5SDimitry Andric 137*0b57cec5SDimitry Andric return false; 138*0b57cec5SDimitry Andric } 139*0b57cec5SDimitry Andric 140*0b57cec5SDimitry Andric // We do not need to note the x workitem or workgroup id because they are always 141*0b57cec5SDimitry Andric // initialized. 142*0b57cec5SDimitry Andric // 143*0b57cec5SDimitry Andric // TODO: We should not add the attributes if the known compile time workgroup 144*0b57cec5SDimitry Andric // size is 1 for y/z. 145*0b57cec5SDimitry Andric static StringRef intrinsicToAttrName(Intrinsic::ID ID, 146*0b57cec5SDimitry Andric bool &NonKernelOnly, 147*0b57cec5SDimitry Andric bool &IsQueuePtr) { 148*0b57cec5SDimitry Andric switch (ID) { 149*0b57cec5SDimitry Andric case Intrinsic::amdgcn_workitem_id_x: 150*0b57cec5SDimitry Andric NonKernelOnly = true; 151*0b57cec5SDimitry Andric return "amdgpu-work-item-id-x"; 152*0b57cec5SDimitry Andric case Intrinsic::amdgcn_workgroup_id_x: 153*0b57cec5SDimitry Andric NonKernelOnly = true; 154*0b57cec5SDimitry Andric return "amdgpu-work-group-id-x"; 155*0b57cec5SDimitry Andric case Intrinsic::amdgcn_workitem_id_y: 156*0b57cec5SDimitry Andric case Intrinsic::r600_read_tidig_y: 157*0b57cec5SDimitry Andric return "amdgpu-work-item-id-y"; 158*0b57cec5SDimitry Andric case Intrinsic::amdgcn_workitem_id_z: 159*0b57cec5SDimitry Andric case Intrinsic::r600_read_tidig_z: 160*0b57cec5SDimitry Andric return "amdgpu-work-item-id-z"; 161*0b57cec5SDimitry Andric case Intrinsic::amdgcn_workgroup_id_y: 162*0b57cec5SDimitry Andric case Intrinsic::r600_read_tgid_y: 163*0b57cec5SDimitry Andric return "amdgpu-work-group-id-y"; 164*0b57cec5SDimitry Andric case Intrinsic::amdgcn_workgroup_id_z: 165*0b57cec5SDimitry Andric case Intrinsic::r600_read_tgid_z: 166*0b57cec5SDimitry Andric return "amdgpu-work-group-id-z"; 167*0b57cec5SDimitry Andric case Intrinsic::amdgcn_dispatch_ptr: 168*0b57cec5SDimitry Andric return "amdgpu-dispatch-ptr"; 169*0b57cec5SDimitry Andric case Intrinsic::amdgcn_dispatch_id: 170*0b57cec5SDimitry Andric return "amdgpu-dispatch-id"; 171*0b57cec5SDimitry Andric case Intrinsic::amdgcn_kernarg_segment_ptr: 172*0b57cec5SDimitry Andric return "amdgpu-kernarg-segment-ptr"; 173*0b57cec5SDimitry Andric case Intrinsic::amdgcn_implicitarg_ptr: 174*0b57cec5SDimitry Andric return "amdgpu-implicitarg-ptr"; 175*0b57cec5SDimitry Andric case Intrinsic::amdgcn_queue_ptr: 176*0b57cec5SDimitry Andric case Intrinsic::trap: 177*0b57cec5SDimitry Andric case Intrinsic::debugtrap: 178*0b57cec5SDimitry Andric IsQueuePtr = true; 179*0b57cec5SDimitry Andric return "amdgpu-queue-ptr"; 180*0b57cec5SDimitry Andric default: 181*0b57cec5SDimitry Andric return ""; 182*0b57cec5SDimitry Andric } 183*0b57cec5SDimitry Andric } 184*0b57cec5SDimitry Andric 185*0b57cec5SDimitry Andric static bool handleAttr(Function &Parent, const Function &Callee, 186*0b57cec5SDimitry Andric StringRef Name) { 187*0b57cec5SDimitry Andric if (Callee.hasFnAttribute(Name)) { 188*0b57cec5SDimitry Andric Parent.addFnAttr(Name); 189*0b57cec5SDimitry Andric return true; 190*0b57cec5SDimitry Andric } 191*0b57cec5SDimitry Andric return false; 192*0b57cec5SDimitry Andric } 193*0b57cec5SDimitry Andric 194*0b57cec5SDimitry Andric static void copyFeaturesToFunction(Function &Parent, const Function &Callee, 195*0b57cec5SDimitry Andric bool &NeedQueuePtr) { 196*0b57cec5SDimitry Andric // X ids unnecessarily propagated to kernels. 197*0b57cec5SDimitry Andric static const StringRef AttrNames[] = { 198*0b57cec5SDimitry Andric { "amdgpu-work-item-id-x" }, 199*0b57cec5SDimitry Andric { "amdgpu-work-item-id-y" }, 200*0b57cec5SDimitry Andric { "amdgpu-work-item-id-z" }, 201*0b57cec5SDimitry Andric { "amdgpu-work-group-id-x" }, 202*0b57cec5SDimitry Andric { "amdgpu-work-group-id-y" }, 203*0b57cec5SDimitry Andric { "amdgpu-work-group-id-z" }, 204*0b57cec5SDimitry Andric { "amdgpu-dispatch-ptr" }, 205*0b57cec5SDimitry Andric { "amdgpu-dispatch-id" }, 206*0b57cec5SDimitry Andric { "amdgpu-kernarg-segment-ptr" }, 207*0b57cec5SDimitry Andric { "amdgpu-implicitarg-ptr" } 208*0b57cec5SDimitry Andric }; 209*0b57cec5SDimitry Andric 210*0b57cec5SDimitry Andric if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) 211*0b57cec5SDimitry Andric NeedQueuePtr = true; 212*0b57cec5SDimitry Andric 213*0b57cec5SDimitry Andric for (StringRef AttrName : AttrNames) 214*0b57cec5SDimitry Andric handleAttr(Parent, Callee, AttrName); 215*0b57cec5SDimitry Andric } 216*0b57cec5SDimitry Andric 217*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() { 218*0b57cec5SDimitry Andric bool Changed = false; 219*0b57cec5SDimitry Andric 220*0b57cec5SDimitry Andric for (auto *Node : reverse(NodeList)) { 221*0b57cec5SDimitry Andric Function *Caller = Node->getFunction(); 222*0b57cec5SDimitry Andric 223*0b57cec5SDimitry Andric for (auto I : *Node) { 224*0b57cec5SDimitry Andric Function *Callee = std::get<1>(I)->getFunction(); 225*0b57cec5SDimitry Andric if (Callee) 226*0b57cec5SDimitry Andric Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee); 227*0b57cec5SDimitry Andric } 228*0b57cec5SDimitry Andric } 229*0b57cec5SDimitry Andric 230*0b57cec5SDimitry Andric return Changed; 231*0b57cec5SDimitry Andric } 232*0b57cec5SDimitry Andric 233*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute( 234*0b57cec5SDimitry Andric Function &Caller, Function &Callee) { 235*0b57cec5SDimitry Andric 236*0b57cec5SDimitry Andric // Check for externally defined function 237*0b57cec5SDimitry Andric if (!Callee.hasExactDefinition()) { 238*0b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "false"); 239*0b57cec5SDimitry Andric if (!Caller.hasFnAttribute("uniform-work-group-size")) 240*0b57cec5SDimitry Andric Caller.addFnAttr("uniform-work-group-size", "false"); 241*0b57cec5SDimitry Andric 242*0b57cec5SDimitry Andric return true; 243*0b57cec5SDimitry Andric } 244*0b57cec5SDimitry Andric // Check if the Caller has the attribute 245*0b57cec5SDimitry Andric if (Caller.hasFnAttribute("uniform-work-group-size")) { 246*0b57cec5SDimitry Andric // Check if the value of the attribute is true 247*0b57cec5SDimitry Andric if (Caller.getFnAttribute("uniform-work-group-size") 248*0b57cec5SDimitry Andric .getValueAsString().equals("true")) { 249*0b57cec5SDimitry Andric // Propagate the attribute to the Callee, if it does not have it 250*0b57cec5SDimitry Andric if (!Callee.hasFnAttribute("uniform-work-group-size")) { 251*0b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "true"); 252*0b57cec5SDimitry Andric return true; 253*0b57cec5SDimitry Andric } 254*0b57cec5SDimitry Andric } else { 255*0b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "false"); 256*0b57cec5SDimitry Andric return true; 257*0b57cec5SDimitry Andric } 258*0b57cec5SDimitry Andric } else { 259*0b57cec5SDimitry Andric // If the attribute is absent, set it as false 260*0b57cec5SDimitry Andric Caller.addFnAttr("uniform-work-group-size", "false"); 261*0b57cec5SDimitry Andric Callee.addFnAttr("uniform-work-group-size", "false"); 262*0b57cec5SDimitry Andric return true; 263*0b57cec5SDimitry Andric } 264*0b57cec5SDimitry Andric return false; 265*0b57cec5SDimitry Andric } 266*0b57cec5SDimitry Andric 267*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { 268*0b57cec5SDimitry Andric const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F); 269*0b57cec5SDimitry Andric bool HasFlat = ST.hasFlatAddressSpace(); 270*0b57cec5SDimitry Andric bool HasApertureRegs = ST.hasApertureRegs(); 271*0b57cec5SDimitry Andric SmallPtrSet<const Constant *, 8> ConstantExprVisited; 272*0b57cec5SDimitry Andric 273*0b57cec5SDimitry Andric bool Changed = false; 274*0b57cec5SDimitry Andric bool NeedQueuePtr = false; 275*0b57cec5SDimitry Andric bool HaveCall = false; 276*0b57cec5SDimitry Andric bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv()); 277*0b57cec5SDimitry Andric 278*0b57cec5SDimitry Andric for (BasicBlock &BB : F) { 279*0b57cec5SDimitry Andric for (Instruction &I : BB) { 280*0b57cec5SDimitry Andric CallSite CS(&I); 281*0b57cec5SDimitry Andric if (CS) { 282*0b57cec5SDimitry Andric Function *Callee = CS.getCalledFunction(); 283*0b57cec5SDimitry Andric 284*0b57cec5SDimitry Andric // TODO: Do something with indirect calls. 285*0b57cec5SDimitry Andric if (!Callee) { 286*0b57cec5SDimitry Andric if (!CS.isInlineAsm()) 287*0b57cec5SDimitry Andric HaveCall = true; 288*0b57cec5SDimitry Andric continue; 289*0b57cec5SDimitry Andric } 290*0b57cec5SDimitry Andric 291*0b57cec5SDimitry Andric Intrinsic::ID IID = Callee->getIntrinsicID(); 292*0b57cec5SDimitry Andric if (IID == Intrinsic::not_intrinsic) { 293*0b57cec5SDimitry Andric HaveCall = true; 294*0b57cec5SDimitry Andric copyFeaturesToFunction(F, *Callee, NeedQueuePtr); 295*0b57cec5SDimitry Andric Changed = true; 296*0b57cec5SDimitry Andric } else { 297*0b57cec5SDimitry Andric bool NonKernelOnly = false; 298*0b57cec5SDimitry Andric StringRef AttrName = intrinsicToAttrName(IID, 299*0b57cec5SDimitry Andric NonKernelOnly, NeedQueuePtr); 300*0b57cec5SDimitry Andric if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) { 301*0b57cec5SDimitry Andric F.addFnAttr(AttrName); 302*0b57cec5SDimitry Andric Changed = true; 303*0b57cec5SDimitry Andric } 304*0b57cec5SDimitry Andric } 305*0b57cec5SDimitry Andric } 306*0b57cec5SDimitry Andric 307*0b57cec5SDimitry Andric if (NeedQueuePtr || HasApertureRegs) 308*0b57cec5SDimitry Andric continue; 309*0b57cec5SDimitry Andric 310*0b57cec5SDimitry Andric if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { 311*0b57cec5SDimitry Andric if (castRequiresQueuePtr(ASC)) { 312*0b57cec5SDimitry Andric NeedQueuePtr = true; 313*0b57cec5SDimitry Andric continue; 314*0b57cec5SDimitry Andric } 315*0b57cec5SDimitry Andric } 316*0b57cec5SDimitry Andric 317*0b57cec5SDimitry Andric for (const Use &U : I.operands()) { 318*0b57cec5SDimitry Andric const auto *OpC = dyn_cast<Constant>(U); 319*0b57cec5SDimitry Andric if (!OpC) 320*0b57cec5SDimitry Andric continue; 321*0b57cec5SDimitry Andric 322*0b57cec5SDimitry Andric if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) { 323*0b57cec5SDimitry Andric NeedQueuePtr = true; 324*0b57cec5SDimitry Andric break; 325*0b57cec5SDimitry Andric } 326*0b57cec5SDimitry Andric } 327*0b57cec5SDimitry Andric } 328*0b57cec5SDimitry Andric } 329*0b57cec5SDimitry Andric 330*0b57cec5SDimitry Andric if (NeedQueuePtr) { 331*0b57cec5SDimitry Andric F.addFnAttr("amdgpu-queue-ptr"); 332*0b57cec5SDimitry Andric Changed = true; 333*0b57cec5SDimitry Andric } 334*0b57cec5SDimitry Andric 335*0b57cec5SDimitry Andric // TODO: We could refine this to captured pointers that could possibly be 336*0b57cec5SDimitry Andric // accessed by flat instructions. For now this is mostly a poor way of 337*0b57cec5SDimitry Andric // estimating whether there are calls before argument lowering. 338*0b57cec5SDimitry Andric if (HasFlat && !IsFunc && HaveCall) { 339*0b57cec5SDimitry Andric F.addFnAttr("amdgpu-flat-scratch"); 340*0b57cec5SDimitry Andric Changed = true; 341*0b57cec5SDimitry Andric } 342*0b57cec5SDimitry Andric 343*0b57cec5SDimitry Andric return Changed; 344*0b57cec5SDimitry Andric } 345*0b57cec5SDimitry Andric 346*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) { 347*0b57cec5SDimitry Andric bool Changed = false; 348*0b57cec5SDimitry Andric 349*0b57cec5SDimitry Andric for (CallGraphNode *I : SCC) { 350*0b57cec5SDimitry Andric // Build a list of CallGraphNodes from most number of uses to least 351*0b57cec5SDimitry Andric if (I->getNumReferences()) 352*0b57cec5SDimitry Andric NodeList.push_back(I); 353*0b57cec5SDimitry Andric else { 354*0b57cec5SDimitry Andric processUniformWorkGroupAttribute(); 355*0b57cec5SDimitry Andric NodeList.clear(); 356*0b57cec5SDimitry Andric } 357*0b57cec5SDimitry Andric 358*0b57cec5SDimitry Andric Function *F = I->getFunction(); 359*0b57cec5SDimitry Andric // Add feature attributes 360*0b57cec5SDimitry Andric if (!F || F->isDeclaration()) 361*0b57cec5SDimitry Andric continue; 362*0b57cec5SDimitry Andric Changed |= addFeatureAttributes(*F); 363*0b57cec5SDimitry Andric } 364*0b57cec5SDimitry Andric 365*0b57cec5SDimitry Andric return Changed; 366*0b57cec5SDimitry Andric } 367*0b57cec5SDimitry Andric 368*0b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) { 369*0b57cec5SDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 370*0b57cec5SDimitry Andric if (!TPC) 371*0b57cec5SDimitry Andric report_fatal_error("TargetMachine is required"); 372*0b57cec5SDimitry Andric 373*0b57cec5SDimitry Andric TM = &TPC->getTM<TargetMachine>(); 374*0b57cec5SDimitry Andric return false; 375*0b57cec5SDimitry Andric } 376*0b57cec5SDimitry Andric 377*0b57cec5SDimitry Andric Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { 378*0b57cec5SDimitry Andric return new AMDGPUAnnotateKernelFeatures(); 379*0b57cec5SDimitry Andric } 380