xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
10b57cec5SDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
9*349cc55cSDimitry Andric /// \file This pass propagates the uniform-work-group-size attribute from
10*349cc55cSDimitry Andric /// kernels to leaf functions when possible. It also adds additional attributes
11*349cc55cSDimitry Andric /// to hint ABI lowering optimizations later.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "AMDGPU.h"
16e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
170b57cec5SDimitry Andric #include "llvm/Analysis/CallGraph.h"
180b57cec5SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h"
190b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
20e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
21e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsR600.h"
220b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric using namespace llvm;
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric namespace {
290b57cec5SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
300b57cec5SDimitry Andric private:
310b57cec5SDimitry Andric   const TargetMachine *TM = nullptr;
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric   bool addFeatureAttributes(Function &F);
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric public:
360b57cec5SDimitry Andric   static char ID;
370b57cec5SDimitry Andric 
AMDGPUAnnotateKernelFeatures()380b57cec5SDimitry Andric   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric   bool doInitialization(CallGraph &CG) override;
410b57cec5SDimitry Andric   bool runOnSCC(CallGraphSCC &SCC) override;
420b57cec5SDimitry Andric 
getPassName() const430b57cec5SDimitry Andric   StringRef getPassName() const override {
440b57cec5SDimitry Andric     return "AMDGPU Annotate Kernel Features";
450b57cec5SDimitry Andric   }
460b57cec5SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const470b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
480b57cec5SDimitry Andric     AU.setPreservesAll();
490b57cec5SDimitry Andric     CallGraphSCCPass::getAnalysisUsage(AU);
500b57cec5SDimitry Andric   }
510b57cec5SDimitry Andric };
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric } // end anonymous namespace
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0;
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
600b57cec5SDimitry Andric                 "Add AMDGPU function attributes", false, false)
610b57cec5SDimitry Andric 
addFeatureAttributes(Function & F)620b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
635ffd83dbSDimitry Andric   bool HaveStackObjects = false;
640b57cec5SDimitry Andric   bool Changed = false;
650b57cec5SDimitry Andric   bool HaveCall = false;
660b57cec5SDimitry Andric   bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric   for (BasicBlock &BB : F) {
690b57cec5SDimitry Andric     for (Instruction &I : BB) {
705ffd83dbSDimitry Andric       if (isa<AllocaInst>(I)) {
715ffd83dbSDimitry Andric         HaveStackObjects = true;
725ffd83dbSDimitry Andric         continue;
735ffd83dbSDimitry Andric       }
745ffd83dbSDimitry Andric 
755ffd83dbSDimitry Andric       if (auto *CB = dyn_cast<CallBase>(&I)) {
765ffd83dbSDimitry Andric         const Function *Callee =
775ffd83dbSDimitry Andric             dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
780b57cec5SDimitry Andric 
79*349cc55cSDimitry Andric         // Note the occurrence of indirect call.
800b57cec5SDimitry Andric         if (!Callee) {
81*349cc55cSDimitry Andric           if (!CB->isInlineAsm())
820b57cec5SDimitry Andric             HaveCall = true;
83*349cc55cSDimitry Andric 
840b57cec5SDimitry Andric           continue;
850b57cec5SDimitry Andric         }
860b57cec5SDimitry Andric 
870b57cec5SDimitry Andric         Intrinsic::ID IID = Callee->getIntrinsicID();
880b57cec5SDimitry Andric         if (IID == Intrinsic::not_intrinsic) {
890b57cec5SDimitry Andric           HaveCall = true;
900b57cec5SDimitry Andric           Changed = true;
910b57cec5SDimitry Andric         }
920b57cec5SDimitry Andric       }
930b57cec5SDimitry Andric     }
945ffd83dbSDimitry Andric   }
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric   // TODO: We could refine this to captured pointers that could possibly be
970b57cec5SDimitry Andric   // accessed by flat instructions. For now this is mostly a poor way of
980b57cec5SDimitry Andric   // estimating whether there are calls before argument lowering.
995ffd83dbSDimitry Andric   if (!IsFunc && HaveCall) {
1005ffd83dbSDimitry Andric     F.addFnAttr("amdgpu-calls");
1015ffd83dbSDimitry Andric     Changed = true;
1025ffd83dbSDimitry Andric   }
1035ffd83dbSDimitry Andric 
1045ffd83dbSDimitry Andric   if (HaveStackObjects) {
1055ffd83dbSDimitry Andric     F.addFnAttr("amdgpu-stack-objects");
1060b57cec5SDimitry Andric     Changed = true;
1070b57cec5SDimitry Andric   }
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric   return Changed;
1100b57cec5SDimitry Andric }
1110b57cec5SDimitry Andric 
runOnSCC(CallGraphSCC & SCC)1120b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
1130b57cec5SDimitry Andric   bool Changed = false;
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric   for (CallGraphNode *I : SCC) {
1160b57cec5SDimitry Andric     Function *F = I->getFunction();
117fe6060f1SDimitry Andric     // Ignore functions with graphics calling conventions, these are currently
118fe6060f1SDimitry Andric     // not allowed to have kernel arguments.
119fe6060f1SDimitry Andric     if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv()))
1200b57cec5SDimitry Andric       continue;
121fe6060f1SDimitry Andric     // Add feature attributes
1220b57cec5SDimitry Andric     Changed |= addFeatureAttributes(*F);
1230b57cec5SDimitry Andric   }
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric   return Changed;
1260b57cec5SDimitry Andric }
1270b57cec5SDimitry Andric 
doInitialization(CallGraph & CG)1280b57cec5SDimitry Andric bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
1290b57cec5SDimitry Andric   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
1300b57cec5SDimitry Andric   if (!TPC)
1310b57cec5SDimitry Andric     report_fatal_error("TargetMachine is required");
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric   TM = &TPC->getTM<TargetMachine>();
1340b57cec5SDimitry Andric   return false;
1350b57cec5SDimitry Andric }
1360b57cec5SDimitry Andric 
createAMDGPUAnnotateKernelFeaturesPass()1370b57cec5SDimitry Andric Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
1380b57cec5SDimitry Andric   return new AMDGPUAnnotateKernelFeatures();
1390b57cec5SDimitry Andric }
140