xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "llvm/CodeGen/TargetPassConfig.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
18 #include "llvm/Target/TargetMachine.h"
19 #include "llvm/Transforms/IPO/Attributor.h"
20 
21 #define DEBUG_TYPE "amdgpu-attributor"
22 
23 using namespace llvm;
24 
25 static constexpr StringLiteral ImplicitAttrNames[] = {
26     // X ids unnecessarily propagated to kernels.
27     "amdgpu-work-item-id-x",  "amdgpu-work-item-id-y",
28     "amdgpu-work-item-id-z",  "amdgpu-work-group-id-x",
29     "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
30     "amdgpu-dispatch-ptr",    "amdgpu-dispatch-id",
31     "amdgpu-queue-ptr",       "amdgpu-implicitarg-ptr"};
32 
33 // We do not need to note the x workitem or workgroup id because they are always
34 // initialized.
35 //
36 // TODO: We should not add the attributes if the known compile time workgroup
37 // size is 1 for y/z.
38 static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
39                                      bool &IsQueuePtr) {
40   switch (ID) {
41   case Intrinsic::amdgcn_workitem_id_x:
42     NonKernelOnly = true;
43     return "amdgpu-work-item-id-x";
44   case Intrinsic::amdgcn_workgroup_id_x:
45     NonKernelOnly = true;
46     return "amdgpu-work-group-id-x";
47   case Intrinsic::amdgcn_workitem_id_y:
48   case Intrinsic::r600_read_tidig_y:
49     return "amdgpu-work-item-id-y";
50   case Intrinsic::amdgcn_workitem_id_z:
51   case Intrinsic::r600_read_tidig_z:
52     return "amdgpu-work-item-id-z";
53   case Intrinsic::amdgcn_workgroup_id_y:
54   case Intrinsic::r600_read_tgid_y:
55     return "amdgpu-work-group-id-y";
56   case Intrinsic::amdgcn_workgroup_id_z:
57   case Intrinsic::r600_read_tgid_z:
58     return "amdgpu-work-group-id-z";
59   case Intrinsic::amdgcn_dispatch_ptr:
60     return "amdgpu-dispatch-ptr";
61   case Intrinsic::amdgcn_dispatch_id:
62     return "amdgpu-dispatch-id";
63   case Intrinsic::amdgcn_kernarg_segment_ptr:
64     return "amdgpu-kernarg-segment-ptr";
65   case Intrinsic::amdgcn_implicitarg_ptr:
66     return "amdgpu-implicitarg-ptr";
67   case Intrinsic::amdgcn_queue_ptr:
68   case Intrinsic::amdgcn_is_shared:
69   case Intrinsic::amdgcn_is_private:
70     // TODO: Does not require queue ptr on gfx9+
71   case Intrinsic::trap:
72   case Intrinsic::debugtrap:
73     IsQueuePtr = true;
74     return "amdgpu-queue-ptr";
75   default:
76     return "";
77   }
78 }
79 
80 static bool castRequiresQueuePtr(unsigned SrcAS) {
81   return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
82 }
83 
84 static bool isDSAddress(const Constant *C) {
85   const GlobalValue *GV = dyn_cast<GlobalValue>(C);
86   if (!GV)
87     return false;
88   unsigned AS = GV->getAddressSpace();
89   return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
90 }
91 
92 class AMDGPUInformationCache : public InformationCache {
93 public:
94   AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
95                          BumpPtrAllocator &Allocator,
96                          SetVector<Function *> *CGSCC, TargetMachine &TM)
97       : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
98   TargetMachine &TM;
99 
100   enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
101 
102   /// Check if the subtarget has aperture regs.
103   bool hasApertureRegs(Function &F) {
104     const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
105     return ST.hasApertureRegs();
106   }
107 
108 private:
109   /// Check if the ConstantExpr \p CE requires queue ptr attribute.
110   static bool visitConstExpr(const ConstantExpr *CE) {
111     if (CE->getOpcode() == Instruction::AddrSpaceCast) {
112       unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
113       return castRequiresQueuePtr(SrcAS);
114     }
115     return false;
116   }
117 
118   /// Get the constant access bitmap for \p C.
119   uint8_t getConstantAccess(const Constant *C) {
120     auto It = ConstantStatus.find(C);
121     if (It != ConstantStatus.end())
122       return It->second;
123 
124     uint8_t Result = 0;
125     if (isDSAddress(C))
126       Result = DS_GLOBAL;
127 
128     if (const auto *CE = dyn_cast<ConstantExpr>(C))
129       if (visitConstExpr(CE))
130         Result |= ADDR_SPACE_CAST;
131 
132     for (const Use &U : C->operands()) {
133       const auto *OpC = dyn_cast<Constant>(U);
134       if (!OpC)
135         continue;
136 
137       Result |= getConstantAccess(OpC);
138     }
139     return Result;
140   }
141 
142 public:
143   /// Returns true if \p Fn needs a queue ptr attribute because of \p C.
144   bool needsQueuePtr(const Constant *C, Function &Fn) {
145     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
146     bool HasAperture = hasApertureRegs(Fn);
147 
148     // No need to explore the constants.
149     if (!IsNonEntryFunc && HasAperture)
150       return false;
151 
152     uint8_t Access = getConstantAccess(C);
153 
154     // We need to trap on DS globals in non-entry functions.
155     if (IsNonEntryFunc && (Access & DS_GLOBAL))
156       return true;
157 
158     return !HasAperture && (Access & ADDR_SPACE_CAST);
159   }
160 
161 private:
162   /// Used to determine if the Constant needs a queue ptr attribute.
163   DenseMap<const Constant *, uint8_t> ConstantStatus;
164 };
165 
166 struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
167   using Base = StateWrapper<BooleanState, AbstractAttribute>;
168   AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
169 
170   /// Create an abstract attribute view for the position \p IRP.
171   static AAAMDAttributes &createForPosition(const IRPosition &IRP,
172                                             Attributor &A);
173 
174   /// See AbstractAttribute::getName().
175   const std::string getName() const override { return "AAAMDAttributes"; }
176 
177   /// See AbstractAttribute::getIdAddr().
178   const char *getIdAddr() const override { return &ID; }
179 
180   /// This function should return true if the type of the \p AA is
181   /// AAAMDAttributes.
182   static bool classof(const AbstractAttribute *AA) {
183     return (AA->getIdAddr() == &ID);
184   }
185 
186   virtual const DenseSet<StringRef> &getAttributes() const = 0;
187 
188   /// Unique ID (due to the unique address)
189   static const char ID;
190 };
191 const char AAAMDAttributes::ID = 0;
192 
193 struct AAAMDWorkGroupSize
194     : public StateWrapper<BooleanState, AbstractAttribute> {
195   using Base = StateWrapper<BooleanState, AbstractAttribute>;
196   AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
197 
198   /// Create an abstract attribute view for the position \p IRP.
199   static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP,
200                                                Attributor &A);
201 
202   /// See AbstractAttribute::getName().
203   const std::string getName() const override { return "AAAMDWorkGroupSize"; }
204 
205   /// See AbstractAttribute::getIdAddr().
206   const char *getIdAddr() const override { return &ID; }
207 
208   /// This function should return true if the type of the \p AA is
209   /// AAAMDAttributes.
210   static bool classof(const AbstractAttribute *AA) {
211     return (AA->getIdAddr() == &ID);
212   }
213 
214   /// Unique ID (due to the unique address)
215   static const char ID;
216 };
217 const char AAAMDWorkGroupSize::ID = 0;
218 
219 struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
220   AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
221       : AAAMDWorkGroupSize(IRP, A) {}
222 
223   void initialize(Attributor &A) override {
224     Function *F = getAssociatedFunction();
225     CallingConv::ID CC = F->getCallingConv();
226 
227     if (CC != CallingConv::AMDGPU_KERNEL)
228       return;
229 
230     bool InitialValue = false;
231     if (F->hasFnAttribute("uniform-work-group-size"))
232       InitialValue = F->getFnAttribute("uniform-work-group-size")
233                          .getValueAsString()
234                          .equals("true");
235 
236     if (InitialValue)
237       indicateOptimisticFixpoint();
238     else
239       indicatePessimisticFixpoint();
240   }
241 
242   ChangeStatus updateImpl(Attributor &A) override {
243     ChangeStatus Change = ChangeStatus::UNCHANGED;
244 
245     auto CheckCallSite = [&](AbstractCallSite CS) {
246       Function *Caller = CS.getInstruction()->getFunction();
247       LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName()
248                         << "->" << getAssociatedFunction()->getName() << "\n");
249 
250       const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>(
251           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
252 
253       Change = Change | clampStateAndIndicateChange(this->getState(),
254                                                     CallerInfo.getState());
255 
256       return true;
257     };
258 
259     bool AllCallSitesKnown = true;
260     if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
261       indicatePessimisticFixpoint();
262 
263     return Change;
264   }
265 
266   ChangeStatus manifest(Attributor &A) override {
267     SmallVector<Attribute, 8> AttrList;
268     LLVMContext &Ctx = getAssociatedFunction()->getContext();
269 
270     AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
271                                       getAssumed() ? "true" : "false"));
272     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
273                                               /* ForceReplace */ true);
274   }
275 
276   bool isValidState() const override {
277     // This state is always valid, even when the state is false.
278     return true;
279   }
280 
281   const std::string getAsStr() const override {
282     return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
283   }
284 
285   /// See AbstractAttribute::trackStatistics()
286   void trackStatistics() const override {}
287 };
288 
289 AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP,
290                                                           Attributor &A) {
291   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
292     return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A);
293   llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
294 }
295 
296 struct AAAMDAttributesFunction : public AAAMDAttributes {
297   AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
298       : AAAMDAttributes(IRP, A) {}
299 
300   void initialize(Attributor &A) override {
301     Function *F = getAssociatedFunction();
302     CallingConv::ID CC = F->getCallingConv();
303     bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
304 
305     // Don't add attributes to instrinsics
306     if (F->isIntrinsic()) {
307       indicatePessimisticFixpoint();
308       return;
309     }
310 
311     // Ignore functions with graphics calling conventions, these are currently
312     // not allowed to have kernel arguments.
313     if (AMDGPU::isGraphics(F->getCallingConv())) {
314       indicatePessimisticFixpoint();
315       return;
316     }
317 
318     for (StringRef Attr : ImplicitAttrNames) {
319       if (F->hasFnAttribute(Attr))
320         Attributes.insert(Attr);
321     }
322 
323     // TODO: We shouldn't need this in the future.
324     if (CallingConvSupportsAllImplicits &&
325         F->hasAddressTaken(nullptr, true, true, true)) {
326       for (StringRef AttrName : ImplicitAttrNames) {
327         Attributes.insert(AttrName);
328       }
329     }
330   }
331 
332   ChangeStatus updateImpl(Attributor &A) override {
333     Function *F = getAssociatedFunction();
334     ChangeStatus Change = ChangeStatus::UNCHANGED;
335     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
336     CallingConv::ID CC = F->getCallingConv();
337     bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
338     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
339 
340     auto AddAttribute = [&](StringRef AttrName) {
341       if (Attributes.insert(AttrName).second)
342         Change = ChangeStatus::CHANGED;
343     };
344 
345     // Check for Intrinsics and propagate attributes.
346     const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
347         *this, this->getIRPosition(), DepClassTy::REQUIRED);
348 
349     // We have to assume that we can reach a function with these attributes.
350     // We do not consider inline assembly as a unknown callee.
351     if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) {
352       for (StringRef AttrName : ImplicitAttrNames) {
353         AddAttribute(AttrName);
354       }
355     }
356 
357     bool NeedsQueuePtr = false;
358     bool HasCall = false;
359     for (Function *Callee : AAEdges.getOptimisticEdges()) {
360       Intrinsic::ID IID = Callee->getIntrinsicID();
361       if (IID != Intrinsic::not_intrinsic) {
362         if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
363           AddAttribute("amdgpu-kernarg-segment-ptr");
364           continue;
365         }
366 
367         bool NonKernelOnly = false;
368         StringRef AttrName =
369             intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr);
370 
371         if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly))
372           AddAttribute(AttrName);
373 
374         continue;
375       }
376 
377       HasCall = true;
378       const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
379           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
380       const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes();
381       // Propagate implicit attributes from called function.
382       for (StringRef AttrName : ImplicitAttrNames)
383         if (CalleeAttributes.count(AttrName))
384           AddAttribute(AttrName);
385     }
386 
387     HasCall |= AAEdges.hasUnknownCallee();
388     if (!IsNonEntryFunc && HasCall)
389       AddAttribute("amdgpu-calls");
390 
391     // Check the function body.
392     auto CheckAlloca = [&](Instruction &I) {
393       AddAttribute("amdgpu-stack-objects");
394       return false;
395     };
396 
397     bool UsedAssumedInformation = false;
398     A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca},
399                               UsedAssumedInformation);
400 
401     // If we found that we need amdgpu-queue-ptr, nothing else to do.
402     if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) {
403       AddAttribute("amdgpu-queue-ptr");
404       return Change;
405     }
406 
407     auto CheckAddrSpaceCasts = [&](Instruction &I) {
408       unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
409       if (castRequiresQueuePtr(SrcAS)) {
410         NeedsQueuePtr = true;
411         return false;
412       }
413       return true;
414     };
415 
416     bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
417 
418     // `checkForAllInstructions` is much more cheaper than going through all
419     // instructions, try it first.
420 
421     // amdgpu-queue-ptr is not needed if aperture regs is present.
422     if (!HasApertureRegs)
423       A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
424                                 {Instruction::AddrSpaceCast},
425                                 UsedAssumedInformation);
426 
427     // If we found  that we need amdgpu-queue-ptr, nothing else to do.
428     if (NeedsQueuePtr) {
429       AddAttribute("amdgpu-queue-ptr");
430       return Change;
431     }
432 
433     if (!IsNonEntryFunc && HasApertureRegs)
434       return Change;
435 
436     for (BasicBlock &BB : *F) {
437       for (Instruction &I : BB) {
438         for (const Use &U : I.operands()) {
439           if (const auto *C = dyn_cast<Constant>(U)) {
440             if (InfoCache.needsQueuePtr(C, *F)) {
441               AddAttribute("amdgpu-queue-ptr");
442               return Change;
443             }
444           }
445         }
446       }
447     }
448 
449     return Change;
450   }
451 
452   ChangeStatus manifest(Attributor &A) override {
453     SmallVector<Attribute, 8> AttrList;
454     LLVMContext &Ctx = getAssociatedFunction()->getContext();
455 
456     for (StringRef AttrName : Attributes)
457       AttrList.push_back(Attribute::get(Ctx, AttrName));
458 
459     return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
460                                               /* ForceReplace */ true);
461   }
462 
463   const std::string getAsStr() const override {
464     return "AMDInfo[" + std::to_string(Attributes.size()) + "]";
465   }
466 
467   const DenseSet<StringRef> &getAttributes() const override {
468     return Attributes;
469   }
470 
471   /// See AbstractAttribute::trackStatistics()
472   void trackStatistics() const override {}
473 
474 private:
475   DenseSet<StringRef> Attributes;
476 };
477 
478 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
479                                                     Attributor &A) {
480   if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
481     return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
482   llvm_unreachable("AAAMDAttributes is only valid for function position");
483 }
484 
485 class AMDGPUAttributor : public ModulePass {
486 public:
487   AMDGPUAttributor() : ModulePass(ID) {}
488 
489   /// doInitialization - Virtual method overridden by subclasses to do
490   /// any necessary initialization before any pass is run.
491   bool doInitialization(Module &) override {
492     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
493     if (!TPC)
494       report_fatal_error("TargetMachine is required");
495 
496     TM = &TPC->getTM<TargetMachine>();
497     return false;
498   }
499 
500   bool runOnModule(Module &M) override {
501     SetVector<Function *> Functions;
502     AnalysisGetter AG;
503     for (Function &F : M)
504       Functions.insert(&F);
505 
506     CallGraphUpdater CGUpdater;
507     BumpPtrAllocator Allocator;
508     AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
509     Attributor A(Functions, InfoCache, CGUpdater);
510 
511     for (Function &F : M) {
512       A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
513       A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F));
514     }
515 
516     ChangeStatus Change = A.run();
517     return Change == ChangeStatus::CHANGED;
518   }
519 
520   StringRef getPassName() const override { return "AMDGPU Attributor"; }
521   TargetMachine *TM;
522   static char ID;
523 };
524 
525 char AMDGPUAttributor::ID = 0;
526 
527 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
528 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
529