xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMachineFunction.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUPerfHintAnalysis.h"
12 #include "AMDGPUSubtarget.h"
13 #include "Utils/AMDGPUBaseInfo.h"
14 #include "llvm/CodeGen/MachineModuleInfo.h"
15 #include "llvm/IR/ConstantRange.h"
16 #include "llvm/IR/Constants.h"
17 #include "llvm/IR/Metadata.h"
18 #include "llvm/Target/TargetMachine.h"
19 
20 using namespace llvm;
21 
22 static const GlobalVariable *
getKernelDynLDSGlobalFromFunction(const Function & F)23 getKernelDynLDSGlobalFromFunction(const Function &F) {
24   const Module *M = F.getParent();
25   SmallString<64> KernelDynLDSName("llvm.amdgcn.");
26   KernelDynLDSName += F.getName();
27   KernelDynLDSName += ".dynlds";
28   return M->getNamedGlobal(KernelDynLDSName);
29 }
30 
hasLDSKernelArgument(const Function & F)31 static bool hasLDSKernelArgument(const Function &F) {
32   for (const Argument &Arg : F.args()) {
33     Type *ArgTy = Arg.getType();
34     if (auto PtrTy = dyn_cast<PointerType>(ArgTy)) {
35       if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
36         return true;
37     }
38   }
39   return false;
40 }
41 
AMDGPUMachineFunction(const Function & F,const AMDGPUSubtarget & ST)42 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
43                                              const AMDGPUSubtarget &ST)
44     : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
45       IsModuleEntryFunction(
46           AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
47       IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())) {
48 
49   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
50   // except reserved size is not correctly aligned.
51 
52   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
53   MemoryBound = MemBoundAttr.getValueAsBool();
54 
55   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
56   WaveLimiter = WaveLimitAttr.getValueAsBool();
57 
58   // FIXME: How is this attribute supposed to interact with statically known
59   // global sizes?
60   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
61   if (!S.empty())
62     S.consumeInteger(0, GDSSize);
63 
64   // Assume the attribute allocates before any known GDS globals.
65   StaticGDSSize = GDSSize;
66 
67   // Second value, if present, is the maximum value that can be assigned.
68   // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
69   // during codegen.
70   std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
71       F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
72 
73   // The two separate variables are only profitable when the LDS module lowering
74   // pass is disabled. If graphics does not use dynamic LDS, this is never
75   // profitable. Leaving cleanup for a later change.
76   LDSSize = LDSSizeRange.first;
77   StaticLDSSize = LDSSize;
78 
79   CallingConv::ID CC = F.getCallingConv();
80   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
81     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
82 
83   // FIXME: Shouldn't be target specific
84   Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
85   NoSignedZerosFPMath =
86       NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
87 
88   const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F);
89   if (DynLdsGlobal || hasLDSKernelArgument(F))
90     UsesDynamicLDS = true;
91 }
92 
allocateLDSGlobal(const DataLayout & DL,const GlobalVariable & GV,Align Trailing)93 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
94                                                   const GlobalVariable &GV,
95                                                   Align Trailing) {
96   auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
97   if (!Entry.second)
98     return Entry.first->second;
99 
100   Align Alignment =
101       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
102 
103   unsigned Offset;
104   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
105 
106     std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
107     if (MaybeAbs) {
108       // Absolute address LDS variables that exist prior to the LDS lowering
109       // pass raise a fatal error in that pass. These failure modes are only
110       // reachable if that lowering pass is disabled or broken. If/when adding
111       // support for absolute addresses on user specified variables, the
112       // alignment check moves to the lowering pass and the frame calculation
113       // needs to take the user variables into consideration.
114 
115       uint32_t ObjectStart = *MaybeAbs;
116 
117       if (ObjectStart != alignTo(ObjectStart, Alignment)) {
118         report_fatal_error("Absolute address LDS variable inconsistent with "
119                            "variable alignment");
120       }
121 
122       if (isModuleEntryFunction()) {
123         // If this is a module entry function, we can also sanity check against
124         // the static frame. Strictly it would be better to check against the
125         // attribute, i.e. that the variable is within the always-allocated
126         // section, and not within some other non-absolute-address object
127         // allocated here, but the extra error detection is minimal and we would
128         // have to pass the Function around or cache the attribute value.
129         uint32_t ObjectEnd =
130             ObjectStart + DL.getTypeAllocSize(GV.getValueType());
131         if (ObjectEnd > StaticLDSSize) {
132           report_fatal_error(
133               "Absolute address LDS variable outside of static frame");
134         }
135       }
136 
137       Entry.first->second = ObjectStart;
138       return ObjectStart;
139     }
140 
141     /// TODO: We should sort these to minimize wasted space due to alignment
142     /// padding. Currently the padding is decided by the first encountered use
143     /// during lowering.
144     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
145 
146     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
147 
148     // Align LDS size to trailing, e.g. for aligning dynamic shared memory
149     LDSSize = alignTo(StaticLDSSize, Trailing);
150   } else {
151     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
152            "expected region address space");
153 
154     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
155     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
156 
157     // FIXME: Apply alignment of dynamic GDS
158     GDSSize = StaticGDSSize;
159   }
160 
161   Entry.first->second = Offset;
162   return Offset;
163 }
164 
165 std::optional<uint32_t>
getLDSKernelIdMetadata(const Function & F)166 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
167   // TODO: Would be more consistent with the abs symbols to use a range
168   MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
169   if (MD && MD->getNumOperands() == 1) {
170     if (ConstantInt *KnownSize =
171             mdconst::extract<ConstantInt>(MD->getOperand(0))) {
172       uint64_t ZExt = KnownSize->getZExtValue();
173       if (ZExt <= UINT32_MAX) {
174         return ZExt;
175       }
176     }
177   }
178   return {};
179 }
180 
181 std::optional<uint32_t>
getLDSAbsoluteAddress(const GlobalValue & GV)182 AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
183   if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
184     return {};
185 
186   std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
187   if (!AbsSymRange)
188     return {};
189 
190   if (const APInt *V = AbsSymRange->getSingleElement()) {
191     std::optional<uint64_t> ZExt = V->tryZExtValue();
192     if (ZExt && (*ZExt <= UINT32_MAX)) {
193       return *ZExt;
194     }
195   }
196 
197   return {};
198 }
199 
setDynLDSAlign(const Function & F,const GlobalVariable & GV)200 void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
201                                            const GlobalVariable &GV) {
202   const Module *M = F.getParent();
203   const DataLayout &DL = M->getDataLayout();
204   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
205 
206   Align Alignment =
207       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
208   if (Alignment <= DynLDSAlign)
209     return;
210 
211   LDSSize = alignTo(StaticLDSSize, Alignment);
212   DynLDSAlign = Alignment;
213 
214   // If there is a dynamic LDS variable associated with this function F, every
215   // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
216   // map to the same address. This holds because no LDS is allocated after the
217   // lowering pass if there are dynamic LDS variables present.
218   const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
219   if (Dyn) {
220     unsigned Offset = LDSSize; // return this?
221     std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
222     if (!Expect || (Offset != *Expect)) {
223       report_fatal_error("Inconsistent metadata on dynamic LDS variable");
224     }
225   }
226 }
227 
setUsesDynamicLDS(bool DynLDS)228 void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) {
229   UsesDynamicLDS = DynLDS;
230 }
231 
isDynamicLDSUsed() const232 bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; }
233