1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDGPUMachineFunction.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUMemoryUtils.h"
12 #include "AMDGPUSubtarget.h"
13 #include "Utils/AMDGPUBaseInfo.h"
14 #include "llvm/CodeGen/MachineModuleInfo.h"
15 #include "llvm/IR/ConstantRange.h"
16 #include "llvm/IR/Constants.h"
17 #include "llvm/IR/Metadata.h"
18 #include "llvm/Target/TargetMachine.h"
19
20 using namespace llvm;
21
22 static const GlobalVariable *
getKernelDynLDSGlobalFromFunction(const Function & F)23 getKernelDynLDSGlobalFromFunction(const Function &F) {
24 const Module *M = F.getParent();
25 SmallString<64> KernelDynLDSName("llvm.amdgcn.");
26 KernelDynLDSName += F.getName();
27 KernelDynLDSName += ".dynlds";
28 return M->getNamedGlobal(KernelDynLDSName);
29 }
30
hasLDSKernelArgument(const Function & F)31 static bool hasLDSKernelArgument(const Function &F) {
32 for (const Argument &Arg : F.args()) {
33 Type *ArgTy = Arg.getType();
34 if (auto *PtrTy = dyn_cast<PointerType>(ArgTy)) {
35 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
36 return true;
37 }
38 }
39 return false;
40 }
41
AMDGPUMachineFunction(const Function & F,const AMDGPUSubtarget & ST)42 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
43 const AMDGPUSubtarget &ST)
44 : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
45 IsModuleEntryFunction(
46 AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
47 IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())) {
48
49 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
50 // except reserved size is not correctly aligned.
51
52 Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
53 MemoryBound = MemBoundAttr.getValueAsBool();
54
55 Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
56 WaveLimiter = WaveLimitAttr.getValueAsBool();
57
58 // FIXME: How is this attribute supposed to interact with statically known
59 // global sizes?
60 StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
61 if (!S.empty())
62 S.consumeInteger(0, GDSSize);
63
64 // Assume the attribute allocates before any known GDS globals.
65 StaticGDSSize = GDSSize;
66
67 // Second value, if present, is the maximum value that can be assigned.
68 // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
69 // during codegen.
70 std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
71 F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
72
73 // The two separate variables are only profitable when the LDS module lowering
74 // pass is disabled. If graphics does not use dynamic LDS, this is never
75 // profitable. Leaving cleanup for a later change.
76 LDSSize = LDSSizeRange.first;
77 StaticLDSSize = LDSSize;
78
79 CallingConv::ID CC = F.getCallingConv();
80 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
81 ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
82
83 // FIXME: Shouldn't be target specific
84 Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
85 NoSignedZerosFPMath =
86 NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
87
88 const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F);
89 if (DynLdsGlobal || hasLDSKernelArgument(F))
90 UsesDynamicLDS = true;
91 }
92
allocateLDSGlobal(const DataLayout & DL,const GlobalVariable & GV,Align Trailing)93 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
94 const GlobalVariable &GV,
95 Align Trailing) {
96 auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
97 if (!Entry.second)
98 return Entry.first->second;
99
100 Align Alignment =
101 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
102
103 unsigned Offset;
104 if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
105 if (AMDGPU::isNamedBarrier(GV)) {
106 std::optional<unsigned> BarAddr = getLDSAbsoluteAddress(GV);
107 if (!BarAddr)
108 llvm_unreachable("named barrier should have an assigned address");
109 Entry.first->second = BarAddr.value();
110 return BarAddr.value();
111 }
112
113 std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
114 if (MaybeAbs) {
115 // Absolute address LDS variables that exist prior to the LDS lowering
116 // pass raise a fatal error in that pass. These failure modes are only
117 // reachable if that lowering pass is disabled or broken. If/when adding
118 // support for absolute addresses on user specified variables, the
119 // alignment check moves to the lowering pass and the frame calculation
120 // needs to take the user variables into consideration.
121
122 uint32_t ObjectStart = *MaybeAbs;
123
124 if (ObjectStart != alignTo(ObjectStart, Alignment)) {
125 report_fatal_error("Absolute address LDS variable inconsistent with "
126 "variable alignment");
127 }
128
129 if (isModuleEntryFunction()) {
130 // If this is a module entry function, we can also sanity check against
131 // the static frame. Strictly it would be better to check against the
132 // attribute, i.e. that the variable is within the always-allocated
133 // section, and not within some other non-absolute-address object
134 // allocated here, but the extra error detection is minimal and we would
135 // have to pass the Function around or cache the attribute value.
136 uint32_t ObjectEnd =
137 ObjectStart + DL.getTypeAllocSize(GV.getValueType());
138 if (ObjectEnd > StaticLDSSize) {
139 report_fatal_error(
140 "Absolute address LDS variable outside of static frame");
141 }
142 }
143
144 Entry.first->second = ObjectStart;
145 return ObjectStart;
146 }
147
148 /// TODO: We should sort these to minimize wasted space due to alignment
149 /// padding. Currently the padding is decided by the first encountered use
150 /// during lowering.
151 Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
152
153 StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
154
155 // Align LDS size to trailing, e.g. for aligning dynamic shared memory
156 LDSSize = alignTo(StaticLDSSize, Trailing);
157 } else {
158 assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
159 "expected region address space");
160
161 Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
162 StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
163
164 // FIXME: Apply alignment of dynamic GDS
165 GDSSize = StaticGDSSize;
166 }
167
168 Entry.first->second = Offset;
169 return Offset;
170 }
171
172 std::optional<uint32_t>
getLDSKernelIdMetadata(const Function & F)173 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
174 // TODO: Would be more consistent with the abs symbols to use a range
175 MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
176 if (MD && MD->getNumOperands() == 1) {
177 if (ConstantInt *KnownSize =
178 mdconst::extract<ConstantInt>(MD->getOperand(0))) {
179 uint64_t ZExt = KnownSize->getZExtValue();
180 if (ZExt <= UINT32_MAX) {
181 return ZExt;
182 }
183 }
184 }
185 return {};
186 }
187
188 std::optional<uint32_t>
getLDSAbsoluteAddress(const GlobalValue & GV)189 AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
190 if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
191 return {};
192
193 std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
194 if (!AbsSymRange)
195 return {};
196
197 if (const APInt *V = AbsSymRange->getSingleElement()) {
198 std::optional<uint64_t> ZExt = V->tryZExtValue();
199 if (ZExt && (*ZExt <= UINT32_MAX)) {
200 return *ZExt;
201 }
202 }
203
204 return {};
205 }
206
setDynLDSAlign(const Function & F,const GlobalVariable & GV)207 void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
208 const GlobalVariable &GV) {
209 const Module *M = F.getParent();
210 const DataLayout &DL = M->getDataLayout();
211 assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
212
213 Align Alignment =
214 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
215 if (Alignment <= DynLDSAlign)
216 return;
217
218 LDSSize = alignTo(StaticLDSSize, Alignment);
219 DynLDSAlign = Alignment;
220
221 // If there is a dynamic LDS variable associated with this function F, every
222 // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
223 // map to the same address. This holds because no LDS is allocated after the
224 // lowering pass if there are dynamic LDS variables present.
225 const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
226 if (Dyn) {
227 unsigned Offset = LDSSize; // return this?
228 std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
229 if (!Expect || (Offset != *Expect)) {
230 report_fatal_error("Inconsistent metadata on dynamic LDS variable");
231 }
232 }
233 }
234
setUsesDynamicLDS(bool DynLDS)235 void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) {
236 UsesDynamicLDS = DynLDS;
237 }
238
isDynamicLDSUsed() const239 bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; }
240