xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h"
1081ad6265SDimitry Andric #include "AMDGPU.h"
110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h"
12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h"
13*06c3fb27SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
140b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
15*06c3fb27SDimitry Andric #include "llvm/IR/ConstantRange.h"
16fcaf7f86SDimitry Andric #include "llvm/IR/Constants.h"
17*06c3fb27SDimitry Andric #include "llvm/IR/Metadata.h"
18e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric using namespace llvm;
210b57cec5SDimitry Andric 
22bdd1243dSDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
23bdd1243dSDimitry Andric                                              const AMDGPUSubtarget &ST)
24bdd1243dSDimitry Andric     : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
25e8d8bef9SDimitry Andric       IsModuleEntryFunction(
26bdd1243dSDimitry Andric           AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
27bdd1243dSDimitry Andric       NoSignedZerosFPMath(false) {
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
300b57cec5SDimitry Andric   // except reserved size is not correctly aligned.
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
33fe6060f1SDimitry Andric   MemoryBound = MemBoundAttr.getValueAsBool();
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
36fe6060f1SDimitry Andric   WaveLimiter = WaveLimitAttr.getValueAsBool();
370b57cec5SDimitry Andric 
3881ad6265SDimitry Andric   // FIXME: How is this attribute supposed to interact with statically known
3981ad6265SDimitry Andric   // global sizes?
4081ad6265SDimitry Andric   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
4181ad6265SDimitry Andric   if (!S.empty())
4281ad6265SDimitry Andric     S.consumeInteger(0, GDSSize);
4381ad6265SDimitry Andric 
4481ad6265SDimitry Andric   // Assume the attribute allocates before any known GDS globals.
4581ad6265SDimitry Andric   StaticGDSSize = GDSSize;
4681ad6265SDimitry Andric 
47*06c3fb27SDimitry Andric   // Second value, if present, is the maximum value that can be assigned.
48*06c3fb27SDimitry Andric   // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
49*06c3fb27SDimitry Andric   // during codegen.
50*06c3fb27SDimitry Andric   std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
51*06c3fb27SDimitry Andric       F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
52*06c3fb27SDimitry Andric 
53*06c3fb27SDimitry Andric   // The two separate variables are only profitable when the LDS module lowering
54*06c3fb27SDimitry Andric   // pass is disabled. If graphics does not use dynamic LDS, this is never
55*06c3fb27SDimitry Andric   // profitable. Leaving cleanup for a later change.
56*06c3fb27SDimitry Andric   LDSSize = LDSSizeRange.first;
57*06c3fb27SDimitry Andric   StaticLDSSize = LDSSize;
58*06c3fb27SDimitry Andric 
590b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
600b57cec5SDimitry Andric   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
610b57cec5SDimitry Andric     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
62bdd1243dSDimitry Andric 
63bdd1243dSDimitry Andric   // FIXME: Shouldn't be target specific
64bdd1243dSDimitry Andric   Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
65bdd1243dSDimitry Andric   NoSignedZerosFPMath =
66bdd1243dSDimitry Andric       NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
670b57cec5SDimitry Andric }
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
70bdd1243dSDimitry Andric                                                   const GlobalVariable &GV,
71bdd1243dSDimitry Andric                                                   Align Trailing) {
72bdd1243dSDimitry Andric   auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
730b57cec5SDimitry Andric   if (!Entry.second)
740b57cec5SDimitry Andric     return Entry.first->second;
750b57cec5SDimitry Andric 
765ffd83dbSDimitry Andric   Align Alignment =
775ffd83dbSDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
780b57cec5SDimitry Andric 
7981ad6265SDimitry Andric   unsigned Offset;
8081ad6265SDimitry Andric   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
81*06c3fb27SDimitry Andric 
82*06c3fb27SDimitry Andric     std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
83*06c3fb27SDimitry Andric     if (MaybeAbs) {
84*06c3fb27SDimitry Andric       // Absolute address LDS variables that exist prior to the LDS lowering
85*06c3fb27SDimitry Andric       // pass raise a fatal error in that pass. These failure modes are only
86*06c3fb27SDimitry Andric       // reachable if that lowering pass is disabled or broken. If/when adding
87*06c3fb27SDimitry Andric       // support for absolute addresses on user specified variables, the
88*06c3fb27SDimitry Andric       // alignment check moves to the lowering pass and the frame calculation
89*06c3fb27SDimitry Andric       // needs to take the user variables into consideration.
90*06c3fb27SDimitry Andric 
91*06c3fb27SDimitry Andric       uint32_t ObjectStart = *MaybeAbs;
92*06c3fb27SDimitry Andric 
93*06c3fb27SDimitry Andric       if (ObjectStart != alignTo(ObjectStart, Alignment)) {
94*06c3fb27SDimitry Andric         report_fatal_error("Absolute address LDS variable inconsistent with "
95*06c3fb27SDimitry Andric                            "variable alignment");
96*06c3fb27SDimitry Andric       }
97*06c3fb27SDimitry Andric 
98*06c3fb27SDimitry Andric       if (isModuleEntryFunction()) {
99*06c3fb27SDimitry Andric         // If this is a module entry function, we can also sanity check against
100*06c3fb27SDimitry Andric         // the static frame. Strictly it would be better to check against the
101*06c3fb27SDimitry Andric         // attribute, i.e. that the variable is within the always-allocated
102*06c3fb27SDimitry Andric         // section, and not within some other non-absolute-address object
103*06c3fb27SDimitry Andric         // allocated here, but the extra error detection is minimal and we would
104*06c3fb27SDimitry Andric         // have to pass the Function around or cache the attribute value.
105*06c3fb27SDimitry Andric         uint32_t ObjectEnd =
106*06c3fb27SDimitry Andric             ObjectStart + DL.getTypeAllocSize(GV.getValueType());
107*06c3fb27SDimitry Andric         if (ObjectEnd > StaticLDSSize) {
108*06c3fb27SDimitry Andric           report_fatal_error(
109*06c3fb27SDimitry Andric               "Absolute address LDS variable outside of static frame");
110*06c3fb27SDimitry Andric         }
111*06c3fb27SDimitry Andric       }
112*06c3fb27SDimitry Andric 
113*06c3fb27SDimitry Andric       Entry.first->second = ObjectStart;
114*06c3fb27SDimitry Andric       return ObjectStart;
115*06c3fb27SDimitry Andric     }
116*06c3fb27SDimitry Andric 
1170b57cec5SDimitry Andric     /// TODO: We should sort these to minimize wasted space due to alignment
1180b57cec5SDimitry Andric     /// padding. Currently the padding is decided by the first encountered use
1190b57cec5SDimitry Andric     /// during lowering.
12081ad6265SDimitry Andric     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
1210b57cec5SDimitry Andric 
122e8d8bef9SDimitry Andric     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
123e8d8bef9SDimitry Andric 
124bdd1243dSDimitry Andric     // Align LDS size to trailing, e.g. for aligning dynamic shared memory
125bdd1243dSDimitry Andric     LDSSize = alignTo(StaticLDSSize, Trailing);
12681ad6265SDimitry Andric   } else {
12781ad6265SDimitry Andric     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
12881ad6265SDimitry Andric            "expected region address space");
1290b57cec5SDimitry Andric 
13081ad6265SDimitry Andric     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
13181ad6265SDimitry Andric     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
13281ad6265SDimitry Andric 
13381ad6265SDimitry Andric     // FIXME: Apply alignment of dynamic GDS
13481ad6265SDimitry Andric     GDSSize = StaticGDSSize;
13581ad6265SDimitry Andric   }
13681ad6265SDimitry Andric 
13781ad6265SDimitry Andric   Entry.first->second = Offset;
1380b57cec5SDimitry Andric   return Offset;
1390b57cec5SDimitry Andric }
140e8d8bef9SDimitry Andric 
141*06c3fb27SDimitry Andric static const GlobalVariable *
142*06c3fb27SDimitry Andric getKernelDynLDSGlobalFromFunction(const Function &F) {
143bdd1243dSDimitry Andric   const Module *M = F.getParent();
144*06c3fb27SDimitry Andric   std::string KernelDynLDSName = "llvm.amdgcn.";
145*06c3fb27SDimitry Andric   KernelDynLDSName += F.getName();
146*06c3fb27SDimitry Andric   KernelDynLDSName += ".dynlds";
147*06c3fb27SDimitry Andric   return M->getNamedGlobal(KernelDynLDSName);
148fe6060f1SDimitry Andric }
149fe6060f1SDimitry Andric 
150bdd1243dSDimitry Andric std::optional<uint32_t>
151fcaf7f86SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
152*06c3fb27SDimitry Andric   // TODO: Would be more consistent with the abs symbols to use a range
153*06c3fb27SDimitry Andric   MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
154fcaf7f86SDimitry Andric   if (MD && MD->getNumOperands() == 1) {
155*06c3fb27SDimitry Andric     if (ConstantInt *KnownSize =
156*06c3fb27SDimitry Andric             mdconst::extract<ConstantInt>(MD->getOperand(0))) {
157*06c3fb27SDimitry Andric       uint64_t ZExt = KnownSize->getZExtValue();
158*06c3fb27SDimitry Andric       if (ZExt <= UINT32_MAX) {
159*06c3fb27SDimitry Andric         return ZExt;
160fcaf7f86SDimitry Andric       }
161fcaf7f86SDimitry Andric     }
162fcaf7f86SDimitry Andric   }
163fcaf7f86SDimitry Andric   return {};
164fcaf7f86SDimitry Andric }
165fcaf7f86SDimitry Andric 
166*06c3fb27SDimitry Andric std::optional<uint32_t>
167*06c3fb27SDimitry Andric AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
168*06c3fb27SDimitry Andric   if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
169*06c3fb27SDimitry Andric     return {};
170*06c3fb27SDimitry Andric 
171*06c3fb27SDimitry Andric   std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
172*06c3fb27SDimitry Andric   if (!AbsSymRange)
173*06c3fb27SDimitry Andric     return {};
174*06c3fb27SDimitry Andric 
175*06c3fb27SDimitry Andric   if (const APInt *V = AbsSymRange->getSingleElement()) {
176*06c3fb27SDimitry Andric     std::optional<uint64_t> ZExt = V->tryZExtValue();
177*06c3fb27SDimitry Andric     if (ZExt && (*ZExt <= UINT32_MAX)) {
178*06c3fb27SDimitry Andric       return *ZExt;
179*06c3fb27SDimitry Andric     }
180*06c3fb27SDimitry Andric   }
181*06c3fb27SDimitry Andric 
182*06c3fb27SDimitry Andric   return {};
183*06c3fb27SDimitry Andric }
184*06c3fb27SDimitry Andric 
185*06c3fb27SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
186e8d8bef9SDimitry Andric                                            const GlobalVariable &GV) {
187*06c3fb27SDimitry Andric   const Module *M = F.getParent();
188*06c3fb27SDimitry Andric   const DataLayout &DL = M->getDataLayout();
189e8d8bef9SDimitry Andric   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
190e8d8bef9SDimitry Andric 
191e8d8bef9SDimitry Andric   Align Alignment =
192e8d8bef9SDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
193e8d8bef9SDimitry Andric   if (Alignment <= DynLDSAlign)
194e8d8bef9SDimitry Andric     return;
195e8d8bef9SDimitry Andric 
196e8d8bef9SDimitry Andric   LDSSize = alignTo(StaticLDSSize, Alignment);
197e8d8bef9SDimitry Andric   DynLDSAlign = Alignment;
198*06c3fb27SDimitry Andric 
199*06c3fb27SDimitry Andric   // If there is a dynamic LDS variable associated with this function F, every
200*06c3fb27SDimitry Andric   // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
201*06c3fb27SDimitry Andric   // map to the same address. This holds because no LDS is allocated after the
202*06c3fb27SDimitry Andric   // lowering pass if there are dynamic LDS variables present.
203*06c3fb27SDimitry Andric   const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
204*06c3fb27SDimitry Andric   if (Dyn) {
205*06c3fb27SDimitry Andric     unsigned Offset = LDSSize; // return this?
206*06c3fb27SDimitry Andric     std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
207*06c3fb27SDimitry Andric     if (!Expect || (Offset != *Expect)) {
208*06c3fb27SDimitry Andric       report_fatal_error("Inconsistent metadata on dynamic LDS variable");
209*06c3fb27SDimitry Andric     }
210*06c3fb27SDimitry Andric   }
211e8d8bef9SDimitry Andric }
212