xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h"
1081ad6265SDimitry Andric #include "AMDGPU.h"
110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h"
12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h"
130b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
14fcaf7f86SDimitry Andric #include "llvm/IR/Constants.h"
15e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric using namespace llvm;
180b57cec5SDimitry Andric 
19*bdd1243dSDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
20*bdd1243dSDimitry Andric                                              const AMDGPUSubtarget &ST)
21*bdd1243dSDimitry Andric     : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
22e8d8bef9SDimitry Andric       IsModuleEntryFunction(
23*bdd1243dSDimitry Andric           AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
24*bdd1243dSDimitry Andric       NoSignedZerosFPMath(false) {
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
270b57cec5SDimitry Andric   // except reserved size is not correctly aligned.
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
30fe6060f1SDimitry Andric   MemoryBound = MemBoundAttr.getValueAsBool();
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
33fe6060f1SDimitry Andric   WaveLimiter = WaveLimitAttr.getValueAsBool();
340b57cec5SDimitry Andric 
3581ad6265SDimitry Andric   // FIXME: How is this attribute supposed to interact with statically known
3681ad6265SDimitry Andric   // global sizes?
3781ad6265SDimitry Andric   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
3881ad6265SDimitry Andric   if (!S.empty())
3981ad6265SDimitry Andric     S.consumeInteger(0, GDSSize);
4081ad6265SDimitry Andric 
4181ad6265SDimitry Andric   // Assume the attribute allocates before any known GDS globals.
4281ad6265SDimitry Andric   StaticGDSSize = GDSSize;
4381ad6265SDimitry Andric 
440b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
450b57cec5SDimitry Andric   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
460b57cec5SDimitry Andric     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
47*bdd1243dSDimitry Andric 
48*bdd1243dSDimitry Andric   // FIXME: Shouldn't be target specific
49*bdd1243dSDimitry Andric   Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
50*bdd1243dSDimitry Andric   NoSignedZerosFPMath =
51*bdd1243dSDimitry Andric       NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
520b57cec5SDimitry Andric }
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
55*bdd1243dSDimitry Andric                                                   const GlobalVariable &GV,
56*bdd1243dSDimitry Andric                                                   Align Trailing) {
57*bdd1243dSDimitry Andric   auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
580b57cec5SDimitry Andric   if (!Entry.second)
590b57cec5SDimitry Andric     return Entry.first->second;
600b57cec5SDimitry Andric 
615ffd83dbSDimitry Andric   Align Alignment =
625ffd83dbSDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
630b57cec5SDimitry Andric 
6481ad6265SDimitry Andric   unsigned Offset;
6581ad6265SDimitry Andric   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
660b57cec5SDimitry Andric     /// TODO: We should sort these to minimize wasted space due to alignment
670b57cec5SDimitry Andric     /// padding. Currently the padding is decided by the first encountered use
680b57cec5SDimitry Andric     /// during lowering.
6981ad6265SDimitry Andric     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
700b57cec5SDimitry Andric 
71e8d8bef9SDimitry Andric     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
72e8d8bef9SDimitry Andric 
73*bdd1243dSDimitry Andric     // Align LDS size to trailing, e.g. for aligning dynamic shared memory
74*bdd1243dSDimitry Andric     LDSSize = alignTo(StaticLDSSize, Trailing);
7581ad6265SDimitry Andric   } else {
7681ad6265SDimitry Andric     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
7781ad6265SDimitry Andric            "expected region address space");
780b57cec5SDimitry Andric 
7981ad6265SDimitry Andric     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
8081ad6265SDimitry Andric     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
8181ad6265SDimitry Andric 
8281ad6265SDimitry Andric     // FIXME: Apply alignment of dynamic GDS
8381ad6265SDimitry Andric     GDSSize = StaticGDSSize;
8481ad6265SDimitry Andric   }
8581ad6265SDimitry Andric 
8681ad6265SDimitry Andric   Entry.first->second = Offset;
870b57cec5SDimitry Andric   return Offset;
880b57cec5SDimitry Andric }
89e8d8bef9SDimitry Andric 
90*bdd1243dSDimitry Andric static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
91*bdd1243dSDimitry Andric 
92*bdd1243dSDimitry Andric bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) {
93*bdd1243dSDimitry Andric   auto name = GV.getName();
94*bdd1243dSDimitry Andric   return (name == ModuleLDSName) ||
95*bdd1243dSDimitry Andric          (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds"));
96*bdd1243dSDimitry Andric }
97*bdd1243dSDimitry Andric 
98*bdd1243dSDimitry Andric const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal(
99*bdd1243dSDimitry Andric     const GlobalVariable &GV) {
100*bdd1243dSDimitry Andric   const Module &M = *GV.getParent();
101*bdd1243dSDimitry Andric   StringRef N(GV.getName());
102*bdd1243dSDimitry Andric   if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) {
103*bdd1243dSDimitry Andric     return M.getFunction(N);
104*bdd1243dSDimitry Andric   }
105*bdd1243dSDimitry Andric   return nullptr;
106*bdd1243dSDimitry Andric }
107*bdd1243dSDimitry Andric 
108*bdd1243dSDimitry Andric const GlobalVariable *
109*bdd1243dSDimitry Andric AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) {
110*bdd1243dSDimitry Andric   const Module *M = F.getParent();
111*bdd1243dSDimitry Andric   std::string KernelLDSName = "llvm.amdgcn.kernel.";
112*bdd1243dSDimitry Andric   KernelLDSName += F.getName();
113*bdd1243dSDimitry Andric   KernelLDSName += ".lds";
114*bdd1243dSDimitry Andric   return M->getNamedGlobal(KernelLDSName);
115*bdd1243dSDimitry Andric }
116*bdd1243dSDimitry Andric 
11781ad6265SDimitry Andric // This kernel calls no functions that require the module lds struct
11881ad6265SDimitry Andric static bool canElideModuleLDS(const Function &F) {
11981ad6265SDimitry Andric   return F.hasFnAttribute("amdgpu-elide-module-lds");
12081ad6265SDimitry Andric }
12181ad6265SDimitry Andric 
122*bdd1243dSDimitry Andric unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(
123*bdd1243dSDimitry Andric     const GlobalVariable &GV) {
124*bdd1243dSDimitry Andric   // module.lds, then alignment padding, then kernel.lds, then other variables
125*bdd1243dSDimitry Andric   // if any
126*bdd1243dSDimitry Andric 
127*bdd1243dSDimitry Andric   assert(isKnownAddressLDSGlobal(GV));
128*bdd1243dSDimitry Andric   unsigned Offset = 0;
129*bdd1243dSDimitry Andric 
130*bdd1243dSDimitry Andric   if (GV.getName() == ModuleLDSName) {
131*bdd1243dSDimitry Andric     return 0;
132*bdd1243dSDimitry Andric   }
133*bdd1243dSDimitry Andric 
134*bdd1243dSDimitry Andric   const Module *M = GV.getParent();
135*bdd1243dSDimitry Andric   const DataLayout &DL = M->getDataLayout();
136*bdd1243dSDimitry Andric 
137*bdd1243dSDimitry Andric   const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName);
138*bdd1243dSDimitry Andric   const Function *f = getKernelLDSFunctionFromGlobal(GV);
139*bdd1243dSDimitry Andric 
140*bdd1243dSDimitry Andric   // Account for module.lds if allocated for this function
141*bdd1243dSDimitry Andric   if (GVM && f && !canElideModuleLDS(*f)) {
142*bdd1243dSDimitry Andric     // allocator aligns this to var align, but it's zero to begin with
143*bdd1243dSDimitry Andric     Offset += DL.getTypeAllocSize(GVM->getValueType());
144*bdd1243dSDimitry Andric   }
145*bdd1243dSDimitry Andric 
146*bdd1243dSDimitry Andric   // No dynamic LDS alignment done by allocateModuleLDSGlobal
147*bdd1243dSDimitry Andric   Offset = alignTo(
148*bdd1243dSDimitry Andric       Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()));
149*bdd1243dSDimitry Andric 
150*bdd1243dSDimitry Andric   return Offset;
151*bdd1243dSDimitry Andric }
152*bdd1243dSDimitry Andric 
153*bdd1243dSDimitry Andric void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
15481ad6265SDimitry Andric   const Module *M = F.getParent();
155*bdd1243dSDimitry Andric 
156*bdd1243dSDimitry Andric   // This function is called before allocating any other LDS so that it can
157*bdd1243dSDimitry Andric   // reliably put values at known addresses. Consequently, dynamic LDS, if
158*bdd1243dSDimitry Andric   // present, will not yet have been allocated
159*bdd1243dSDimitry Andric 
160*bdd1243dSDimitry Andric   assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated");
161*bdd1243dSDimitry Andric 
162fe6060f1SDimitry Andric   if (isModuleEntryFunction()) {
163*bdd1243dSDimitry Andric 
164*bdd1243dSDimitry Andric     // Pointer values start from zero, memory allocated per-kernel-launch
165*bdd1243dSDimitry Andric     // Variables can be grouped into a module level struct and a struct per
166*bdd1243dSDimitry Andric     // kernel function by AMDGPULowerModuleLDSPass. If that is done, they
167*bdd1243dSDimitry Andric     // are allocated at statically computable addresses here.
168*bdd1243dSDimitry Andric     //
169*bdd1243dSDimitry Andric     // Address 0
170*bdd1243dSDimitry Andric     // {
171*bdd1243dSDimitry Andric     //   llvm.amdgcn.module.lds
172*bdd1243dSDimitry Andric     // }
173*bdd1243dSDimitry Andric     // alignment padding
174*bdd1243dSDimitry Andric     // {
175*bdd1243dSDimitry Andric     //   llvm.amdgcn.kernel.some-name.lds
176*bdd1243dSDimitry Andric     // }
177*bdd1243dSDimitry Andric     // other variables, e.g. dynamic lds, allocated after this call
178*bdd1243dSDimitry Andric 
179*bdd1243dSDimitry Andric     const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName);
180*bdd1243dSDimitry Andric     const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
181*bdd1243dSDimitry Andric 
18281ad6265SDimitry Andric     if (GV && !canElideModuleLDS(F)) {
183*bdd1243dSDimitry Andric       assert(isKnownAddressLDSGlobal(*GV));
184*bdd1243dSDimitry Andric       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
185fe6060f1SDimitry Andric       (void)Offset;
186*bdd1243dSDimitry Andric       assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) &&
187fe6060f1SDimitry Andric              "Module LDS expected to be allocated before other LDS");
188fe6060f1SDimitry Andric     }
189*bdd1243dSDimitry Andric 
190*bdd1243dSDimitry Andric     if (KV) {
191*bdd1243dSDimitry Andric       // The per-kernel offset is deterministic because it is allocated
192*bdd1243dSDimitry Andric       // before any other non-module LDS variables.
193*bdd1243dSDimitry Andric       assert(isKnownAddressLDSGlobal(*KV));
194*bdd1243dSDimitry Andric       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
195*bdd1243dSDimitry Andric       (void)Offset;
196*bdd1243dSDimitry Andric       assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) &&
197*bdd1243dSDimitry Andric              "Kernel LDS expected to be immediately after module LDS");
198*bdd1243dSDimitry Andric     }
199fe6060f1SDimitry Andric   }
200fe6060f1SDimitry Andric }
201fe6060f1SDimitry Andric 
202*bdd1243dSDimitry Andric std::optional<uint32_t>
203fcaf7f86SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
204fcaf7f86SDimitry Andric   auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
205fcaf7f86SDimitry Andric   if (MD && MD->getNumOperands() == 1) {
206fcaf7f86SDimitry Andric     ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
207fcaf7f86SDimitry Andric     if (KnownSize) {
208fcaf7f86SDimitry Andric       uint64_t V = KnownSize->getZExtValue();
209fcaf7f86SDimitry Andric       if (V <= UINT32_MAX) {
210fcaf7f86SDimitry Andric         return V;
211fcaf7f86SDimitry Andric       }
212fcaf7f86SDimitry Andric     }
213fcaf7f86SDimitry Andric   }
214fcaf7f86SDimitry Andric   return {};
215fcaf7f86SDimitry Andric }
216fcaf7f86SDimitry Andric 
217e8d8bef9SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
218e8d8bef9SDimitry Andric                                            const GlobalVariable &GV) {
219e8d8bef9SDimitry Andric   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
220e8d8bef9SDimitry Andric 
221e8d8bef9SDimitry Andric   Align Alignment =
222e8d8bef9SDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
223e8d8bef9SDimitry Andric   if (Alignment <= DynLDSAlign)
224e8d8bef9SDimitry Andric     return;
225e8d8bef9SDimitry Andric 
226e8d8bef9SDimitry Andric   LDSSize = alignTo(StaticLDSSize, Alignment);
227e8d8bef9SDimitry Andric   DynLDSAlign = Alignment;
228e8d8bef9SDimitry Andric }
229