xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h"
1081ad6265SDimitry Andric #include "AMDGPU.h"
110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h"
12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h"
1306c3fb27SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
140b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
1506c3fb27SDimitry Andric #include "llvm/IR/ConstantRange.h"
16fcaf7f86SDimitry Andric #include "llvm/IR/Constants.h"
1706c3fb27SDimitry Andric #include "llvm/IR/Metadata.h"
18e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric using namespace llvm;
210b57cec5SDimitry Andric 
22bdd1243dSDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
23bdd1243dSDimitry Andric                                              const AMDGPUSubtarget &ST)
24bdd1243dSDimitry Andric     : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
25e8d8bef9SDimitry Andric       IsModuleEntryFunction(
26bdd1243dSDimitry Andric           AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
27*5f757f3fSDimitry Andric       IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())),
28bdd1243dSDimitry Andric       NoSignedZerosFPMath(false) {
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
310b57cec5SDimitry Andric   // except reserved size is not correctly aligned.
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
34fe6060f1SDimitry Andric   MemoryBound = MemBoundAttr.getValueAsBool();
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
37fe6060f1SDimitry Andric   WaveLimiter = WaveLimitAttr.getValueAsBool();
380b57cec5SDimitry Andric 
3981ad6265SDimitry Andric   // FIXME: How is this attribute supposed to interact with statically known
4081ad6265SDimitry Andric   // global sizes?
4181ad6265SDimitry Andric   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
4281ad6265SDimitry Andric   if (!S.empty())
4381ad6265SDimitry Andric     S.consumeInteger(0, GDSSize);
4481ad6265SDimitry Andric 
4581ad6265SDimitry Andric   // Assume the attribute allocates before any known GDS globals.
4681ad6265SDimitry Andric   StaticGDSSize = GDSSize;
4781ad6265SDimitry Andric 
4806c3fb27SDimitry Andric   // Second value, if present, is the maximum value that can be assigned.
4906c3fb27SDimitry Andric   // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
5006c3fb27SDimitry Andric   // during codegen.
5106c3fb27SDimitry Andric   std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
5206c3fb27SDimitry Andric       F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
5306c3fb27SDimitry Andric 
5406c3fb27SDimitry Andric   // The two separate variables are only profitable when the LDS module lowering
5506c3fb27SDimitry Andric   // pass is disabled. If graphics does not use dynamic LDS, this is never
5606c3fb27SDimitry Andric   // profitable. Leaving cleanup for a later change.
5706c3fb27SDimitry Andric   LDSSize = LDSSizeRange.first;
5806c3fb27SDimitry Andric   StaticLDSSize = LDSSize;
5906c3fb27SDimitry Andric 
600b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
610b57cec5SDimitry Andric   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
620b57cec5SDimitry Andric     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
63bdd1243dSDimitry Andric 
64bdd1243dSDimitry Andric   // FIXME: Shouldn't be target specific
65bdd1243dSDimitry Andric   Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
66bdd1243dSDimitry Andric   NoSignedZerosFPMath =
67bdd1243dSDimitry Andric       NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
680b57cec5SDimitry Andric }
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
71bdd1243dSDimitry Andric                                                   const GlobalVariable &GV,
72bdd1243dSDimitry Andric                                                   Align Trailing) {
73bdd1243dSDimitry Andric   auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
740b57cec5SDimitry Andric   if (!Entry.second)
750b57cec5SDimitry Andric     return Entry.first->second;
760b57cec5SDimitry Andric 
775ffd83dbSDimitry Andric   Align Alignment =
785ffd83dbSDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
790b57cec5SDimitry Andric 
8081ad6265SDimitry Andric   unsigned Offset;
8181ad6265SDimitry Andric   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
8206c3fb27SDimitry Andric 
8306c3fb27SDimitry Andric     std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
8406c3fb27SDimitry Andric     if (MaybeAbs) {
8506c3fb27SDimitry Andric       // Absolute address LDS variables that exist prior to the LDS lowering
8606c3fb27SDimitry Andric       // pass raise a fatal error in that pass. These failure modes are only
8706c3fb27SDimitry Andric       // reachable if that lowering pass is disabled or broken. If/when adding
8806c3fb27SDimitry Andric       // support for absolute addresses on user specified variables, the
8906c3fb27SDimitry Andric       // alignment check moves to the lowering pass and the frame calculation
9006c3fb27SDimitry Andric       // needs to take the user variables into consideration.
9106c3fb27SDimitry Andric 
9206c3fb27SDimitry Andric       uint32_t ObjectStart = *MaybeAbs;
9306c3fb27SDimitry Andric 
9406c3fb27SDimitry Andric       if (ObjectStart != alignTo(ObjectStart, Alignment)) {
9506c3fb27SDimitry Andric         report_fatal_error("Absolute address LDS variable inconsistent with "
9606c3fb27SDimitry Andric                            "variable alignment");
9706c3fb27SDimitry Andric       }
9806c3fb27SDimitry Andric 
9906c3fb27SDimitry Andric       if (isModuleEntryFunction()) {
10006c3fb27SDimitry Andric         // If this is a module entry function, we can also sanity check against
10106c3fb27SDimitry Andric         // the static frame. Strictly it would be better to check against the
10206c3fb27SDimitry Andric         // attribute, i.e. that the variable is within the always-allocated
10306c3fb27SDimitry Andric         // section, and not within some other non-absolute-address object
10406c3fb27SDimitry Andric         // allocated here, but the extra error detection is minimal and we would
10506c3fb27SDimitry Andric         // have to pass the Function around or cache the attribute value.
10606c3fb27SDimitry Andric         uint32_t ObjectEnd =
10706c3fb27SDimitry Andric             ObjectStart + DL.getTypeAllocSize(GV.getValueType());
10806c3fb27SDimitry Andric         if (ObjectEnd > StaticLDSSize) {
10906c3fb27SDimitry Andric           report_fatal_error(
11006c3fb27SDimitry Andric               "Absolute address LDS variable outside of static frame");
11106c3fb27SDimitry Andric         }
11206c3fb27SDimitry Andric       }
11306c3fb27SDimitry Andric 
11406c3fb27SDimitry Andric       Entry.first->second = ObjectStart;
11506c3fb27SDimitry Andric       return ObjectStart;
11606c3fb27SDimitry Andric     }
11706c3fb27SDimitry Andric 
1180b57cec5SDimitry Andric     /// TODO: We should sort these to minimize wasted space due to alignment
1190b57cec5SDimitry Andric     /// padding. Currently the padding is decided by the first encountered use
1200b57cec5SDimitry Andric     /// during lowering.
12181ad6265SDimitry Andric     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
1220b57cec5SDimitry Andric 
123e8d8bef9SDimitry Andric     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
124e8d8bef9SDimitry Andric 
125bdd1243dSDimitry Andric     // Align LDS size to trailing, e.g. for aligning dynamic shared memory
126bdd1243dSDimitry Andric     LDSSize = alignTo(StaticLDSSize, Trailing);
12781ad6265SDimitry Andric   } else {
12881ad6265SDimitry Andric     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
12981ad6265SDimitry Andric            "expected region address space");
1300b57cec5SDimitry Andric 
13181ad6265SDimitry Andric     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
13281ad6265SDimitry Andric     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
13381ad6265SDimitry Andric 
13481ad6265SDimitry Andric     // FIXME: Apply alignment of dynamic GDS
13581ad6265SDimitry Andric     GDSSize = StaticGDSSize;
13681ad6265SDimitry Andric   }
13781ad6265SDimitry Andric 
13881ad6265SDimitry Andric   Entry.first->second = Offset;
1390b57cec5SDimitry Andric   return Offset;
1400b57cec5SDimitry Andric }
141e8d8bef9SDimitry Andric 
14206c3fb27SDimitry Andric static const GlobalVariable *
14306c3fb27SDimitry Andric getKernelDynLDSGlobalFromFunction(const Function &F) {
144bdd1243dSDimitry Andric   const Module *M = F.getParent();
14506c3fb27SDimitry Andric   std::string KernelDynLDSName = "llvm.amdgcn.";
14606c3fb27SDimitry Andric   KernelDynLDSName += F.getName();
14706c3fb27SDimitry Andric   KernelDynLDSName += ".dynlds";
14806c3fb27SDimitry Andric   return M->getNamedGlobal(KernelDynLDSName);
149fe6060f1SDimitry Andric }
150fe6060f1SDimitry Andric 
151bdd1243dSDimitry Andric std::optional<uint32_t>
152fcaf7f86SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
15306c3fb27SDimitry Andric   // TODO: Would be more consistent with the abs symbols to use a range
15406c3fb27SDimitry Andric   MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
155fcaf7f86SDimitry Andric   if (MD && MD->getNumOperands() == 1) {
15606c3fb27SDimitry Andric     if (ConstantInt *KnownSize =
15706c3fb27SDimitry Andric             mdconst::extract<ConstantInt>(MD->getOperand(0))) {
15806c3fb27SDimitry Andric       uint64_t ZExt = KnownSize->getZExtValue();
15906c3fb27SDimitry Andric       if (ZExt <= UINT32_MAX) {
16006c3fb27SDimitry Andric         return ZExt;
161fcaf7f86SDimitry Andric       }
162fcaf7f86SDimitry Andric     }
163fcaf7f86SDimitry Andric   }
164fcaf7f86SDimitry Andric   return {};
165fcaf7f86SDimitry Andric }
166fcaf7f86SDimitry Andric 
16706c3fb27SDimitry Andric std::optional<uint32_t>
16806c3fb27SDimitry Andric AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
16906c3fb27SDimitry Andric   if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
17006c3fb27SDimitry Andric     return {};
17106c3fb27SDimitry Andric 
17206c3fb27SDimitry Andric   std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
17306c3fb27SDimitry Andric   if (!AbsSymRange)
17406c3fb27SDimitry Andric     return {};
17506c3fb27SDimitry Andric 
17606c3fb27SDimitry Andric   if (const APInt *V = AbsSymRange->getSingleElement()) {
17706c3fb27SDimitry Andric     std::optional<uint64_t> ZExt = V->tryZExtValue();
17806c3fb27SDimitry Andric     if (ZExt && (*ZExt <= UINT32_MAX)) {
17906c3fb27SDimitry Andric       return *ZExt;
18006c3fb27SDimitry Andric     }
18106c3fb27SDimitry Andric   }
18206c3fb27SDimitry Andric 
18306c3fb27SDimitry Andric   return {};
18406c3fb27SDimitry Andric }
18506c3fb27SDimitry Andric 
18606c3fb27SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
187e8d8bef9SDimitry Andric                                            const GlobalVariable &GV) {
18806c3fb27SDimitry Andric   const Module *M = F.getParent();
18906c3fb27SDimitry Andric   const DataLayout &DL = M->getDataLayout();
190e8d8bef9SDimitry Andric   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
191e8d8bef9SDimitry Andric 
192e8d8bef9SDimitry Andric   Align Alignment =
193e8d8bef9SDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
194e8d8bef9SDimitry Andric   if (Alignment <= DynLDSAlign)
195e8d8bef9SDimitry Andric     return;
196e8d8bef9SDimitry Andric 
197e8d8bef9SDimitry Andric   LDSSize = alignTo(StaticLDSSize, Alignment);
198e8d8bef9SDimitry Andric   DynLDSAlign = Alignment;
19906c3fb27SDimitry Andric 
20006c3fb27SDimitry Andric   // If there is a dynamic LDS variable associated with this function F, every
20106c3fb27SDimitry Andric   // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
20206c3fb27SDimitry Andric   // map to the same address. This holds because no LDS is allocated after the
20306c3fb27SDimitry Andric   // lowering pass if there are dynamic LDS variables present.
20406c3fb27SDimitry Andric   const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
20506c3fb27SDimitry Andric   if (Dyn) {
20606c3fb27SDimitry Andric     unsigned Offset = LDSSize; // return this?
20706c3fb27SDimitry Andric     std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
20806c3fb27SDimitry Andric     if (!Expect || (Offset != *Expect)) {
20906c3fb27SDimitry Andric       report_fatal_error("Inconsistent metadata on dynamic LDS variable");
21006c3fb27SDimitry Andric     }
21106c3fb27SDimitry Andric   }
212e8d8bef9SDimitry Andric }
213