xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h"
1081ad6265SDimitry Andric #include "AMDGPU.h"
110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h"
12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h"
1306c3fb27SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
140b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
1506c3fb27SDimitry Andric #include "llvm/IR/ConstantRange.h"
16fcaf7f86SDimitry Andric #include "llvm/IR/Constants.h"
1706c3fb27SDimitry Andric #include "llvm/IR/Metadata.h"
18e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric using namespace llvm;
210b57cec5SDimitry Andric 
22*1db9f3b2SDimitry Andric static const GlobalVariable *
23*1db9f3b2SDimitry Andric getKernelDynLDSGlobalFromFunction(const Function &F) {
24*1db9f3b2SDimitry Andric   const Module *M = F.getParent();
25*1db9f3b2SDimitry Andric   SmallString<64> KernelDynLDSName("llvm.amdgcn.");
26*1db9f3b2SDimitry Andric   KernelDynLDSName += F.getName();
27*1db9f3b2SDimitry Andric   KernelDynLDSName += ".dynlds";
28*1db9f3b2SDimitry Andric   return M->getNamedGlobal(KernelDynLDSName);
29*1db9f3b2SDimitry Andric }
30*1db9f3b2SDimitry Andric 
31*1db9f3b2SDimitry Andric static bool hasLDSKernelArgument(const Function &F) {
32*1db9f3b2SDimitry Andric   for (const Argument &Arg : F.args()) {
33*1db9f3b2SDimitry Andric     Type *ArgTy = Arg.getType();
34*1db9f3b2SDimitry Andric     if (auto PtrTy = dyn_cast<PointerType>(ArgTy)) {
35*1db9f3b2SDimitry Andric       if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
36*1db9f3b2SDimitry Andric         return true;
37*1db9f3b2SDimitry Andric     }
38*1db9f3b2SDimitry Andric   }
39*1db9f3b2SDimitry Andric   return false;
40*1db9f3b2SDimitry Andric }
41*1db9f3b2SDimitry Andric 
42bdd1243dSDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
43bdd1243dSDimitry Andric                                              const AMDGPUSubtarget &ST)
44bdd1243dSDimitry Andric     : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
45e8d8bef9SDimitry Andric       IsModuleEntryFunction(
46bdd1243dSDimitry Andric           AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
475f757f3fSDimitry Andric       IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())),
48bdd1243dSDimitry Andric       NoSignedZerosFPMath(false) {
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
510b57cec5SDimitry Andric   // except reserved size is not correctly aligned.
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
54fe6060f1SDimitry Andric   MemoryBound = MemBoundAttr.getValueAsBool();
550b57cec5SDimitry Andric 
560b57cec5SDimitry Andric   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
57fe6060f1SDimitry Andric   WaveLimiter = WaveLimitAttr.getValueAsBool();
580b57cec5SDimitry Andric 
5981ad6265SDimitry Andric   // FIXME: How is this attribute supposed to interact with statically known
6081ad6265SDimitry Andric   // global sizes?
6181ad6265SDimitry Andric   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
6281ad6265SDimitry Andric   if (!S.empty())
6381ad6265SDimitry Andric     S.consumeInteger(0, GDSSize);
6481ad6265SDimitry Andric 
6581ad6265SDimitry Andric   // Assume the attribute allocates before any known GDS globals.
6681ad6265SDimitry Andric   StaticGDSSize = GDSSize;
6781ad6265SDimitry Andric 
6806c3fb27SDimitry Andric   // Second value, if present, is the maximum value that can be assigned.
6906c3fb27SDimitry Andric   // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
7006c3fb27SDimitry Andric   // during codegen.
7106c3fb27SDimitry Andric   std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
7206c3fb27SDimitry Andric       F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
7306c3fb27SDimitry Andric 
7406c3fb27SDimitry Andric   // The two separate variables are only profitable when the LDS module lowering
7506c3fb27SDimitry Andric   // pass is disabled. If graphics does not use dynamic LDS, this is never
7606c3fb27SDimitry Andric   // profitable. Leaving cleanup for a later change.
7706c3fb27SDimitry Andric   LDSSize = LDSSizeRange.first;
7806c3fb27SDimitry Andric   StaticLDSSize = LDSSize;
7906c3fb27SDimitry Andric 
800b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
810b57cec5SDimitry Andric   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
820b57cec5SDimitry Andric     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
83bdd1243dSDimitry Andric 
84bdd1243dSDimitry Andric   // FIXME: Shouldn't be target specific
85bdd1243dSDimitry Andric   Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
86bdd1243dSDimitry Andric   NoSignedZerosFPMath =
87bdd1243dSDimitry Andric       NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
88*1db9f3b2SDimitry Andric 
89*1db9f3b2SDimitry Andric   const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F);
90*1db9f3b2SDimitry Andric   if (DynLdsGlobal || hasLDSKernelArgument(F))
91*1db9f3b2SDimitry Andric     UsesDynamicLDS = true;
920b57cec5SDimitry Andric }
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
95bdd1243dSDimitry Andric                                                   const GlobalVariable &GV,
96bdd1243dSDimitry Andric                                                   Align Trailing) {
97bdd1243dSDimitry Andric   auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
980b57cec5SDimitry Andric   if (!Entry.second)
990b57cec5SDimitry Andric     return Entry.first->second;
1000b57cec5SDimitry Andric 
1015ffd83dbSDimitry Andric   Align Alignment =
1025ffd83dbSDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
1030b57cec5SDimitry Andric 
10481ad6265SDimitry Andric   unsigned Offset;
10581ad6265SDimitry Andric   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
10606c3fb27SDimitry Andric 
10706c3fb27SDimitry Andric     std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
10806c3fb27SDimitry Andric     if (MaybeAbs) {
10906c3fb27SDimitry Andric       // Absolute address LDS variables that exist prior to the LDS lowering
11006c3fb27SDimitry Andric       // pass raise a fatal error in that pass. These failure modes are only
11106c3fb27SDimitry Andric       // reachable if that lowering pass is disabled or broken. If/when adding
11206c3fb27SDimitry Andric       // support for absolute addresses on user specified variables, the
11306c3fb27SDimitry Andric       // alignment check moves to the lowering pass and the frame calculation
11406c3fb27SDimitry Andric       // needs to take the user variables into consideration.
11506c3fb27SDimitry Andric 
11606c3fb27SDimitry Andric       uint32_t ObjectStart = *MaybeAbs;
11706c3fb27SDimitry Andric 
11806c3fb27SDimitry Andric       if (ObjectStart != alignTo(ObjectStart, Alignment)) {
11906c3fb27SDimitry Andric         report_fatal_error("Absolute address LDS variable inconsistent with "
12006c3fb27SDimitry Andric                            "variable alignment");
12106c3fb27SDimitry Andric       }
12206c3fb27SDimitry Andric 
12306c3fb27SDimitry Andric       if (isModuleEntryFunction()) {
12406c3fb27SDimitry Andric         // If this is a module entry function, we can also sanity check against
12506c3fb27SDimitry Andric         // the static frame. Strictly it would be better to check against the
12606c3fb27SDimitry Andric         // attribute, i.e. that the variable is within the always-allocated
12706c3fb27SDimitry Andric         // section, and not within some other non-absolute-address object
12806c3fb27SDimitry Andric         // allocated here, but the extra error detection is minimal and we would
12906c3fb27SDimitry Andric         // have to pass the Function around or cache the attribute value.
13006c3fb27SDimitry Andric         uint32_t ObjectEnd =
13106c3fb27SDimitry Andric             ObjectStart + DL.getTypeAllocSize(GV.getValueType());
13206c3fb27SDimitry Andric         if (ObjectEnd > StaticLDSSize) {
13306c3fb27SDimitry Andric           report_fatal_error(
13406c3fb27SDimitry Andric               "Absolute address LDS variable outside of static frame");
13506c3fb27SDimitry Andric         }
13606c3fb27SDimitry Andric       }
13706c3fb27SDimitry Andric 
13806c3fb27SDimitry Andric       Entry.first->second = ObjectStart;
13906c3fb27SDimitry Andric       return ObjectStart;
14006c3fb27SDimitry Andric     }
14106c3fb27SDimitry Andric 
1420b57cec5SDimitry Andric     /// TODO: We should sort these to minimize wasted space due to alignment
1430b57cec5SDimitry Andric     /// padding. Currently the padding is decided by the first encountered use
1440b57cec5SDimitry Andric     /// during lowering.
14581ad6265SDimitry Andric     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
1460b57cec5SDimitry Andric 
147e8d8bef9SDimitry Andric     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
148e8d8bef9SDimitry Andric 
149bdd1243dSDimitry Andric     // Align LDS size to trailing, e.g. for aligning dynamic shared memory
150bdd1243dSDimitry Andric     LDSSize = alignTo(StaticLDSSize, Trailing);
15181ad6265SDimitry Andric   } else {
15281ad6265SDimitry Andric     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
15381ad6265SDimitry Andric            "expected region address space");
1540b57cec5SDimitry Andric 
15581ad6265SDimitry Andric     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
15681ad6265SDimitry Andric     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
15781ad6265SDimitry Andric 
15881ad6265SDimitry Andric     // FIXME: Apply alignment of dynamic GDS
15981ad6265SDimitry Andric     GDSSize = StaticGDSSize;
16081ad6265SDimitry Andric   }
16181ad6265SDimitry Andric 
16281ad6265SDimitry Andric   Entry.first->second = Offset;
1630b57cec5SDimitry Andric   return Offset;
1640b57cec5SDimitry Andric }
165e8d8bef9SDimitry Andric 
166bdd1243dSDimitry Andric std::optional<uint32_t>
167fcaf7f86SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
16806c3fb27SDimitry Andric   // TODO: Would be more consistent with the abs symbols to use a range
16906c3fb27SDimitry Andric   MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
170fcaf7f86SDimitry Andric   if (MD && MD->getNumOperands() == 1) {
17106c3fb27SDimitry Andric     if (ConstantInt *KnownSize =
17206c3fb27SDimitry Andric             mdconst::extract<ConstantInt>(MD->getOperand(0))) {
17306c3fb27SDimitry Andric       uint64_t ZExt = KnownSize->getZExtValue();
17406c3fb27SDimitry Andric       if (ZExt <= UINT32_MAX) {
17506c3fb27SDimitry Andric         return ZExt;
176fcaf7f86SDimitry Andric       }
177fcaf7f86SDimitry Andric     }
178fcaf7f86SDimitry Andric   }
179fcaf7f86SDimitry Andric   return {};
180fcaf7f86SDimitry Andric }
181fcaf7f86SDimitry Andric 
18206c3fb27SDimitry Andric std::optional<uint32_t>
18306c3fb27SDimitry Andric AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
18406c3fb27SDimitry Andric   if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
18506c3fb27SDimitry Andric     return {};
18606c3fb27SDimitry Andric 
18706c3fb27SDimitry Andric   std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
18806c3fb27SDimitry Andric   if (!AbsSymRange)
18906c3fb27SDimitry Andric     return {};
19006c3fb27SDimitry Andric 
19106c3fb27SDimitry Andric   if (const APInt *V = AbsSymRange->getSingleElement()) {
19206c3fb27SDimitry Andric     std::optional<uint64_t> ZExt = V->tryZExtValue();
19306c3fb27SDimitry Andric     if (ZExt && (*ZExt <= UINT32_MAX)) {
19406c3fb27SDimitry Andric       return *ZExt;
19506c3fb27SDimitry Andric     }
19606c3fb27SDimitry Andric   }
19706c3fb27SDimitry Andric 
19806c3fb27SDimitry Andric   return {};
19906c3fb27SDimitry Andric }
20006c3fb27SDimitry Andric 
20106c3fb27SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
202e8d8bef9SDimitry Andric                                            const GlobalVariable &GV) {
20306c3fb27SDimitry Andric   const Module *M = F.getParent();
20406c3fb27SDimitry Andric   const DataLayout &DL = M->getDataLayout();
205e8d8bef9SDimitry Andric   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
206e8d8bef9SDimitry Andric 
207e8d8bef9SDimitry Andric   Align Alignment =
208e8d8bef9SDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
209e8d8bef9SDimitry Andric   if (Alignment <= DynLDSAlign)
210e8d8bef9SDimitry Andric     return;
211e8d8bef9SDimitry Andric 
212e8d8bef9SDimitry Andric   LDSSize = alignTo(StaticLDSSize, Alignment);
213e8d8bef9SDimitry Andric   DynLDSAlign = Alignment;
21406c3fb27SDimitry Andric 
21506c3fb27SDimitry Andric   // If there is a dynamic LDS variable associated with this function F, every
21606c3fb27SDimitry Andric   // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
21706c3fb27SDimitry Andric   // map to the same address. This holds because no LDS is allocated after the
21806c3fb27SDimitry Andric   // lowering pass if there are dynamic LDS variables present.
21906c3fb27SDimitry Andric   const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
22006c3fb27SDimitry Andric   if (Dyn) {
22106c3fb27SDimitry Andric     unsigned Offset = LDSSize; // return this?
22206c3fb27SDimitry Andric     std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
22306c3fb27SDimitry Andric     if (!Expect || (Offset != *Expect)) {
22406c3fb27SDimitry Andric       report_fatal_error("Inconsistent metadata on dynamic LDS variable");
22506c3fb27SDimitry Andric     }
22606c3fb27SDimitry Andric   }
227e8d8bef9SDimitry Andric }
228*1db9f3b2SDimitry Andric 
229*1db9f3b2SDimitry Andric void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) {
230*1db9f3b2SDimitry Andric   UsesDynamicLDS = DynLDS;
231*1db9f3b2SDimitry Andric }
232*1db9f3b2SDimitry Andric 
233*1db9f3b2SDimitry Andric bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; }
234