10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h"
1081ad6265SDimitry Andric #include "AMDGPU.h"
110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h"
12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h"
1306c3fb27SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
140b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
1506c3fb27SDimitry Andric #include "llvm/IR/ConstantRange.h"
16fcaf7f86SDimitry Andric #include "llvm/IR/Constants.h"
1706c3fb27SDimitry Andric #include "llvm/IR/Metadata.h"
18e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
190b57cec5SDimitry Andric
200b57cec5SDimitry Andric using namespace llvm;
210b57cec5SDimitry Andric
221db9f3b2SDimitry Andric static const GlobalVariable *
getKernelDynLDSGlobalFromFunction(const Function & F)231db9f3b2SDimitry Andric getKernelDynLDSGlobalFromFunction(const Function &F) {
241db9f3b2SDimitry Andric const Module *M = F.getParent();
251db9f3b2SDimitry Andric SmallString<64> KernelDynLDSName("llvm.amdgcn.");
261db9f3b2SDimitry Andric KernelDynLDSName += F.getName();
271db9f3b2SDimitry Andric KernelDynLDSName += ".dynlds";
281db9f3b2SDimitry Andric return M->getNamedGlobal(KernelDynLDSName);
291db9f3b2SDimitry Andric }
301db9f3b2SDimitry Andric
hasLDSKernelArgument(const Function & F)311db9f3b2SDimitry Andric static bool hasLDSKernelArgument(const Function &F) {
321db9f3b2SDimitry Andric for (const Argument &Arg : F.args()) {
331db9f3b2SDimitry Andric Type *ArgTy = Arg.getType();
341db9f3b2SDimitry Andric if (auto PtrTy = dyn_cast<PointerType>(ArgTy)) {
351db9f3b2SDimitry Andric if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
361db9f3b2SDimitry Andric return true;
371db9f3b2SDimitry Andric }
381db9f3b2SDimitry Andric }
391db9f3b2SDimitry Andric return false;
401db9f3b2SDimitry Andric }
411db9f3b2SDimitry Andric
AMDGPUMachineFunction(const Function & F,const AMDGPUSubtarget & ST)42bdd1243dSDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
43bdd1243dSDimitry Andric const AMDGPUSubtarget &ST)
44bdd1243dSDimitry Andric : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
45e8d8bef9SDimitry Andric IsModuleEntryFunction(
46bdd1243dSDimitry Andric AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
47*0fca6ea1SDimitry Andric IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())) {
480b57cec5SDimitry Andric
490b57cec5SDimitry Andric // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
500b57cec5SDimitry Andric // except reserved size is not correctly aligned.
510b57cec5SDimitry Andric
520b57cec5SDimitry Andric Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
53fe6060f1SDimitry Andric MemoryBound = MemBoundAttr.getValueAsBool();
540b57cec5SDimitry Andric
550b57cec5SDimitry Andric Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
56fe6060f1SDimitry Andric WaveLimiter = WaveLimitAttr.getValueAsBool();
570b57cec5SDimitry Andric
5881ad6265SDimitry Andric // FIXME: How is this attribute supposed to interact with statically known
5981ad6265SDimitry Andric // global sizes?
6081ad6265SDimitry Andric StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
6181ad6265SDimitry Andric if (!S.empty())
6281ad6265SDimitry Andric S.consumeInteger(0, GDSSize);
6381ad6265SDimitry Andric
6481ad6265SDimitry Andric // Assume the attribute allocates before any known GDS globals.
6581ad6265SDimitry Andric StaticGDSSize = GDSSize;
6681ad6265SDimitry Andric
6706c3fb27SDimitry Andric // Second value, if present, is the maximum value that can be assigned.
6806c3fb27SDimitry Andric // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
6906c3fb27SDimitry Andric // during codegen.
7006c3fb27SDimitry Andric std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
7106c3fb27SDimitry Andric F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
7206c3fb27SDimitry Andric
7306c3fb27SDimitry Andric // The two separate variables are only profitable when the LDS module lowering
7406c3fb27SDimitry Andric // pass is disabled. If graphics does not use dynamic LDS, this is never
7506c3fb27SDimitry Andric // profitable. Leaving cleanup for a later change.
7606c3fb27SDimitry Andric LDSSize = LDSSizeRange.first;
7706c3fb27SDimitry Andric StaticLDSSize = LDSSize;
7806c3fb27SDimitry Andric
790b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv();
800b57cec5SDimitry Andric if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
810b57cec5SDimitry Andric ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
82bdd1243dSDimitry Andric
83bdd1243dSDimitry Andric // FIXME: Shouldn't be target specific
84bdd1243dSDimitry Andric Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
85bdd1243dSDimitry Andric NoSignedZerosFPMath =
86bdd1243dSDimitry Andric NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
871db9f3b2SDimitry Andric
881db9f3b2SDimitry Andric const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F);
891db9f3b2SDimitry Andric if (DynLdsGlobal || hasLDSKernelArgument(F))
901db9f3b2SDimitry Andric UsesDynamicLDS = true;
910b57cec5SDimitry Andric }
920b57cec5SDimitry Andric
allocateLDSGlobal(const DataLayout & DL,const GlobalVariable & GV,Align Trailing)930b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
94bdd1243dSDimitry Andric const GlobalVariable &GV,
95bdd1243dSDimitry Andric Align Trailing) {
96bdd1243dSDimitry Andric auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
970b57cec5SDimitry Andric if (!Entry.second)
980b57cec5SDimitry Andric return Entry.first->second;
990b57cec5SDimitry Andric
1005ffd83dbSDimitry Andric Align Alignment =
1015ffd83dbSDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
1020b57cec5SDimitry Andric
10381ad6265SDimitry Andric unsigned Offset;
10481ad6265SDimitry Andric if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
10506c3fb27SDimitry Andric
10606c3fb27SDimitry Andric std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
10706c3fb27SDimitry Andric if (MaybeAbs) {
10806c3fb27SDimitry Andric // Absolute address LDS variables that exist prior to the LDS lowering
10906c3fb27SDimitry Andric // pass raise a fatal error in that pass. These failure modes are only
11006c3fb27SDimitry Andric // reachable if that lowering pass is disabled or broken. If/when adding
11106c3fb27SDimitry Andric // support for absolute addresses on user specified variables, the
11206c3fb27SDimitry Andric // alignment check moves to the lowering pass and the frame calculation
11306c3fb27SDimitry Andric // needs to take the user variables into consideration.
11406c3fb27SDimitry Andric
11506c3fb27SDimitry Andric uint32_t ObjectStart = *MaybeAbs;
11606c3fb27SDimitry Andric
11706c3fb27SDimitry Andric if (ObjectStart != alignTo(ObjectStart, Alignment)) {
11806c3fb27SDimitry Andric report_fatal_error("Absolute address LDS variable inconsistent with "
11906c3fb27SDimitry Andric "variable alignment");
12006c3fb27SDimitry Andric }
12106c3fb27SDimitry Andric
12206c3fb27SDimitry Andric if (isModuleEntryFunction()) {
12306c3fb27SDimitry Andric // If this is a module entry function, we can also sanity check against
12406c3fb27SDimitry Andric // the static frame. Strictly it would be better to check against the
12506c3fb27SDimitry Andric // attribute, i.e. that the variable is within the always-allocated
12606c3fb27SDimitry Andric // section, and not within some other non-absolute-address object
12706c3fb27SDimitry Andric // allocated here, but the extra error detection is minimal and we would
12806c3fb27SDimitry Andric // have to pass the Function around or cache the attribute value.
12906c3fb27SDimitry Andric uint32_t ObjectEnd =
13006c3fb27SDimitry Andric ObjectStart + DL.getTypeAllocSize(GV.getValueType());
13106c3fb27SDimitry Andric if (ObjectEnd > StaticLDSSize) {
13206c3fb27SDimitry Andric report_fatal_error(
13306c3fb27SDimitry Andric "Absolute address LDS variable outside of static frame");
13406c3fb27SDimitry Andric }
13506c3fb27SDimitry Andric }
13606c3fb27SDimitry Andric
13706c3fb27SDimitry Andric Entry.first->second = ObjectStart;
13806c3fb27SDimitry Andric return ObjectStart;
13906c3fb27SDimitry Andric }
14006c3fb27SDimitry Andric
1410b57cec5SDimitry Andric /// TODO: We should sort these to minimize wasted space due to alignment
1420b57cec5SDimitry Andric /// padding. Currently the padding is decided by the first encountered use
1430b57cec5SDimitry Andric /// during lowering.
14481ad6265SDimitry Andric Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
1450b57cec5SDimitry Andric
146e8d8bef9SDimitry Andric StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
147e8d8bef9SDimitry Andric
148bdd1243dSDimitry Andric // Align LDS size to trailing, e.g. for aligning dynamic shared memory
149bdd1243dSDimitry Andric LDSSize = alignTo(StaticLDSSize, Trailing);
15081ad6265SDimitry Andric } else {
15181ad6265SDimitry Andric assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
15281ad6265SDimitry Andric "expected region address space");
1530b57cec5SDimitry Andric
15481ad6265SDimitry Andric Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
15581ad6265SDimitry Andric StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
15681ad6265SDimitry Andric
15781ad6265SDimitry Andric // FIXME: Apply alignment of dynamic GDS
15881ad6265SDimitry Andric GDSSize = StaticGDSSize;
15981ad6265SDimitry Andric }
16081ad6265SDimitry Andric
16181ad6265SDimitry Andric Entry.first->second = Offset;
1620b57cec5SDimitry Andric return Offset;
1630b57cec5SDimitry Andric }
164e8d8bef9SDimitry Andric
165bdd1243dSDimitry Andric std::optional<uint32_t>
getLDSKernelIdMetadata(const Function & F)166fcaf7f86SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
16706c3fb27SDimitry Andric // TODO: Would be more consistent with the abs symbols to use a range
16806c3fb27SDimitry Andric MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
169fcaf7f86SDimitry Andric if (MD && MD->getNumOperands() == 1) {
17006c3fb27SDimitry Andric if (ConstantInt *KnownSize =
17106c3fb27SDimitry Andric mdconst::extract<ConstantInt>(MD->getOperand(0))) {
17206c3fb27SDimitry Andric uint64_t ZExt = KnownSize->getZExtValue();
17306c3fb27SDimitry Andric if (ZExt <= UINT32_MAX) {
17406c3fb27SDimitry Andric return ZExt;
175fcaf7f86SDimitry Andric }
176fcaf7f86SDimitry Andric }
177fcaf7f86SDimitry Andric }
178fcaf7f86SDimitry Andric return {};
179fcaf7f86SDimitry Andric }
180fcaf7f86SDimitry Andric
18106c3fb27SDimitry Andric std::optional<uint32_t>
getLDSAbsoluteAddress(const GlobalValue & GV)18206c3fb27SDimitry Andric AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
18306c3fb27SDimitry Andric if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
18406c3fb27SDimitry Andric return {};
18506c3fb27SDimitry Andric
18606c3fb27SDimitry Andric std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
18706c3fb27SDimitry Andric if (!AbsSymRange)
18806c3fb27SDimitry Andric return {};
18906c3fb27SDimitry Andric
19006c3fb27SDimitry Andric if (const APInt *V = AbsSymRange->getSingleElement()) {
19106c3fb27SDimitry Andric std::optional<uint64_t> ZExt = V->tryZExtValue();
19206c3fb27SDimitry Andric if (ZExt && (*ZExt <= UINT32_MAX)) {
19306c3fb27SDimitry Andric return *ZExt;
19406c3fb27SDimitry Andric }
19506c3fb27SDimitry Andric }
19606c3fb27SDimitry Andric
19706c3fb27SDimitry Andric return {};
19806c3fb27SDimitry Andric }
19906c3fb27SDimitry Andric
setDynLDSAlign(const Function & F,const GlobalVariable & GV)20006c3fb27SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
201e8d8bef9SDimitry Andric const GlobalVariable &GV) {
20206c3fb27SDimitry Andric const Module *M = F.getParent();
20306c3fb27SDimitry Andric const DataLayout &DL = M->getDataLayout();
204e8d8bef9SDimitry Andric assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
205e8d8bef9SDimitry Andric
206e8d8bef9SDimitry Andric Align Alignment =
207e8d8bef9SDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
208e8d8bef9SDimitry Andric if (Alignment <= DynLDSAlign)
209e8d8bef9SDimitry Andric return;
210e8d8bef9SDimitry Andric
211e8d8bef9SDimitry Andric LDSSize = alignTo(StaticLDSSize, Alignment);
212e8d8bef9SDimitry Andric DynLDSAlign = Alignment;
21306c3fb27SDimitry Andric
21406c3fb27SDimitry Andric // If there is a dynamic LDS variable associated with this function F, every
21506c3fb27SDimitry Andric // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
21606c3fb27SDimitry Andric // map to the same address. This holds because no LDS is allocated after the
21706c3fb27SDimitry Andric // lowering pass if there are dynamic LDS variables present.
21806c3fb27SDimitry Andric const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
21906c3fb27SDimitry Andric if (Dyn) {
22006c3fb27SDimitry Andric unsigned Offset = LDSSize; // return this?
22106c3fb27SDimitry Andric std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
22206c3fb27SDimitry Andric if (!Expect || (Offset != *Expect)) {
22306c3fb27SDimitry Andric report_fatal_error("Inconsistent metadata on dynamic LDS variable");
22406c3fb27SDimitry Andric }
22506c3fb27SDimitry Andric }
226e8d8bef9SDimitry Andric }
2271db9f3b2SDimitry Andric
setUsesDynamicLDS(bool DynLDS)2281db9f3b2SDimitry Andric void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) {
2291db9f3b2SDimitry Andric UsesDynamicLDS = DynLDS;
2301db9f3b2SDimitry Andric }
2311db9f3b2SDimitry Andric
isDynamicLDSUsed() const2321db9f3b2SDimitry Andric bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; }
233