10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h" 1081ad6265SDimitry Andric #include "AMDGPU.h" 110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h" 12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h" 1306c3fb27SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 140b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 1506c3fb27SDimitry Andric #include "llvm/IR/ConstantRange.h" 16fcaf7f86SDimitry Andric #include "llvm/IR/Constants.h" 1706c3fb27SDimitry Andric #include "llvm/IR/Metadata.h" 18e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric using namespace llvm; 210b57cec5SDimitry Andric 22*1db9f3b2SDimitry Andric static const GlobalVariable * 23*1db9f3b2SDimitry Andric getKernelDynLDSGlobalFromFunction(const Function &F) { 24*1db9f3b2SDimitry Andric const Module *M = F.getParent(); 25*1db9f3b2SDimitry Andric SmallString<64> KernelDynLDSName("llvm.amdgcn."); 26*1db9f3b2SDimitry Andric KernelDynLDSName += F.getName(); 27*1db9f3b2SDimitry Andric KernelDynLDSName += ".dynlds"; 28*1db9f3b2SDimitry Andric return M->getNamedGlobal(KernelDynLDSName); 29*1db9f3b2SDimitry Andric } 30*1db9f3b2SDimitry Andric 31*1db9f3b2SDimitry Andric static bool hasLDSKernelArgument(const Function &F) { 32*1db9f3b2SDimitry Andric for (const Argument &Arg : F.args()) { 33*1db9f3b2SDimitry Andric Type *ArgTy = Arg.getType(); 34*1db9f3b2SDimitry Andric if (auto PtrTy = dyn_cast<PointerType>(ArgTy)) { 35*1db9f3b2SDimitry Andric if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) 36*1db9f3b2SDimitry Andric return true; 37*1db9f3b2SDimitry Andric } 38*1db9f3b2SDimitry Andric } 39*1db9f3b2SDimitry Andric return false; 40*1db9f3b2SDimitry Andric } 41*1db9f3b2SDimitry Andric 42bdd1243dSDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, 43bdd1243dSDimitry Andric const AMDGPUSubtarget &ST) 44bdd1243dSDimitry Andric : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), 45e8d8bef9SDimitry Andric IsModuleEntryFunction( 46bdd1243dSDimitry Andric AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), 475f757f3fSDimitry Andric IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())), 48bdd1243dSDimitry Andric NoSignedZerosFPMath(false) { 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 510b57cec5SDimitry Andric // except reserved size is not correctly aligned. 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 54fe6060f1SDimitry Andric MemoryBound = MemBoundAttr.getValueAsBool(); 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 57fe6060f1SDimitry Andric WaveLimiter = WaveLimitAttr.getValueAsBool(); 580b57cec5SDimitry Andric 5981ad6265SDimitry Andric // FIXME: How is this attribute supposed to interact with statically known 6081ad6265SDimitry Andric // global sizes? 6181ad6265SDimitry Andric StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 6281ad6265SDimitry Andric if (!S.empty()) 6381ad6265SDimitry Andric S.consumeInteger(0, GDSSize); 6481ad6265SDimitry Andric 6581ad6265SDimitry Andric // Assume the attribute allocates before any known GDS globals. 6681ad6265SDimitry Andric StaticGDSSize = GDSSize; 6781ad6265SDimitry Andric 6806c3fb27SDimitry Andric // Second value, if present, is the maximum value that can be assigned. 6906c3fb27SDimitry Andric // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics 7006c3fb27SDimitry Andric // during codegen. 7106c3fb27SDimitry Andric std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute( 7206c3fb27SDimitry Andric F, "amdgpu-lds-size", {0, UINT32_MAX}, true); 7306c3fb27SDimitry Andric 7406c3fb27SDimitry Andric // The two separate variables are only profitable when the LDS module lowering 7506c3fb27SDimitry Andric // pass is disabled. If graphics does not use dynamic LDS, this is never 7606c3fb27SDimitry Andric // profitable. Leaving cleanup for a later change. 7706c3fb27SDimitry Andric LDSSize = LDSSizeRange.first; 7806c3fb27SDimitry Andric StaticLDSSize = LDSSize; 7906c3fb27SDimitry Andric 800b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 810b57cec5SDimitry Andric if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 820b57cec5SDimitry Andric ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 83bdd1243dSDimitry Andric 84bdd1243dSDimitry Andric // FIXME: Shouldn't be target specific 85bdd1243dSDimitry Andric Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); 86bdd1243dSDimitry Andric NoSignedZerosFPMath = 87bdd1243dSDimitry Andric NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; 88*1db9f3b2SDimitry Andric 89*1db9f3b2SDimitry Andric const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F); 90*1db9f3b2SDimitry Andric if (DynLdsGlobal || hasLDSKernelArgument(F)) 91*1db9f3b2SDimitry Andric UsesDynamicLDS = true; 920b57cec5SDimitry Andric } 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 95bdd1243dSDimitry Andric const GlobalVariable &GV, 96bdd1243dSDimitry Andric Align Trailing) { 97bdd1243dSDimitry Andric auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0)); 980b57cec5SDimitry Andric if (!Entry.second) 990b57cec5SDimitry Andric return Entry.first->second; 1000b57cec5SDimitry Andric 1015ffd83dbSDimitry Andric Align Alignment = 1025ffd83dbSDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 1030b57cec5SDimitry Andric 10481ad6265SDimitry Andric unsigned Offset; 10581ad6265SDimitry Andric if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 10606c3fb27SDimitry Andric 10706c3fb27SDimitry Andric std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV); 10806c3fb27SDimitry Andric if (MaybeAbs) { 10906c3fb27SDimitry Andric // Absolute address LDS variables that exist prior to the LDS lowering 11006c3fb27SDimitry Andric // pass raise a fatal error in that pass. These failure modes are only 11106c3fb27SDimitry Andric // reachable if that lowering pass is disabled or broken. If/when adding 11206c3fb27SDimitry Andric // support for absolute addresses on user specified variables, the 11306c3fb27SDimitry Andric // alignment check moves to the lowering pass and the frame calculation 11406c3fb27SDimitry Andric // needs to take the user variables into consideration. 11506c3fb27SDimitry Andric 11606c3fb27SDimitry Andric uint32_t ObjectStart = *MaybeAbs; 11706c3fb27SDimitry Andric 11806c3fb27SDimitry Andric if (ObjectStart != alignTo(ObjectStart, Alignment)) { 11906c3fb27SDimitry Andric report_fatal_error("Absolute address LDS variable inconsistent with " 12006c3fb27SDimitry Andric "variable alignment"); 12106c3fb27SDimitry Andric } 12206c3fb27SDimitry Andric 12306c3fb27SDimitry Andric if (isModuleEntryFunction()) { 12406c3fb27SDimitry Andric // If this is a module entry function, we can also sanity check against 12506c3fb27SDimitry Andric // the static frame. Strictly it would be better to check against the 12606c3fb27SDimitry Andric // attribute, i.e. that the variable is within the always-allocated 12706c3fb27SDimitry Andric // section, and not within some other non-absolute-address object 12806c3fb27SDimitry Andric // allocated here, but the extra error detection is minimal and we would 12906c3fb27SDimitry Andric // have to pass the Function around or cache the attribute value. 13006c3fb27SDimitry Andric uint32_t ObjectEnd = 13106c3fb27SDimitry Andric ObjectStart + DL.getTypeAllocSize(GV.getValueType()); 13206c3fb27SDimitry Andric if (ObjectEnd > StaticLDSSize) { 13306c3fb27SDimitry Andric report_fatal_error( 13406c3fb27SDimitry Andric "Absolute address LDS variable outside of static frame"); 13506c3fb27SDimitry Andric } 13606c3fb27SDimitry Andric } 13706c3fb27SDimitry Andric 13806c3fb27SDimitry Andric Entry.first->second = ObjectStart; 13906c3fb27SDimitry Andric return ObjectStart; 14006c3fb27SDimitry Andric } 14106c3fb27SDimitry Andric 1420b57cec5SDimitry Andric /// TODO: We should sort these to minimize wasted space due to alignment 1430b57cec5SDimitry Andric /// padding. Currently the padding is decided by the first encountered use 1440b57cec5SDimitry Andric /// during lowering. 14581ad6265SDimitry Andric Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 1460b57cec5SDimitry Andric 147e8d8bef9SDimitry Andric StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 148e8d8bef9SDimitry Andric 149bdd1243dSDimitry Andric // Align LDS size to trailing, e.g. for aligning dynamic shared memory 150bdd1243dSDimitry Andric LDSSize = alignTo(StaticLDSSize, Trailing); 15181ad6265SDimitry Andric } else { 15281ad6265SDimitry Andric assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 15381ad6265SDimitry Andric "expected region address space"); 1540b57cec5SDimitry Andric 15581ad6265SDimitry Andric Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 15681ad6265SDimitry Andric StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 15781ad6265SDimitry Andric 15881ad6265SDimitry Andric // FIXME: Apply alignment of dynamic GDS 15981ad6265SDimitry Andric GDSSize = StaticGDSSize; 16081ad6265SDimitry Andric } 16181ad6265SDimitry Andric 16281ad6265SDimitry Andric Entry.first->second = Offset; 1630b57cec5SDimitry Andric return Offset; 1640b57cec5SDimitry Andric } 165e8d8bef9SDimitry Andric 166bdd1243dSDimitry Andric std::optional<uint32_t> 167fcaf7f86SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { 16806c3fb27SDimitry Andric // TODO: Would be more consistent with the abs symbols to use a range 16906c3fb27SDimitry Andric MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); 170fcaf7f86SDimitry Andric if (MD && MD->getNumOperands() == 1) { 17106c3fb27SDimitry Andric if (ConstantInt *KnownSize = 17206c3fb27SDimitry Andric mdconst::extract<ConstantInt>(MD->getOperand(0))) { 17306c3fb27SDimitry Andric uint64_t ZExt = KnownSize->getZExtValue(); 17406c3fb27SDimitry Andric if (ZExt <= UINT32_MAX) { 17506c3fb27SDimitry Andric return ZExt; 176fcaf7f86SDimitry Andric } 177fcaf7f86SDimitry Andric } 178fcaf7f86SDimitry Andric } 179fcaf7f86SDimitry Andric return {}; 180fcaf7f86SDimitry Andric } 181fcaf7f86SDimitry Andric 18206c3fb27SDimitry Andric std::optional<uint32_t> 18306c3fb27SDimitry Andric AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { 18406c3fb27SDimitry Andric if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 18506c3fb27SDimitry Andric return {}; 18606c3fb27SDimitry Andric 18706c3fb27SDimitry Andric std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange(); 18806c3fb27SDimitry Andric if (!AbsSymRange) 18906c3fb27SDimitry Andric return {}; 19006c3fb27SDimitry Andric 19106c3fb27SDimitry Andric if (const APInt *V = AbsSymRange->getSingleElement()) { 19206c3fb27SDimitry Andric std::optional<uint64_t> ZExt = V->tryZExtValue(); 19306c3fb27SDimitry Andric if (ZExt && (*ZExt <= UINT32_MAX)) { 19406c3fb27SDimitry Andric return *ZExt; 19506c3fb27SDimitry Andric } 19606c3fb27SDimitry Andric } 19706c3fb27SDimitry Andric 19806c3fb27SDimitry Andric return {}; 19906c3fb27SDimitry Andric } 20006c3fb27SDimitry Andric 20106c3fb27SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const Function &F, 202e8d8bef9SDimitry Andric const GlobalVariable &GV) { 20306c3fb27SDimitry Andric const Module *M = F.getParent(); 20406c3fb27SDimitry Andric const DataLayout &DL = M->getDataLayout(); 205e8d8bef9SDimitry Andric assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 206e8d8bef9SDimitry Andric 207e8d8bef9SDimitry Andric Align Alignment = 208e8d8bef9SDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 209e8d8bef9SDimitry Andric if (Alignment <= DynLDSAlign) 210e8d8bef9SDimitry Andric return; 211e8d8bef9SDimitry Andric 212e8d8bef9SDimitry Andric LDSSize = alignTo(StaticLDSSize, Alignment); 213e8d8bef9SDimitry Andric DynLDSAlign = Alignment; 21406c3fb27SDimitry Andric 21506c3fb27SDimitry Andric // If there is a dynamic LDS variable associated with this function F, every 21606c3fb27SDimitry Andric // further dynamic LDS instance (allocated by calling setDynLDSAlign) must 21706c3fb27SDimitry Andric // map to the same address. This holds because no LDS is allocated after the 21806c3fb27SDimitry Andric // lowering pass if there are dynamic LDS variables present. 21906c3fb27SDimitry Andric const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F); 22006c3fb27SDimitry Andric if (Dyn) { 22106c3fb27SDimitry Andric unsigned Offset = LDSSize; // return this? 22206c3fb27SDimitry Andric std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn); 22306c3fb27SDimitry Andric if (!Expect || (Offset != *Expect)) { 22406c3fb27SDimitry Andric report_fatal_error("Inconsistent metadata on dynamic LDS variable"); 22506c3fb27SDimitry Andric } 22606c3fb27SDimitry Andric } 227e8d8bef9SDimitry Andric } 228*1db9f3b2SDimitry Andric 229*1db9f3b2SDimitry Andric void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) { 230*1db9f3b2SDimitry Andric UsesDynamicLDS = DynLDS; 231*1db9f3b2SDimitry Andric } 232*1db9f3b2SDimitry Andric 233*1db9f3b2SDimitry Andric bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; } 234