10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h" 1081ad6265SDimitry Andric #include "AMDGPU.h" 110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h" 12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h" 13*06c3fb27SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 140b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 15*06c3fb27SDimitry Andric #include "llvm/IR/ConstantRange.h" 16fcaf7f86SDimitry Andric #include "llvm/IR/Constants.h" 17*06c3fb27SDimitry Andric #include "llvm/IR/Metadata.h" 18e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric using namespace llvm; 210b57cec5SDimitry Andric 22bdd1243dSDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, 23bdd1243dSDimitry Andric const AMDGPUSubtarget &ST) 24bdd1243dSDimitry Andric : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), 25e8d8bef9SDimitry Andric IsModuleEntryFunction( 26bdd1243dSDimitry Andric AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), 27bdd1243dSDimitry Andric NoSignedZerosFPMath(false) { 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 300b57cec5SDimitry Andric // except reserved size is not correctly aligned. 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 33fe6060f1SDimitry Andric MemoryBound = MemBoundAttr.getValueAsBool(); 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 36fe6060f1SDimitry Andric WaveLimiter = WaveLimitAttr.getValueAsBool(); 370b57cec5SDimitry Andric 3881ad6265SDimitry Andric // FIXME: How is this attribute supposed to interact with statically known 3981ad6265SDimitry Andric // global sizes? 4081ad6265SDimitry Andric StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 4181ad6265SDimitry Andric if (!S.empty()) 4281ad6265SDimitry Andric S.consumeInteger(0, GDSSize); 4381ad6265SDimitry Andric 4481ad6265SDimitry Andric // Assume the attribute allocates before any known GDS globals. 4581ad6265SDimitry Andric StaticGDSSize = GDSSize; 4681ad6265SDimitry Andric 47*06c3fb27SDimitry Andric // Second value, if present, is the maximum value that can be assigned. 48*06c3fb27SDimitry Andric // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics 49*06c3fb27SDimitry Andric // during codegen. 50*06c3fb27SDimitry Andric std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute( 51*06c3fb27SDimitry Andric F, "amdgpu-lds-size", {0, UINT32_MAX}, true); 52*06c3fb27SDimitry Andric 53*06c3fb27SDimitry Andric // The two separate variables are only profitable when the LDS module lowering 54*06c3fb27SDimitry Andric // pass is disabled. If graphics does not use dynamic LDS, this is never 55*06c3fb27SDimitry Andric // profitable. Leaving cleanup for a later change. 56*06c3fb27SDimitry Andric LDSSize = LDSSizeRange.first; 57*06c3fb27SDimitry Andric StaticLDSSize = LDSSize; 58*06c3fb27SDimitry Andric 590b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 600b57cec5SDimitry Andric if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 610b57cec5SDimitry Andric ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 62bdd1243dSDimitry Andric 63bdd1243dSDimitry Andric // FIXME: Shouldn't be target specific 64bdd1243dSDimitry Andric Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); 65bdd1243dSDimitry Andric NoSignedZerosFPMath = 66bdd1243dSDimitry Andric NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 70bdd1243dSDimitry Andric const GlobalVariable &GV, 71bdd1243dSDimitry Andric Align Trailing) { 72bdd1243dSDimitry Andric auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0)); 730b57cec5SDimitry Andric if (!Entry.second) 740b57cec5SDimitry Andric return Entry.first->second; 750b57cec5SDimitry Andric 765ffd83dbSDimitry Andric Align Alignment = 775ffd83dbSDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 780b57cec5SDimitry Andric 7981ad6265SDimitry Andric unsigned Offset; 8081ad6265SDimitry Andric if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 81*06c3fb27SDimitry Andric 82*06c3fb27SDimitry Andric std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV); 83*06c3fb27SDimitry Andric if (MaybeAbs) { 84*06c3fb27SDimitry Andric // Absolute address LDS variables that exist prior to the LDS lowering 85*06c3fb27SDimitry Andric // pass raise a fatal error in that pass. These failure modes are only 86*06c3fb27SDimitry Andric // reachable if that lowering pass is disabled or broken. If/when adding 87*06c3fb27SDimitry Andric // support for absolute addresses on user specified variables, the 88*06c3fb27SDimitry Andric // alignment check moves to the lowering pass and the frame calculation 89*06c3fb27SDimitry Andric // needs to take the user variables into consideration. 90*06c3fb27SDimitry Andric 91*06c3fb27SDimitry Andric uint32_t ObjectStart = *MaybeAbs; 92*06c3fb27SDimitry Andric 93*06c3fb27SDimitry Andric if (ObjectStart != alignTo(ObjectStart, Alignment)) { 94*06c3fb27SDimitry Andric report_fatal_error("Absolute address LDS variable inconsistent with " 95*06c3fb27SDimitry Andric "variable alignment"); 96*06c3fb27SDimitry Andric } 97*06c3fb27SDimitry Andric 98*06c3fb27SDimitry Andric if (isModuleEntryFunction()) { 99*06c3fb27SDimitry Andric // If this is a module entry function, we can also sanity check against 100*06c3fb27SDimitry Andric // the static frame. Strictly it would be better to check against the 101*06c3fb27SDimitry Andric // attribute, i.e. that the variable is within the always-allocated 102*06c3fb27SDimitry Andric // section, and not within some other non-absolute-address object 103*06c3fb27SDimitry Andric // allocated here, but the extra error detection is minimal and we would 104*06c3fb27SDimitry Andric // have to pass the Function around or cache the attribute value. 105*06c3fb27SDimitry Andric uint32_t ObjectEnd = 106*06c3fb27SDimitry Andric ObjectStart + DL.getTypeAllocSize(GV.getValueType()); 107*06c3fb27SDimitry Andric if (ObjectEnd > StaticLDSSize) { 108*06c3fb27SDimitry Andric report_fatal_error( 109*06c3fb27SDimitry Andric "Absolute address LDS variable outside of static frame"); 110*06c3fb27SDimitry Andric } 111*06c3fb27SDimitry Andric } 112*06c3fb27SDimitry Andric 113*06c3fb27SDimitry Andric Entry.first->second = ObjectStart; 114*06c3fb27SDimitry Andric return ObjectStart; 115*06c3fb27SDimitry Andric } 116*06c3fb27SDimitry Andric 1170b57cec5SDimitry Andric /// TODO: We should sort these to minimize wasted space due to alignment 1180b57cec5SDimitry Andric /// padding. Currently the padding is decided by the first encountered use 1190b57cec5SDimitry Andric /// during lowering. 12081ad6265SDimitry Andric Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 1210b57cec5SDimitry Andric 122e8d8bef9SDimitry Andric StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 123e8d8bef9SDimitry Andric 124bdd1243dSDimitry Andric // Align LDS size to trailing, e.g. for aligning dynamic shared memory 125bdd1243dSDimitry Andric LDSSize = alignTo(StaticLDSSize, Trailing); 12681ad6265SDimitry Andric } else { 12781ad6265SDimitry Andric assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 12881ad6265SDimitry Andric "expected region address space"); 1290b57cec5SDimitry Andric 13081ad6265SDimitry Andric Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 13181ad6265SDimitry Andric StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 13281ad6265SDimitry Andric 13381ad6265SDimitry Andric // FIXME: Apply alignment of dynamic GDS 13481ad6265SDimitry Andric GDSSize = StaticGDSSize; 13581ad6265SDimitry Andric } 13681ad6265SDimitry Andric 13781ad6265SDimitry Andric Entry.first->second = Offset; 1380b57cec5SDimitry Andric return Offset; 1390b57cec5SDimitry Andric } 140e8d8bef9SDimitry Andric 141*06c3fb27SDimitry Andric static const GlobalVariable * 142*06c3fb27SDimitry Andric getKernelDynLDSGlobalFromFunction(const Function &F) { 143bdd1243dSDimitry Andric const Module *M = F.getParent(); 144*06c3fb27SDimitry Andric std::string KernelDynLDSName = "llvm.amdgcn."; 145*06c3fb27SDimitry Andric KernelDynLDSName += F.getName(); 146*06c3fb27SDimitry Andric KernelDynLDSName += ".dynlds"; 147*06c3fb27SDimitry Andric return M->getNamedGlobal(KernelDynLDSName); 148fe6060f1SDimitry Andric } 149fe6060f1SDimitry Andric 150bdd1243dSDimitry Andric std::optional<uint32_t> 151fcaf7f86SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { 152*06c3fb27SDimitry Andric // TODO: Would be more consistent with the abs symbols to use a range 153*06c3fb27SDimitry Andric MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); 154fcaf7f86SDimitry Andric if (MD && MD->getNumOperands() == 1) { 155*06c3fb27SDimitry Andric if (ConstantInt *KnownSize = 156*06c3fb27SDimitry Andric mdconst::extract<ConstantInt>(MD->getOperand(0))) { 157*06c3fb27SDimitry Andric uint64_t ZExt = KnownSize->getZExtValue(); 158*06c3fb27SDimitry Andric if (ZExt <= UINT32_MAX) { 159*06c3fb27SDimitry Andric return ZExt; 160fcaf7f86SDimitry Andric } 161fcaf7f86SDimitry Andric } 162fcaf7f86SDimitry Andric } 163fcaf7f86SDimitry Andric return {}; 164fcaf7f86SDimitry Andric } 165fcaf7f86SDimitry Andric 166*06c3fb27SDimitry Andric std::optional<uint32_t> 167*06c3fb27SDimitry Andric AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { 168*06c3fb27SDimitry Andric if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 169*06c3fb27SDimitry Andric return {}; 170*06c3fb27SDimitry Andric 171*06c3fb27SDimitry Andric std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange(); 172*06c3fb27SDimitry Andric if (!AbsSymRange) 173*06c3fb27SDimitry Andric return {}; 174*06c3fb27SDimitry Andric 175*06c3fb27SDimitry Andric if (const APInt *V = AbsSymRange->getSingleElement()) { 176*06c3fb27SDimitry Andric std::optional<uint64_t> ZExt = V->tryZExtValue(); 177*06c3fb27SDimitry Andric if (ZExt && (*ZExt <= UINT32_MAX)) { 178*06c3fb27SDimitry Andric return *ZExt; 179*06c3fb27SDimitry Andric } 180*06c3fb27SDimitry Andric } 181*06c3fb27SDimitry Andric 182*06c3fb27SDimitry Andric return {}; 183*06c3fb27SDimitry Andric } 184*06c3fb27SDimitry Andric 185*06c3fb27SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const Function &F, 186e8d8bef9SDimitry Andric const GlobalVariable &GV) { 187*06c3fb27SDimitry Andric const Module *M = F.getParent(); 188*06c3fb27SDimitry Andric const DataLayout &DL = M->getDataLayout(); 189e8d8bef9SDimitry Andric assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 190e8d8bef9SDimitry Andric 191e8d8bef9SDimitry Andric Align Alignment = 192e8d8bef9SDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 193e8d8bef9SDimitry Andric if (Alignment <= DynLDSAlign) 194e8d8bef9SDimitry Andric return; 195e8d8bef9SDimitry Andric 196e8d8bef9SDimitry Andric LDSSize = alignTo(StaticLDSSize, Alignment); 197e8d8bef9SDimitry Andric DynLDSAlign = Alignment; 198*06c3fb27SDimitry Andric 199*06c3fb27SDimitry Andric // If there is a dynamic LDS variable associated with this function F, every 200*06c3fb27SDimitry Andric // further dynamic LDS instance (allocated by calling setDynLDSAlign) must 201*06c3fb27SDimitry Andric // map to the same address. This holds because no LDS is allocated after the 202*06c3fb27SDimitry Andric // lowering pass if there are dynamic LDS variables present. 203*06c3fb27SDimitry Andric const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F); 204*06c3fb27SDimitry Andric if (Dyn) { 205*06c3fb27SDimitry Andric unsigned Offset = LDSSize; // return this? 206*06c3fb27SDimitry Andric std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn); 207*06c3fb27SDimitry Andric if (!Expect || (Offset != *Expect)) { 208*06c3fb27SDimitry Andric report_fatal_error("Inconsistent metadata on dynamic LDS variable"); 209*06c3fb27SDimitry Andric } 210*06c3fb27SDimitry Andric } 211e8d8bef9SDimitry Andric } 212