10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h" 1081ad6265SDimitry Andric #include "AMDGPU.h" 110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h" 12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h" 130b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 14fcaf7f86SDimitry Andric #include "llvm/IR/Constants.h" 15e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric using namespace llvm; 180b57cec5SDimitry Andric 19*bdd1243dSDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, 20*bdd1243dSDimitry Andric const AMDGPUSubtarget &ST) 21*bdd1243dSDimitry Andric : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), 22e8d8bef9SDimitry Andric IsModuleEntryFunction( 23*bdd1243dSDimitry Andric AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), 24*bdd1243dSDimitry Andric NoSignedZerosFPMath(false) { 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 270b57cec5SDimitry Andric // except reserved size is not correctly aligned. 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 30fe6060f1SDimitry Andric MemoryBound = MemBoundAttr.getValueAsBool(); 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 33fe6060f1SDimitry Andric WaveLimiter = WaveLimitAttr.getValueAsBool(); 340b57cec5SDimitry Andric 3581ad6265SDimitry Andric // FIXME: How is this attribute supposed to interact with statically known 3681ad6265SDimitry Andric // global sizes? 3781ad6265SDimitry Andric StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 3881ad6265SDimitry Andric if (!S.empty()) 3981ad6265SDimitry Andric S.consumeInteger(0, GDSSize); 4081ad6265SDimitry Andric 4181ad6265SDimitry Andric // Assume the attribute allocates before any known GDS globals. 4281ad6265SDimitry Andric StaticGDSSize = GDSSize; 4381ad6265SDimitry Andric 440b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 450b57cec5SDimitry Andric if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 460b57cec5SDimitry Andric ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 47*bdd1243dSDimitry Andric 48*bdd1243dSDimitry Andric // FIXME: Shouldn't be target specific 49*bdd1243dSDimitry Andric Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); 50*bdd1243dSDimitry Andric NoSignedZerosFPMath = 51*bdd1243dSDimitry Andric NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; 520b57cec5SDimitry Andric } 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 55*bdd1243dSDimitry Andric const GlobalVariable &GV, 56*bdd1243dSDimitry Andric Align Trailing) { 57*bdd1243dSDimitry Andric auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0)); 580b57cec5SDimitry Andric if (!Entry.second) 590b57cec5SDimitry Andric return Entry.first->second; 600b57cec5SDimitry Andric 615ffd83dbSDimitry Andric Align Alignment = 625ffd83dbSDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 630b57cec5SDimitry Andric 6481ad6265SDimitry Andric unsigned Offset; 6581ad6265SDimitry Andric if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 660b57cec5SDimitry Andric /// TODO: We should sort these to minimize wasted space due to alignment 670b57cec5SDimitry Andric /// padding. Currently the padding is decided by the first encountered use 680b57cec5SDimitry Andric /// during lowering. 6981ad6265SDimitry Andric Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 700b57cec5SDimitry Andric 71e8d8bef9SDimitry Andric StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 72e8d8bef9SDimitry Andric 73*bdd1243dSDimitry Andric // Align LDS size to trailing, e.g. for aligning dynamic shared memory 74*bdd1243dSDimitry Andric LDSSize = alignTo(StaticLDSSize, Trailing); 7581ad6265SDimitry Andric } else { 7681ad6265SDimitry Andric assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 7781ad6265SDimitry Andric "expected region address space"); 780b57cec5SDimitry Andric 7981ad6265SDimitry Andric Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 8081ad6265SDimitry Andric StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 8181ad6265SDimitry Andric 8281ad6265SDimitry Andric // FIXME: Apply alignment of dynamic GDS 8381ad6265SDimitry Andric GDSSize = StaticGDSSize; 8481ad6265SDimitry Andric } 8581ad6265SDimitry Andric 8681ad6265SDimitry Andric Entry.first->second = Offset; 870b57cec5SDimitry Andric return Offset; 880b57cec5SDimitry Andric } 89e8d8bef9SDimitry Andric 90*bdd1243dSDimitry Andric static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; 91*bdd1243dSDimitry Andric 92*bdd1243dSDimitry Andric bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) { 93*bdd1243dSDimitry Andric auto name = GV.getName(); 94*bdd1243dSDimitry Andric return (name == ModuleLDSName) || 95*bdd1243dSDimitry Andric (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds")); 96*bdd1243dSDimitry Andric } 97*bdd1243dSDimitry Andric 98*bdd1243dSDimitry Andric const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal( 99*bdd1243dSDimitry Andric const GlobalVariable &GV) { 100*bdd1243dSDimitry Andric const Module &M = *GV.getParent(); 101*bdd1243dSDimitry Andric StringRef N(GV.getName()); 102*bdd1243dSDimitry Andric if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) { 103*bdd1243dSDimitry Andric return M.getFunction(N); 104*bdd1243dSDimitry Andric } 105*bdd1243dSDimitry Andric return nullptr; 106*bdd1243dSDimitry Andric } 107*bdd1243dSDimitry Andric 108*bdd1243dSDimitry Andric const GlobalVariable * 109*bdd1243dSDimitry Andric AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) { 110*bdd1243dSDimitry Andric const Module *M = F.getParent(); 111*bdd1243dSDimitry Andric std::string KernelLDSName = "llvm.amdgcn.kernel."; 112*bdd1243dSDimitry Andric KernelLDSName += F.getName(); 113*bdd1243dSDimitry Andric KernelLDSName += ".lds"; 114*bdd1243dSDimitry Andric return M->getNamedGlobal(KernelLDSName); 115*bdd1243dSDimitry Andric } 116*bdd1243dSDimitry Andric 11781ad6265SDimitry Andric // This kernel calls no functions that require the module lds struct 11881ad6265SDimitry Andric static bool canElideModuleLDS(const Function &F) { 11981ad6265SDimitry Andric return F.hasFnAttribute("amdgpu-elide-module-lds"); 12081ad6265SDimitry Andric } 12181ad6265SDimitry Andric 122*bdd1243dSDimitry Andric unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal( 123*bdd1243dSDimitry Andric const GlobalVariable &GV) { 124*bdd1243dSDimitry Andric // module.lds, then alignment padding, then kernel.lds, then other variables 125*bdd1243dSDimitry Andric // if any 126*bdd1243dSDimitry Andric 127*bdd1243dSDimitry Andric assert(isKnownAddressLDSGlobal(GV)); 128*bdd1243dSDimitry Andric unsigned Offset = 0; 129*bdd1243dSDimitry Andric 130*bdd1243dSDimitry Andric if (GV.getName() == ModuleLDSName) { 131*bdd1243dSDimitry Andric return 0; 132*bdd1243dSDimitry Andric } 133*bdd1243dSDimitry Andric 134*bdd1243dSDimitry Andric const Module *M = GV.getParent(); 135*bdd1243dSDimitry Andric const DataLayout &DL = M->getDataLayout(); 136*bdd1243dSDimitry Andric 137*bdd1243dSDimitry Andric const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName); 138*bdd1243dSDimitry Andric const Function *f = getKernelLDSFunctionFromGlobal(GV); 139*bdd1243dSDimitry Andric 140*bdd1243dSDimitry Andric // Account for module.lds if allocated for this function 141*bdd1243dSDimitry Andric if (GVM && f && !canElideModuleLDS(*f)) { 142*bdd1243dSDimitry Andric // allocator aligns this to var align, but it's zero to begin with 143*bdd1243dSDimitry Andric Offset += DL.getTypeAllocSize(GVM->getValueType()); 144*bdd1243dSDimitry Andric } 145*bdd1243dSDimitry Andric 146*bdd1243dSDimitry Andric // No dynamic LDS alignment done by allocateModuleLDSGlobal 147*bdd1243dSDimitry Andric Offset = alignTo( 148*bdd1243dSDimitry Andric Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType())); 149*bdd1243dSDimitry Andric 150*bdd1243dSDimitry Andric return Offset; 151*bdd1243dSDimitry Andric } 152*bdd1243dSDimitry Andric 153*bdd1243dSDimitry Andric void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { 15481ad6265SDimitry Andric const Module *M = F.getParent(); 155*bdd1243dSDimitry Andric 156*bdd1243dSDimitry Andric // This function is called before allocating any other LDS so that it can 157*bdd1243dSDimitry Andric // reliably put values at known addresses. Consequently, dynamic LDS, if 158*bdd1243dSDimitry Andric // present, will not yet have been allocated 159*bdd1243dSDimitry Andric 160*bdd1243dSDimitry Andric assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated"); 161*bdd1243dSDimitry Andric 162fe6060f1SDimitry Andric if (isModuleEntryFunction()) { 163*bdd1243dSDimitry Andric 164*bdd1243dSDimitry Andric // Pointer values start from zero, memory allocated per-kernel-launch 165*bdd1243dSDimitry Andric // Variables can be grouped into a module level struct and a struct per 166*bdd1243dSDimitry Andric // kernel function by AMDGPULowerModuleLDSPass. If that is done, they 167*bdd1243dSDimitry Andric // are allocated at statically computable addresses here. 168*bdd1243dSDimitry Andric // 169*bdd1243dSDimitry Andric // Address 0 170*bdd1243dSDimitry Andric // { 171*bdd1243dSDimitry Andric // llvm.amdgcn.module.lds 172*bdd1243dSDimitry Andric // } 173*bdd1243dSDimitry Andric // alignment padding 174*bdd1243dSDimitry Andric // { 175*bdd1243dSDimitry Andric // llvm.amdgcn.kernel.some-name.lds 176*bdd1243dSDimitry Andric // } 177*bdd1243dSDimitry Andric // other variables, e.g. dynamic lds, allocated after this call 178*bdd1243dSDimitry Andric 179*bdd1243dSDimitry Andric const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName); 180*bdd1243dSDimitry Andric const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); 181*bdd1243dSDimitry Andric 18281ad6265SDimitry Andric if (GV && !canElideModuleLDS(F)) { 183*bdd1243dSDimitry Andric assert(isKnownAddressLDSGlobal(*GV)); 184*bdd1243dSDimitry Andric unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); 185fe6060f1SDimitry Andric (void)Offset; 186*bdd1243dSDimitry Andric assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) && 187fe6060f1SDimitry Andric "Module LDS expected to be allocated before other LDS"); 188fe6060f1SDimitry Andric } 189*bdd1243dSDimitry Andric 190*bdd1243dSDimitry Andric if (KV) { 191*bdd1243dSDimitry Andric // The per-kernel offset is deterministic because it is allocated 192*bdd1243dSDimitry Andric // before any other non-module LDS variables. 193*bdd1243dSDimitry Andric assert(isKnownAddressLDSGlobal(*KV)); 194*bdd1243dSDimitry Andric unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); 195*bdd1243dSDimitry Andric (void)Offset; 196*bdd1243dSDimitry Andric assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) && 197*bdd1243dSDimitry Andric "Kernel LDS expected to be immediately after module LDS"); 198*bdd1243dSDimitry Andric } 199fe6060f1SDimitry Andric } 200fe6060f1SDimitry Andric } 201fe6060f1SDimitry Andric 202*bdd1243dSDimitry Andric std::optional<uint32_t> 203fcaf7f86SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { 204fcaf7f86SDimitry Andric auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); 205fcaf7f86SDimitry Andric if (MD && MD->getNumOperands() == 1) { 206fcaf7f86SDimitry Andric ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0)); 207fcaf7f86SDimitry Andric if (KnownSize) { 208fcaf7f86SDimitry Andric uint64_t V = KnownSize->getZExtValue(); 209fcaf7f86SDimitry Andric if (V <= UINT32_MAX) { 210fcaf7f86SDimitry Andric return V; 211fcaf7f86SDimitry Andric } 212fcaf7f86SDimitry Andric } 213fcaf7f86SDimitry Andric } 214fcaf7f86SDimitry Andric return {}; 215fcaf7f86SDimitry Andric } 216fcaf7f86SDimitry Andric 217e8d8bef9SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, 218e8d8bef9SDimitry Andric const GlobalVariable &GV) { 219e8d8bef9SDimitry Andric assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 220e8d8bef9SDimitry Andric 221e8d8bef9SDimitry Andric Align Alignment = 222e8d8bef9SDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 223e8d8bef9SDimitry Andric if (Alignment <= DynLDSAlign) 224e8d8bef9SDimitry Andric return; 225e8d8bef9SDimitry Andric 226e8d8bef9SDimitry Andric LDSSize = alignTo(StaticLDSSize, Alignment); 227e8d8bef9SDimitry Andric DynLDSAlign = Alignment; 228e8d8bef9SDimitry Andric } 229