1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUMachineFunction.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUPerfHintAnalysis.h" 12 #include "AMDGPUSubtarget.h" 13 #include "llvm/CodeGen/MachineModuleInfo.h" 14 #include "llvm/IR/Constants.h" 15 #include "llvm/Target/TargetMachine.h" 16 17 using namespace llvm; 18 19 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, 20 const AMDGPUSubtarget &ST) 21 : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), 22 IsModuleEntryFunction( 23 AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), 24 NoSignedZerosFPMath(false) { 25 26 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 27 // except reserved size is not correctly aligned. 28 29 Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 30 MemoryBound = MemBoundAttr.getValueAsBool(); 31 32 Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 33 WaveLimiter = WaveLimitAttr.getValueAsBool(); 34 35 // FIXME: How is this attribute supposed to interact with statically known 36 // global sizes? 37 StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 38 if (!S.empty()) 39 S.consumeInteger(0, GDSSize); 40 41 // Assume the attribute allocates before any known GDS globals. 42 StaticGDSSize = GDSSize; 43 44 CallingConv::ID CC = F.getCallingConv(); 45 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 46 ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 47 48 // FIXME: Shouldn't be target specific 49 Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); 50 NoSignedZerosFPMath = 51 NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; 52 } 53 54 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 55 const GlobalVariable &GV, 56 Align Trailing) { 57 auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0)); 58 if (!Entry.second) 59 return Entry.first->second; 60 61 Align Alignment = 62 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 63 64 unsigned Offset; 65 if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 66 /// TODO: We should sort these to minimize wasted space due to alignment 67 /// padding. Currently the padding is decided by the first encountered use 68 /// during lowering. 69 Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 70 71 StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 72 73 // Align LDS size to trailing, e.g. for aligning dynamic shared memory 74 LDSSize = alignTo(StaticLDSSize, Trailing); 75 } else { 76 assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 77 "expected region address space"); 78 79 Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 80 StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 81 82 // FIXME: Apply alignment of dynamic GDS 83 GDSSize = StaticGDSSize; 84 } 85 86 Entry.first->second = Offset; 87 return Offset; 88 } 89 90 static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds"; 91 92 bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) { 93 auto name = GV.getName(); 94 return (name == ModuleLDSName) || 95 (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds")); 96 } 97 98 const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal( 99 const GlobalVariable &GV) { 100 const Module &M = *GV.getParent(); 101 StringRef N(GV.getName()); 102 if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) { 103 return M.getFunction(N); 104 } 105 return nullptr; 106 } 107 108 const GlobalVariable * 109 AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) { 110 const Module *M = F.getParent(); 111 std::string KernelLDSName = "llvm.amdgcn.kernel."; 112 KernelLDSName += F.getName(); 113 KernelLDSName += ".lds"; 114 return M->getNamedGlobal(KernelLDSName); 115 } 116 117 // This kernel calls no functions that require the module lds struct 118 static bool canElideModuleLDS(const Function &F) { 119 return F.hasFnAttribute("amdgpu-elide-module-lds"); 120 } 121 122 unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal( 123 const GlobalVariable &GV) { 124 // module.lds, then alignment padding, then kernel.lds, then other variables 125 // if any 126 127 assert(isKnownAddressLDSGlobal(GV)); 128 unsigned Offset = 0; 129 130 if (GV.getName() == ModuleLDSName) { 131 return 0; 132 } 133 134 const Module *M = GV.getParent(); 135 const DataLayout &DL = M->getDataLayout(); 136 137 const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName); 138 const Function *f = getKernelLDSFunctionFromGlobal(GV); 139 140 // Account for module.lds if allocated for this function 141 if (GVM && f && !canElideModuleLDS(*f)) { 142 // allocator aligns this to var align, but it's zero to begin with 143 Offset += DL.getTypeAllocSize(GVM->getValueType()); 144 } 145 146 // No dynamic LDS alignment done by allocateModuleLDSGlobal 147 Offset = alignTo( 148 Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType())); 149 150 return Offset; 151 } 152 153 void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) { 154 const Module *M = F.getParent(); 155 156 // This function is called before allocating any other LDS so that it can 157 // reliably put values at known addresses. Consequently, dynamic LDS, if 158 // present, will not yet have been allocated 159 160 assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated"); 161 162 if (isModuleEntryFunction()) { 163 164 // Pointer values start from zero, memory allocated per-kernel-launch 165 // Variables can be grouped into a module level struct and a struct per 166 // kernel function by AMDGPULowerModuleLDSPass. If that is done, they 167 // are allocated at statically computable addresses here. 168 // 169 // Address 0 170 // { 171 // llvm.amdgcn.module.lds 172 // } 173 // alignment padding 174 // { 175 // llvm.amdgcn.kernel.some-name.lds 176 // } 177 // other variables, e.g. dynamic lds, allocated after this call 178 179 const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName); 180 const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F); 181 182 if (GV && !canElideModuleLDS(F)) { 183 assert(isKnownAddressLDSGlobal(*GV)); 184 unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align()); 185 (void)Offset; 186 assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) && 187 "Module LDS expected to be allocated before other LDS"); 188 } 189 190 if (KV) { 191 // The per-kernel offset is deterministic because it is allocated 192 // before any other non-module LDS variables. 193 assert(isKnownAddressLDSGlobal(*KV)); 194 unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align()); 195 (void)Offset; 196 assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) && 197 "Kernel LDS expected to be immediately after module LDS"); 198 } 199 } 200 } 201 202 std::optional<uint32_t> 203 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { 204 auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); 205 if (MD && MD->getNumOperands() == 1) { 206 ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0)); 207 if (KnownSize) { 208 uint64_t V = KnownSize->getZExtValue(); 209 if (V <= UINT32_MAX) { 210 return V; 211 } 212 } 213 } 214 return {}; 215 } 216 217 void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, 218 const GlobalVariable &GV) { 219 assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 220 221 Align Alignment = 222 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 223 if (Alignment <= DynLDSAlign) 224 return; 225 226 LDSSize = alignTo(StaticLDSSize, Alignment); 227 DynLDSAlign = Alignment; 228 } 229