1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUMachineFunction.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUPerfHintAnalysis.h" 12 #include "AMDGPUSubtarget.h" 13 #include "Utils/AMDGPUBaseInfo.h" 14 #include "llvm/CodeGen/MachineModuleInfo.h" 15 #include "llvm/IR/ConstantRange.h" 16 #include "llvm/IR/Constants.h" 17 #include "llvm/IR/Metadata.h" 18 #include "llvm/Target/TargetMachine.h" 19 20 using namespace llvm; 21 22 AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, 23 const AMDGPUSubtarget &ST) 24 : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())), 25 IsModuleEntryFunction( 26 AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())), 27 NoSignedZerosFPMath(false) { 28 29 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 30 // except reserved size is not correctly aligned. 31 32 Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 33 MemoryBound = MemBoundAttr.getValueAsBool(); 34 35 Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 36 WaveLimiter = WaveLimitAttr.getValueAsBool(); 37 38 // FIXME: How is this attribute supposed to interact with statically known 39 // global sizes? 40 StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 41 if (!S.empty()) 42 S.consumeInteger(0, GDSSize); 43 44 // Assume the attribute allocates before any known GDS globals. 45 StaticGDSSize = GDSSize; 46 47 // Second value, if present, is the maximum value that can be assigned. 48 // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics 49 // during codegen. 50 std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute( 51 F, "amdgpu-lds-size", {0, UINT32_MAX}, true); 52 53 // The two separate variables are only profitable when the LDS module lowering 54 // pass is disabled. If graphics does not use dynamic LDS, this is never 55 // profitable. Leaving cleanup for a later change. 56 LDSSize = LDSSizeRange.first; 57 StaticLDSSize = LDSSize; 58 59 CallingConv::ID CC = F.getCallingConv(); 60 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 61 ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 62 63 // FIXME: Shouldn't be target specific 64 Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math"); 65 NoSignedZerosFPMath = 66 NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true"; 67 } 68 69 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 70 const GlobalVariable &GV, 71 Align Trailing) { 72 auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0)); 73 if (!Entry.second) 74 return Entry.first->second; 75 76 Align Alignment = 77 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 78 79 unsigned Offset; 80 if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 81 82 std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV); 83 if (MaybeAbs) { 84 // Absolute address LDS variables that exist prior to the LDS lowering 85 // pass raise a fatal error in that pass. These failure modes are only 86 // reachable if that lowering pass is disabled or broken. If/when adding 87 // support for absolute addresses on user specified variables, the 88 // alignment check moves to the lowering pass and the frame calculation 89 // needs to take the user variables into consideration. 90 91 uint32_t ObjectStart = *MaybeAbs; 92 93 if (ObjectStart != alignTo(ObjectStart, Alignment)) { 94 report_fatal_error("Absolute address LDS variable inconsistent with " 95 "variable alignment"); 96 } 97 98 if (isModuleEntryFunction()) { 99 // If this is a module entry function, we can also sanity check against 100 // the static frame. Strictly it would be better to check against the 101 // attribute, i.e. that the variable is within the always-allocated 102 // section, and not within some other non-absolute-address object 103 // allocated here, but the extra error detection is minimal and we would 104 // have to pass the Function around or cache the attribute value. 105 uint32_t ObjectEnd = 106 ObjectStart + DL.getTypeAllocSize(GV.getValueType()); 107 if (ObjectEnd > StaticLDSSize) { 108 report_fatal_error( 109 "Absolute address LDS variable outside of static frame"); 110 } 111 } 112 113 Entry.first->second = ObjectStart; 114 return ObjectStart; 115 } 116 117 /// TODO: We should sort these to minimize wasted space due to alignment 118 /// padding. Currently the padding is decided by the first encountered use 119 /// during lowering. 120 Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 121 122 StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 123 124 // Align LDS size to trailing, e.g. for aligning dynamic shared memory 125 LDSSize = alignTo(StaticLDSSize, Trailing); 126 } else { 127 assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 128 "expected region address space"); 129 130 Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 131 StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 132 133 // FIXME: Apply alignment of dynamic GDS 134 GDSSize = StaticGDSSize; 135 } 136 137 Entry.first->second = Offset; 138 return Offset; 139 } 140 141 static const GlobalVariable * 142 getKernelDynLDSGlobalFromFunction(const Function &F) { 143 const Module *M = F.getParent(); 144 std::string KernelDynLDSName = "llvm.amdgcn."; 145 KernelDynLDSName += F.getName(); 146 KernelDynLDSName += ".dynlds"; 147 return M->getNamedGlobal(KernelDynLDSName); 148 } 149 150 std::optional<uint32_t> 151 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { 152 // TODO: Would be more consistent with the abs symbols to use a range 153 MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); 154 if (MD && MD->getNumOperands() == 1) { 155 if (ConstantInt *KnownSize = 156 mdconst::extract<ConstantInt>(MD->getOperand(0))) { 157 uint64_t ZExt = KnownSize->getZExtValue(); 158 if (ZExt <= UINT32_MAX) { 159 return ZExt; 160 } 161 } 162 } 163 return {}; 164 } 165 166 std::optional<uint32_t> 167 AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { 168 if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 169 return {}; 170 171 std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange(); 172 if (!AbsSymRange) 173 return {}; 174 175 if (const APInt *V = AbsSymRange->getSingleElement()) { 176 std::optional<uint64_t> ZExt = V->tryZExtValue(); 177 if (ZExt && (*ZExt <= UINT32_MAX)) { 178 return *ZExt; 179 } 180 } 181 182 return {}; 183 } 184 185 void AMDGPUMachineFunction::setDynLDSAlign(const Function &F, 186 const GlobalVariable &GV) { 187 const Module *M = F.getParent(); 188 const DataLayout &DL = M->getDataLayout(); 189 assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 190 191 Align Alignment = 192 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 193 if (Alignment <= DynLDSAlign) 194 return; 195 196 LDSSize = alignTo(StaticLDSSize, Alignment); 197 DynLDSAlign = Alignment; 198 199 // If there is a dynamic LDS variable associated with this function F, every 200 // further dynamic LDS instance (allocated by calling setDynLDSAlign) must 201 // map to the same address. This holds because no LDS is allocated after the 202 // lowering pass if there are dynamic LDS variables present. 203 const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F); 204 if (Dyn) { 205 unsigned Offset = LDSSize; // return this? 206 std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn); 207 if (!Expect || (Offset != *Expect)) { 208 report_fatal_error("Inconsistent metadata on dynamic LDS variable"); 209 } 210 } 211 } 212