1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUMachineFunction.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUPerfHintAnalysis.h" 12 #include "AMDGPUSubtarget.h" 13 #include "llvm/CodeGen/MachineModuleInfo.h" 14 #include "llvm/IR/Constants.h" 15 #include "llvm/Target/TargetMachine.h" 16 17 using namespace llvm; 18 19 AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) 20 : Mode(MF.getFunction()), IsEntryFunction(AMDGPU::isEntryFunctionCC( 21 MF.getFunction().getCallingConv())), 22 IsModuleEntryFunction( 23 AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())), 24 NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) { 25 const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); 26 27 // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 28 // except reserved size is not correctly aligned. 29 const Function &F = MF.getFunction(); 30 31 Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 32 MemoryBound = MemBoundAttr.getValueAsBool(); 33 34 Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 35 WaveLimiter = WaveLimitAttr.getValueAsBool(); 36 37 // FIXME: How is this attribute supposed to interact with statically known 38 // global sizes? 39 StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 40 if (!S.empty()) 41 S.consumeInteger(0, GDSSize); 42 43 // Assume the attribute allocates before any known GDS globals. 44 StaticGDSSize = GDSSize; 45 46 CallingConv::ID CC = F.getCallingConv(); 47 if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 48 ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 49 } 50 51 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 52 const GlobalVariable &GV) { 53 auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0)); 54 if (!Entry.second) 55 return Entry.first->second; 56 57 Align Alignment = 58 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 59 60 unsigned Offset; 61 if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 62 /// TODO: We should sort these to minimize wasted space due to alignment 63 /// padding. Currently the padding is decided by the first encountered use 64 /// during lowering. 65 Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 66 67 StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 68 69 // Update the LDS size considering the padding to align the dynamic shared 70 // memory. 71 LDSSize = alignTo(StaticLDSSize, DynLDSAlign); 72 } else { 73 assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 74 "expected region address space"); 75 76 Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 77 StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 78 79 // FIXME: Apply alignment of dynamic GDS 80 GDSSize = StaticGDSSize; 81 } 82 83 Entry.first->second = Offset; 84 return Offset; 85 } 86 87 // This kernel calls no functions that require the module lds struct 88 static bool canElideModuleLDS(const Function &F) { 89 return F.hasFnAttribute("amdgpu-elide-module-lds"); 90 } 91 92 void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) { 93 const Module *M = F.getParent(); 94 if (isModuleEntryFunction()) { 95 const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds"); 96 if (GV && !canElideModuleLDS(F)) { 97 unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV); 98 (void)Offset; 99 assert(Offset == 0 && 100 "Module LDS expected to be allocated before other LDS"); 101 } 102 } 103 } 104 105 Optional<uint32_t> 106 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { 107 auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); 108 if (MD && MD->getNumOperands() == 1) { 109 ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0)); 110 if (KnownSize) { 111 uint64_t V = KnownSize->getZExtValue(); 112 if (V <= UINT32_MAX) { 113 return V; 114 } 115 } 116 } 117 return {}; 118 } 119 120 void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, 121 const GlobalVariable &GV) { 122 assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 123 124 Align Alignment = 125 DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 126 if (Alignment <= DynLDSAlign) 127 return; 128 129 LDSSize = alignTo(StaticLDSSize, Alignment); 130 DynLDSAlign = Alignment; 131 } 132