10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h" 1081ad6265SDimitry Andric #include "AMDGPU.h" 110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h" 12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h" 130b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 14*fcaf7f86SDimitry Andric #include "llvm/IR/Constants.h" 15e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric using namespace llvm; 180b57cec5SDimitry Andric 19e8d8bef9SDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) 2004eeddc0SDimitry Andric : Mode(MF.getFunction()), IsEntryFunction(AMDGPU::isEntryFunctionCC( 2104eeddc0SDimitry Andric MF.getFunction().getCallingConv())), 22e8d8bef9SDimitry Andric IsModuleEntryFunction( 23e8d8bef9SDimitry Andric AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())), 245ffd83dbSDimitry Andric NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) { 250b57cec5SDimitry Andric const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 280b57cec5SDimitry Andric // except reserved size is not correctly aligned. 290b57cec5SDimitry Andric const Function &F = MF.getFunction(); 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 32fe6060f1SDimitry Andric MemoryBound = MemBoundAttr.getValueAsBool(); 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 35fe6060f1SDimitry Andric WaveLimiter = WaveLimitAttr.getValueAsBool(); 360b57cec5SDimitry Andric 3781ad6265SDimitry Andric // FIXME: How is this attribute supposed to interact with statically known 3881ad6265SDimitry Andric // global sizes? 3981ad6265SDimitry Andric StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 4081ad6265SDimitry Andric if (!S.empty()) 4181ad6265SDimitry Andric S.consumeInteger(0, GDSSize); 4281ad6265SDimitry Andric 4381ad6265SDimitry Andric // Assume the attribute allocates before any known GDS globals. 4481ad6265SDimitry Andric StaticGDSSize = GDSSize; 4581ad6265SDimitry Andric 460b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 470b57cec5SDimitry Andric if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 480b57cec5SDimitry Andric ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 525ffd83dbSDimitry Andric const GlobalVariable &GV) { 530b57cec5SDimitry Andric auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0)); 540b57cec5SDimitry Andric if (!Entry.second) 550b57cec5SDimitry Andric return Entry.first->second; 560b57cec5SDimitry Andric 575ffd83dbSDimitry Andric Align Alignment = 585ffd83dbSDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 590b57cec5SDimitry Andric 6081ad6265SDimitry Andric unsigned Offset; 6181ad6265SDimitry Andric if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 620b57cec5SDimitry Andric /// TODO: We should sort these to minimize wasted space due to alignment 630b57cec5SDimitry Andric /// padding. Currently the padding is decided by the first encountered use 640b57cec5SDimitry Andric /// during lowering. 6581ad6265SDimitry Andric Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 660b57cec5SDimitry Andric 67e8d8bef9SDimitry Andric StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 68e8d8bef9SDimitry Andric 69e8d8bef9SDimitry Andric // Update the LDS size considering the padding to align the dynamic shared 70e8d8bef9SDimitry Andric // memory. 71e8d8bef9SDimitry Andric LDSSize = alignTo(StaticLDSSize, DynLDSAlign); 7281ad6265SDimitry Andric } else { 7381ad6265SDimitry Andric assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 7481ad6265SDimitry Andric "expected region address space"); 750b57cec5SDimitry Andric 7681ad6265SDimitry Andric Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 7781ad6265SDimitry Andric StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 7881ad6265SDimitry Andric 7981ad6265SDimitry Andric // FIXME: Apply alignment of dynamic GDS 8081ad6265SDimitry Andric GDSSize = StaticGDSSize; 8181ad6265SDimitry Andric } 8281ad6265SDimitry Andric 8381ad6265SDimitry Andric Entry.first->second = Offset; 840b57cec5SDimitry Andric return Offset; 850b57cec5SDimitry Andric } 86e8d8bef9SDimitry Andric 8781ad6265SDimitry Andric // This kernel calls no functions that require the module lds struct 8881ad6265SDimitry Andric static bool canElideModuleLDS(const Function &F) { 8981ad6265SDimitry Andric return F.hasFnAttribute("amdgpu-elide-module-lds"); 9081ad6265SDimitry Andric } 9181ad6265SDimitry Andric 9281ad6265SDimitry Andric void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) { 9381ad6265SDimitry Andric const Module *M = F.getParent(); 94fe6060f1SDimitry Andric if (isModuleEntryFunction()) { 95fe6060f1SDimitry Andric const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds"); 9681ad6265SDimitry Andric if (GV && !canElideModuleLDS(F)) { 97fe6060f1SDimitry Andric unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV); 98fe6060f1SDimitry Andric (void)Offset; 99fe6060f1SDimitry Andric assert(Offset == 0 && 100fe6060f1SDimitry Andric "Module LDS expected to be allocated before other LDS"); 101fe6060f1SDimitry Andric } 102fe6060f1SDimitry Andric } 103fe6060f1SDimitry Andric } 104fe6060f1SDimitry Andric 105*fcaf7f86SDimitry Andric Optional<uint32_t> 106*fcaf7f86SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { 107*fcaf7f86SDimitry Andric auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id"); 108*fcaf7f86SDimitry Andric if (MD && MD->getNumOperands() == 1) { 109*fcaf7f86SDimitry Andric ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0)); 110*fcaf7f86SDimitry Andric if (KnownSize) { 111*fcaf7f86SDimitry Andric uint64_t V = KnownSize->getZExtValue(); 112*fcaf7f86SDimitry Andric if (V <= UINT32_MAX) { 113*fcaf7f86SDimitry Andric return V; 114*fcaf7f86SDimitry Andric } 115*fcaf7f86SDimitry Andric } 116*fcaf7f86SDimitry Andric } 117*fcaf7f86SDimitry Andric return {}; 118*fcaf7f86SDimitry Andric } 119*fcaf7f86SDimitry Andric 120e8d8bef9SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, 121e8d8bef9SDimitry Andric const GlobalVariable &GV) { 122e8d8bef9SDimitry Andric assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 123e8d8bef9SDimitry Andric 124e8d8bef9SDimitry Andric Align Alignment = 125e8d8bef9SDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 126e8d8bef9SDimitry Andric if (Alignment <= DynLDSAlign) 127e8d8bef9SDimitry Andric return; 128e8d8bef9SDimitry Andric 129e8d8bef9SDimitry Andric LDSSize = alignTo(StaticLDSSize, Alignment); 130e8d8bef9SDimitry Andric DynLDSAlign = Alignment; 131e8d8bef9SDimitry Andric } 132