10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h" 10*81ad6265SDimitry Andric #include "AMDGPU.h" 110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h" 12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h" 130b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 14e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric using namespace llvm; 170b57cec5SDimitry Andric 18e8d8bef9SDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) 1904eeddc0SDimitry Andric : Mode(MF.getFunction()), IsEntryFunction(AMDGPU::isEntryFunctionCC( 2004eeddc0SDimitry Andric MF.getFunction().getCallingConv())), 21e8d8bef9SDimitry Andric IsModuleEntryFunction( 22e8d8bef9SDimitry Andric AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())), 235ffd83dbSDimitry Andric NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) { 240b57cec5SDimitry Andric const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, 270b57cec5SDimitry Andric // except reserved size is not correctly aligned. 280b57cec5SDimitry Andric const Function &F = MF.getFunction(); 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound"); 31fe6060f1SDimitry Andric MemoryBound = MemBoundAttr.getValueAsBool(); 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter"); 34fe6060f1SDimitry Andric WaveLimiter = WaveLimitAttr.getValueAsBool(); 350b57cec5SDimitry Andric 36*81ad6265SDimitry Andric // FIXME: How is this attribute supposed to interact with statically known 37*81ad6265SDimitry Andric // global sizes? 38*81ad6265SDimitry Andric StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 39*81ad6265SDimitry Andric if (!S.empty()) 40*81ad6265SDimitry Andric S.consumeInteger(0, GDSSize); 41*81ad6265SDimitry Andric 42*81ad6265SDimitry Andric // Assume the attribute allocates before any known GDS globals. 43*81ad6265SDimitry Andric StaticGDSSize = GDSSize; 44*81ad6265SDimitry Andric 450b57cec5SDimitry Andric CallingConv::ID CC = F.getCallingConv(); 460b57cec5SDimitry Andric if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) 470b57cec5SDimitry Andric ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign); 480b57cec5SDimitry Andric } 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, 515ffd83dbSDimitry Andric const GlobalVariable &GV) { 520b57cec5SDimitry Andric auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0)); 530b57cec5SDimitry Andric if (!Entry.second) 540b57cec5SDimitry Andric return Entry.first->second; 550b57cec5SDimitry Andric 565ffd83dbSDimitry Andric Align Alignment = 575ffd83dbSDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 580b57cec5SDimitry Andric 59*81ad6265SDimitry Andric unsigned Offset; 60*81ad6265SDimitry Andric if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { 610b57cec5SDimitry Andric /// TODO: We should sort these to minimize wasted space due to alignment 620b57cec5SDimitry Andric /// padding. Currently the padding is decided by the first encountered use 630b57cec5SDimitry Andric /// during lowering. 64*81ad6265SDimitry Andric Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment); 650b57cec5SDimitry Andric 66e8d8bef9SDimitry Andric StaticLDSSize += DL.getTypeAllocSize(GV.getValueType()); 67e8d8bef9SDimitry Andric 68e8d8bef9SDimitry Andric // Update the LDS size considering the padding to align the dynamic shared 69e8d8bef9SDimitry Andric // memory. 70e8d8bef9SDimitry Andric LDSSize = alignTo(StaticLDSSize, DynLDSAlign); 71*81ad6265SDimitry Andric } else { 72*81ad6265SDimitry Andric assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && 73*81ad6265SDimitry Andric "expected region address space"); 740b57cec5SDimitry Andric 75*81ad6265SDimitry Andric Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment); 76*81ad6265SDimitry Andric StaticGDSSize += DL.getTypeAllocSize(GV.getValueType()); 77*81ad6265SDimitry Andric 78*81ad6265SDimitry Andric // FIXME: Apply alignment of dynamic GDS 79*81ad6265SDimitry Andric GDSSize = StaticGDSSize; 80*81ad6265SDimitry Andric } 81*81ad6265SDimitry Andric 82*81ad6265SDimitry Andric Entry.first->second = Offset; 830b57cec5SDimitry Andric return Offset; 840b57cec5SDimitry Andric } 85e8d8bef9SDimitry Andric 86*81ad6265SDimitry Andric // This kernel calls no functions that require the module lds struct 87*81ad6265SDimitry Andric static bool canElideModuleLDS(const Function &F) { 88*81ad6265SDimitry Andric return F.hasFnAttribute("amdgpu-elide-module-lds"); 89*81ad6265SDimitry Andric } 90*81ad6265SDimitry Andric 91*81ad6265SDimitry Andric void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) { 92*81ad6265SDimitry Andric const Module *M = F.getParent(); 93fe6060f1SDimitry Andric if (isModuleEntryFunction()) { 94fe6060f1SDimitry Andric const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds"); 95*81ad6265SDimitry Andric if (GV && !canElideModuleLDS(F)) { 96fe6060f1SDimitry Andric unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV); 97fe6060f1SDimitry Andric (void)Offset; 98fe6060f1SDimitry Andric assert(Offset == 0 && 99fe6060f1SDimitry Andric "Module LDS expected to be allocated before other LDS"); 100fe6060f1SDimitry Andric } 101fe6060f1SDimitry Andric } 102fe6060f1SDimitry Andric } 103fe6060f1SDimitry Andric 104e8d8bef9SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL, 105e8d8bef9SDimitry Andric const GlobalVariable &GV) { 106e8d8bef9SDimitry Andric assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); 107e8d8bef9SDimitry Andric 108e8d8bef9SDimitry Andric Align Alignment = 109e8d8bef9SDimitry Andric DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()); 110e8d8bef9SDimitry Andric if (Alignment <= DynLDSAlign) 111e8d8bef9SDimitry Andric return; 112e8d8bef9SDimitry Andric 113e8d8bef9SDimitry Andric LDSSize = alignTo(StaticLDSSize, Alignment); 114e8d8bef9SDimitry Andric DynLDSAlign = Alignment; 115e8d8bef9SDimitry Andric } 116