xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
10b57cec5SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "AMDGPUMachineFunction.h"
10*81ad6265SDimitry Andric #include "AMDGPU.h"
110b57cec5SDimitry Andric #include "AMDGPUPerfHintAnalysis.h"
12e8d8bef9SDimitry Andric #include "AMDGPUSubtarget.h"
130b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
14e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric using namespace llvm;
170b57cec5SDimitry Andric 
18e8d8bef9SDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
1904eeddc0SDimitry Andric     : Mode(MF.getFunction()), IsEntryFunction(AMDGPU::isEntryFunctionCC(
2004eeddc0SDimitry Andric                                   MF.getFunction().getCallingConv())),
21e8d8bef9SDimitry Andric       IsModuleEntryFunction(
22e8d8bef9SDimitry Andric           AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
235ffd83dbSDimitry Andric       NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
240b57cec5SDimitry Andric   const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
270b57cec5SDimitry Andric   // except reserved size is not correctly aligned.
280b57cec5SDimitry Andric   const Function &F = MF.getFunction();
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
31fe6060f1SDimitry Andric   MemoryBound = MemBoundAttr.getValueAsBool();
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
34fe6060f1SDimitry Andric   WaveLimiter = WaveLimitAttr.getValueAsBool();
350b57cec5SDimitry Andric 
36*81ad6265SDimitry Andric   // FIXME: How is this attribute supposed to interact with statically known
37*81ad6265SDimitry Andric   // global sizes?
38*81ad6265SDimitry Andric   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
39*81ad6265SDimitry Andric   if (!S.empty())
40*81ad6265SDimitry Andric     S.consumeInteger(0, GDSSize);
41*81ad6265SDimitry Andric 
42*81ad6265SDimitry Andric   // Assume the attribute allocates before any known GDS globals.
43*81ad6265SDimitry Andric   StaticGDSSize = GDSSize;
44*81ad6265SDimitry Andric 
450b57cec5SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
460b57cec5SDimitry Andric   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
470b57cec5SDimitry Andric     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
480b57cec5SDimitry Andric }
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
515ffd83dbSDimitry Andric                                                   const GlobalVariable &GV) {
520b57cec5SDimitry Andric   auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0));
530b57cec5SDimitry Andric   if (!Entry.second)
540b57cec5SDimitry Andric     return Entry.first->second;
550b57cec5SDimitry Andric 
565ffd83dbSDimitry Andric   Align Alignment =
575ffd83dbSDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
580b57cec5SDimitry Andric 
59*81ad6265SDimitry Andric   unsigned Offset;
60*81ad6265SDimitry Andric   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
610b57cec5SDimitry Andric     /// TODO: We should sort these to minimize wasted space due to alignment
620b57cec5SDimitry Andric     /// padding. Currently the padding is decided by the first encountered use
630b57cec5SDimitry Andric     /// during lowering.
64*81ad6265SDimitry Andric     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
650b57cec5SDimitry Andric 
66e8d8bef9SDimitry Andric     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
67e8d8bef9SDimitry Andric 
68e8d8bef9SDimitry Andric     // Update the LDS size considering the padding to align the dynamic shared
69e8d8bef9SDimitry Andric     // memory.
70e8d8bef9SDimitry Andric     LDSSize = alignTo(StaticLDSSize, DynLDSAlign);
71*81ad6265SDimitry Andric   } else {
72*81ad6265SDimitry Andric     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
73*81ad6265SDimitry Andric            "expected region address space");
740b57cec5SDimitry Andric 
75*81ad6265SDimitry Andric     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
76*81ad6265SDimitry Andric     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
77*81ad6265SDimitry Andric 
78*81ad6265SDimitry Andric     // FIXME: Apply alignment of dynamic GDS
79*81ad6265SDimitry Andric     GDSSize = StaticGDSSize;
80*81ad6265SDimitry Andric   }
81*81ad6265SDimitry Andric 
82*81ad6265SDimitry Andric   Entry.first->second = Offset;
830b57cec5SDimitry Andric   return Offset;
840b57cec5SDimitry Andric }
85e8d8bef9SDimitry Andric 
86*81ad6265SDimitry Andric // This kernel calls no functions that require the module lds struct
87*81ad6265SDimitry Andric static bool canElideModuleLDS(const Function &F) {
88*81ad6265SDimitry Andric   return F.hasFnAttribute("amdgpu-elide-module-lds");
89*81ad6265SDimitry Andric }
90*81ad6265SDimitry Andric 
91*81ad6265SDimitry Andric void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) {
92*81ad6265SDimitry Andric   const Module *M = F.getParent();
93fe6060f1SDimitry Andric   if (isModuleEntryFunction()) {
94fe6060f1SDimitry Andric     const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds");
95*81ad6265SDimitry Andric     if (GV && !canElideModuleLDS(F)) {
96fe6060f1SDimitry Andric       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV);
97fe6060f1SDimitry Andric       (void)Offset;
98fe6060f1SDimitry Andric       assert(Offset == 0 &&
99fe6060f1SDimitry Andric              "Module LDS expected to be allocated before other LDS");
100fe6060f1SDimitry Andric     }
101fe6060f1SDimitry Andric   }
102fe6060f1SDimitry Andric }
103fe6060f1SDimitry Andric 
104e8d8bef9SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
105e8d8bef9SDimitry Andric                                            const GlobalVariable &GV) {
106e8d8bef9SDimitry Andric   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
107e8d8bef9SDimitry Andric 
108e8d8bef9SDimitry Andric   Align Alignment =
109e8d8bef9SDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
110e8d8bef9SDimitry Andric   if (Alignment <= DynLDSAlign)
111e8d8bef9SDimitry Andric     return;
112e8d8bef9SDimitry Andric 
113e8d8bef9SDimitry Andric   LDSSize = alignTo(StaticLDSSize, Alignment);
114e8d8bef9SDimitry Andric   DynLDSAlign = Alignment;
115e8d8bef9SDimitry Andric }
116