xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (revision fcaf7f8644a9988098ac6be2165bce3ea4786e91)
1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUMachineFunction.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUPerfHintAnalysis.h"
12 #include "AMDGPUSubtarget.h"
13 #include "llvm/CodeGen/MachineModuleInfo.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/Target/TargetMachine.h"
16 
17 using namespace llvm;
18 
19 AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
20     : Mode(MF.getFunction()), IsEntryFunction(AMDGPU::isEntryFunctionCC(
21                                   MF.getFunction().getCallingConv())),
22       IsModuleEntryFunction(
23           AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
24       NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
25   const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
26 
27   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
28   // except reserved size is not correctly aligned.
29   const Function &F = MF.getFunction();
30 
31   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
32   MemoryBound = MemBoundAttr.getValueAsBool();
33 
34   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
35   WaveLimiter = WaveLimitAttr.getValueAsBool();
36 
37   // FIXME: How is this attribute supposed to interact with statically known
38   // global sizes?
39   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
40   if (!S.empty())
41     S.consumeInteger(0, GDSSize);
42 
43   // Assume the attribute allocates before any known GDS globals.
44   StaticGDSSize = GDSSize;
45 
46   CallingConv::ID CC = F.getCallingConv();
47   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
48     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
49 }
50 
51 unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
52                                                   const GlobalVariable &GV) {
53   auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0));
54   if (!Entry.second)
55     return Entry.first->second;
56 
57   Align Alignment =
58       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
59 
60   unsigned Offset;
61   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
62     /// TODO: We should sort these to minimize wasted space due to alignment
63     /// padding. Currently the padding is decided by the first encountered use
64     /// during lowering.
65     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
66 
67     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
68 
69     // Update the LDS size considering the padding to align the dynamic shared
70     // memory.
71     LDSSize = alignTo(StaticLDSSize, DynLDSAlign);
72   } else {
73     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
74            "expected region address space");
75 
76     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
77     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
78 
79     // FIXME: Apply alignment of dynamic GDS
80     GDSSize = StaticGDSSize;
81   }
82 
83   Entry.first->second = Offset;
84   return Offset;
85 }
86 
87 // This kernel calls no functions that require the module lds struct
88 static bool canElideModuleLDS(const Function &F) {
89   return F.hasFnAttribute("amdgpu-elide-module-lds");
90 }
91 
92 void AMDGPUMachineFunction::allocateModuleLDSGlobal(const Function &F) {
93   const Module *M = F.getParent();
94   if (isModuleEntryFunction()) {
95     const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds");
96     if (GV && !canElideModuleLDS(F)) {
97       unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV);
98       (void)Offset;
99       assert(Offset == 0 &&
100              "Module LDS expected to be allocated before other LDS");
101     }
102   }
103 }
104 
105 Optional<uint32_t>
106 AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
107   auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
108   if (MD && MD->getNumOperands() == 1) {
109     ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
110     if (KnownSize) {
111       uint64_t V = KnownSize->getZExtValue();
112       if (V <= UINT32_MAX) {
113         return V;
114       }
115     }
116   }
117   return {};
118 }
119 
120 void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
121                                            const GlobalVariable &GV) {
122   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
123 
124   Align Alignment =
125       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
126   if (Alignment <= DynLDSAlign)
127     return;
128 
129   LDSSize = alignTo(StaticLDSSize, Alignment);
130   DynLDSAlign = Alignment;
131 }
132