xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===-- SIProgramInfo.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// The SIProgramInfo tracks resource usage and hardware flags for kernels and
12 /// entry functions.
13 //
14 //===----------------------------------------------------------------------===//
15 //
16 
17 #include "SIProgramInfo.h"
18 #include "GCNSubtarget.h"
19 #include "SIDefines.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/MC/MCExpr.h"
22 
23 using namespace llvm;
24 
25 void SIProgramInfo::reset(const MachineFunction &MF) {
26   MCContext &Ctx = MF.getContext();
27 
28   const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
29 
30   CodeSizeInBytes.reset();
31 
32   VGPRBlocks = ZeroExpr;
33   SGPRBlocks = ZeroExpr;
34   Priority = 0;
35   FloatMode = 0;
36   Priv = 0;
37   DX10Clamp = 0;
38   DebugMode = 0;
39   IEEEMode = 0;
40   WgpMode = 0;
41   MemOrdered = 0;
42   FwdProgress = 0;
43   RrWgMode = 0;
44   ScratchSize = ZeroExpr;
45 
46   LDSBlocks = 0;
47   ScratchBlocks = ZeroExpr;
48 
49   ScratchEnable = ZeroExpr;
50   UserSGPR = 0;
51   TrapHandlerEnable = 0;
52   TGIdXEnable = 0;
53   TGIdYEnable = 0;
54   TGIdZEnable = 0;
55   TGSizeEnable = 0;
56   TIdIGCompCount = 0;
57   EXCPEnMSB = 0;
58   LdsSize = 0;
59   EXCPEnable = 0;
60 
61   ComputePGMRSrc3 = ZeroExpr;
62 
63   NumVGPR = ZeroExpr;
64   NumArchVGPR = ZeroExpr;
65   NumAccVGPR = ZeroExpr;
66   AccumOffset = ZeroExpr;
67   TgSplit = 0;
68   NumSGPR = ZeroExpr;
69   SGPRSpill = 0;
70   VGPRSpill = 0;
71   LDSSize = 0;
72   FlatUsed = ZeroExpr;
73 
74   NumSGPRsForWavesPerEU = ZeroExpr;
75   NumVGPRsForWavesPerEU = ZeroExpr;
76   Occupancy = ZeroExpr;
77   DynamicCallStack = ZeroExpr;
78   VCCUsed = ZeroExpr;
79 }
80 
81 static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
82                                       const GCNSubtarget &ST) {
83   uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
84                  S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
85                  S_00B848_PRIV(ProgInfo.Priv) |
86                  S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
87                  S_00B848_WGP_MODE(ProgInfo.WgpMode) |
88                  S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
89 
90   if (ST.hasDX10ClampMode())
91     Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
92 
93   if (ST.hasIEEEMode())
94     Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
95 
96   // TODO: in the long run we will want to enable this unconditionally.
97   if (ST.getTargetTriple().getOS() == Triple::OSType::AMDHSA)
98     Reg |= S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress);
99 
100   if (ST.hasRrWGMode())
101     Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
102 
103   return Reg;
104 }
105 
106 static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo,
107                                CallingConv::ID CC, const GCNSubtarget &ST) {
108   uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
109                  S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
110                  S_00B848_PRIV(ProgInfo.Priv) |
111                  S_00B848_DEBUG_MODE(ProgInfo.DebugMode);
112 
113   if (ST.hasDX10ClampMode())
114     Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
115 
116   if (ST.hasIEEEMode())
117     Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
118 
119   if (ST.hasRrWGMode())
120     Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
121 
122   switch (CC) {
123   case CallingConv::AMDGPU_PS:
124     Reg |= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered);
125     break;
126   case CallingConv::AMDGPU_VS:
127     Reg |= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered);
128     break;
129   case CallingConv::AMDGPU_GS:
130     Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) |
131            S_00B228_MEM_ORDERED(ProgInfo.MemOrdered);
132     break;
133   case CallingConv::AMDGPU_HS:
134     Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) |
135            S_00B428_MEM_ORDERED(ProgInfo.MemOrdered);
136     break;
137   default:
138     break;
139   }
140   return Reg;
141 }
142 
143 static uint64_t getComputePGMRSrc2Reg(const SIProgramInfo &ProgInfo) {
144   uint64_t Reg = S_00B84C_USER_SGPR(ProgInfo.UserSGPR) |
145                  S_00B84C_TRAP_HANDLER(ProgInfo.TrapHandlerEnable) |
146                  S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) |
147                  S_00B84C_TGID_Y_EN(ProgInfo.TGIdYEnable) |
148                  S_00B84C_TGID_Z_EN(ProgInfo.TGIdZEnable) |
149                  S_00B84C_TG_SIZE_EN(ProgInfo.TGSizeEnable) |
150                  S_00B84C_TIDIG_COMP_CNT(ProgInfo.TIdIGCompCount) |
151                  S_00B84C_EXCP_EN_MSB(ProgInfo.EXCPEnMSB) |
152                  S_00B84C_LDS_SIZE(ProgInfo.LdsSize) |
153                  S_00B84C_EXCP_EN(ProgInfo.EXCPEnable);
154 
155   return Reg;
156 }
157 
158 static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift,
159                                MCContext &Ctx) {
160   if (Mask) {
161     const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
162     Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
163   }
164   if (Shift) {
165     const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
166     Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
167   }
168   return Val;
169 }
170 
171 const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST,
172                                                 MCContext &Ctx) const {
173   uint64_t Reg = getComputePGMRSrc1Reg(*this, ST);
174   const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
175   const MCExpr *Res = MCBinaryExpr::createOr(
176       MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
177       MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
178   return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
179 }
180 
181 const MCExpr *SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
182                                          const GCNSubtarget &ST,
183                                          MCContext &Ctx) const {
184   if (AMDGPU::isCompute(CC)) {
185     return getComputePGMRSrc1(ST, Ctx);
186   }
187 
188   uint64_t Reg = getPGMRSrc1Reg(*this, CC, ST);
189   const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
190   const MCExpr *Res = MCBinaryExpr::createOr(
191       MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
192       MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
193   return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
194 }
195 
196 const MCExpr *SIProgramInfo::getComputePGMRSrc2(MCContext &Ctx) const {
197   uint64_t Reg = getComputePGMRSrc2Reg(*this);
198   const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
199   return MCBinaryExpr::createOr(ScratchEnable, RegExpr, Ctx);
200 }
201 
202 const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC,
203                                          MCContext &Ctx) const {
204   if (AMDGPU::isCompute(CC))
205     return getComputePGMRSrc2(Ctx);
206 
207   return MCConstantExpr::create(0, Ctx);
208 }
209 
210 uint64_t SIProgramInfo::getFunctionCodeSize(const MachineFunction &MF,
211                                             bool IsLowerBound) {
212   if (!IsLowerBound && CodeSizeInBytes.has_value())
213     return *CodeSizeInBytes;
214 
215   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
216   const SIInstrInfo *TII = STM.getInstrInfo();
217 
218   uint64_t CodeSize = 0;
219 
220   for (const MachineBasicBlock &MBB : MF) {
221     // The amount of padding to align code can be both underestimated and
222     // overestimated. In case of inline asm used getInstSizeInBytes() will
223     // return a maximum size of a single instruction, where the real size may
224     // differ. At this point CodeSize may be already off.
225     if (!IsLowerBound)
226       CodeSize = alignTo(CodeSize, MBB.getAlignment());
227 
228     for (const MachineInstr &MI : MBB) {
229       // TODO: CodeSize should account for multiple functions.
230 
231       if (MI.isMetaInstruction())
232         continue;
233 
234       // We cannot properly estimate inline asm size. It can be as small as zero
235       // if that is just a comment.
236       if (IsLowerBound && MI.isInlineAsm())
237         continue;
238 
239       CodeSize += TII->getInstSizeInBytes(MI);
240     }
241   }
242 
243   CodeSizeInBytes = CodeSize;
244   return CodeSize;
245 }
246