1 //===-- SIProgramInfo.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// The SIProgramInfo tracks resource usage and hardware flags for kernels and
12 /// entry functions.
13 //
14 //===----------------------------------------------------------------------===//
15 //
16
17 #include "SIProgramInfo.h"
18 #include "GCNSubtarget.h"
19 #include "SIDefines.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/MC/MCExpr.h"
22
23 using namespace llvm;
24
reset(const MachineFunction & MF)25 void SIProgramInfo::reset(const MachineFunction &MF) {
26 MCContext &Ctx = MF.getContext();
27
28 const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
29
30 CodeSizeInBytes.reset();
31
32 VGPRBlocks = ZeroExpr;
33 SGPRBlocks = ZeroExpr;
34 Priority = 0;
35 FloatMode = 0;
36 Priv = 0;
37 DX10Clamp = 0;
38 DebugMode = 0;
39 IEEEMode = 0;
40 WgpMode = 0;
41 MemOrdered = 0;
42 FwdProgress = 0;
43 RrWgMode = 0;
44 ScratchSize = ZeroExpr;
45
46 LDSBlocks = 0;
47 ScratchBlocks = ZeroExpr;
48
49 ScratchEnable = ZeroExpr;
50 UserSGPR = 0;
51 TrapHandlerEnable = 0;
52 TGIdXEnable = 0;
53 TGIdYEnable = 0;
54 TGIdZEnable = 0;
55 TGSizeEnable = 0;
56 TIdIGCompCount = 0;
57 EXCPEnMSB = 0;
58 LdsSize = 0;
59 EXCPEnable = 0;
60
61 ComputePGMRSrc3 = ZeroExpr;
62
63 NumVGPR = ZeroExpr;
64 NumArchVGPR = ZeroExpr;
65 NumAccVGPR = ZeroExpr;
66 AccumOffset = ZeroExpr;
67 TgSplit = 0;
68 NumSGPR = ZeroExpr;
69 SGPRSpill = 0;
70 VGPRSpill = 0;
71 LDSSize = 0;
72 FlatUsed = ZeroExpr;
73
74 NumSGPRsForWavesPerEU = ZeroExpr;
75 NumVGPRsForWavesPerEU = ZeroExpr;
76 Occupancy = ZeroExpr;
77 DynamicCallStack = ZeroExpr;
78 VCCUsed = ZeroExpr;
79 }
80
getComputePGMRSrc1Reg(const SIProgramInfo & ProgInfo,const GCNSubtarget & ST)81 static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
82 const GCNSubtarget &ST) {
83 uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
84 S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
85 S_00B848_PRIV(ProgInfo.Priv) |
86 S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
87 S_00B848_WGP_MODE(ProgInfo.WgpMode) |
88 S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
89
90 if (ST.hasDX10ClampMode())
91 Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
92
93 if (ST.hasIEEEMode())
94 Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
95
96 // TODO: in the long run we will want to enable this unconditionally.
97 if (ST.getTargetTriple().getOS() == Triple::OSType::AMDHSA)
98 Reg |= S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress);
99
100 if (ST.hasRrWGMode())
101 Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
102
103 return Reg;
104 }
105
getPGMRSrc1Reg(const SIProgramInfo & ProgInfo,CallingConv::ID CC,const GCNSubtarget & ST)106 static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo,
107 CallingConv::ID CC, const GCNSubtarget &ST) {
108 uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
109 S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
110 S_00B848_PRIV(ProgInfo.Priv) |
111 S_00B848_DEBUG_MODE(ProgInfo.DebugMode);
112
113 if (ST.hasDX10ClampMode())
114 Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
115
116 if (ST.hasIEEEMode())
117 Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
118
119 if (ST.hasRrWGMode())
120 Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
121
122 switch (CC) {
123 case CallingConv::AMDGPU_PS:
124 Reg |= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered);
125 break;
126 case CallingConv::AMDGPU_VS:
127 Reg |= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered);
128 break;
129 case CallingConv::AMDGPU_GS:
130 Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) |
131 S_00B228_MEM_ORDERED(ProgInfo.MemOrdered);
132 break;
133 case CallingConv::AMDGPU_HS:
134 Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) |
135 S_00B428_MEM_ORDERED(ProgInfo.MemOrdered);
136 break;
137 default:
138 break;
139 }
140 return Reg;
141 }
142
getComputePGMRSrc2Reg(const SIProgramInfo & ProgInfo)143 static uint64_t getComputePGMRSrc2Reg(const SIProgramInfo &ProgInfo) {
144 uint64_t Reg = S_00B84C_USER_SGPR(ProgInfo.UserSGPR) |
145 S_00B84C_TRAP_HANDLER(ProgInfo.TrapHandlerEnable) |
146 S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) |
147 S_00B84C_TGID_Y_EN(ProgInfo.TGIdYEnable) |
148 S_00B84C_TGID_Z_EN(ProgInfo.TGIdZEnable) |
149 S_00B84C_TG_SIZE_EN(ProgInfo.TGSizeEnable) |
150 S_00B84C_TIDIG_COMP_CNT(ProgInfo.TIdIGCompCount) |
151 S_00B84C_EXCP_EN_MSB(ProgInfo.EXCPEnMSB) |
152 S_00B84C_LDS_SIZE(ProgInfo.LdsSize) |
153 S_00B84C_EXCP_EN(ProgInfo.EXCPEnable);
154
155 return Reg;
156 }
157
MaskShift(const MCExpr * Val,uint32_t Mask,uint32_t Shift,MCContext & Ctx)158 static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift,
159 MCContext &Ctx) {
160 if (Mask) {
161 const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
162 Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
163 }
164 if (Shift) {
165 const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
166 Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
167 }
168 return Val;
169 }
170
getComputePGMRSrc1(const GCNSubtarget & ST,MCContext & Ctx) const171 const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST,
172 MCContext &Ctx) const {
173 uint64_t Reg = getComputePGMRSrc1Reg(*this, ST);
174 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
175 const MCExpr *Res = MCBinaryExpr::createOr(
176 MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
177 MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
178 return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
179 }
180
getPGMRSrc1(CallingConv::ID CC,const GCNSubtarget & ST,MCContext & Ctx) const181 const MCExpr *SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
182 const GCNSubtarget &ST,
183 MCContext &Ctx) const {
184 if (AMDGPU::isCompute(CC)) {
185 return getComputePGMRSrc1(ST, Ctx);
186 }
187
188 uint64_t Reg = getPGMRSrc1Reg(*this, CC, ST);
189 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
190 const MCExpr *Res = MCBinaryExpr::createOr(
191 MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
192 MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
193 return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
194 }
195
getComputePGMRSrc2(MCContext & Ctx) const196 const MCExpr *SIProgramInfo::getComputePGMRSrc2(MCContext &Ctx) const {
197 uint64_t Reg = getComputePGMRSrc2Reg(*this);
198 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
199 return MCBinaryExpr::createOr(ScratchEnable, RegExpr, Ctx);
200 }
201
getPGMRSrc2(CallingConv::ID CC,MCContext & Ctx) const202 const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC,
203 MCContext &Ctx) const {
204 if (AMDGPU::isCompute(CC))
205 return getComputePGMRSrc2(Ctx);
206
207 return MCConstantExpr::create(0, Ctx);
208 }
209
getFunctionCodeSize(const MachineFunction & MF,bool IsLowerBound)210 uint64_t SIProgramInfo::getFunctionCodeSize(const MachineFunction &MF,
211 bool IsLowerBound) {
212 if (!IsLowerBound && CodeSizeInBytes.has_value())
213 return *CodeSizeInBytes;
214
215 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
216 const SIInstrInfo *TII = STM.getInstrInfo();
217
218 uint64_t CodeSize = 0;
219
220 for (const MachineBasicBlock &MBB : MF) {
221 // The amount of padding to align code can be both underestimated and
222 // overestimated. In case of inline asm used getInstSizeInBytes() will
223 // return a maximum size of a single instruction, where the real size may
224 // differ. At this point CodeSize may be already off.
225 if (!IsLowerBound)
226 CodeSize = alignTo(CodeSize, MBB.getAlignment());
227
228 for (const MachineInstr &MI : MBB) {
229 // TODO: CodeSize should account for multiple functions.
230
231 if (MI.isMetaInstruction())
232 continue;
233
234 // We cannot properly estimate inline asm size. It can be as small as zero
235 // if that is just a comment.
236 if (IsLowerBound && MI.isInlineAsm())
237 continue;
238
239 CodeSize += TII->getInstSizeInBytes(MI);
240 }
241 }
242
243 CodeSizeInBytes = CodeSize;
244 return CodeSize;
245 }
246