1 //===-- SIProgramInfo.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// The SIProgramInfo tracks resource usage and hardware flags for kernels and
12 /// entry functions.
13 //
14 //===----------------------------------------------------------------------===//
15 //
16
17 #include "SIProgramInfo.h"
18 #include "GCNSubtarget.h"
19 #include "SIDefines.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/MC/MCExpr.h"
22
23 using namespace llvm;
24
reset(const MachineFunction & MF)25 void SIProgramInfo::reset(const MachineFunction &MF) {
26 MCContext &Ctx = MF.getContext();
27
28 const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
29
30 VGPRBlocks = ZeroExpr;
31 SGPRBlocks = ZeroExpr;
32 Priority = 0;
33 FloatMode = 0;
34 Priv = 0;
35 DX10Clamp = 0;
36 DebugMode = 0;
37 IEEEMode = 0;
38 WgpMode = 0;
39 MemOrdered = 0;
40 RrWgMode = 0;
41 ScratchSize = ZeroExpr;
42
43 LDSBlocks = 0;
44 ScratchBlocks = ZeroExpr;
45
46 ScratchEnable = ZeroExpr;
47 UserSGPR = 0;
48 TrapHandlerEnable = 0;
49 TGIdXEnable = 0;
50 TGIdYEnable = 0;
51 TGIdZEnable = 0;
52 TGSizeEnable = 0;
53 TIdIGCompCount = 0;
54 EXCPEnMSB = 0;
55 LdsSize = 0;
56 EXCPEnable = 0;
57
58 ComputePGMRSrc3GFX90A = ZeroExpr;
59
60 NumVGPR = ZeroExpr;
61 NumArchVGPR = ZeroExpr;
62 NumAccVGPR = ZeroExpr;
63 AccumOffset = ZeroExpr;
64 TgSplit = 0;
65 NumSGPR = ZeroExpr;
66 SGPRSpill = 0;
67 VGPRSpill = 0;
68 LDSSize = 0;
69 FlatUsed = ZeroExpr;
70
71 NumSGPRsForWavesPerEU = ZeroExpr;
72 NumVGPRsForWavesPerEU = ZeroExpr;
73 Occupancy = ZeroExpr;
74 DynamicCallStack = ZeroExpr;
75 VCCUsed = ZeroExpr;
76 }
77
getComputePGMRSrc1Reg(const SIProgramInfo & ProgInfo,const GCNSubtarget & ST)78 static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
79 const GCNSubtarget &ST) {
80 uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
81 S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
82 S_00B848_PRIV(ProgInfo.Priv) |
83 S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
84 S_00B848_WGP_MODE(ProgInfo.WgpMode) |
85 S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
86
87 if (ST.hasDX10ClampMode())
88 Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
89
90 if (ST.hasIEEEMode())
91 Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
92
93 if (ST.hasRrWGMode())
94 Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
95
96 return Reg;
97 }
98
getPGMRSrc1Reg(const SIProgramInfo & ProgInfo,CallingConv::ID CC,const GCNSubtarget & ST)99 static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo,
100 CallingConv::ID CC, const GCNSubtarget &ST) {
101 uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
102 S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
103 S_00B848_PRIV(ProgInfo.Priv) |
104 S_00B848_DEBUG_MODE(ProgInfo.DebugMode);
105
106 if (ST.hasDX10ClampMode())
107 Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
108
109 if (ST.hasIEEEMode())
110 Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
111
112 if (ST.hasRrWGMode())
113 Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
114
115 switch (CC) {
116 case CallingConv::AMDGPU_PS:
117 Reg |= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered);
118 break;
119 case CallingConv::AMDGPU_VS:
120 Reg |= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered);
121 break;
122 case CallingConv::AMDGPU_GS:
123 Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) |
124 S_00B228_MEM_ORDERED(ProgInfo.MemOrdered);
125 break;
126 case CallingConv::AMDGPU_HS:
127 Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) |
128 S_00B428_MEM_ORDERED(ProgInfo.MemOrdered);
129 break;
130 default:
131 break;
132 }
133 return Reg;
134 }
135
getComputePGMRSrc2Reg(const SIProgramInfo & ProgInfo)136 static uint64_t getComputePGMRSrc2Reg(const SIProgramInfo &ProgInfo) {
137 uint64_t Reg = S_00B84C_USER_SGPR(ProgInfo.UserSGPR) |
138 S_00B84C_TRAP_HANDLER(ProgInfo.TrapHandlerEnable) |
139 S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) |
140 S_00B84C_TGID_Y_EN(ProgInfo.TGIdYEnable) |
141 S_00B84C_TGID_Z_EN(ProgInfo.TGIdZEnable) |
142 S_00B84C_TG_SIZE_EN(ProgInfo.TGSizeEnable) |
143 S_00B84C_TIDIG_COMP_CNT(ProgInfo.TIdIGCompCount) |
144 S_00B84C_EXCP_EN_MSB(ProgInfo.EXCPEnMSB) |
145 S_00B84C_LDS_SIZE(ProgInfo.LdsSize) |
146 S_00B84C_EXCP_EN(ProgInfo.EXCPEnable);
147
148 return Reg;
149 }
150
MaskShift(const MCExpr * Val,uint32_t Mask,uint32_t Shift,MCContext & Ctx)151 static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift,
152 MCContext &Ctx) {
153 if (Mask) {
154 const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
155 Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
156 }
157 if (Shift) {
158 const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
159 Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
160 }
161 return Val;
162 }
163
getComputePGMRSrc1(const GCNSubtarget & ST,MCContext & Ctx) const164 const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST,
165 MCContext &Ctx) const {
166 uint64_t Reg = getComputePGMRSrc1Reg(*this, ST);
167 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
168 const MCExpr *Res = MCBinaryExpr::createOr(
169 MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
170 MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
171 return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
172 }
173
getPGMRSrc1(CallingConv::ID CC,const GCNSubtarget & ST,MCContext & Ctx) const174 const MCExpr *SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
175 const GCNSubtarget &ST,
176 MCContext &Ctx) const {
177 if (AMDGPU::isCompute(CC)) {
178 return getComputePGMRSrc1(ST, Ctx);
179 }
180
181 uint64_t Reg = getPGMRSrc1Reg(*this, CC, ST);
182 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
183 const MCExpr *Res = MCBinaryExpr::createOr(
184 MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
185 MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
186 return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
187 }
188
getComputePGMRSrc2(MCContext & Ctx) const189 const MCExpr *SIProgramInfo::getComputePGMRSrc2(MCContext &Ctx) const {
190 uint64_t Reg = getComputePGMRSrc2Reg(*this);
191 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
192 return MCBinaryExpr::createOr(ScratchEnable, RegExpr, Ctx);
193 }
194
getPGMRSrc2(CallingConv::ID CC,MCContext & Ctx) const195 const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC,
196 MCContext &Ctx) const {
197 if (AMDGPU::isCompute(CC))
198 return getComputePGMRSrc2(Ctx);
199
200 return MCConstantExpr::create(0, Ctx);
201 }
202