xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===-- SIProgramInfo.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 ///
11 /// The SIProgramInfo tracks resource usage and hardware flags for kernels and
12 /// entry functions.
13 //
14 //===----------------------------------------------------------------------===//
15 //
16 
17 #include "SIProgramInfo.h"
18 #include "GCNSubtarget.h"
19 #include "SIDefines.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/MC/MCExpr.h"
22 
23 using namespace llvm;
24 
25 void SIProgramInfo::reset(const MachineFunction &MF) {
26   MCContext &Ctx = MF.getContext();
27 
28   const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
29 
30   VGPRBlocks = ZeroExpr;
31   SGPRBlocks = ZeroExpr;
32   Priority = 0;
33   FloatMode = 0;
34   Priv = 0;
35   DX10Clamp = 0;
36   DebugMode = 0;
37   IEEEMode = 0;
38   WgpMode = 0;
39   MemOrdered = 0;
40   RrWgMode = 0;
41   ScratchSize = ZeroExpr;
42 
43   LDSBlocks = 0;
44   ScratchBlocks = ZeroExpr;
45 
46   ScratchEnable = ZeroExpr;
47   UserSGPR = 0;
48   TrapHandlerEnable = 0;
49   TGIdXEnable = 0;
50   TGIdYEnable = 0;
51   TGIdZEnable = 0;
52   TGSizeEnable = 0;
53   TIdIGCompCount = 0;
54   EXCPEnMSB = 0;
55   LdsSize = 0;
56   EXCPEnable = 0;
57 
58   ComputePGMRSrc3GFX90A = ZeroExpr;
59 
60   NumVGPR = ZeroExpr;
61   NumArchVGPR = ZeroExpr;
62   NumAccVGPR = ZeroExpr;
63   AccumOffset = ZeroExpr;
64   TgSplit = 0;
65   NumSGPR = ZeroExpr;
66   SGPRSpill = 0;
67   VGPRSpill = 0;
68   LDSSize = 0;
69   FlatUsed = ZeroExpr;
70 
71   NumSGPRsForWavesPerEU = ZeroExpr;
72   NumVGPRsForWavesPerEU = ZeroExpr;
73   Occupancy = ZeroExpr;
74   DynamicCallStack = ZeroExpr;
75   VCCUsed = ZeroExpr;
76 }
77 
78 static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
79                                       const GCNSubtarget &ST) {
80   uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
81                  S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
82                  S_00B848_PRIV(ProgInfo.Priv) |
83                  S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
84                  S_00B848_WGP_MODE(ProgInfo.WgpMode) |
85                  S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
86 
87   if (ST.hasDX10ClampMode())
88     Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
89 
90   if (ST.hasIEEEMode())
91     Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
92 
93   if (ST.hasRrWGMode())
94     Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
95 
96   return Reg;
97 }
98 
99 static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo,
100                                CallingConv::ID CC, const GCNSubtarget &ST) {
101   uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
102                  S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
103                  S_00B848_PRIV(ProgInfo.Priv) |
104                  S_00B848_DEBUG_MODE(ProgInfo.DebugMode);
105 
106   if (ST.hasDX10ClampMode())
107     Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
108 
109   if (ST.hasIEEEMode())
110     Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
111 
112   if (ST.hasRrWGMode())
113     Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
114 
115   switch (CC) {
116   case CallingConv::AMDGPU_PS:
117     Reg |= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered);
118     break;
119   case CallingConv::AMDGPU_VS:
120     Reg |= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered);
121     break;
122   case CallingConv::AMDGPU_GS:
123     Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) |
124            S_00B228_MEM_ORDERED(ProgInfo.MemOrdered);
125     break;
126   case CallingConv::AMDGPU_HS:
127     Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) |
128            S_00B428_MEM_ORDERED(ProgInfo.MemOrdered);
129     break;
130   default:
131     break;
132   }
133   return Reg;
134 }
135 
136 static uint64_t getComputePGMRSrc2Reg(const SIProgramInfo &ProgInfo) {
137   uint64_t Reg = S_00B84C_USER_SGPR(ProgInfo.UserSGPR) |
138                  S_00B84C_TRAP_HANDLER(ProgInfo.TrapHandlerEnable) |
139                  S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) |
140                  S_00B84C_TGID_Y_EN(ProgInfo.TGIdYEnable) |
141                  S_00B84C_TGID_Z_EN(ProgInfo.TGIdZEnable) |
142                  S_00B84C_TG_SIZE_EN(ProgInfo.TGSizeEnable) |
143                  S_00B84C_TIDIG_COMP_CNT(ProgInfo.TIdIGCompCount) |
144                  S_00B84C_EXCP_EN_MSB(ProgInfo.EXCPEnMSB) |
145                  S_00B84C_LDS_SIZE(ProgInfo.LdsSize) |
146                  S_00B84C_EXCP_EN(ProgInfo.EXCPEnable);
147 
148   return Reg;
149 }
150 
151 static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift,
152                                MCContext &Ctx) {
153   if (Mask) {
154     const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
155     Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
156   }
157   if (Shift) {
158     const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
159     Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
160   }
161   return Val;
162 }
163 
164 const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST,
165                                                 MCContext &Ctx) const {
166   uint64_t Reg = getComputePGMRSrc1Reg(*this, ST);
167   const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
168   const MCExpr *Res = MCBinaryExpr::createOr(
169       MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
170       MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
171   return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
172 }
173 
174 const MCExpr *SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
175                                          const GCNSubtarget &ST,
176                                          MCContext &Ctx) const {
177   if (AMDGPU::isCompute(CC)) {
178     return getComputePGMRSrc1(ST, Ctx);
179   }
180 
181   uint64_t Reg = getPGMRSrc1Reg(*this, CC, ST);
182   const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
183   const MCExpr *Res = MCBinaryExpr::createOr(
184       MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
185       MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
186   return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
187 }
188 
189 const MCExpr *SIProgramInfo::getComputePGMRSrc2(MCContext &Ctx) const {
190   uint64_t Reg = getComputePGMRSrc2Reg(*this);
191   const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
192   return MCBinaryExpr::createOr(ScratchEnable, RegExpr, Ctx);
193 }
194 
195 const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC,
196                                          MCContext &Ctx) const {
197   if (AMDGPU::isCompute(CC))
198     return getComputePGMRSrc2(Ctx);
199 
200   return MCConstantExpr::create(0, Ctx);
201 }
202