1e8d8bef9SDimitry Andric //===-- SIProgramInfo.cpp ----------------------------------------------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9e8d8bef9SDimitry Andric /// \file
10e8d8bef9SDimitry Andric ///
11e8d8bef9SDimitry Andric /// The SIProgramInfo tracks resource usage and hardware flags for kernels and
12e8d8bef9SDimitry Andric /// entry functions.
13e8d8bef9SDimitry Andric //
14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
15e8d8bef9SDimitry Andric //
16e8d8bef9SDimitry Andric
17e8d8bef9SDimitry Andric #include "SIProgramInfo.h"
185f757f3fSDimitry Andric #include "GCNSubtarget.h"
19e8d8bef9SDimitry Andric #include "SIDefines.h"
20e8d8bef9SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
21*0fca6ea1SDimitry Andric #include "llvm/MC/MCExpr.h"
22e8d8bef9SDimitry Andric
23e8d8bef9SDimitry Andric using namespace llvm;
24e8d8bef9SDimitry Andric
reset(const MachineFunction & MF)25*0fca6ea1SDimitry Andric void SIProgramInfo::reset(const MachineFunction &MF) {
26*0fca6ea1SDimitry Andric MCContext &Ctx = MF.getContext();
27*0fca6ea1SDimitry Andric
28*0fca6ea1SDimitry Andric const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
29*0fca6ea1SDimitry Andric
30*0fca6ea1SDimitry Andric VGPRBlocks = ZeroExpr;
31*0fca6ea1SDimitry Andric SGPRBlocks = ZeroExpr;
32*0fca6ea1SDimitry Andric Priority = 0;
33*0fca6ea1SDimitry Andric FloatMode = 0;
34*0fca6ea1SDimitry Andric Priv = 0;
35*0fca6ea1SDimitry Andric DX10Clamp = 0;
36*0fca6ea1SDimitry Andric DebugMode = 0;
37*0fca6ea1SDimitry Andric IEEEMode = 0;
38*0fca6ea1SDimitry Andric WgpMode = 0;
39*0fca6ea1SDimitry Andric MemOrdered = 0;
40*0fca6ea1SDimitry Andric RrWgMode = 0;
41*0fca6ea1SDimitry Andric ScratchSize = ZeroExpr;
42*0fca6ea1SDimitry Andric
43*0fca6ea1SDimitry Andric LDSBlocks = 0;
44*0fca6ea1SDimitry Andric ScratchBlocks = ZeroExpr;
45*0fca6ea1SDimitry Andric
46*0fca6ea1SDimitry Andric ScratchEnable = ZeroExpr;
47*0fca6ea1SDimitry Andric UserSGPR = 0;
48*0fca6ea1SDimitry Andric TrapHandlerEnable = 0;
49*0fca6ea1SDimitry Andric TGIdXEnable = 0;
50*0fca6ea1SDimitry Andric TGIdYEnable = 0;
51*0fca6ea1SDimitry Andric TGIdZEnable = 0;
52*0fca6ea1SDimitry Andric TGSizeEnable = 0;
53*0fca6ea1SDimitry Andric TIdIGCompCount = 0;
54*0fca6ea1SDimitry Andric EXCPEnMSB = 0;
55*0fca6ea1SDimitry Andric LdsSize = 0;
56*0fca6ea1SDimitry Andric EXCPEnable = 0;
57*0fca6ea1SDimitry Andric
58*0fca6ea1SDimitry Andric ComputePGMRSrc3GFX90A = ZeroExpr;
59*0fca6ea1SDimitry Andric
60*0fca6ea1SDimitry Andric NumVGPR = ZeroExpr;
61*0fca6ea1SDimitry Andric NumArchVGPR = ZeroExpr;
62*0fca6ea1SDimitry Andric NumAccVGPR = ZeroExpr;
63*0fca6ea1SDimitry Andric AccumOffset = ZeroExpr;
64*0fca6ea1SDimitry Andric TgSplit = 0;
65*0fca6ea1SDimitry Andric NumSGPR = ZeroExpr;
66*0fca6ea1SDimitry Andric SGPRSpill = 0;
67*0fca6ea1SDimitry Andric VGPRSpill = 0;
68*0fca6ea1SDimitry Andric LDSSize = 0;
69*0fca6ea1SDimitry Andric FlatUsed = ZeroExpr;
70*0fca6ea1SDimitry Andric
71*0fca6ea1SDimitry Andric NumSGPRsForWavesPerEU = ZeroExpr;
72*0fca6ea1SDimitry Andric NumVGPRsForWavesPerEU = ZeroExpr;
73*0fca6ea1SDimitry Andric Occupancy = ZeroExpr;
74*0fca6ea1SDimitry Andric DynamicCallStack = ZeroExpr;
75*0fca6ea1SDimitry Andric VCCUsed = ZeroExpr;
76*0fca6ea1SDimitry Andric }
77*0fca6ea1SDimitry Andric
getComputePGMRSrc1Reg(const SIProgramInfo & ProgInfo,const GCNSubtarget & ST)78*0fca6ea1SDimitry Andric static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo,
79*0fca6ea1SDimitry Andric const GCNSubtarget &ST) {
80*0fca6ea1SDimitry Andric uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
81*0fca6ea1SDimitry Andric S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
82*0fca6ea1SDimitry Andric S_00B848_PRIV(ProgInfo.Priv) |
83*0fca6ea1SDimitry Andric S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
84*0fca6ea1SDimitry Andric S_00B848_WGP_MODE(ProgInfo.WgpMode) |
85*0fca6ea1SDimitry Andric S_00B848_MEM_ORDERED(ProgInfo.MemOrdered);
865f757f3fSDimitry Andric
875f757f3fSDimitry Andric if (ST.hasDX10ClampMode())
88*0fca6ea1SDimitry Andric Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
895f757f3fSDimitry Andric
905f757f3fSDimitry Andric if (ST.hasIEEEMode())
91*0fca6ea1SDimitry Andric Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
925f757f3fSDimitry Andric
935f757f3fSDimitry Andric if (ST.hasRrWGMode())
94*0fca6ea1SDimitry Andric Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
955f757f3fSDimitry Andric
965f757f3fSDimitry Andric return Reg;
97e8d8bef9SDimitry Andric }
98e8d8bef9SDimitry Andric
getPGMRSrc1Reg(const SIProgramInfo & ProgInfo,CallingConv::ID CC,const GCNSubtarget & ST)99*0fca6ea1SDimitry Andric static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo,
100*0fca6ea1SDimitry Andric CallingConv::ID CC, const GCNSubtarget &ST) {
101*0fca6ea1SDimitry Andric uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) |
102*0fca6ea1SDimitry Andric S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
103*0fca6ea1SDimitry Andric S_00B848_PRIV(ProgInfo.Priv) |
104*0fca6ea1SDimitry Andric S_00B848_DEBUG_MODE(ProgInfo.DebugMode);
1055f757f3fSDimitry Andric
1065f757f3fSDimitry Andric if (ST.hasDX10ClampMode())
107*0fca6ea1SDimitry Andric Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp);
1085f757f3fSDimitry Andric
1095f757f3fSDimitry Andric if (ST.hasIEEEMode())
110*0fca6ea1SDimitry Andric Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
1115f757f3fSDimitry Andric
1125f757f3fSDimitry Andric if (ST.hasRrWGMode())
113*0fca6ea1SDimitry Andric Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode);
1145f757f3fSDimitry Andric
115e8d8bef9SDimitry Andric switch (CC) {
116e8d8bef9SDimitry Andric case CallingConv::AMDGPU_PS:
117*0fca6ea1SDimitry Andric Reg |= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered);
118e8d8bef9SDimitry Andric break;
119e8d8bef9SDimitry Andric case CallingConv::AMDGPU_VS:
120*0fca6ea1SDimitry Andric Reg |= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered);
121e8d8bef9SDimitry Andric break;
122e8d8bef9SDimitry Andric case CallingConv::AMDGPU_GS:
123*0fca6ea1SDimitry Andric Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) |
124*0fca6ea1SDimitry Andric S_00B228_MEM_ORDERED(ProgInfo.MemOrdered);
125e8d8bef9SDimitry Andric break;
126e8d8bef9SDimitry Andric case CallingConv::AMDGPU_HS:
127*0fca6ea1SDimitry Andric Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) |
128*0fca6ea1SDimitry Andric S_00B428_MEM_ORDERED(ProgInfo.MemOrdered);
129e8d8bef9SDimitry Andric break;
130e8d8bef9SDimitry Andric default:
131e8d8bef9SDimitry Andric break;
132e8d8bef9SDimitry Andric }
133e8d8bef9SDimitry Andric return Reg;
134e8d8bef9SDimitry Andric }
13506c3fb27SDimitry Andric
getComputePGMRSrc2Reg(const SIProgramInfo & ProgInfo)136*0fca6ea1SDimitry Andric static uint64_t getComputePGMRSrc2Reg(const SIProgramInfo &ProgInfo) {
137*0fca6ea1SDimitry Andric uint64_t Reg = S_00B84C_USER_SGPR(ProgInfo.UserSGPR) |
138*0fca6ea1SDimitry Andric S_00B84C_TRAP_HANDLER(ProgInfo.TrapHandlerEnable) |
139*0fca6ea1SDimitry Andric S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) |
140*0fca6ea1SDimitry Andric S_00B84C_TGID_Y_EN(ProgInfo.TGIdYEnable) |
141*0fca6ea1SDimitry Andric S_00B84C_TGID_Z_EN(ProgInfo.TGIdZEnable) |
142*0fca6ea1SDimitry Andric S_00B84C_TG_SIZE_EN(ProgInfo.TGSizeEnable) |
143*0fca6ea1SDimitry Andric S_00B84C_TIDIG_COMP_CNT(ProgInfo.TIdIGCompCount) |
144*0fca6ea1SDimitry Andric S_00B84C_EXCP_EN_MSB(ProgInfo.EXCPEnMSB) |
145*0fca6ea1SDimitry Andric S_00B84C_LDS_SIZE(ProgInfo.LdsSize) |
146*0fca6ea1SDimitry Andric S_00B84C_EXCP_EN(ProgInfo.EXCPEnable);
14706c3fb27SDimitry Andric
14806c3fb27SDimitry Andric return Reg;
14906c3fb27SDimitry Andric }
15006c3fb27SDimitry Andric
MaskShift(const MCExpr * Val,uint32_t Mask,uint32_t Shift,MCContext & Ctx)151*0fca6ea1SDimitry Andric static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift,
152*0fca6ea1SDimitry Andric MCContext &Ctx) {
153*0fca6ea1SDimitry Andric if (Mask) {
154*0fca6ea1SDimitry Andric const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
155*0fca6ea1SDimitry Andric Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
156*0fca6ea1SDimitry Andric }
157*0fca6ea1SDimitry Andric if (Shift) {
158*0fca6ea1SDimitry Andric const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
159*0fca6ea1SDimitry Andric Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
160*0fca6ea1SDimitry Andric }
161*0fca6ea1SDimitry Andric return Val;
162*0fca6ea1SDimitry Andric }
16306c3fb27SDimitry Andric
getComputePGMRSrc1(const GCNSubtarget & ST,MCContext & Ctx) const164*0fca6ea1SDimitry Andric const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST,
165*0fca6ea1SDimitry Andric MCContext &Ctx) const {
166*0fca6ea1SDimitry Andric uint64_t Reg = getComputePGMRSrc1Reg(*this, ST);
167*0fca6ea1SDimitry Andric const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
168*0fca6ea1SDimitry Andric const MCExpr *Res = MCBinaryExpr::createOr(
169*0fca6ea1SDimitry Andric MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
170*0fca6ea1SDimitry Andric MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
171*0fca6ea1SDimitry Andric return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
172*0fca6ea1SDimitry Andric }
173*0fca6ea1SDimitry Andric
getPGMRSrc1(CallingConv::ID CC,const GCNSubtarget & ST,MCContext & Ctx) const174*0fca6ea1SDimitry Andric const MCExpr *SIProgramInfo::getPGMRSrc1(CallingConv::ID CC,
175*0fca6ea1SDimitry Andric const GCNSubtarget &ST,
176*0fca6ea1SDimitry Andric MCContext &Ctx) const {
177*0fca6ea1SDimitry Andric if (AMDGPU::isCompute(CC)) {
178*0fca6ea1SDimitry Andric return getComputePGMRSrc1(ST, Ctx);
179*0fca6ea1SDimitry Andric }
180*0fca6ea1SDimitry Andric
181*0fca6ea1SDimitry Andric uint64_t Reg = getPGMRSrc1Reg(*this, CC, ST);
182*0fca6ea1SDimitry Andric const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
183*0fca6ea1SDimitry Andric const MCExpr *Res = MCBinaryExpr::createOr(
184*0fca6ea1SDimitry Andric MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
185*0fca6ea1SDimitry Andric MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
186*0fca6ea1SDimitry Andric return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
187*0fca6ea1SDimitry Andric }
188*0fca6ea1SDimitry Andric
getComputePGMRSrc2(MCContext & Ctx) const189*0fca6ea1SDimitry Andric const MCExpr *SIProgramInfo::getComputePGMRSrc2(MCContext &Ctx) const {
190*0fca6ea1SDimitry Andric uint64_t Reg = getComputePGMRSrc2Reg(*this);
191*0fca6ea1SDimitry Andric const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
192*0fca6ea1SDimitry Andric return MCBinaryExpr::createOr(ScratchEnable, RegExpr, Ctx);
193*0fca6ea1SDimitry Andric }
194*0fca6ea1SDimitry Andric
getPGMRSrc2(CallingConv::ID CC,MCContext & Ctx) const195*0fca6ea1SDimitry Andric const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC,
196*0fca6ea1SDimitry Andric MCContext &Ctx) const {
197*0fca6ea1SDimitry Andric if (AMDGPU::isCompute(CC))
198*0fca6ea1SDimitry Andric return getComputePGMRSrc2(Ctx);
199*0fca6ea1SDimitry Andric
200*0fca6ea1SDimitry Andric return MCConstantExpr::create(0, Ctx);
20106c3fb27SDimitry Andric }
202