1 //===-- SIProgramInfo.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// 11 /// The SIProgramInfo tracks resource usage and hardware flags for kernels and 12 /// entry functions. 13 // 14 //===----------------------------------------------------------------------===// 15 // 16 17 #include "SIProgramInfo.h" 18 #include "GCNSubtarget.h" 19 #include "SIDefines.h" 20 #include "Utils/AMDGPUBaseInfo.h" 21 #include "llvm/MC/MCExpr.h" 22 23 using namespace llvm; 24 25 void SIProgramInfo::reset(const MachineFunction &MF) { 26 MCContext &Ctx = MF.getContext(); 27 28 const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx); 29 30 VGPRBlocks = ZeroExpr; 31 SGPRBlocks = ZeroExpr; 32 Priority = 0; 33 FloatMode = 0; 34 Priv = 0; 35 DX10Clamp = 0; 36 DebugMode = 0; 37 IEEEMode = 0; 38 WgpMode = 0; 39 MemOrdered = 0; 40 RrWgMode = 0; 41 ScratchSize = ZeroExpr; 42 43 LDSBlocks = 0; 44 ScratchBlocks = ZeroExpr; 45 46 ScratchEnable = ZeroExpr; 47 UserSGPR = 0; 48 TrapHandlerEnable = 0; 49 TGIdXEnable = 0; 50 TGIdYEnable = 0; 51 TGIdZEnable = 0; 52 TGSizeEnable = 0; 53 TIdIGCompCount = 0; 54 EXCPEnMSB = 0; 55 LdsSize = 0; 56 EXCPEnable = 0; 57 58 ComputePGMRSrc3GFX90A = ZeroExpr; 59 60 NumVGPR = ZeroExpr; 61 NumArchVGPR = ZeroExpr; 62 NumAccVGPR = ZeroExpr; 63 AccumOffset = ZeroExpr; 64 TgSplit = 0; 65 NumSGPR = ZeroExpr; 66 SGPRSpill = 0; 67 VGPRSpill = 0; 68 LDSSize = 0; 69 FlatUsed = ZeroExpr; 70 71 NumSGPRsForWavesPerEU = ZeroExpr; 72 NumVGPRsForWavesPerEU = ZeroExpr; 73 Occupancy = ZeroExpr; 74 DynamicCallStack = ZeroExpr; 75 VCCUsed = ZeroExpr; 76 } 77 78 static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo, 79 const GCNSubtarget &ST) { 80 uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) | 81 S_00B848_FLOAT_MODE(ProgInfo.FloatMode) | 82 S_00B848_PRIV(ProgInfo.Priv) | 83 S_00B848_DEBUG_MODE(ProgInfo.DebugMode) | 84 S_00B848_WGP_MODE(ProgInfo.WgpMode) | 85 S_00B848_MEM_ORDERED(ProgInfo.MemOrdered); 86 87 if (ST.hasDX10ClampMode()) 88 Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp); 89 90 if (ST.hasIEEEMode()) 91 Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode); 92 93 if (ST.hasRrWGMode()) 94 Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode); 95 96 return Reg; 97 } 98 99 static uint64_t getPGMRSrc1Reg(const SIProgramInfo &ProgInfo, 100 CallingConv::ID CC, const GCNSubtarget &ST) { 101 uint64_t Reg = S_00B848_PRIORITY(ProgInfo.Priority) | 102 S_00B848_FLOAT_MODE(ProgInfo.FloatMode) | 103 S_00B848_PRIV(ProgInfo.Priv) | 104 S_00B848_DEBUG_MODE(ProgInfo.DebugMode); 105 106 if (ST.hasDX10ClampMode()) 107 Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp); 108 109 if (ST.hasIEEEMode()) 110 Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode); 111 112 if (ST.hasRrWGMode()) 113 Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode); 114 115 switch (CC) { 116 case CallingConv::AMDGPU_PS: 117 Reg |= S_00B028_MEM_ORDERED(ProgInfo.MemOrdered); 118 break; 119 case CallingConv::AMDGPU_VS: 120 Reg |= S_00B128_MEM_ORDERED(ProgInfo.MemOrdered); 121 break; 122 case CallingConv::AMDGPU_GS: 123 Reg |= S_00B228_WGP_MODE(ProgInfo.WgpMode) | 124 S_00B228_MEM_ORDERED(ProgInfo.MemOrdered); 125 break; 126 case CallingConv::AMDGPU_HS: 127 Reg |= S_00B428_WGP_MODE(ProgInfo.WgpMode) | 128 S_00B428_MEM_ORDERED(ProgInfo.MemOrdered); 129 break; 130 default: 131 break; 132 } 133 return Reg; 134 } 135 136 static uint64_t getComputePGMRSrc2Reg(const SIProgramInfo &ProgInfo) { 137 uint64_t Reg = S_00B84C_USER_SGPR(ProgInfo.UserSGPR) | 138 S_00B84C_TRAP_HANDLER(ProgInfo.TrapHandlerEnable) | 139 S_00B84C_TGID_X_EN(ProgInfo.TGIdXEnable) | 140 S_00B84C_TGID_Y_EN(ProgInfo.TGIdYEnable) | 141 S_00B84C_TGID_Z_EN(ProgInfo.TGIdZEnable) | 142 S_00B84C_TG_SIZE_EN(ProgInfo.TGSizeEnable) | 143 S_00B84C_TIDIG_COMP_CNT(ProgInfo.TIdIGCompCount) | 144 S_00B84C_EXCP_EN_MSB(ProgInfo.EXCPEnMSB) | 145 S_00B84C_LDS_SIZE(ProgInfo.LdsSize) | 146 S_00B84C_EXCP_EN(ProgInfo.EXCPEnable); 147 148 return Reg; 149 } 150 151 static const MCExpr *MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, 152 MCContext &Ctx) { 153 if (Mask) { 154 const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx); 155 Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx); 156 } 157 if (Shift) { 158 const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx); 159 Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx); 160 } 161 return Val; 162 } 163 164 const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST, 165 MCContext &Ctx) const { 166 uint64_t Reg = getComputePGMRSrc1Reg(*this, ST); 167 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx); 168 const MCExpr *Res = MCBinaryExpr::createOr( 169 MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx), 170 MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx); 171 return MCBinaryExpr::createOr(RegExpr, Res, Ctx); 172 } 173 174 const MCExpr *SIProgramInfo::getPGMRSrc1(CallingConv::ID CC, 175 const GCNSubtarget &ST, 176 MCContext &Ctx) const { 177 if (AMDGPU::isCompute(CC)) { 178 return getComputePGMRSrc1(ST, Ctx); 179 } 180 181 uint64_t Reg = getPGMRSrc1Reg(*this, CC, ST); 182 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx); 183 const MCExpr *Res = MCBinaryExpr::createOr( 184 MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx), 185 MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx); 186 return MCBinaryExpr::createOr(RegExpr, Res, Ctx); 187 } 188 189 const MCExpr *SIProgramInfo::getComputePGMRSrc2(MCContext &Ctx) const { 190 uint64_t Reg = getComputePGMRSrc2Reg(*this); 191 const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx); 192 return MCBinaryExpr::createOr(ScratchEnable, RegExpr, Ctx); 193 } 194 195 const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC, 196 MCContext &Ctx) const { 197 if (AMDGPU::isCompute(CC)) 198 return getComputePGMRSrc2(Ctx); 199 200 return MCConstantExpr::create(0, Ctx); 201 } 202