1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 #include "hdp/hdp_4_0_offset.h" 42 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #define GFX9_NUM_GFX_RINGS 1 52 #define GFX9_MEC_HPD_SIZE 4096 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 55 56 #define mmPWR_MISC_CNTL_STATUS 0x0183 57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 62 63 #define mmGCEA_PROBE_MAP 0x070c 64 #define mmGCEA_PROBE_MAP_BASE_IDX 0 65 66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 72 73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 79 80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 86 87 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 93 94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 101 102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); 112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 113 114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 120 121 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 123 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 125 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 127 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 129 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 131 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 133 134 enum ta_ras_gfx_subblock { 135 /*CPC*/ 136 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 137 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 138 TA_RAS_BLOCK__GFX_CPC_UCODE, 139 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 140 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 141 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 142 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 143 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 144 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 145 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 146 /* CPF*/ 147 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 148 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 149 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 150 TA_RAS_BLOCK__GFX_CPF_TAG, 151 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 152 /* CPG*/ 153 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 154 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 155 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 156 TA_RAS_BLOCK__GFX_CPG_TAG, 157 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 158 /* GDS*/ 159 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 160 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 161 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 162 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 163 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 164 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 165 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 166 /* SPI*/ 167 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 168 /* SQ*/ 169 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 170 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 171 TA_RAS_BLOCK__GFX_SQ_LDS_D, 172 TA_RAS_BLOCK__GFX_SQ_LDS_I, 173 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 174 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 175 /* SQC (3 ranges)*/ 176 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 177 /* SQC range 0*/ 178 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 179 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 180 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 181 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 182 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 183 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 184 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 185 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 186 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 187 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 188 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 189 /* SQC range 1*/ 190 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 191 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 192 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 193 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 194 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 195 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 196 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 197 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 198 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 199 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 200 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 201 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 202 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 203 /* SQC range 2*/ 204 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 205 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 206 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 207 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 208 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 209 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 210 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 212 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 213 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 214 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 215 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 216 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 217 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 218 /* TA*/ 219 TA_RAS_BLOCK__GFX_TA_INDEX_START, 220 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 221 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 222 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 223 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 224 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 225 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 226 /* TCA*/ 227 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 228 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 229 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 230 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 231 /* TCC (5 sub-ranges)*/ 232 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 233 /* TCC range 0*/ 234 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 235 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 236 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 237 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 238 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 239 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 240 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 241 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 242 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 243 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 244 /* TCC range 1*/ 245 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 246 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 247 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 248 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 249 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 250 /* TCC range 2*/ 251 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 252 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 253 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 254 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 255 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 256 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 257 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 258 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 259 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 260 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 261 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 262 /* TCC range 3*/ 263 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 264 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 265 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 266 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 267 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 268 /* TCC range 4*/ 269 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 270 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 271 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 272 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 273 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 274 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 275 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 276 /* TCI*/ 277 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 278 /* TCP*/ 279 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 280 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 281 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 282 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 283 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 284 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 285 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 286 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 287 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 288 /* TD*/ 289 TA_RAS_BLOCK__GFX_TD_INDEX_START, 290 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 291 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 292 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 293 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 294 /* EA (3 sub-ranges)*/ 295 TA_RAS_BLOCK__GFX_EA_INDEX_START, 296 /* EA range 0*/ 297 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 298 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 299 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 300 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 301 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 302 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 303 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 304 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 305 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 306 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 307 /* EA range 1*/ 308 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 309 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 310 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 311 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 312 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 313 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 314 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 315 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 316 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 317 /* EA range 2*/ 318 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 319 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 320 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 321 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 322 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 323 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 324 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 325 /* UTC VM L2 bank*/ 326 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 327 /* UTC VM walker*/ 328 TA_RAS_BLOCK__UTC_VML2_WALKER, 329 /* UTC ATC L2 2MB cache*/ 330 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 331 /* UTC ATC L2 4KB cache*/ 332 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 333 TA_RAS_BLOCK__GFX_MAX 334 }; 335 336 struct ras_gfx_subblock { 337 unsigned char *name; 338 int ta_subblock; 339 int hw_supported_error_type; 340 int sw_supported_error_type; 341 }; 342 343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 344 [AMDGPU_RAS_BLOCK__##subblock] = { \ 345 #subblock, \ 346 TA_RAS_BLOCK__##subblock, \ 347 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 348 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 349 } 350 351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 352 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 353 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 354 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 355 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 356 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 357 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 358 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 359 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 360 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 361 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 362 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 363 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 364 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 365 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 366 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 367 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 368 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 369 0), 370 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 371 0), 372 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 373 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 374 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 375 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 376 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 377 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 378 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 379 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 380 0, 0), 381 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 382 0), 383 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 384 0, 0), 385 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 386 0), 387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 388 0, 0), 389 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 390 0), 391 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 392 1), 393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 394 0, 0, 0), 395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 396 0), 397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 398 0), 399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 400 0), 401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 402 0), 403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 404 0), 405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 406 0, 0), 407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 408 0), 409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 410 0), 411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 412 0, 0, 0), 413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 414 0), 415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 416 0), 417 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 418 0), 419 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 420 0), 421 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 422 0), 423 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 424 0, 0), 425 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 426 0), 427 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 428 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 429 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 430 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 431 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 432 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 433 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 434 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 435 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 436 1), 437 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 438 1), 439 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 440 1), 441 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 442 0), 443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 444 0), 445 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 447 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 450 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 452 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 454 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 457 0), 458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 460 0), 461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 462 0, 0), 463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 464 0), 465 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 466 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 467 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 469 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 471 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 472 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 473 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 474 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 476 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 477 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 478 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 479 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 481 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 483 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 485 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 486 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 488 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 489 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 495 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 496 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 497 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 498 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 499 }; 500 501 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 502 { 503 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 504 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 505 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 506 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 507 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 508 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 509 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 510 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 511 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 513 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 514 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 523 }; 524 525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 526 { 527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 535 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 545 }; 546 547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 548 { 549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 560 }; 561 562 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 563 { 564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 588 }; 589 590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 591 { 592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 599 }; 600 601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 602 { 603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 622 }; 623 624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 625 { 626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 638 }; 639 640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 641 { 642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 645 }; 646 647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 648 { 649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 665 }; 666 667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 668 { 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 682 }; 683 684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 685 { 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 694 }; 695 696 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 697 { 698 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 699 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 700 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 701 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 702 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 703 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 704 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 705 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 706 }; 707 708 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 709 { 710 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 711 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 712 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 713 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 714 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 715 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 716 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 717 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 718 }; 719 720 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 721 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 722 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 723 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 724 725 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 726 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 727 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 728 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 729 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 730 struct amdgpu_cu_info *cu_info); 731 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 732 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 733 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 734 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 735 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 736 void *ras_error_status); 737 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 738 void *inject_if); 739 740 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 741 { 742 switch (adev->asic_type) { 743 case CHIP_VEGA10: 744 soc15_program_register_sequence(adev, 745 golden_settings_gc_9_0, 746 ARRAY_SIZE(golden_settings_gc_9_0)); 747 soc15_program_register_sequence(adev, 748 golden_settings_gc_9_0_vg10, 749 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 750 break; 751 case CHIP_VEGA12: 752 soc15_program_register_sequence(adev, 753 golden_settings_gc_9_2_1, 754 ARRAY_SIZE(golden_settings_gc_9_2_1)); 755 soc15_program_register_sequence(adev, 756 golden_settings_gc_9_2_1_vg12, 757 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 758 break; 759 case CHIP_VEGA20: 760 soc15_program_register_sequence(adev, 761 golden_settings_gc_9_0, 762 ARRAY_SIZE(golden_settings_gc_9_0)); 763 soc15_program_register_sequence(adev, 764 golden_settings_gc_9_0_vg20, 765 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 766 break; 767 case CHIP_ARCTURUS: 768 soc15_program_register_sequence(adev, 769 golden_settings_gc_9_4_1_arct, 770 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 771 break; 772 case CHIP_RAVEN: 773 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 774 ARRAY_SIZE(golden_settings_gc_9_1)); 775 if (adev->rev_id >= 8) 776 soc15_program_register_sequence(adev, 777 golden_settings_gc_9_1_rv2, 778 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 779 else 780 soc15_program_register_sequence(adev, 781 golden_settings_gc_9_1_rv1, 782 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 783 break; 784 case CHIP_RENOIR: 785 soc15_program_register_sequence(adev, 786 golden_settings_gc_9_1_rn, 787 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 788 return; /* for renoir, don't need common goldensetting */ 789 default: 790 break; 791 } 792 793 if (adev->asic_type != CHIP_ARCTURUS) 794 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 795 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 796 } 797 798 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 799 { 800 adev->gfx.scratch.num_reg = 8; 801 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 802 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 803 } 804 805 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 806 bool wc, uint32_t reg, uint32_t val) 807 { 808 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 809 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 810 WRITE_DATA_DST_SEL(0) | 811 (wc ? WR_CONFIRM : 0)); 812 amdgpu_ring_write(ring, reg); 813 amdgpu_ring_write(ring, 0); 814 amdgpu_ring_write(ring, val); 815 } 816 817 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 818 int mem_space, int opt, uint32_t addr0, 819 uint32_t addr1, uint32_t ref, uint32_t mask, 820 uint32_t inv) 821 { 822 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 823 amdgpu_ring_write(ring, 824 /* memory (1) or register (0) */ 825 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 826 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 827 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 828 WAIT_REG_MEM_ENGINE(eng_sel))); 829 830 if (mem_space) 831 BUG_ON(addr0 & 0x3); /* Dword align */ 832 amdgpu_ring_write(ring, addr0); 833 amdgpu_ring_write(ring, addr1); 834 amdgpu_ring_write(ring, ref); 835 amdgpu_ring_write(ring, mask); 836 amdgpu_ring_write(ring, inv); /* poll interval */ 837 } 838 839 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 840 { 841 struct amdgpu_device *adev = ring->adev; 842 uint32_t scratch; 843 uint32_t tmp = 0; 844 unsigned i; 845 int r; 846 847 r = amdgpu_gfx_scratch_get(adev, &scratch); 848 if (r) 849 return r; 850 851 WREG32(scratch, 0xCAFEDEAD); 852 r = amdgpu_ring_alloc(ring, 3); 853 if (r) 854 goto error_free_scratch; 855 856 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 857 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 858 amdgpu_ring_write(ring, 0xDEADBEEF); 859 amdgpu_ring_commit(ring); 860 861 for (i = 0; i < adev->usec_timeout; i++) { 862 tmp = RREG32(scratch); 863 if (tmp == 0xDEADBEEF) 864 break; 865 udelay(1); 866 } 867 868 if (i >= adev->usec_timeout) 869 r = -ETIMEDOUT; 870 871 error_free_scratch: 872 amdgpu_gfx_scratch_free(adev, scratch); 873 return r; 874 } 875 876 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 877 { 878 struct amdgpu_device *adev = ring->adev; 879 struct amdgpu_ib ib; 880 struct dma_fence *f = NULL; 881 882 unsigned index; 883 uint64_t gpu_addr; 884 uint32_t tmp; 885 long r; 886 887 r = amdgpu_device_wb_get(adev, &index); 888 if (r) 889 return r; 890 891 gpu_addr = adev->wb.gpu_addr + (index * 4); 892 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 893 memset(&ib, 0, sizeof(ib)); 894 r = amdgpu_ib_get(adev, NULL, 16, &ib); 895 if (r) 896 goto err1; 897 898 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 899 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 900 ib.ptr[2] = lower_32_bits(gpu_addr); 901 ib.ptr[3] = upper_32_bits(gpu_addr); 902 ib.ptr[4] = 0xDEADBEEF; 903 ib.length_dw = 5; 904 905 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 906 if (r) 907 goto err2; 908 909 r = dma_fence_wait_timeout(f, false, timeout); 910 if (r == 0) { 911 r = -ETIMEDOUT; 912 goto err2; 913 } else if (r < 0) { 914 goto err2; 915 } 916 917 tmp = adev->wb.wb[index]; 918 if (tmp == 0xDEADBEEF) 919 r = 0; 920 else 921 r = -EINVAL; 922 923 err2: 924 amdgpu_ib_free(adev, &ib, NULL); 925 dma_fence_put(f); 926 err1: 927 amdgpu_device_wb_free(adev, index); 928 return r; 929 } 930 931 932 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 933 { 934 release_firmware(adev->gfx.pfp_fw); 935 adev->gfx.pfp_fw = NULL; 936 release_firmware(adev->gfx.me_fw); 937 adev->gfx.me_fw = NULL; 938 release_firmware(adev->gfx.ce_fw); 939 adev->gfx.ce_fw = NULL; 940 release_firmware(adev->gfx.rlc_fw); 941 adev->gfx.rlc_fw = NULL; 942 release_firmware(adev->gfx.mec_fw); 943 adev->gfx.mec_fw = NULL; 944 release_firmware(adev->gfx.mec2_fw); 945 adev->gfx.mec2_fw = NULL; 946 947 kfree(adev->gfx.rlc.register_list_format); 948 } 949 950 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 951 { 952 const struct rlc_firmware_header_v2_1 *rlc_hdr; 953 954 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 955 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 956 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 957 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 958 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 959 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 960 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 961 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 962 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 963 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 964 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 965 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 966 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 967 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 968 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 969 } 970 971 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 972 { 973 adev->gfx.me_fw_write_wait = false; 974 adev->gfx.mec_fw_write_wait = false; 975 976 if ((adev->gfx.mec_fw_version < 0x000001a5) || 977 (adev->gfx.mec_feature_version < 46) || 978 (adev->gfx.pfp_fw_version < 0x000000b7) || 979 (adev->gfx.pfp_feature_version < 46)) 980 DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ 981 GRBM requires 1-cycle delay in cp firmware\n"); 982 983 switch (adev->asic_type) { 984 case CHIP_VEGA10: 985 if ((adev->gfx.me_fw_version >= 0x0000009c) && 986 (adev->gfx.me_feature_version >= 42) && 987 (adev->gfx.pfp_fw_version >= 0x000000b1) && 988 (adev->gfx.pfp_feature_version >= 42)) 989 adev->gfx.me_fw_write_wait = true; 990 991 if ((adev->gfx.mec_fw_version >= 0x00000193) && 992 (adev->gfx.mec_feature_version >= 42)) 993 adev->gfx.mec_fw_write_wait = true; 994 break; 995 case CHIP_VEGA12: 996 if ((adev->gfx.me_fw_version >= 0x0000009c) && 997 (adev->gfx.me_feature_version >= 44) && 998 (adev->gfx.pfp_fw_version >= 0x000000b2) && 999 (adev->gfx.pfp_feature_version >= 44)) 1000 adev->gfx.me_fw_write_wait = true; 1001 1002 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1003 (adev->gfx.mec_feature_version >= 44)) 1004 adev->gfx.mec_fw_write_wait = true; 1005 break; 1006 case CHIP_VEGA20: 1007 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1008 (adev->gfx.me_feature_version >= 44) && 1009 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1010 (adev->gfx.pfp_feature_version >= 44)) 1011 adev->gfx.me_fw_write_wait = true; 1012 1013 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1014 (adev->gfx.mec_feature_version >= 44)) 1015 adev->gfx.mec_fw_write_wait = true; 1016 break; 1017 case CHIP_RAVEN: 1018 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1019 (adev->gfx.me_feature_version >= 42) && 1020 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1021 (adev->gfx.pfp_feature_version >= 42)) 1022 adev->gfx.me_fw_write_wait = true; 1023 1024 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1025 (adev->gfx.mec_feature_version >= 42)) 1026 adev->gfx.mec_fw_write_wait = true; 1027 break; 1028 default: 1029 break; 1030 } 1031 } 1032 1033 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1034 { 1035 switch (adev->asic_type) { 1036 case CHIP_VEGA10: 1037 case CHIP_VEGA12: 1038 case CHIP_VEGA20: 1039 break; 1040 case CHIP_RAVEN: 1041 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) 1042 &&((adev->gfx.rlc_fw_version != 106 && 1043 adev->gfx.rlc_fw_version < 531) || 1044 (adev->gfx.rlc_fw_version == 53815) || 1045 (adev->gfx.rlc_feature_version < 1) || 1046 !adev->gfx.rlc.is_rlc_v2_1)) 1047 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1048 1049 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1050 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1051 AMD_PG_SUPPORT_CP | 1052 AMD_PG_SUPPORT_RLC_SMU_HS; 1053 break; 1054 case CHIP_RENOIR: 1055 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1056 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1057 AMD_PG_SUPPORT_CP | 1058 AMD_PG_SUPPORT_RLC_SMU_HS; 1059 break; 1060 default: 1061 break; 1062 } 1063 } 1064 1065 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1066 const char *chip_name) 1067 { 1068 char fw_name[30]; 1069 int err; 1070 struct amdgpu_firmware_info *info = NULL; 1071 const struct common_firmware_header *header = NULL; 1072 const struct gfx_firmware_header_v1_0 *cp_hdr; 1073 1074 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1075 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1076 if (err) 1077 goto out; 1078 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1079 if (err) 1080 goto out; 1081 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1082 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1083 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1084 1085 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1086 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1087 if (err) 1088 goto out; 1089 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1090 if (err) 1091 goto out; 1092 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1093 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1094 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1095 1096 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1097 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1098 if (err) 1099 goto out; 1100 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1101 if (err) 1102 goto out; 1103 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1104 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1105 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1106 1107 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1108 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1109 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1110 info->fw = adev->gfx.pfp_fw; 1111 header = (const struct common_firmware_header *)info->fw->data; 1112 adev->firmware.fw_size += 1113 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1114 1115 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1116 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1117 info->fw = adev->gfx.me_fw; 1118 header = (const struct common_firmware_header *)info->fw->data; 1119 adev->firmware.fw_size += 1120 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1121 1122 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1123 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1124 info->fw = adev->gfx.ce_fw; 1125 header = (const struct common_firmware_header *)info->fw->data; 1126 adev->firmware.fw_size += 1127 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1128 } 1129 1130 out: 1131 if (err) { 1132 dev_err(adev->dev, 1133 "gfx9: Failed to load firmware \"%s\"\n", 1134 fw_name); 1135 release_firmware(adev->gfx.pfp_fw); 1136 adev->gfx.pfp_fw = NULL; 1137 release_firmware(adev->gfx.me_fw); 1138 adev->gfx.me_fw = NULL; 1139 release_firmware(adev->gfx.ce_fw); 1140 adev->gfx.ce_fw = NULL; 1141 } 1142 return err; 1143 } 1144 1145 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1146 const char *chip_name) 1147 { 1148 char fw_name[30]; 1149 int err; 1150 struct amdgpu_firmware_info *info = NULL; 1151 const struct common_firmware_header *header = NULL; 1152 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1153 unsigned int *tmp = NULL; 1154 unsigned int i = 0; 1155 uint16_t version_major; 1156 uint16_t version_minor; 1157 uint32_t smu_version; 1158 1159 /* 1160 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1161 * instead of picasso_rlc.bin. 1162 * Judgment method: 1163 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1164 * or revision >= 0xD8 && revision <= 0xDF 1165 * otherwise is PCO FP5 1166 */ 1167 if (!strcmp(chip_name, "picasso") && 1168 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1169 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1170 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1171 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1172 (smu_version >= 0x41e2b)) 1173 /** 1174 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1175 */ 1176 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1177 else 1178 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1179 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1180 if (err) 1181 goto out; 1182 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1183 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1184 1185 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1186 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1187 if (version_major == 2 && version_minor == 1) 1188 adev->gfx.rlc.is_rlc_v2_1 = true; 1189 1190 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1191 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1192 adev->gfx.rlc.save_and_restore_offset = 1193 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1194 adev->gfx.rlc.clear_state_descriptor_offset = 1195 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1196 adev->gfx.rlc.avail_scratch_ram_locations = 1197 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1198 adev->gfx.rlc.reg_restore_list_size = 1199 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1200 adev->gfx.rlc.reg_list_format_start = 1201 le32_to_cpu(rlc_hdr->reg_list_format_start); 1202 adev->gfx.rlc.reg_list_format_separate_start = 1203 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1204 adev->gfx.rlc.starting_offsets_start = 1205 le32_to_cpu(rlc_hdr->starting_offsets_start); 1206 adev->gfx.rlc.reg_list_format_size_bytes = 1207 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1208 adev->gfx.rlc.reg_list_size_bytes = 1209 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1210 adev->gfx.rlc.register_list_format = 1211 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1212 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1213 if (!adev->gfx.rlc.register_list_format) { 1214 err = -ENOMEM; 1215 goto out; 1216 } 1217 1218 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1219 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1220 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1221 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1222 1223 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1224 1225 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1226 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1227 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1228 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1229 1230 if (adev->gfx.rlc.is_rlc_v2_1) 1231 gfx_v9_0_init_rlc_ext_microcode(adev); 1232 1233 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1234 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1235 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1236 info->fw = adev->gfx.rlc_fw; 1237 header = (const struct common_firmware_header *)info->fw->data; 1238 adev->firmware.fw_size += 1239 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1240 1241 if (adev->gfx.rlc.is_rlc_v2_1 && 1242 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1243 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1244 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1245 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1246 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1247 info->fw = adev->gfx.rlc_fw; 1248 adev->firmware.fw_size += 1249 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1250 1251 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1252 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1253 info->fw = adev->gfx.rlc_fw; 1254 adev->firmware.fw_size += 1255 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1256 1257 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1258 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1259 info->fw = adev->gfx.rlc_fw; 1260 adev->firmware.fw_size += 1261 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1262 } 1263 } 1264 1265 out: 1266 if (err) { 1267 dev_err(adev->dev, 1268 "gfx9: Failed to load firmware \"%s\"\n", 1269 fw_name); 1270 release_firmware(adev->gfx.rlc_fw); 1271 adev->gfx.rlc_fw = NULL; 1272 } 1273 return err; 1274 } 1275 1276 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1277 const char *chip_name) 1278 { 1279 char fw_name[30]; 1280 int err; 1281 struct amdgpu_firmware_info *info = NULL; 1282 const struct common_firmware_header *header = NULL; 1283 const struct gfx_firmware_header_v1_0 *cp_hdr; 1284 1285 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1286 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1287 if (err) 1288 goto out; 1289 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1290 if (err) 1291 goto out; 1292 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1293 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1294 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1295 1296 1297 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1298 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1299 if (!err) { 1300 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1301 if (err) 1302 goto out; 1303 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1304 adev->gfx.mec2_fw->data; 1305 adev->gfx.mec2_fw_version = 1306 le32_to_cpu(cp_hdr->header.ucode_version); 1307 adev->gfx.mec2_feature_version = 1308 le32_to_cpu(cp_hdr->ucode_feature_version); 1309 } else { 1310 err = 0; 1311 adev->gfx.mec2_fw = NULL; 1312 } 1313 1314 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1315 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1316 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1317 info->fw = adev->gfx.mec_fw; 1318 header = (const struct common_firmware_header *)info->fw->data; 1319 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1320 adev->firmware.fw_size += 1321 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1322 1323 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1324 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1325 info->fw = adev->gfx.mec_fw; 1326 adev->firmware.fw_size += 1327 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1328 1329 if (adev->gfx.mec2_fw) { 1330 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1331 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1332 info->fw = adev->gfx.mec2_fw; 1333 header = (const struct common_firmware_header *)info->fw->data; 1334 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1335 adev->firmware.fw_size += 1336 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1337 1338 /* TODO: Determine if MEC2 JT FW loading can be removed 1339 for all GFX V9 asic and above */ 1340 if (adev->asic_type != CHIP_ARCTURUS) { 1341 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1342 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1343 info->fw = adev->gfx.mec2_fw; 1344 adev->firmware.fw_size += 1345 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1346 PAGE_SIZE); 1347 } 1348 } 1349 } 1350 1351 out: 1352 gfx_v9_0_check_if_need_gfxoff(adev); 1353 gfx_v9_0_check_fw_write_wait(adev); 1354 if (err) { 1355 dev_err(adev->dev, 1356 "gfx9: Failed to load firmware \"%s\"\n", 1357 fw_name); 1358 release_firmware(adev->gfx.mec_fw); 1359 adev->gfx.mec_fw = NULL; 1360 release_firmware(adev->gfx.mec2_fw); 1361 adev->gfx.mec2_fw = NULL; 1362 } 1363 return err; 1364 } 1365 1366 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1367 { 1368 const char *chip_name; 1369 int r; 1370 1371 DRM_DEBUG("\n"); 1372 1373 switch (adev->asic_type) { 1374 case CHIP_VEGA10: 1375 chip_name = "vega10"; 1376 break; 1377 case CHIP_VEGA12: 1378 chip_name = "vega12"; 1379 break; 1380 case CHIP_VEGA20: 1381 chip_name = "vega20"; 1382 break; 1383 case CHIP_RAVEN: 1384 if (adev->rev_id >= 8) 1385 chip_name = "raven2"; 1386 else if (adev->pdev->device == 0x15d8) 1387 chip_name = "picasso"; 1388 else 1389 chip_name = "raven"; 1390 break; 1391 case CHIP_ARCTURUS: 1392 chip_name = "arcturus"; 1393 break; 1394 case CHIP_RENOIR: 1395 chip_name = "renoir"; 1396 break; 1397 default: 1398 BUG(); 1399 } 1400 1401 /* No CPG in Arcturus */ 1402 if (adev->asic_type != CHIP_ARCTURUS) { 1403 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1404 if (r) 1405 return r; 1406 } 1407 1408 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1409 if (r) 1410 return r; 1411 1412 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1413 if (r) 1414 return r; 1415 1416 return r; 1417 } 1418 1419 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1420 { 1421 u32 count = 0; 1422 const struct cs_section_def *sect = NULL; 1423 const struct cs_extent_def *ext = NULL; 1424 1425 /* begin clear state */ 1426 count += 2; 1427 /* context control state */ 1428 count += 3; 1429 1430 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1431 for (ext = sect->section; ext->extent != NULL; ++ext) { 1432 if (sect->id == SECT_CONTEXT) 1433 count += 2 + ext->reg_count; 1434 else 1435 return 0; 1436 } 1437 } 1438 1439 /* end clear state */ 1440 count += 2; 1441 /* clear state */ 1442 count += 2; 1443 1444 return count; 1445 } 1446 1447 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1448 volatile u32 *buffer) 1449 { 1450 u32 count = 0, i; 1451 const struct cs_section_def *sect = NULL; 1452 const struct cs_extent_def *ext = NULL; 1453 1454 if (adev->gfx.rlc.cs_data == NULL) 1455 return; 1456 if (buffer == NULL) 1457 return; 1458 1459 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1460 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1461 1462 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1463 buffer[count++] = cpu_to_le32(0x80000000); 1464 buffer[count++] = cpu_to_le32(0x80000000); 1465 1466 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1467 for (ext = sect->section; ext->extent != NULL; ++ext) { 1468 if (sect->id == SECT_CONTEXT) { 1469 buffer[count++] = 1470 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1471 buffer[count++] = cpu_to_le32(ext->reg_index - 1472 PACKET3_SET_CONTEXT_REG_START); 1473 for (i = 0; i < ext->reg_count; i++) 1474 buffer[count++] = cpu_to_le32(ext->extent[i]); 1475 } else { 1476 return; 1477 } 1478 } 1479 } 1480 1481 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1482 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1483 1484 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1485 buffer[count++] = cpu_to_le32(0); 1486 } 1487 1488 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1489 { 1490 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1491 uint32_t pg_always_on_cu_num = 2; 1492 uint32_t always_on_cu_num; 1493 uint32_t i, j, k; 1494 uint32_t mask, cu_bitmap, counter; 1495 1496 if (adev->flags & AMD_IS_APU) 1497 always_on_cu_num = 4; 1498 else if (adev->asic_type == CHIP_VEGA12) 1499 always_on_cu_num = 8; 1500 else 1501 always_on_cu_num = 12; 1502 1503 mutex_lock(&adev->grbm_idx_mutex); 1504 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1505 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1506 mask = 1; 1507 cu_bitmap = 0; 1508 counter = 0; 1509 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1510 1511 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1512 if (cu_info->bitmap[i][j] & mask) { 1513 if (counter == pg_always_on_cu_num) 1514 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1515 if (counter < always_on_cu_num) 1516 cu_bitmap |= mask; 1517 else 1518 break; 1519 counter++; 1520 } 1521 mask <<= 1; 1522 } 1523 1524 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1525 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1526 } 1527 } 1528 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1529 mutex_unlock(&adev->grbm_idx_mutex); 1530 } 1531 1532 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1533 { 1534 uint32_t data; 1535 1536 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1537 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1538 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1539 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1540 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1541 1542 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1543 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1544 1545 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1546 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1547 1548 mutex_lock(&adev->grbm_idx_mutex); 1549 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1550 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1551 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1552 1553 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1554 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1555 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1556 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1557 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1558 1559 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1560 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1561 data &= 0x0000FFFF; 1562 data |= 0x00C00000; 1563 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1564 1565 /* 1566 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1567 * programmed in gfx_v9_0_init_always_on_cu_mask() 1568 */ 1569 1570 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1571 * but used for RLC_LB_CNTL configuration */ 1572 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1573 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1574 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1575 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1576 mutex_unlock(&adev->grbm_idx_mutex); 1577 1578 gfx_v9_0_init_always_on_cu_mask(adev); 1579 } 1580 1581 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1582 { 1583 uint32_t data; 1584 1585 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1586 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1587 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1588 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1589 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1590 1591 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1592 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1593 1594 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1595 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1596 1597 mutex_lock(&adev->grbm_idx_mutex); 1598 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1599 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1600 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1601 1602 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1603 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1604 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1605 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1606 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1607 1608 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1609 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1610 data &= 0x0000FFFF; 1611 data |= 0x00C00000; 1612 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1613 1614 /* 1615 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1616 * programmed in gfx_v9_0_init_always_on_cu_mask() 1617 */ 1618 1619 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1620 * but used for RLC_LB_CNTL configuration */ 1621 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1622 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1623 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1624 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1625 mutex_unlock(&adev->grbm_idx_mutex); 1626 1627 gfx_v9_0_init_always_on_cu_mask(adev); 1628 } 1629 1630 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1631 { 1632 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1633 } 1634 1635 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1636 { 1637 return 5; 1638 } 1639 1640 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1641 { 1642 const struct cs_section_def *cs_data; 1643 int r; 1644 1645 adev->gfx.rlc.cs_data = gfx9_cs_data; 1646 1647 cs_data = adev->gfx.rlc.cs_data; 1648 1649 if (cs_data) { 1650 /* init clear state block */ 1651 r = amdgpu_gfx_rlc_init_csb(adev); 1652 if (r) 1653 return r; 1654 } 1655 1656 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 1657 /* TODO: double check the cp_table_size for RV */ 1658 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1659 r = amdgpu_gfx_rlc_init_cpt(adev); 1660 if (r) 1661 return r; 1662 } 1663 1664 switch (adev->asic_type) { 1665 case CHIP_RAVEN: 1666 gfx_v9_0_init_lbpw(adev); 1667 break; 1668 case CHIP_VEGA20: 1669 gfx_v9_4_init_lbpw(adev); 1670 break; 1671 default: 1672 break; 1673 } 1674 1675 return 0; 1676 } 1677 1678 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) 1679 { 1680 int r; 1681 1682 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1683 if (unlikely(r != 0)) 1684 return r; 1685 1686 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 1687 AMDGPU_GEM_DOMAIN_VRAM); 1688 if (!r) 1689 adev->gfx.rlc.clear_state_gpu_addr = 1690 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 1691 1692 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1693 1694 return r; 1695 } 1696 1697 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) 1698 { 1699 int r; 1700 1701 if (!adev->gfx.rlc.clear_state_obj) 1702 return; 1703 1704 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1705 if (likely(r == 0)) { 1706 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1707 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1708 } 1709 } 1710 1711 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1712 { 1713 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1714 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1715 } 1716 1717 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1718 { 1719 int r; 1720 u32 *hpd; 1721 const __le32 *fw_data; 1722 unsigned fw_size; 1723 u32 *fw; 1724 size_t mec_hpd_size; 1725 1726 const struct gfx_firmware_header_v1_0 *mec_hdr; 1727 1728 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1729 1730 /* take ownership of the relevant compute queues */ 1731 amdgpu_gfx_compute_queue_acquire(adev); 1732 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1733 1734 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1735 AMDGPU_GEM_DOMAIN_VRAM, 1736 &adev->gfx.mec.hpd_eop_obj, 1737 &adev->gfx.mec.hpd_eop_gpu_addr, 1738 (void **)&hpd); 1739 if (r) { 1740 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1741 gfx_v9_0_mec_fini(adev); 1742 return r; 1743 } 1744 1745 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1746 1747 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1748 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1749 1750 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1751 1752 fw_data = (const __le32 *) 1753 (adev->gfx.mec_fw->data + 1754 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1755 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1756 1757 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1758 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1759 &adev->gfx.mec.mec_fw_obj, 1760 &adev->gfx.mec.mec_fw_gpu_addr, 1761 (void **)&fw); 1762 if (r) { 1763 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1764 gfx_v9_0_mec_fini(adev); 1765 return r; 1766 } 1767 1768 memcpy(fw, fw_data, fw_size); 1769 1770 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1771 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1772 1773 return 0; 1774 } 1775 1776 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1777 { 1778 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1779 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1780 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1781 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1782 (SQ_IND_INDEX__FORCE_READ_MASK)); 1783 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1784 } 1785 1786 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1787 uint32_t wave, uint32_t thread, 1788 uint32_t regno, uint32_t num, uint32_t *out) 1789 { 1790 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1791 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1792 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1793 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1794 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1795 (SQ_IND_INDEX__FORCE_READ_MASK) | 1796 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1797 while (num--) 1798 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1799 } 1800 1801 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1802 { 1803 /* type 1 wave data */ 1804 dst[(*no_fields)++] = 1; 1805 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1806 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1807 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1808 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1809 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1810 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1811 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1812 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1813 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1814 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1815 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1816 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1817 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1818 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1819 } 1820 1821 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1822 uint32_t wave, uint32_t start, 1823 uint32_t size, uint32_t *dst) 1824 { 1825 wave_read_regs( 1826 adev, simd, wave, 0, 1827 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1828 } 1829 1830 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1831 uint32_t wave, uint32_t thread, 1832 uint32_t start, uint32_t size, 1833 uint32_t *dst) 1834 { 1835 wave_read_regs( 1836 adev, simd, wave, thread, 1837 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1838 } 1839 1840 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1841 u32 me, u32 pipe, u32 q, u32 vm) 1842 { 1843 soc15_grbm_select(adev, me, pipe, q, vm); 1844 } 1845 1846 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1847 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1848 .select_se_sh = &gfx_v9_0_select_se_sh, 1849 .read_wave_data = &gfx_v9_0_read_wave_data, 1850 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1851 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1852 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 1853 .ras_error_inject = &gfx_v9_0_ras_error_inject, 1854 .query_ras_error_count = &gfx_v9_0_query_ras_error_count 1855 }; 1856 1857 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1858 { 1859 u32 gb_addr_config; 1860 int err; 1861 1862 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1863 1864 switch (adev->asic_type) { 1865 case CHIP_VEGA10: 1866 adev->gfx.config.max_hw_contexts = 8; 1867 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1868 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1869 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1870 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1871 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1872 break; 1873 case CHIP_VEGA12: 1874 adev->gfx.config.max_hw_contexts = 8; 1875 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1876 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1877 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1878 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1879 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1880 DRM_INFO("fix gfx.config for vega12\n"); 1881 break; 1882 case CHIP_VEGA20: 1883 adev->gfx.config.max_hw_contexts = 8; 1884 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1885 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1886 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1887 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1888 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1889 gb_addr_config &= ~0xf3e777ff; 1890 gb_addr_config |= 0x22014042; 1891 /* check vbios table if gpu info is not available */ 1892 err = amdgpu_atomfirmware_get_gfx_info(adev); 1893 if (err) 1894 return err; 1895 break; 1896 case CHIP_RAVEN: 1897 adev->gfx.config.max_hw_contexts = 8; 1898 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1899 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1900 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1901 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1902 if (adev->rev_id >= 8) 1903 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1904 else 1905 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1906 break; 1907 case CHIP_ARCTURUS: 1908 adev->gfx.config.max_hw_contexts = 8; 1909 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1910 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1911 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1912 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1913 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1914 gb_addr_config &= ~0xf3e777ff; 1915 gb_addr_config |= 0x22014042; 1916 break; 1917 case CHIP_RENOIR: 1918 adev->gfx.config.max_hw_contexts = 8; 1919 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1920 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1921 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1922 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1923 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1924 gb_addr_config &= ~0xf3e777ff; 1925 gb_addr_config |= 0x22010042; 1926 break; 1927 default: 1928 BUG(); 1929 break; 1930 } 1931 1932 adev->gfx.config.gb_addr_config = gb_addr_config; 1933 1934 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1935 REG_GET_FIELD( 1936 adev->gfx.config.gb_addr_config, 1937 GB_ADDR_CONFIG, 1938 NUM_PIPES); 1939 1940 adev->gfx.config.max_tile_pipes = 1941 adev->gfx.config.gb_addr_config_fields.num_pipes; 1942 1943 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1944 REG_GET_FIELD( 1945 adev->gfx.config.gb_addr_config, 1946 GB_ADDR_CONFIG, 1947 NUM_BANKS); 1948 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1949 REG_GET_FIELD( 1950 adev->gfx.config.gb_addr_config, 1951 GB_ADDR_CONFIG, 1952 MAX_COMPRESSED_FRAGS); 1953 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1954 REG_GET_FIELD( 1955 adev->gfx.config.gb_addr_config, 1956 GB_ADDR_CONFIG, 1957 NUM_RB_PER_SE); 1958 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1959 REG_GET_FIELD( 1960 adev->gfx.config.gb_addr_config, 1961 GB_ADDR_CONFIG, 1962 NUM_SHADER_ENGINES); 1963 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1964 REG_GET_FIELD( 1965 adev->gfx.config.gb_addr_config, 1966 GB_ADDR_CONFIG, 1967 PIPE_INTERLEAVE_SIZE)); 1968 1969 return 0; 1970 } 1971 1972 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1973 struct amdgpu_ngg_buf *ngg_buf, 1974 int size_se, 1975 int default_size_se) 1976 { 1977 int r; 1978 1979 if (size_se < 0) { 1980 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 1981 return -EINVAL; 1982 } 1983 size_se = size_se ? size_se : default_size_se; 1984 1985 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 1986 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 1987 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1988 &ngg_buf->bo, 1989 &ngg_buf->gpu_addr, 1990 NULL); 1991 if (r) { 1992 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 1993 return r; 1994 } 1995 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 1996 1997 return r; 1998 } 1999 2000 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 2001 { 2002 int i; 2003 2004 for (i = 0; i < NGG_BUF_MAX; i++) 2005 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 2006 &adev->gfx.ngg.buf[i].gpu_addr, 2007 NULL); 2008 2009 memset(&adev->gfx.ngg.buf[0], 0, 2010 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 2011 2012 adev->gfx.ngg.init = false; 2013 2014 return 0; 2015 } 2016 2017 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 2018 { 2019 int r; 2020 2021 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 2022 return 0; 2023 2024 /* GDS reserve memory: 64 bytes alignment */ 2025 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 2026 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; 2027 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); 2028 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 2029 2030 /* Primitive Buffer */ 2031 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 2032 amdgpu_prim_buf_per_se, 2033 64 * 1024); 2034 if (r) { 2035 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 2036 goto err; 2037 } 2038 2039 /* Position Buffer */ 2040 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 2041 amdgpu_pos_buf_per_se, 2042 256 * 1024); 2043 if (r) { 2044 dev_err(adev->dev, "Failed to create Position Buffer\n"); 2045 goto err; 2046 } 2047 2048 /* Control Sideband */ 2049 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 2050 amdgpu_cntl_sb_buf_per_se, 2051 256); 2052 if (r) { 2053 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 2054 goto err; 2055 } 2056 2057 /* Parameter Cache, not created by default */ 2058 if (amdgpu_param_buf_per_se <= 0) 2059 goto out; 2060 2061 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 2062 amdgpu_param_buf_per_se, 2063 512 * 1024); 2064 if (r) { 2065 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 2066 goto err; 2067 } 2068 2069 out: 2070 adev->gfx.ngg.init = true; 2071 return 0; 2072 err: 2073 gfx_v9_0_ngg_fini(adev); 2074 return r; 2075 } 2076 2077 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 2078 { 2079 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2080 int r; 2081 u32 data, base; 2082 2083 if (!amdgpu_ngg) 2084 return 0; 2085 2086 /* Program buffer size */ 2087 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, 2088 adev->gfx.ngg.buf[NGG_PRIM].size >> 8); 2089 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, 2090 adev->gfx.ngg.buf[NGG_POS].size >> 8); 2091 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 2092 2093 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, 2094 adev->gfx.ngg.buf[NGG_CNTL].size >> 8); 2095 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, 2096 adev->gfx.ngg.buf[NGG_PARAM].size >> 10); 2097 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 2098 2099 /* Program buffer base address */ 2100 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 2101 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 2102 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 2103 2104 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 2105 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 2106 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 2107 2108 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 2109 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 2110 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 2111 2112 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 2113 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 2114 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 2115 2116 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 2117 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 2118 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 2119 2120 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 2121 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 2122 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 2123 2124 /* Clear GDS reserved memory */ 2125 r = amdgpu_ring_alloc(ring, 17); 2126 if (r) { 2127 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", 2128 ring->name, r); 2129 return r; 2130 } 2131 2132 gfx_v9_0_write_data_to_reg(ring, 0, false, 2133 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 2134 (adev->gds.gds_size + 2135 adev->gfx.ngg.gds_reserve_size)); 2136 2137 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 2138 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 2139 PACKET3_DMA_DATA_DST_SEL(1) | 2140 PACKET3_DMA_DATA_SRC_SEL(2))); 2141 amdgpu_ring_write(ring, 0); 2142 amdgpu_ring_write(ring, 0); 2143 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 2144 amdgpu_ring_write(ring, 0); 2145 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 2146 adev->gfx.ngg.gds_reserve_size); 2147 2148 gfx_v9_0_write_data_to_reg(ring, 0, false, 2149 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); 2150 2151 amdgpu_ring_commit(ring); 2152 2153 return 0; 2154 } 2155 2156 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2157 int mec, int pipe, int queue) 2158 { 2159 int r; 2160 unsigned irq_type; 2161 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2162 2163 ring = &adev->gfx.compute_ring[ring_id]; 2164 2165 /* mec0 is me1 */ 2166 ring->me = mec + 1; 2167 ring->pipe = pipe; 2168 ring->queue = queue; 2169 2170 ring->ring_obj = NULL; 2171 ring->use_doorbell = true; 2172 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2173 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2174 + (ring_id * GFX9_MEC_HPD_SIZE); 2175 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2176 2177 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2178 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2179 + ring->pipe; 2180 2181 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2182 r = amdgpu_ring_init(adev, ring, 1024, 2183 &adev->gfx.eop_irq, irq_type); 2184 if (r) 2185 return r; 2186 2187 2188 return 0; 2189 } 2190 2191 static int gfx_v9_0_sw_init(void *handle) 2192 { 2193 int i, j, k, r, ring_id; 2194 struct amdgpu_ring *ring; 2195 struct amdgpu_kiq *kiq; 2196 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2197 2198 switch (adev->asic_type) { 2199 case CHIP_VEGA10: 2200 case CHIP_VEGA12: 2201 case CHIP_VEGA20: 2202 case CHIP_RAVEN: 2203 case CHIP_ARCTURUS: 2204 case CHIP_RENOIR: 2205 adev->gfx.mec.num_mec = 2; 2206 break; 2207 default: 2208 adev->gfx.mec.num_mec = 1; 2209 break; 2210 } 2211 2212 adev->gfx.mec.num_pipe_per_mec = 4; 2213 adev->gfx.mec.num_queue_per_pipe = 8; 2214 2215 /* EOP Event */ 2216 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2217 if (r) 2218 return r; 2219 2220 /* Privileged reg */ 2221 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2222 &adev->gfx.priv_reg_irq); 2223 if (r) 2224 return r; 2225 2226 /* Privileged inst */ 2227 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2228 &adev->gfx.priv_inst_irq); 2229 if (r) 2230 return r; 2231 2232 /* ECC error */ 2233 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2234 &adev->gfx.cp_ecc_error_irq); 2235 if (r) 2236 return r; 2237 2238 /* FUE error */ 2239 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2240 &adev->gfx.cp_ecc_error_irq); 2241 if (r) 2242 return r; 2243 2244 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2245 2246 gfx_v9_0_scratch_init(adev); 2247 2248 r = gfx_v9_0_init_microcode(adev); 2249 if (r) { 2250 DRM_ERROR("Failed to load gfx firmware!\n"); 2251 return r; 2252 } 2253 2254 r = adev->gfx.rlc.funcs->init(adev); 2255 if (r) { 2256 DRM_ERROR("Failed to init rlc BOs!\n"); 2257 return r; 2258 } 2259 2260 r = gfx_v9_0_mec_init(adev); 2261 if (r) { 2262 DRM_ERROR("Failed to init MEC BOs!\n"); 2263 return r; 2264 } 2265 2266 /* set up the gfx ring */ 2267 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2268 ring = &adev->gfx.gfx_ring[i]; 2269 ring->ring_obj = NULL; 2270 if (!i) 2271 sprintf(ring->name, "gfx"); 2272 else 2273 sprintf(ring->name, "gfx_%d", i); 2274 ring->use_doorbell = true; 2275 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2276 r = amdgpu_ring_init(adev, ring, 1024, 2277 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 2278 if (r) 2279 return r; 2280 } 2281 2282 /* set up the compute queues - allocate horizontally across pipes */ 2283 ring_id = 0; 2284 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2285 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2286 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2287 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2288 continue; 2289 2290 r = gfx_v9_0_compute_ring_init(adev, 2291 ring_id, 2292 i, k, j); 2293 if (r) 2294 return r; 2295 2296 ring_id++; 2297 } 2298 } 2299 } 2300 2301 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2302 if (r) { 2303 DRM_ERROR("Failed to init KIQ BOs!\n"); 2304 return r; 2305 } 2306 2307 kiq = &adev->gfx.kiq; 2308 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2309 if (r) 2310 return r; 2311 2312 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2313 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2314 if (r) 2315 return r; 2316 2317 adev->gfx.ce_ram_size = 0x8000; 2318 2319 r = gfx_v9_0_gpu_early_init(adev); 2320 if (r) 2321 return r; 2322 2323 r = gfx_v9_0_ngg_init(adev); 2324 if (r) 2325 return r; 2326 2327 return 0; 2328 } 2329 2330 2331 static int gfx_v9_0_sw_fini(void *handle) 2332 { 2333 int i; 2334 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2335 2336 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && 2337 adev->gfx.ras_if) { 2338 struct ras_common_if *ras_if = adev->gfx.ras_if; 2339 struct ras_ih_if ih_info = { 2340 .head = *ras_if, 2341 }; 2342 2343 amdgpu_ras_debugfs_remove(adev, ras_if); 2344 amdgpu_ras_sysfs_remove(adev, ras_if); 2345 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 2346 amdgpu_ras_feature_enable(adev, ras_if, 0); 2347 kfree(ras_if); 2348 } 2349 2350 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2351 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2352 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2353 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2354 2355 amdgpu_gfx_mqd_sw_fini(adev); 2356 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2357 amdgpu_gfx_kiq_fini(adev); 2358 2359 gfx_v9_0_mec_fini(adev); 2360 gfx_v9_0_ngg_fini(adev); 2361 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 2362 if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { 2363 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2364 &adev->gfx.rlc.cp_table_gpu_addr, 2365 (void **)&adev->gfx.rlc.cp_table_ptr); 2366 } 2367 gfx_v9_0_free_microcode(adev); 2368 2369 return 0; 2370 } 2371 2372 2373 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2374 { 2375 /* TODO */ 2376 } 2377 2378 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 2379 { 2380 u32 data; 2381 2382 if (instance == 0xffffffff) 2383 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2384 else 2385 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2386 2387 if (se_num == 0xffffffff) 2388 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2389 else 2390 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2391 2392 if (sh_num == 0xffffffff) 2393 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2394 else 2395 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2396 2397 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2398 } 2399 2400 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2401 { 2402 u32 data, mask; 2403 2404 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2405 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2406 2407 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2408 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2409 2410 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2411 adev->gfx.config.max_sh_per_se); 2412 2413 return (~data) & mask; 2414 } 2415 2416 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2417 { 2418 int i, j; 2419 u32 data; 2420 u32 active_rbs = 0; 2421 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2422 adev->gfx.config.max_sh_per_se; 2423 2424 mutex_lock(&adev->grbm_idx_mutex); 2425 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2426 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2427 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2428 data = gfx_v9_0_get_rb_active_bitmap(adev); 2429 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2430 rb_bitmap_width_per_sh); 2431 } 2432 } 2433 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2434 mutex_unlock(&adev->grbm_idx_mutex); 2435 2436 adev->gfx.config.backend_enable_mask = active_rbs; 2437 adev->gfx.config.num_rbs = hweight32(active_rbs); 2438 } 2439 2440 #define DEFAULT_SH_MEM_BASES (0x6000) 2441 #define FIRST_COMPUTE_VMID (8) 2442 #define LAST_COMPUTE_VMID (16) 2443 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2444 { 2445 int i; 2446 uint32_t sh_mem_config; 2447 uint32_t sh_mem_bases; 2448 2449 /* 2450 * Configure apertures: 2451 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2452 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2453 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2454 */ 2455 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2456 2457 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2458 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2459 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2460 2461 mutex_lock(&adev->srbm_mutex); 2462 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2463 soc15_grbm_select(adev, 0, 0, 0, i); 2464 /* CP and shaders */ 2465 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2466 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2467 } 2468 soc15_grbm_select(adev, 0, 0, 0, 0); 2469 mutex_unlock(&adev->srbm_mutex); 2470 2471 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2472 acccess. These should be enabled by FW for target VMIDs. */ 2473 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2474 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2475 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2476 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2477 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2478 } 2479 } 2480 2481 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2482 { 2483 int vmid; 2484 2485 /* 2486 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2487 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2488 * the driver can enable them for graphics. VMID0 should maintain 2489 * access so that HWS firmware can save/restore entries. 2490 */ 2491 for (vmid = 1; vmid < 16; vmid++) { 2492 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2493 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2494 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2495 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2496 } 2497 } 2498 2499 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2500 { 2501 u32 tmp; 2502 int i; 2503 2504 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2505 2506 gfx_v9_0_tiling_mode_table_init(adev); 2507 2508 gfx_v9_0_setup_rb(adev); 2509 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2510 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2511 2512 /* XXX SH_MEM regs */ 2513 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2514 mutex_lock(&adev->srbm_mutex); 2515 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2516 soc15_grbm_select(adev, 0, 0, 0, i); 2517 /* CP and shaders */ 2518 if (i == 0) { 2519 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2520 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2521 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2522 !!amdgpu_noretry); 2523 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2524 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2525 } else { 2526 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2527 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2528 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2529 !!amdgpu_noretry); 2530 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2531 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2532 (adev->gmc.private_aperture_start >> 48)); 2533 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2534 (adev->gmc.shared_aperture_start >> 48)); 2535 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2536 } 2537 } 2538 soc15_grbm_select(adev, 0, 0, 0, 0); 2539 2540 mutex_unlock(&adev->srbm_mutex); 2541 2542 gfx_v9_0_init_compute_vmid(adev); 2543 gfx_v9_0_init_gds_vmid(adev); 2544 } 2545 2546 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2547 { 2548 u32 i, j, k; 2549 u32 mask; 2550 2551 mutex_lock(&adev->grbm_idx_mutex); 2552 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2553 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2554 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2555 for (k = 0; k < adev->usec_timeout; k++) { 2556 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2557 break; 2558 udelay(1); 2559 } 2560 if (k == adev->usec_timeout) { 2561 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2562 0xffffffff, 0xffffffff); 2563 mutex_unlock(&adev->grbm_idx_mutex); 2564 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2565 i, j); 2566 return; 2567 } 2568 } 2569 } 2570 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2571 mutex_unlock(&adev->grbm_idx_mutex); 2572 2573 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2574 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2575 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2576 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2577 for (k = 0; k < adev->usec_timeout; k++) { 2578 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2579 break; 2580 udelay(1); 2581 } 2582 } 2583 2584 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2585 bool enable) 2586 { 2587 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2588 2589 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2590 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2591 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2592 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2593 2594 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2595 } 2596 2597 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2598 { 2599 /* csib */ 2600 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2601 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2602 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2603 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2604 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2605 adev->gfx.rlc.clear_state_size); 2606 } 2607 2608 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2609 int indirect_offset, 2610 int list_size, 2611 int *unique_indirect_regs, 2612 int unique_indirect_reg_count, 2613 int *indirect_start_offsets, 2614 int *indirect_start_offsets_count, 2615 int max_start_offsets_count) 2616 { 2617 int idx; 2618 2619 for (; indirect_offset < list_size; indirect_offset++) { 2620 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2621 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2622 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2623 2624 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2625 indirect_offset += 2; 2626 2627 /* look for the matching indice */ 2628 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2629 if (unique_indirect_regs[idx] == 2630 register_list_format[indirect_offset] || 2631 !unique_indirect_regs[idx]) 2632 break; 2633 } 2634 2635 BUG_ON(idx >= unique_indirect_reg_count); 2636 2637 if (!unique_indirect_regs[idx]) 2638 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2639 2640 indirect_offset++; 2641 } 2642 } 2643 } 2644 2645 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2646 { 2647 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2648 int unique_indirect_reg_count = 0; 2649 2650 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2651 int indirect_start_offsets_count = 0; 2652 2653 int list_size = 0; 2654 int i = 0, j = 0; 2655 u32 tmp = 0; 2656 2657 u32 *register_list_format = 2658 kmemdup(adev->gfx.rlc.register_list_format, 2659 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2660 if (!register_list_format) 2661 return -ENOMEM; 2662 2663 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2664 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2665 gfx_v9_1_parse_ind_reg_list(register_list_format, 2666 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2667 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2668 unique_indirect_regs, 2669 unique_indirect_reg_count, 2670 indirect_start_offsets, 2671 &indirect_start_offsets_count, 2672 ARRAY_SIZE(indirect_start_offsets)); 2673 2674 /* enable auto inc in case it is disabled */ 2675 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2676 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2677 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2678 2679 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2680 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2681 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2682 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2683 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2684 adev->gfx.rlc.register_restore[i]); 2685 2686 /* load indirect register */ 2687 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2688 adev->gfx.rlc.reg_list_format_start); 2689 2690 /* direct register portion */ 2691 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2692 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2693 register_list_format[i]); 2694 2695 /* indirect register portion */ 2696 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2697 if (register_list_format[i] == 0xFFFFFFFF) { 2698 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2699 continue; 2700 } 2701 2702 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2703 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2704 2705 for (j = 0; j < unique_indirect_reg_count; j++) { 2706 if (register_list_format[i] == unique_indirect_regs[j]) { 2707 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2708 break; 2709 } 2710 } 2711 2712 BUG_ON(j >= unique_indirect_reg_count); 2713 2714 i++; 2715 } 2716 2717 /* set save/restore list size */ 2718 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2719 list_size = list_size >> 1; 2720 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2721 adev->gfx.rlc.reg_restore_list_size); 2722 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2723 2724 /* write the starting offsets to RLC scratch ram */ 2725 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2726 adev->gfx.rlc.starting_offsets_start); 2727 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2728 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2729 indirect_start_offsets[i]); 2730 2731 /* load unique indirect regs*/ 2732 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2733 if (unique_indirect_regs[i] != 0) { 2734 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2735 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2736 unique_indirect_regs[i] & 0x3FFFF); 2737 2738 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2739 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2740 unique_indirect_regs[i] >> 20); 2741 } 2742 } 2743 2744 kfree(register_list_format); 2745 return 0; 2746 } 2747 2748 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2749 { 2750 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2751 } 2752 2753 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2754 bool enable) 2755 { 2756 uint32_t data = 0; 2757 uint32_t default_data = 0; 2758 2759 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2760 if (enable == true) { 2761 /* enable GFXIP control over CGPG */ 2762 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2763 if(default_data != data) 2764 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2765 2766 /* update status */ 2767 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2768 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2769 if(default_data != data) 2770 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2771 } else { 2772 /* restore GFXIP control over GCPG */ 2773 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2774 if(default_data != data) 2775 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2776 } 2777 } 2778 2779 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2780 { 2781 uint32_t data = 0; 2782 2783 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2784 AMD_PG_SUPPORT_GFX_SMG | 2785 AMD_PG_SUPPORT_GFX_DMG)) { 2786 /* init IDLE_POLL_COUNT = 60 */ 2787 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2788 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2789 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2790 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2791 2792 /* init RLC PG Delay */ 2793 data = 0; 2794 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2795 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2796 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2797 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2798 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2799 2800 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2801 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2802 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2803 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2804 2805 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2806 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2807 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2808 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2809 2810 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2811 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2812 2813 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2814 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2815 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2816 2817 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2818 } 2819 } 2820 2821 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2822 bool enable) 2823 { 2824 uint32_t data = 0; 2825 uint32_t default_data = 0; 2826 2827 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2828 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2829 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2830 enable ? 1 : 0); 2831 if (default_data != data) 2832 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2833 } 2834 2835 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2836 bool enable) 2837 { 2838 uint32_t data = 0; 2839 uint32_t default_data = 0; 2840 2841 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2842 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2843 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2844 enable ? 1 : 0); 2845 if(default_data != data) 2846 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2847 } 2848 2849 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2850 bool enable) 2851 { 2852 uint32_t data = 0; 2853 uint32_t default_data = 0; 2854 2855 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2856 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2857 CP_PG_DISABLE, 2858 enable ? 0 : 1); 2859 if(default_data != data) 2860 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2861 } 2862 2863 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2864 bool enable) 2865 { 2866 uint32_t data, default_data; 2867 2868 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2869 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2870 GFX_POWER_GATING_ENABLE, 2871 enable ? 1 : 0); 2872 if(default_data != data) 2873 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2874 } 2875 2876 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2877 bool enable) 2878 { 2879 uint32_t data, default_data; 2880 2881 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2882 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2883 GFX_PIPELINE_PG_ENABLE, 2884 enable ? 1 : 0); 2885 if(default_data != data) 2886 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2887 2888 if (!enable) 2889 /* read any GFX register to wake up GFX */ 2890 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2891 } 2892 2893 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2894 bool enable) 2895 { 2896 uint32_t data, default_data; 2897 2898 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2899 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2900 STATIC_PER_CU_PG_ENABLE, 2901 enable ? 1 : 0); 2902 if(default_data != data) 2903 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2904 } 2905 2906 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2907 bool enable) 2908 { 2909 uint32_t data, default_data; 2910 2911 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2912 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2913 DYN_PER_CU_PG_ENABLE, 2914 enable ? 1 : 0); 2915 if(default_data != data) 2916 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2917 } 2918 2919 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2920 { 2921 gfx_v9_0_init_csb(adev); 2922 2923 /* 2924 * Rlc save restore list is workable since v2_1. 2925 * And it's needed by gfxoff feature. 2926 */ 2927 if (adev->gfx.rlc.is_rlc_v2_1) { 2928 gfx_v9_1_init_rlc_save_restore_list(adev); 2929 gfx_v9_0_enable_save_restore_machine(adev); 2930 } 2931 2932 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2933 AMD_PG_SUPPORT_GFX_SMG | 2934 AMD_PG_SUPPORT_GFX_DMG | 2935 AMD_PG_SUPPORT_CP | 2936 AMD_PG_SUPPORT_GDS | 2937 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2938 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2939 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2940 gfx_v9_0_init_gfx_power_gating(adev); 2941 } 2942 } 2943 2944 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2945 { 2946 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2947 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2948 gfx_v9_0_wait_for_rlc_serdes(adev); 2949 } 2950 2951 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2952 { 2953 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2954 udelay(50); 2955 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2956 udelay(50); 2957 } 2958 2959 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2960 { 2961 #ifdef AMDGPU_RLC_DEBUG_RETRY 2962 u32 rlc_ucode_ver; 2963 #endif 2964 2965 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2966 udelay(50); 2967 2968 /* carrizo do enable cp interrupt after cp inited */ 2969 if (!(adev->flags & AMD_IS_APU)) { 2970 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2971 udelay(50); 2972 } 2973 2974 #ifdef AMDGPU_RLC_DEBUG_RETRY 2975 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2976 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2977 if(rlc_ucode_ver == 0x108) { 2978 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2979 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2980 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2981 * default is 0x9C4 to create a 100us interval */ 2982 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2983 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2984 * to disable the page fault retry interrupts, default is 2985 * 0x100 (256) */ 2986 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2987 } 2988 #endif 2989 } 2990 2991 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2992 { 2993 const struct rlc_firmware_header_v2_0 *hdr; 2994 const __le32 *fw_data; 2995 unsigned i, fw_size; 2996 2997 if (!adev->gfx.rlc_fw) 2998 return -EINVAL; 2999 3000 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3001 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3002 3003 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3004 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3005 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3006 3007 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 3008 RLCG_UCODE_LOADING_START_ADDRESS); 3009 for (i = 0; i < fw_size; i++) 3010 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3011 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3012 3013 return 0; 3014 } 3015 3016 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3017 { 3018 int r; 3019 3020 if (amdgpu_sriov_vf(adev)) { 3021 gfx_v9_0_init_csb(adev); 3022 return 0; 3023 } 3024 3025 adev->gfx.rlc.funcs->stop(adev); 3026 3027 /* disable CG */ 3028 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3029 3030 gfx_v9_0_init_pg(adev); 3031 3032 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3033 /* legacy rlc firmware loading */ 3034 r = gfx_v9_0_rlc_load_microcode(adev); 3035 if (r) 3036 return r; 3037 } 3038 3039 switch (adev->asic_type) { 3040 case CHIP_RAVEN: 3041 if (amdgpu_lbpw == 0) 3042 gfx_v9_0_enable_lbpw(adev, false); 3043 else 3044 gfx_v9_0_enable_lbpw(adev, true); 3045 break; 3046 case CHIP_VEGA20: 3047 if (amdgpu_lbpw > 0) 3048 gfx_v9_0_enable_lbpw(adev, true); 3049 else 3050 gfx_v9_0_enable_lbpw(adev, false); 3051 break; 3052 default: 3053 break; 3054 } 3055 3056 adev->gfx.rlc.funcs->start(adev); 3057 3058 return 0; 3059 } 3060 3061 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3062 { 3063 int i; 3064 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3065 3066 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3067 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3068 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3069 if (!enable) { 3070 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3071 adev->gfx.gfx_ring[i].sched.ready = false; 3072 } 3073 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3074 udelay(50); 3075 } 3076 3077 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3078 { 3079 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3080 const struct gfx_firmware_header_v1_0 *ce_hdr; 3081 const struct gfx_firmware_header_v1_0 *me_hdr; 3082 const __le32 *fw_data; 3083 unsigned i, fw_size; 3084 3085 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3086 return -EINVAL; 3087 3088 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3089 adev->gfx.pfp_fw->data; 3090 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3091 adev->gfx.ce_fw->data; 3092 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3093 adev->gfx.me_fw->data; 3094 3095 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3096 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3097 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3098 3099 gfx_v9_0_cp_gfx_enable(adev, false); 3100 3101 /* PFP */ 3102 fw_data = (const __le32 *) 3103 (adev->gfx.pfp_fw->data + 3104 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3105 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3106 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3107 for (i = 0; i < fw_size; i++) 3108 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3109 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3110 3111 /* CE */ 3112 fw_data = (const __le32 *) 3113 (adev->gfx.ce_fw->data + 3114 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3115 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3116 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3117 for (i = 0; i < fw_size; i++) 3118 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3119 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3120 3121 /* ME */ 3122 fw_data = (const __le32 *) 3123 (adev->gfx.me_fw->data + 3124 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3125 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3126 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3127 for (i = 0; i < fw_size; i++) 3128 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3129 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3130 3131 return 0; 3132 } 3133 3134 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3135 { 3136 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3137 const struct cs_section_def *sect = NULL; 3138 const struct cs_extent_def *ext = NULL; 3139 int r, i, tmp; 3140 3141 /* init the CP */ 3142 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3143 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3144 3145 gfx_v9_0_cp_gfx_enable(adev, true); 3146 3147 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3148 if (r) { 3149 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3150 return r; 3151 } 3152 3153 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3154 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3155 3156 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3157 amdgpu_ring_write(ring, 0x80000000); 3158 amdgpu_ring_write(ring, 0x80000000); 3159 3160 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3161 for (ext = sect->section; ext->extent != NULL; ++ext) { 3162 if (sect->id == SECT_CONTEXT) { 3163 amdgpu_ring_write(ring, 3164 PACKET3(PACKET3_SET_CONTEXT_REG, 3165 ext->reg_count)); 3166 amdgpu_ring_write(ring, 3167 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3168 for (i = 0; i < ext->reg_count; i++) 3169 amdgpu_ring_write(ring, ext->extent[i]); 3170 } 3171 } 3172 } 3173 3174 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3175 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3176 3177 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3178 amdgpu_ring_write(ring, 0); 3179 3180 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3181 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3182 amdgpu_ring_write(ring, 0x8000); 3183 amdgpu_ring_write(ring, 0x8000); 3184 3185 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3186 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3187 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3188 amdgpu_ring_write(ring, tmp); 3189 amdgpu_ring_write(ring, 0); 3190 3191 amdgpu_ring_commit(ring); 3192 3193 return 0; 3194 } 3195 3196 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3197 { 3198 struct amdgpu_ring *ring; 3199 u32 tmp; 3200 u32 rb_bufsz; 3201 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3202 3203 /* Set the write pointer delay */ 3204 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3205 3206 /* set the RB to use vmid 0 */ 3207 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3208 3209 /* Set ring buffer size */ 3210 ring = &adev->gfx.gfx_ring[0]; 3211 rb_bufsz = order_base_2(ring->ring_size / 8); 3212 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3213 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3214 #ifdef __BIG_ENDIAN 3215 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3216 #endif 3217 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3218 3219 /* Initialize the ring buffer's write pointers */ 3220 ring->wptr = 0; 3221 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3222 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3223 3224 /* set the wb address wether it's enabled or not */ 3225 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3226 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3227 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3228 3229 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3230 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3231 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3232 3233 mdelay(1); 3234 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3235 3236 rb_addr = ring->gpu_addr >> 8; 3237 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3238 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3239 3240 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3241 if (ring->use_doorbell) { 3242 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3243 DOORBELL_OFFSET, ring->doorbell_index); 3244 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3245 DOORBELL_EN, 1); 3246 } else { 3247 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3248 } 3249 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3250 3251 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3252 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3253 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3254 3255 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3256 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3257 3258 3259 /* start the ring */ 3260 gfx_v9_0_cp_gfx_start(adev); 3261 ring->sched.ready = true; 3262 3263 return 0; 3264 } 3265 3266 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3267 { 3268 int i; 3269 3270 if (enable) { 3271 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3272 } else { 3273 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3274 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3275 for (i = 0; i < adev->gfx.num_compute_rings; i++) 3276 adev->gfx.compute_ring[i].sched.ready = false; 3277 adev->gfx.kiq.ring.sched.ready = false; 3278 } 3279 udelay(50); 3280 } 3281 3282 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3283 { 3284 const struct gfx_firmware_header_v1_0 *mec_hdr; 3285 const __le32 *fw_data; 3286 unsigned i; 3287 u32 tmp; 3288 3289 if (!adev->gfx.mec_fw) 3290 return -EINVAL; 3291 3292 gfx_v9_0_cp_compute_enable(adev, false); 3293 3294 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3295 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3296 3297 fw_data = (const __le32 *) 3298 (adev->gfx.mec_fw->data + 3299 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3300 tmp = 0; 3301 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3302 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3303 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3304 3305 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3306 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3307 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3308 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3309 3310 /* MEC1 */ 3311 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3312 mec_hdr->jt_offset); 3313 for (i = 0; i < mec_hdr->jt_size; i++) 3314 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3315 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3316 3317 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3318 adev->gfx.mec_fw_version); 3319 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3320 3321 return 0; 3322 } 3323 3324 /* KIQ functions */ 3325 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3326 { 3327 uint32_t tmp; 3328 struct amdgpu_device *adev = ring->adev; 3329 3330 /* tell RLC which is KIQ queue */ 3331 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3332 tmp &= 0xffffff00; 3333 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3334 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3335 tmp |= 0x80; 3336 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3337 } 3338 3339 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 3340 { 3341 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3342 uint64_t queue_mask = 0; 3343 int r, i; 3344 3345 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 3346 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 3347 continue; 3348 3349 /* This situation may be hit in the future if a new HW 3350 * generation exposes more than 64 queues. If so, the 3351 * definition of queue_mask needs updating */ 3352 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 3353 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 3354 break; 3355 } 3356 3357 queue_mask |= (1ull << i); 3358 } 3359 3360 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 3361 if (r) { 3362 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3363 return r; 3364 } 3365 3366 /* set resources */ 3367 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 3368 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 3369 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 3370 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 3371 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 3372 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 3373 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 3374 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 3375 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 3376 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3377 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3378 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 3379 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3380 3381 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 3382 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 3383 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3384 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 3385 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 3386 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 3387 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 3388 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 3389 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 3390 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 3391 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 3392 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 3393 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 3394 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 3395 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 3396 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 3397 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 3398 } 3399 3400 r = amdgpu_ring_test_helper(kiq_ring); 3401 if (r) 3402 DRM_ERROR("KCQ enable failed\n"); 3403 3404 return r; 3405 } 3406 3407 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3408 { 3409 struct amdgpu_device *adev = ring->adev; 3410 struct v9_mqd *mqd = ring->mqd_ptr; 3411 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3412 uint32_t tmp; 3413 3414 mqd->header = 0xC0310800; 3415 mqd->compute_pipelinestat_enable = 0x00000001; 3416 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3417 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3418 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3419 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3420 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3421 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3422 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3423 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3424 mqd->compute_misc_reserved = 0x00000003; 3425 3426 mqd->dynamic_cu_mask_addr_lo = 3427 lower_32_bits(ring->mqd_gpu_addr 3428 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3429 mqd->dynamic_cu_mask_addr_hi = 3430 upper_32_bits(ring->mqd_gpu_addr 3431 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3432 3433 eop_base_addr = ring->eop_gpu_addr >> 8; 3434 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3435 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3436 3437 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3438 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3439 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3440 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3441 3442 mqd->cp_hqd_eop_control = tmp; 3443 3444 /* enable doorbell? */ 3445 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3446 3447 if (ring->use_doorbell) { 3448 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3449 DOORBELL_OFFSET, ring->doorbell_index); 3450 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3451 DOORBELL_EN, 1); 3452 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3453 DOORBELL_SOURCE, 0); 3454 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3455 DOORBELL_HIT, 0); 3456 } else { 3457 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3458 DOORBELL_EN, 0); 3459 } 3460 3461 mqd->cp_hqd_pq_doorbell_control = tmp; 3462 3463 /* disable the queue if it's active */ 3464 ring->wptr = 0; 3465 mqd->cp_hqd_dequeue_request = 0; 3466 mqd->cp_hqd_pq_rptr = 0; 3467 mqd->cp_hqd_pq_wptr_lo = 0; 3468 mqd->cp_hqd_pq_wptr_hi = 0; 3469 3470 /* set the pointer to the MQD */ 3471 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3472 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3473 3474 /* set MQD vmid to 0 */ 3475 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3476 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3477 mqd->cp_mqd_control = tmp; 3478 3479 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3480 hqd_gpu_addr = ring->gpu_addr >> 8; 3481 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3482 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3483 3484 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3485 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3486 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3487 (order_base_2(ring->ring_size / 4) - 1)); 3488 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3489 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3490 #ifdef __BIG_ENDIAN 3491 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3492 #endif 3493 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3494 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3495 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3496 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3497 mqd->cp_hqd_pq_control = tmp; 3498 3499 /* set the wb address whether it's enabled or not */ 3500 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3501 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3502 mqd->cp_hqd_pq_rptr_report_addr_hi = 3503 upper_32_bits(wb_gpu_addr) & 0xffff; 3504 3505 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3506 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3507 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3508 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3509 3510 tmp = 0; 3511 /* enable the doorbell if requested */ 3512 if (ring->use_doorbell) { 3513 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3514 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3515 DOORBELL_OFFSET, ring->doorbell_index); 3516 3517 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3518 DOORBELL_EN, 1); 3519 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3520 DOORBELL_SOURCE, 0); 3521 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3522 DOORBELL_HIT, 0); 3523 } 3524 3525 mqd->cp_hqd_pq_doorbell_control = tmp; 3526 3527 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3528 ring->wptr = 0; 3529 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3530 3531 /* set the vmid for the queue */ 3532 mqd->cp_hqd_vmid = 0; 3533 3534 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3535 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3536 mqd->cp_hqd_persistent_state = tmp; 3537 3538 /* set MIN_IB_AVAIL_SIZE */ 3539 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3540 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3541 mqd->cp_hqd_ib_control = tmp; 3542 3543 /* activate the queue */ 3544 mqd->cp_hqd_active = 1; 3545 3546 return 0; 3547 } 3548 3549 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3550 { 3551 struct amdgpu_device *adev = ring->adev; 3552 struct v9_mqd *mqd = ring->mqd_ptr; 3553 int j; 3554 3555 /* disable wptr polling */ 3556 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3557 3558 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3559 mqd->cp_hqd_eop_base_addr_lo); 3560 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3561 mqd->cp_hqd_eop_base_addr_hi); 3562 3563 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3564 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3565 mqd->cp_hqd_eop_control); 3566 3567 /* enable doorbell? */ 3568 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3569 mqd->cp_hqd_pq_doorbell_control); 3570 3571 /* disable the queue if it's active */ 3572 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3573 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3574 for (j = 0; j < adev->usec_timeout; j++) { 3575 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3576 break; 3577 udelay(1); 3578 } 3579 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3580 mqd->cp_hqd_dequeue_request); 3581 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3582 mqd->cp_hqd_pq_rptr); 3583 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3584 mqd->cp_hqd_pq_wptr_lo); 3585 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3586 mqd->cp_hqd_pq_wptr_hi); 3587 } 3588 3589 /* set the pointer to the MQD */ 3590 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3591 mqd->cp_mqd_base_addr_lo); 3592 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3593 mqd->cp_mqd_base_addr_hi); 3594 3595 /* set MQD vmid to 0 */ 3596 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3597 mqd->cp_mqd_control); 3598 3599 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3600 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3601 mqd->cp_hqd_pq_base_lo); 3602 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3603 mqd->cp_hqd_pq_base_hi); 3604 3605 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3606 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3607 mqd->cp_hqd_pq_control); 3608 3609 /* set the wb address whether it's enabled or not */ 3610 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3611 mqd->cp_hqd_pq_rptr_report_addr_lo); 3612 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3613 mqd->cp_hqd_pq_rptr_report_addr_hi); 3614 3615 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3616 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3617 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3618 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3619 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3620 3621 /* enable the doorbell if requested */ 3622 if (ring->use_doorbell) { 3623 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3624 (adev->doorbell_index.kiq * 2) << 2); 3625 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3626 (adev->doorbell_index.userqueue_end * 2) << 2); 3627 } 3628 3629 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3630 mqd->cp_hqd_pq_doorbell_control); 3631 3632 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3633 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3634 mqd->cp_hqd_pq_wptr_lo); 3635 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3636 mqd->cp_hqd_pq_wptr_hi); 3637 3638 /* set the vmid for the queue */ 3639 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3640 3641 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3642 mqd->cp_hqd_persistent_state); 3643 3644 /* activate the queue */ 3645 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3646 mqd->cp_hqd_active); 3647 3648 if (ring->use_doorbell) 3649 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3650 3651 return 0; 3652 } 3653 3654 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3655 { 3656 struct amdgpu_device *adev = ring->adev; 3657 int j; 3658 3659 /* disable the queue if it's active */ 3660 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3661 3662 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3663 3664 for (j = 0; j < adev->usec_timeout; j++) { 3665 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3666 break; 3667 udelay(1); 3668 } 3669 3670 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3671 DRM_DEBUG("KIQ dequeue request failed.\n"); 3672 3673 /* Manual disable if dequeue request times out */ 3674 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3675 } 3676 3677 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3678 0); 3679 } 3680 3681 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3682 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3683 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3684 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3685 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3686 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3687 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3688 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3689 3690 return 0; 3691 } 3692 3693 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3694 { 3695 struct amdgpu_device *adev = ring->adev; 3696 struct v9_mqd *mqd = ring->mqd_ptr; 3697 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3698 3699 gfx_v9_0_kiq_setting(ring); 3700 3701 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3702 /* reset MQD to a clean status */ 3703 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3704 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3705 3706 /* reset ring buffer */ 3707 ring->wptr = 0; 3708 amdgpu_ring_clear_ring(ring); 3709 3710 mutex_lock(&adev->srbm_mutex); 3711 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3712 gfx_v9_0_kiq_init_register(ring); 3713 soc15_grbm_select(adev, 0, 0, 0, 0); 3714 mutex_unlock(&adev->srbm_mutex); 3715 } else { 3716 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3717 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3718 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3719 mutex_lock(&adev->srbm_mutex); 3720 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3721 gfx_v9_0_mqd_init(ring); 3722 gfx_v9_0_kiq_init_register(ring); 3723 soc15_grbm_select(adev, 0, 0, 0, 0); 3724 mutex_unlock(&adev->srbm_mutex); 3725 3726 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3727 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3728 } 3729 3730 return 0; 3731 } 3732 3733 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3734 { 3735 struct amdgpu_device *adev = ring->adev; 3736 struct v9_mqd *mqd = ring->mqd_ptr; 3737 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3738 3739 if (!adev->in_gpu_reset && !adev->in_suspend) { 3740 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3741 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3742 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3743 mutex_lock(&adev->srbm_mutex); 3744 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3745 gfx_v9_0_mqd_init(ring); 3746 soc15_grbm_select(adev, 0, 0, 0, 0); 3747 mutex_unlock(&adev->srbm_mutex); 3748 3749 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3750 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3751 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3752 /* reset MQD to a clean status */ 3753 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3754 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3755 3756 /* reset ring buffer */ 3757 ring->wptr = 0; 3758 amdgpu_ring_clear_ring(ring); 3759 } else { 3760 amdgpu_ring_clear_ring(ring); 3761 } 3762 3763 return 0; 3764 } 3765 3766 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3767 { 3768 struct amdgpu_ring *ring; 3769 int r; 3770 3771 ring = &adev->gfx.kiq.ring; 3772 3773 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3774 if (unlikely(r != 0)) 3775 return r; 3776 3777 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3778 if (unlikely(r != 0)) 3779 return r; 3780 3781 gfx_v9_0_kiq_init_queue(ring); 3782 amdgpu_bo_kunmap(ring->mqd_obj); 3783 ring->mqd_ptr = NULL; 3784 amdgpu_bo_unreserve(ring->mqd_obj); 3785 ring->sched.ready = true; 3786 return 0; 3787 } 3788 3789 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3790 { 3791 struct amdgpu_ring *ring = NULL; 3792 int r = 0, i; 3793 3794 gfx_v9_0_cp_compute_enable(adev, true); 3795 3796 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3797 ring = &adev->gfx.compute_ring[i]; 3798 3799 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3800 if (unlikely(r != 0)) 3801 goto done; 3802 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3803 if (!r) { 3804 r = gfx_v9_0_kcq_init_queue(ring); 3805 amdgpu_bo_kunmap(ring->mqd_obj); 3806 ring->mqd_ptr = NULL; 3807 } 3808 amdgpu_bo_unreserve(ring->mqd_obj); 3809 if (r) 3810 goto done; 3811 } 3812 3813 r = gfx_v9_0_kiq_kcq_enable(adev); 3814 done: 3815 return r; 3816 } 3817 3818 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3819 { 3820 int r, i; 3821 struct amdgpu_ring *ring; 3822 3823 if (!(adev->flags & AMD_IS_APU)) 3824 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3825 3826 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3827 if (adev->asic_type != CHIP_ARCTURUS) { 3828 /* legacy firmware loading */ 3829 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3830 if (r) 3831 return r; 3832 } 3833 3834 r = gfx_v9_0_cp_compute_load_microcode(adev); 3835 if (r) 3836 return r; 3837 } 3838 3839 r = gfx_v9_0_kiq_resume(adev); 3840 if (r) 3841 return r; 3842 3843 if (adev->asic_type != CHIP_ARCTURUS) { 3844 r = gfx_v9_0_cp_gfx_resume(adev); 3845 if (r) 3846 return r; 3847 } 3848 3849 r = gfx_v9_0_kcq_resume(adev); 3850 if (r) 3851 return r; 3852 3853 if (adev->asic_type != CHIP_ARCTURUS) { 3854 ring = &adev->gfx.gfx_ring[0]; 3855 r = amdgpu_ring_test_helper(ring); 3856 if (r) 3857 return r; 3858 } 3859 3860 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3861 ring = &adev->gfx.compute_ring[i]; 3862 amdgpu_ring_test_helper(ring); 3863 } 3864 3865 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3866 3867 return 0; 3868 } 3869 3870 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3871 { 3872 if (adev->asic_type != CHIP_ARCTURUS) 3873 gfx_v9_0_cp_gfx_enable(adev, enable); 3874 gfx_v9_0_cp_compute_enable(adev, enable); 3875 } 3876 3877 static int gfx_v9_0_hw_init(void *handle) 3878 { 3879 int r; 3880 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3881 3882 if (!amdgpu_sriov_vf(adev)) 3883 gfx_v9_0_init_golden_registers(adev); 3884 3885 gfx_v9_0_constants_init(adev); 3886 3887 r = gfx_v9_0_csb_vram_pin(adev); 3888 if (r) 3889 return r; 3890 3891 r = adev->gfx.rlc.funcs->resume(adev); 3892 if (r) 3893 return r; 3894 3895 r = gfx_v9_0_cp_resume(adev); 3896 if (r) 3897 return r; 3898 3899 if (adev->asic_type != CHIP_ARCTURUS) { 3900 r = gfx_v9_0_ngg_en(adev); 3901 if (r) 3902 return r; 3903 } 3904 3905 return r; 3906 } 3907 3908 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3909 { 3910 int r, i; 3911 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3912 3913 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3914 if (r) 3915 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3916 3917 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3918 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3919 3920 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3921 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3922 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3923 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3924 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3925 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3926 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3927 amdgpu_ring_write(kiq_ring, 0); 3928 amdgpu_ring_write(kiq_ring, 0); 3929 amdgpu_ring_write(kiq_ring, 0); 3930 } 3931 r = amdgpu_ring_test_helper(kiq_ring); 3932 if (r) 3933 DRM_ERROR("KCQ disable failed\n"); 3934 3935 return r; 3936 } 3937 3938 static int gfx_v9_0_hw_fini(void *handle) 3939 { 3940 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3941 3942 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3943 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3944 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3945 3946 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3947 gfx_v9_0_kcq_disable(adev); 3948 3949 if (amdgpu_sriov_vf(adev)) { 3950 gfx_v9_0_cp_gfx_enable(adev, false); 3951 /* must disable polling for SRIOV when hw finished, otherwise 3952 * CPC engine may still keep fetching WB address which is already 3953 * invalid after sw finished and trigger DMAR reading error in 3954 * hypervisor side. 3955 */ 3956 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3957 return 0; 3958 } 3959 3960 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3961 * otherwise KIQ is hanging when binding back 3962 */ 3963 if (!adev->in_gpu_reset && !adev->in_suspend) { 3964 mutex_lock(&adev->srbm_mutex); 3965 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3966 adev->gfx.kiq.ring.pipe, 3967 adev->gfx.kiq.ring.queue, 0); 3968 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3969 soc15_grbm_select(adev, 0, 0, 0, 0); 3970 mutex_unlock(&adev->srbm_mutex); 3971 } 3972 3973 gfx_v9_0_cp_enable(adev, false); 3974 adev->gfx.rlc.funcs->stop(adev); 3975 3976 gfx_v9_0_csb_vram_unpin(adev); 3977 3978 return 0; 3979 } 3980 3981 static int gfx_v9_0_suspend(void *handle) 3982 { 3983 return gfx_v9_0_hw_fini(handle); 3984 } 3985 3986 static int gfx_v9_0_resume(void *handle) 3987 { 3988 return gfx_v9_0_hw_init(handle); 3989 } 3990 3991 static bool gfx_v9_0_is_idle(void *handle) 3992 { 3993 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3994 3995 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3996 GRBM_STATUS, GUI_ACTIVE)) 3997 return false; 3998 else 3999 return true; 4000 } 4001 4002 static int gfx_v9_0_wait_for_idle(void *handle) 4003 { 4004 unsigned i; 4005 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4006 4007 for (i = 0; i < adev->usec_timeout; i++) { 4008 if (gfx_v9_0_is_idle(handle)) 4009 return 0; 4010 udelay(1); 4011 } 4012 return -ETIMEDOUT; 4013 } 4014 4015 static int gfx_v9_0_soft_reset(void *handle) 4016 { 4017 u32 grbm_soft_reset = 0; 4018 u32 tmp; 4019 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4020 4021 /* GRBM_STATUS */ 4022 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4023 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4024 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4025 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4026 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4027 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4028 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4029 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4030 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4031 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4032 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4033 } 4034 4035 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4036 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4037 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4038 } 4039 4040 /* GRBM_STATUS2 */ 4041 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4042 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4043 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4044 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4045 4046 4047 if (grbm_soft_reset) { 4048 /* stop the rlc */ 4049 adev->gfx.rlc.funcs->stop(adev); 4050 4051 if (adev->asic_type != CHIP_ARCTURUS) 4052 /* Disable GFX parsing/prefetching */ 4053 gfx_v9_0_cp_gfx_enable(adev, false); 4054 4055 /* Disable MEC parsing/prefetching */ 4056 gfx_v9_0_cp_compute_enable(adev, false); 4057 4058 if (grbm_soft_reset) { 4059 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4060 tmp |= grbm_soft_reset; 4061 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4062 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4063 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4064 4065 udelay(50); 4066 4067 tmp &= ~grbm_soft_reset; 4068 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4069 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4070 } 4071 4072 /* Wait a little for things to settle down */ 4073 udelay(50); 4074 } 4075 return 0; 4076 } 4077 4078 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4079 { 4080 uint64_t clock; 4081 4082 mutex_lock(&adev->gfx.gpu_clock_mutex); 4083 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4084 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4085 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4086 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4087 return clock; 4088 } 4089 4090 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4091 uint32_t vmid, 4092 uint32_t gds_base, uint32_t gds_size, 4093 uint32_t gws_base, uint32_t gws_size, 4094 uint32_t oa_base, uint32_t oa_size) 4095 { 4096 struct amdgpu_device *adev = ring->adev; 4097 4098 /* GDS Base */ 4099 gfx_v9_0_write_data_to_reg(ring, 0, false, 4100 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4101 gds_base); 4102 4103 /* GDS Size */ 4104 gfx_v9_0_write_data_to_reg(ring, 0, false, 4105 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4106 gds_size); 4107 4108 /* GWS */ 4109 gfx_v9_0_write_data_to_reg(ring, 0, false, 4110 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4111 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4112 4113 /* OA */ 4114 gfx_v9_0_write_data_to_reg(ring, 0, false, 4115 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4116 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4117 } 4118 4119 static const u32 vgpr_init_compute_shader[] = 4120 { 4121 0xb07c0000, 0xbe8000ff, 4122 0x000000f8, 0xbf110800, 4123 0x7e000280, 0x7e020280, 4124 0x7e040280, 0x7e060280, 4125 0x7e080280, 0x7e0a0280, 4126 0x7e0c0280, 0x7e0e0280, 4127 0x80808800, 0xbe803200, 4128 0xbf84fff5, 0xbf9c0000, 4129 0xd28c0001, 0x0001007f, 4130 0xd28d0001, 0x0002027e, 4131 0x10020288, 0xb8810904, 4132 0xb7814000, 0xd1196a01, 4133 0x00000301, 0xbe800087, 4134 0xbefc00c1, 0xd89c4000, 4135 0x00020201, 0xd89cc080, 4136 0x00040401, 0x320202ff, 4137 0x00000800, 0x80808100, 4138 0xbf84fff8, 0x7e020280, 4139 0xbf810000, 0x00000000, 4140 }; 4141 4142 static const u32 sgpr_init_compute_shader[] = 4143 { 4144 0xb07c0000, 0xbe8000ff, 4145 0x0000005f, 0xbee50080, 4146 0xbe812c65, 0xbe822c65, 4147 0xbe832c65, 0xbe842c65, 4148 0xbe852c65, 0xb77c0005, 4149 0x80808500, 0xbf84fff8, 4150 0xbe800080, 0xbf810000, 4151 }; 4152 4153 static const struct soc15_reg_entry vgpr_init_regs[] = { 4154 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4155 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4156 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4157 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4158 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 4159 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 4160 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 4161 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4162 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ 4163 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4164 }; 4165 4166 static const struct soc15_reg_entry sgpr_init_regs[] = { 4167 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4168 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4169 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4170 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4171 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 4172 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 4173 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 4174 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4175 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ 4176 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4177 }; 4178 4179 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 4180 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4181 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4182 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4183 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4184 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4185 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4186 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4187 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4188 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4189 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4190 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4191 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4192 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4193 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4194 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4195 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4196 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4197 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4198 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4199 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4200 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4201 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4202 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4203 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4204 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4205 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4206 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4207 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4208 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4209 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4210 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4211 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4212 }; 4213 4214 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4215 { 4216 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4217 int i, r; 4218 4219 r = amdgpu_ring_alloc(ring, 7); 4220 if (r) { 4221 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4222 ring->name, r); 4223 return r; 4224 } 4225 4226 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4227 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4228 4229 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4230 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4231 PACKET3_DMA_DATA_DST_SEL(1) | 4232 PACKET3_DMA_DATA_SRC_SEL(2) | 4233 PACKET3_DMA_DATA_ENGINE(0))); 4234 amdgpu_ring_write(ring, 0); 4235 amdgpu_ring_write(ring, 0); 4236 amdgpu_ring_write(ring, 0); 4237 amdgpu_ring_write(ring, 0); 4238 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4239 adev->gds.gds_size); 4240 4241 amdgpu_ring_commit(ring); 4242 4243 for (i = 0; i < adev->usec_timeout; i++) { 4244 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4245 break; 4246 udelay(1); 4247 } 4248 4249 if (i >= adev->usec_timeout) 4250 r = -ETIMEDOUT; 4251 4252 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4253 4254 return r; 4255 } 4256 4257 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4258 { 4259 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4260 struct amdgpu_ib ib; 4261 struct dma_fence *f = NULL; 4262 int r, i, j, k; 4263 unsigned total_size, vgpr_offset, sgpr_offset; 4264 u64 gpu_addr; 4265 4266 /* only support when RAS is enabled */ 4267 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4268 return 0; 4269 4270 /* bail if the compute ring is not ready */ 4271 if (!ring->sched.ready) 4272 return 0; 4273 4274 total_size = 4275 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 4276 total_size += 4277 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 4278 total_size = ALIGN(total_size, 256); 4279 vgpr_offset = total_size; 4280 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 4281 sgpr_offset = total_size; 4282 total_size += sizeof(sgpr_init_compute_shader); 4283 4284 /* allocate an indirect buffer to put the commands in */ 4285 memset(&ib, 0, sizeof(ib)); 4286 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 4287 if (r) { 4288 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4289 return r; 4290 } 4291 4292 /* load the compute shaders */ 4293 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 4294 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 4295 4296 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4297 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4298 4299 /* init the ib length to 0 */ 4300 ib.length_dw = 0; 4301 4302 /* VGPR */ 4303 /* write the register state for the compute dispatch */ 4304 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 4305 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4306 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 4307 - PACKET3_SET_SH_REG_START; 4308 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 4309 } 4310 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4311 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4312 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4313 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4314 - PACKET3_SET_SH_REG_START; 4315 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4316 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4317 4318 /* write dispatch packet */ 4319 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4320 ib.ptr[ib.length_dw++] = 128; /* x */ 4321 ib.ptr[ib.length_dw++] = 1; /* y */ 4322 ib.ptr[ib.length_dw++] = 1; /* z */ 4323 ib.ptr[ib.length_dw++] = 4324 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4325 4326 /* write CS partial flush packet */ 4327 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4328 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4329 4330 /* SGPR */ 4331 /* write the register state for the compute dispatch */ 4332 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { 4333 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4334 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) 4335 - PACKET3_SET_SH_REG_START; 4336 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; 4337 } 4338 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4339 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4340 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4341 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4342 - PACKET3_SET_SH_REG_START; 4343 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4344 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4345 4346 /* write dispatch packet */ 4347 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4348 ib.ptr[ib.length_dw++] = 128; /* x */ 4349 ib.ptr[ib.length_dw++] = 1; /* y */ 4350 ib.ptr[ib.length_dw++] = 1; /* z */ 4351 ib.ptr[ib.length_dw++] = 4352 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4353 4354 /* write CS partial flush packet */ 4355 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4356 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4357 4358 /* shedule the ib on the ring */ 4359 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4360 if (r) { 4361 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4362 goto fail; 4363 } 4364 4365 /* wait for the GPU to finish processing the IB */ 4366 r = dma_fence_wait(f, false); 4367 if (r) { 4368 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4369 goto fail; 4370 } 4371 4372 /* read back registers to clear the counters */ 4373 mutex_lock(&adev->grbm_idx_mutex); 4374 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 4375 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 4376 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 4377 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 4378 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 4379 } 4380 } 4381 } 4382 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 4383 mutex_unlock(&adev->grbm_idx_mutex); 4384 4385 fail: 4386 amdgpu_ib_free(adev, &ib, NULL); 4387 dma_fence_put(f); 4388 4389 return r; 4390 } 4391 4392 static int gfx_v9_0_early_init(void *handle) 4393 { 4394 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4395 4396 if (adev->asic_type == CHIP_ARCTURUS) 4397 adev->gfx.num_gfx_rings = 0; 4398 else 4399 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4400 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 4401 gfx_v9_0_set_ring_funcs(adev); 4402 gfx_v9_0_set_irq_funcs(adev); 4403 gfx_v9_0_set_gds_init(adev); 4404 gfx_v9_0_set_rlc_funcs(adev); 4405 4406 return 0; 4407 } 4408 4409 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 4410 struct ras_err_data *err_data, 4411 struct amdgpu_iv_entry *entry); 4412 4413 static int gfx_v9_0_ecc_late_init(void *handle) 4414 { 4415 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4416 struct ras_common_if **ras_if = &adev->gfx.ras_if; 4417 struct ras_ih_if ih_info = { 4418 .cb = gfx_v9_0_process_ras_data_cb, 4419 }; 4420 struct ras_fs_if fs_info = { 4421 .sysfs_name = "gfx_err_count", 4422 .debugfs_name = "gfx_err_inject", 4423 }; 4424 struct ras_common_if ras_block = { 4425 .block = AMDGPU_RAS_BLOCK__GFX, 4426 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 4427 .sub_block_index = 0, 4428 .name = "gfx", 4429 }; 4430 int r; 4431 4432 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 4433 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); 4434 return 0; 4435 } 4436 4437 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4438 if (r) 4439 return r; 4440 4441 /* requires IBs so do in late init after IB pool is initialized */ 4442 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4443 if (r) 4444 return r; 4445 4446 /* handle resume path. */ 4447 if (*ras_if) { 4448 /* resend ras TA enable cmd during resume. 4449 * prepare to handle failure. 4450 */ 4451 ih_info.head = **ras_if; 4452 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 4453 if (r) { 4454 if (r == -EAGAIN) { 4455 /* request a gpu reset. will run again. */ 4456 amdgpu_ras_request_reset_on_boot(adev, 4457 AMDGPU_RAS_BLOCK__GFX); 4458 return 0; 4459 } 4460 /* fail to enable ras, cleanup all. */ 4461 goto irq; 4462 } 4463 /* enable successfully. continue. */ 4464 goto resume; 4465 } 4466 4467 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 4468 if (!*ras_if) 4469 return -ENOMEM; 4470 4471 **ras_if = ras_block; 4472 4473 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 4474 if (r) { 4475 if (r == -EAGAIN) { 4476 amdgpu_ras_request_reset_on_boot(adev, 4477 AMDGPU_RAS_BLOCK__GFX); 4478 r = 0; 4479 } 4480 goto feature; 4481 } 4482 4483 ih_info.head = **ras_if; 4484 fs_info.head = **ras_if; 4485 4486 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); 4487 if (r) 4488 goto interrupt; 4489 4490 amdgpu_ras_debugfs_create(adev, &fs_info); 4491 4492 r = amdgpu_ras_sysfs_create(adev, &fs_info); 4493 if (r) 4494 goto sysfs; 4495 resume: 4496 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 4497 if (r) 4498 goto irq; 4499 4500 return 0; 4501 irq: 4502 amdgpu_ras_sysfs_remove(adev, *ras_if); 4503 sysfs: 4504 amdgpu_ras_debugfs_remove(adev, *ras_if); 4505 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 4506 interrupt: 4507 amdgpu_ras_feature_enable(adev, *ras_if, 0); 4508 feature: 4509 kfree(*ras_if); 4510 *ras_if = NULL; 4511 return r; 4512 } 4513 4514 static int gfx_v9_0_late_init(void *handle) 4515 { 4516 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4517 int r; 4518 4519 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4520 if (r) 4521 return r; 4522 4523 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4524 if (r) 4525 return r; 4526 4527 r = gfx_v9_0_ecc_late_init(handle); 4528 if (r) 4529 return r; 4530 4531 return 0; 4532 } 4533 4534 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4535 { 4536 uint32_t rlc_setting; 4537 4538 /* if RLC is not enabled, do nothing */ 4539 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4540 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4541 return false; 4542 4543 return true; 4544 } 4545 4546 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4547 { 4548 uint32_t data; 4549 unsigned i; 4550 4551 data = RLC_SAFE_MODE__CMD_MASK; 4552 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4553 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4554 4555 /* wait for RLC_SAFE_MODE */ 4556 for (i = 0; i < adev->usec_timeout; i++) { 4557 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4558 break; 4559 udelay(1); 4560 } 4561 } 4562 4563 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4564 { 4565 uint32_t data; 4566 4567 data = RLC_SAFE_MODE__CMD_MASK; 4568 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4569 } 4570 4571 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4572 bool enable) 4573 { 4574 amdgpu_gfx_rlc_enter_safe_mode(adev); 4575 4576 if (is_support_sw_smu(adev) && !enable) 4577 smu_set_gfx_cgpg(&adev->smu, enable); 4578 4579 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4580 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4581 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4582 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4583 } else { 4584 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4585 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4586 } 4587 4588 amdgpu_gfx_rlc_exit_safe_mode(adev); 4589 } 4590 4591 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4592 bool enable) 4593 { 4594 /* TODO: double check if we need to perform under safe mode */ 4595 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4596 4597 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4598 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4599 else 4600 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4601 4602 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4603 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4604 else 4605 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4606 4607 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4608 } 4609 4610 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4611 bool enable) 4612 { 4613 uint32_t data, def; 4614 4615 amdgpu_gfx_rlc_enter_safe_mode(adev); 4616 4617 /* It is disabled by HW by default */ 4618 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4619 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4620 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4621 4622 if (adev->asic_type != CHIP_VEGA12) 4623 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4624 4625 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4626 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4627 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4628 4629 /* only for Vega10 & Raven1 */ 4630 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4631 4632 if (def != data) 4633 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4634 4635 /* MGLS is a global flag to control all MGLS in GFX */ 4636 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4637 /* 2 - RLC memory Light sleep */ 4638 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4639 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4640 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4641 if (def != data) 4642 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4643 } 4644 /* 3 - CP memory Light sleep */ 4645 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4646 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4647 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4648 if (def != data) 4649 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4650 } 4651 } 4652 } else { 4653 /* 1 - MGCG_OVERRIDE */ 4654 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4655 4656 if (adev->asic_type != CHIP_VEGA12) 4657 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4658 4659 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4660 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4661 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4662 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4663 4664 if (def != data) 4665 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4666 4667 /* 2 - disable MGLS in RLC */ 4668 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4669 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4670 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4671 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4672 } 4673 4674 /* 3 - disable MGLS in CP */ 4675 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4676 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4677 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4678 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4679 } 4680 } 4681 4682 amdgpu_gfx_rlc_exit_safe_mode(adev); 4683 } 4684 4685 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4686 bool enable) 4687 { 4688 uint32_t data, def; 4689 4690 if (adev->asic_type == CHIP_ARCTURUS) 4691 return; 4692 4693 amdgpu_gfx_rlc_enter_safe_mode(adev); 4694 4695 /* Enable 3D CGCG/CGLS */ 4696 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4697 /* write cmd to clear cgcg/cgls ov */ 4698 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4699 /* unset CGCG override */ 4700 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4701 /* update CGCG and CGLS override bits */ 4702 if (def != data) 4703 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4704 4705 /* enable 3Dcgcg FSM(0x0000363f) */ 4706 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4707 4708 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4709 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4710 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4711 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4712 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4713 if (def != data) 4714 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4715 4716 /* set IDLE_POLL_COUNT(0x00900100) */ 4717 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4718 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4719 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4720 if (def != data) 4721 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4722 } else { 4723 /* Disable CGCG/CGLS */ 4724 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4725 /* disable cgcg, cgls should be disabled */ 4726 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4727 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4728 /* disable cgcg and cgls in FSM */ 4729 if (def != data) 4730 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4731 } 4732 4733 amdgpu_gfx_rlc_exit_safe_mode(adev); 4734 } 4735 4736 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4737 bool enable) 4738 { 4739 uint32_t def, data; 4740 4741 amdgpu_gfx_rlc_enter_safe_mode(adev); 4742 4743 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4744 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4745 /* unset CGCG override */ 4746 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4747 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4748 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4749 else 4750 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4751 /* update CGCG and CGLS override bits */ 4752 if (def != data) 4753 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4754 4755 /* enable cgcg FSM(0x0000363F) */ 4756 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4757 4758 if (adev->asic_type == CHIP_ARCTURUS) 4759 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4760 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4761 else 4762 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4763 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4764 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4765 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4766 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4767 if (def != data) 4768 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4769 4770 /* set IDLE_POLL_COUNT(0x00900100) */ 4771 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4772 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4773 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4774 if (def != data) 4775 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4776 } else { 4777 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4778 /* reset CGCG/CGLS bits */ 4779 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4780 /* disable cgcg and cgls in FSM */ 4781 if (def != data) 4782 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4783 } 4784 4785 amdgpu_gfx_rlc_exit_safe_mode(adev); 4786 } 4787 4788 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4789 bool enable) 4790 { 4791 if (enable) { 4792 /* CGCG/CGLS should be enabled after MGCG/MGLS 4793 * === MGCG + MGLS === 4794 */ 4795 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4796 /* === CGCG /CGLS for GFX 3D Only === */ 4797 gfx_v9_0_update_3d_clock_gating(adev, enable); 4798 /* === CGCG + CGLS === */ 4799 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4800 } else { 4801 /* CGCG/CGLS should be disabled before MGCG/MGLS 4802 * === CGCG + CGLS === 4803 */ 4804 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4805 /* === CGCG /CGLS for GFX 3D Only === */ 4806 gfx_v9_0_update_3d_clock_gating(adev, enable); 4807 /* === MGCG + MGLS === */ 4808 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4809 } 4810 return 0; 4811 } 4812 4813 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4814 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4815 .set_safe_mode = gfx_v9_0_set_safe_mode, 4816 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4817 .init = gfx_v9_0_rlc_init, 4818 .get_csb_size = gfx_v9_0_get_csb_size, 4819 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4820 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4821 .resume = gfx_v9_0_rlc_resume, 4822 .stop = gfx_v9_0_rlc_stop, 4823 .reset = gfx_v9_0_rlc_reset, 4824 .start = gfx_v9_0_rlc_start 4825 }; 4826 4827 static int gfx_v9_0_set_powergating_state(void *handle, 4828 enum amd_powergating_state state) 4829 { 4830 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4831 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4832 4833 switch (adev->asic_type) { 4834 case CHIP_RAVEN: 4835 case CHIP_RENOIR: 4836 if (!enable) { 4837 amdgpu_gfx_off_ctrl(adev, false); 4838 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4839 } 4840 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4841 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4842 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4843 } else { 4844 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4845 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4846 } 4847 4848 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4849 gfx_v9_0_enable_cp_power_gating(adev, true); 4850 else 4851 gfx_v9_0_enable_cp_power_gating(adev, false); 4852 4853 /* update gfx cgpg state */ 4854 if (is_support_sw_smu(adev) && enable) 4855 smu_set_gfx_cgpg(&adev->smu, enable); 4856 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4857 4858 /* update mgcg state */ 4859 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4860 4861 if (enable) 4862 amdgpu_gfx_off_ctrl(adev, true); 4863 break; 4864 case CHIP_VEGA12: 4865 if (!enable) { 4866 amdgpu_gfx_off_ctrl(adev, false); 4867 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4868 } else { 4869 amdgpu_gfx_off_ctrl(adev, true); 4870 } 4871 break; 4872 default: 4873 break; 4874 } 4875 4876 return 0; 4877 } 4878 4879 static int gfx_v9_0_set_clockgating_state(void *handle, 4880 enum amd_clockgating_state state) 4881 { 4882 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4883 4884 if (amdgpu_sriov_vf(adev)) 4885 return 0; 4886 4887 switch (adev->asic_type) { 4888 case CHIP_VEGA10: 4889 case CHIP_VEGA12: 4890 case CHIP_VEGA20: 4891 case CHIP_RAVEN: 4892 case CHIP_ARCTURUS: 4893 case CHIP_RENOIR: 4894 gfx_v9_0_update_gfx_clock_gating(adev, 4895 state == AMD_CG_STATE_GATE ? true : false); 4896 break; 4897 default: 4898 break; 4899 } 4900 return 0; 4901 } 4902 4903 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4904 { 4905 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4906 int data; 4907 4908 if (amdgpu_sriov_vf(adev)) 4909 *flags = 0; 4910 4911 /* AMD_CG_SUPPORT_GFX_MGCG */ 4912 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4913 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4914 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4915 4916 /* AMD_CG_SUPPORT_GFX_CGCG */ 4917 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4918 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4919 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4920 4921 /* AMD_CG_SUPPORT_GFX_CGLS */ 4922 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4923 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4924 4925 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4926 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4927 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4928 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4929 4930 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4931 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4932 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4933 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4934 4935 if (adev->asic_type != CHIP_ARCTURUS) { 4936 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4937 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4938 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4939 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4940 4941 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4942 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4943 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4944 } 4945 } 4946 4947 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4948 { 4949 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4950 } 4951 4952 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4953 { 4954 struct amdgpu_device *adev = ring->adev; 4955 u64 wptr; 4956 4957 /* XXX check if swapping is necessary on BE */ 4958 if (ring->use_doorbell) { 4959 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4960 } else { 4961 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4962 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4963 } 4964 4965 return wptr; 4966 } 4967 4968 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4969 { 4970 struct amdgpu_device *adev = ring->adev; 4971 4972 if (ring->use_doorbell) { 4973 /* XXX check if swapping is necessary on BE */ 4974 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4975 WDOORBELL64(ring->doorbell_index, ring->wptr); 4976 } else { 4977 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4978 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4979 } 4980 } 4981 4982 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4983 { 4984 struct amdgpu_device *adev = ring->adev; 4985 u32 ref_and_mask, reg_mem_engine; 4986 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; 4987 4988 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4989 switch (ring->me) { 4990 case 1: 4991 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4992 break; 4993 case 2: 4994 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4995 break; 4996 default: 4997 return; 4998 } 4999 reg_mem_engine = 0; 5000 } else { 5001 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5002 reg_mem_engine = 1; /* pfp */ 5003 } 5004 5005 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5006 adev->nbio_funcs->get_hdp_flush_req_offset(adev), 5007 adev->nbio_funcs->get_hdp_flush_done_offset(adev), 5008 ref_and_mask, ref_and_mask, 0x20); 5009 } 5010 5011 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5012 struct amdgpu_job *job, 5013 struct amdgpu_ib *ib, 5014 uint32_t flags) 5015 { 5016 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5017 u32 header, control = 0; 5018 5019 if (ib->flags & AMDGPU_IB_FLAG_CE) 5020 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5021 else 5022 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5023 5024 control |= ib->length_dw | (vmid << 24); 5025 5026 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5027 control |= INDIRECT_BUFFER_PRE_ENB(1); 5028 5029 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 5030 gfx_v9_0_ring_emit_de_meta(ring); 5031 } 5032 5033 amdgpu_ring_write(ring, header); 5034 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5035 amdgpu_ring_write(ring, 5036 #ifdef __BIG_ENDIAN 5037 (2 << 0) | 5038 #endif 5039 lower_32_bits(ib->gpu_addr)); 5040 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5041 amdgpu_ring_write(ring, control); 5042 } 5043 5044 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5045 struct amdgpu_job *job, 5046 struct amdgpu_ib *ib, 5047 uint32_t flags) 5048 { 5049 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5050 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5051 5052 /* Currently, there is a high possibility to get wave ID mismatch 5053 * between ME and GDS, leading to a hw deadlock, because ME generates 5054 * different wave IDs than the GDS expects. This situation happens 5055 * randomly when at least 5 compute pipes use GDS ordered append. 5056 * The wave IDs generated by ME are also wrong after suspend/resume. 5057 * Those are probably bugs somewhere else in the kernel driver. 5058 * 5059 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5060 * GDS to 0 for this ring (me/pipe). 5061 */ 5062 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5063 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5064 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5065 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5066 } 5067 5068 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5069 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5070 amdgpu_ring_write(ring, 5071 #ifdef __BIG_ENDIAN 5072 (2 << 0) | 5073 #endif 5074 lower_32_bits(ib->gpu_addr)); 5075 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5076 amdgpu_ring_write(ring, control); 5077 } 5078 5079 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5080 u64 seq, unsigned flags) 5081 { 5082 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5083 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5084 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5085 5086 /* RELEASE_MEM - flush caches, send int */ 5087 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5088 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 5089 EOP_TC_NC_ACTION_EN) : 5090 (EOP_TCL1_ACTION_EN | 5091 EOP_TC_ACTION_EN | 5092 EOP_TC_WB_ACTION_EN | 5093 EOP_TC_MD_ACTION_EN)) | 5094 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5095 EVENT_INDEX(5))); 5096 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5097 5098 /* 5099 * the address should be Qword aligned if 64bit write, Dword 5100 * aligned if only send 32bit data low (discard data high) 5101 */ 5102 if (write64bit) 5103 BUG_ON(addr & 0x7); 5104 else 5105 BUG_ON(addr & 0x3); 5106 amdgpu_ring_write(ring, lower_32_bits(addr)); 5107 amdgpu_ring_write(ring, upper_32_bits(addr)); 5108 amdgpu_ring_write(ring, lower_32_bits(seq)); 5109 amdgpu_ring_write(ring, upper_32_bits(seq)); 5110 amdgpu_ring_write(ring, 0); 5111 } 5112 5113 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5114 { 5115 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5116 uint32_t seq = ring->fence_drv.sync_seq; 5117 uint64_t addr = ring->fence_drv.gpu_addr; 5118 5119 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5120 lower_32_bits(addr), upper_32_bits(addr), 5121 seq, 0xffffffff, 4); 5122 } 5123 5124 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5125 unsigned vmid, uint64_t pd_addr) 5126 { 5127 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5128 5129 /* compute doesn't have PFP */ 5130 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5131 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5132 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5133 amdgpu_ring_write(ring, 0x0); 5134 } 5135 } 5136 5137 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5138 { 5139 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 5140 } 5141 5142 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5143 { 5144 u64 wptr; 5145 5146 /* XXX check if swapping is necessary on BE */ 5147 if (ring->use_doorbell) 5148 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 5149 else 5150 BUG(); 5151 return wptr; 5152 } 5153 5154 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 5155 bool acquire) 5156 { 5157 struct amdgpu_device *adev = ring->adev; 5158 int pipe_num, tmp, reg; 5159 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 5160 5161 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 5162 5163 /* first me only has 2 entries, GFX and HP3D */ 5164 if (ring->me > 0) 5165 pipe_num -= 2; 5166 5167 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 5168 tmp = RREG32(reg); 5169 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 5170 WREG32(reg, tmp); 5171 } 5172 5173 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 5174 struct amdgpu_ring *ring, 5175 bool acquire) 5176 { 5177 int i, pipe; 5178 bool reserve; 5179 struct amdgpu_ring *iring; 5180 5181 mutex_lock(&adev->gfx.pipe_reserve_mutex); 5182 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 5183 if (acquire) 5184 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5185 else 5186 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5187 5188 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 5189 /* Clear all reservations - everyone reacquires all resources */ 5190 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 5191 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 5192 true); 5193 5194 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 5195 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 5196 true); 5197 } else { 5198 /* Lower all pipes without a current reservation */ 5199 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 5200 iring = &adev->gfx.gfx_ring[i]; 5201 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 5202 iring->me, 5203 iring->pipe, 5204 0); 5205 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5206 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 5207 } 5208 5209 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 5210 iring = &adev->gfx.compute_ring[i]; 5211 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 5212 iring->me, 5213 iring->pipe, 5214 0); 5215 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5216 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 5217 } 5218 } 5219 5220 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 5221 } 5222 5223 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 5224 struct amdgpu_ring *ring, 5225 bool acquire) 5226 { 5227 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 5228 uint32_t queue_priority = acquire ? 0xf : 0x0; 5229 5230 mutex_lock(&adev->srbm_mutex); 5231 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5232 5233 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 5234 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 5235 5236 soc15_grbm_select(adev, 0, 0, 0, 0); 5237 mutex_unlock(&adev->srbm_mutex); 5238 } 5239 5240 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 5241 enum drm_sched_priority priority) 5242 { 5243 struct amdgpu_device *adev = ring->adev; 5244 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 5245 5246 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 5247 return; 5248 5249 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 5250 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 5251 } 5252 5253 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5254 { 5255 struct amdgpu_device *adev = ring->adev; 5256 5257 /* XXX check if swapping is necessary on BE */ 5258 if (ring->use_doorbell) { 5259 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5260 WDOORBELL64(ring->doorbell_index, ring->wptr); 5261 } else{ 5262 BUG(); /* only DOORBELL method supported on gfx9 now */ 5263 } 5264 } 5265 5266 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5267 u64 seq, unsigned int flags) 5268 { 5269 struct amdgpu_device *adev = ring->adev; 5270 5271 /* we only allocate 32bit for each seq wb address */ 5272 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5273 5274 /* write fence seq to the "addr" */ 5275 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5276 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5277 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5278 amdgpu_ring_write(ring, lower_32_bits(addr)); 5279 amdgpu_ring_write(ring, upper_32_bits(addr)); 5280 amdgpu_ring_write(ring, lower_32_bits(seq)); 5281 5282 if (flags & AMDGPU_FENCE_FLAG_INT) { 5283 /* set register to trigger INT */ 5284 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5285 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5286 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5287 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5288 amdgpu_ring_write(ring, 0); 5289 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5290 } 5291 } 5292 5293 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5294 { 5295 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5296 amdgpu_ring_write(ring, 0); 5297 } 5298 5299 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5300 { 5301 struct v9_ce_ib_state ce_payload = {0}; 5302 uint64_t csa_addr; 5303 int cnt; 5304 5305 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5306 csa_addr = amdgpu_csa_vaddr(ring->adev); 5307 5308 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5309 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5310 WRITE_DATA_DST_SEL(8) | 5311 WR_CONFIRM) | 5312 WRITE_DATA_CACHE_POLICY(0)); 5313 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5314 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5315 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5316 } 5317 5318 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5319 { 5320 struct v9_de_ib_state de_payload = {0}; 5321 uint64_t csa_addr, gds_addr; 5322 int cnt; 5323 5324 csa_addr = amdgpu_csa_vaddr(ring->adev); 5325 gds_addr = csa_addr + 4096; 5326 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5327 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5328 5329 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5330 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5331 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5332 WRITE_DATA_DST_SEL(8) | 5333 WR_CONFIRM) | 5334 WRITE_DATA_CACHE_POLICY(0)); 5335 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5336 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5337 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5338 } 5339 5340 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 5341 { 5342 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5343 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 5344 } 5345 5346 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5347 { 5348 uint32_t dw2 = 0; 5349 5350 if (amdgpu_sriov_vf(ring->adev)) 5351 gfx_v9_0_ring_emit_ce_meta(ring); 5352 5353 gfx_v9_0_ring_emit_tmz(ring, true); 5354 5355 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5356 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5357 /* set load_global_config & load_global_uconfig */ 5358 dw2 |= 0x8001; 5359 /* set load_cs_sh_regs */ 5360 dw2 |= 0x01000000; 5361 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5362 dw2 |= 0x10002; 5363 5364 /* set load_ce_ram if preamble presented */ 5365 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5366 dw2 |= 0x10000000; 5367 } else { 5368 /* still load_ce_ram if this is the first time preamble presented 5369 * although there is no context switch happens. 5370 */ 5371 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5372 dw2 |= 0x10000000; 5373 } 5374 5375 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5376 amdgpu_ring_write(ring, dw2); 5377 amdgpu_ring_write(ring, 0); 5378 } 5379 5380 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5381 { 5382 unsigned ret; 5383 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5384 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5385 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5386 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5387 ret = ring->wptr & ring->buf_mask; 5388 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5389 return ret; 5390 } 5391 5392 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5393 { 5394 unsigned cur; 5395 BUG_ON(offset > ring->buf_mask); 5396 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5397 5398 cur = (ring->wptr & ring->buf_mask) - 1; 5399 if (likely(cur > offset)) 5400 ring->ring[offset] = cur - offset; 5401 else 5402 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5403 } 5404 5405 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 5406 { 5407 struct amdgpu_device *adev = ring->adev; 5408 5409 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5410 amdgpu_ring_write(ring, 0 | /* src: register*/ 5411 (5 << 8) | /* dst: memory */ 5412 (1 << 20)); /* write confirm */ 5413 amdgpu_ring_write(ring, reg); 5414 amdgpu_ring_write(ring, 0); 5415 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5416 adev->virt.reg_val_offs * 4)); 5417 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5418 adev->virt.reg_val_offs * 4)); 5419 } 5420 5421 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5422 uint32_t val) 5423 { 5424 uint32_t cmd = 0; 5425 5426 switch (ring->funcs->type) { 5427 case AMDGPU_RING_TYPE_GFX: 5428 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5429 break; 5430 case AMDGPU_RING_TYPE_KIQ: 5431 cmd = (1 << 16); /* no inc addr */ 5432 break; 5433 default: 5434 cmd = WR_CONFIRM; 5435 break; 5436 } 5437 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5438 amdgpu_ring_write(ring, cmd); 5439 amdgpu_ring_write(ring, reg); 5440 amdgpu_ring_write(ring, 0); 5441 amdgpu_ring_write(ring, val); 5442 } 5443 5444 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5445 uint32_t val, uint32_t mask) 5446 { 5447 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5448 } 5449 5450 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5451 uint32_t reg0, uint32_t reg1, 5452 uint32_t ref, uint32_t mask) 5453 { 5454 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5455 struct amdgpu_device *adev = ring->adev; 5456 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5457 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5458 5459 if (fw_version_ok) 5460 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5461 ref, mask, 0x20); 5462 else 5463 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5464 ref, mask); 5465 } 5466 5467 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5468 { 5469 struct amdgpu_device *adev = ring->adev; 5470 uint32_t value = 0; 5471 5472 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5473 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5474 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5475 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5476 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5477 } 5478 5479 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5480 enum amdgpu_interrupt_state state) 5481 { 5482 switch (state) { 5483 case AMDGPU_IRQ_STATE_DISABLE: 5484 case AMDGPU_IRQ_STATE_ENABLE: 5485 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5486 TIME_STAMP_INT_ENABLE, 5487 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5488 break; 5489 default: 5490 break; 5491 } 5492 } 5493 5494 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5495 int me, int pipe, 5496 enum amdgpu_interrupt_state state) 5497 { 5498 u32 mec_int_cntl, mec_int_cntl_reg; 5499 5500 /* 5501 * amdgpu controls only the first MEC. That's why this function only 5502 * handles the setting of interrupts for this specific MEC. All other 5503 * pipes' interrupts are set by amdkfd. 5504 */ 5505 5506 if (me == 1) { 5507 switch (pipe) { 5508 case 0: 5509 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5510 break; 5511 case 1: 5512 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5513 break; 5514 case 2: 5515 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5516 break; 5517 case 3: 5518 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5519 break; 5520 default: 5521 DRM_DEBUG("invalid pipe %d\n", pipe); 5522 return; 5523 } 5524 } else { 5525 DRM_DEBUG("invalid me %d\n", me); 5526 return; 5527 } 5528 5529 switch (state) { 5530 case AMDGPU_IRQ_STATE_DISABLE: 5531 mec_int_cntl = RREG32(mec_int_cntl_reg); 5532 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5533 TIME_STAMP_INT_ENABLE, 0); 5534 WREG32(mec_int_cntl_reg, mec_int_cntl); 5535 break; 5536 case AMDGPU_IRQ_STATE_ENABLE: 5537 mec_int_cntl = RREG32(mec_int_cntl_reg); 5538 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5539 TIME_STAMP_INT_ENABLE, 1); 5540 WREG32(mec_int_cntl_reg, mec_int_cntl); 5541 break; 5542 default: 5543 break; 5544 } 5545 } 5546 5547 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5548 struct amdgpu_irq_src *source, 5549 unsigned type, 5550 enum amdgpu_interrupt_state state) 5551 { 5552 switch (state) { 5553 case AMDGPU_IRQ_STATE_DISABLE: 5554 case AMDGPU_IRQ_STATE_ENABLE: 5555 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5556 PRIV_REG_INT_ENABLE, 5557 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5558 break; 5559 default: 5560 break; 5561 } 5562 5563 return 0; 5564 } 5565 5566 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5567 struct amdgpu_irq_src *source, 5568 unsigned type, 5569 enum amdgpu_interrupt_state state) 5570 { 5571 switch (state) { 5572 case AMDGPU_IRQ_STATE_DISABLE: 5573 case AMDGPU_IRQ_STATE_ENABLE: 5574 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5575 PRIV_INSTR_INT_ENABLE, 5576 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5577 default: 5578 break; 5579 } 5580 5581 return 0; 5582 } 5583 5584 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5585 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5586 CP_ECC_ERROR_INT_ENABLE, 1) 5587 5588 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5589 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5590 CP_ECC_ERROR_INT_ENABLE, 0) 5591 5592 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5593 struct amdgpu_irq_src *source, 5594 unsigned type, 5595 enum amdgpu_interrupt_state state) 5596 { 5597 switch (state) { 5598 case AMDGPU_IRQ_STATE_DISABLE: 5599 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5600 CP_ECC_ERROR_INT_ENABLE, 0); 5601 DISABLE_ECC_ON_ME_PIPE(1, 0); 5602 DISABLE_ECC_ON_ME_PIPE(1, 1); 5603 DISABLE_ECC_ON_ME_PIPE(1, 2); 5604 DISABLE_ECC_ON_ME_PIPE(1, 3); 5605 break; 5606 5607 case AMDGPU_IRQ_STATE_ENABLE: 5608 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5609 CP_ECC_ERROR_INT_ENABLE, 1); 5610 ENABLE_ECC_ON_ME_PIPE(1, 0); 5611 ENABLE_ECC_ON_ME_PIPE(1, 1); 5612 ENABLE_ECC_ON_ME_PIPE(1, 2); 5613 ENABLE_ECC_ON_ME_PIPE(1, 3); 5614 break; 5615 default: 5616 break; 5617 } 5618 5619 return 0; 5620 } 5621 5622 5623 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5624 struct amdgpu_irq_src *src, 5625 unsigned type, 5626 enum amdgpu_interrupt_state state) 5627 { 5628 switch (type) { 5629 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5630 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5631 break; 5632 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5633 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5634 break; 5635 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5636 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5637 break; 5638 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5639 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5640 break; 5641 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5642 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5643 break; 5644 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5645 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5646 break; 5647 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5648 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5649 break; 5650 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5651 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5652 break; 5653 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5654 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5655 break; 5656 default: 5657 break; 5658 } 5659 return 0; 5660 } 5661 5662 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5663 struct amdgpu_irq_src *source, 5664 struct amdgpu_iv_entry *entry) 5665 { 5666 int i; 5667 u8 me_id, pipe_id, queue_id; 5668 struct amdgpu_ring *ring; 5669 5670 DRM_DEBUG("IH: CP EOP\n"); 5671 me_id = (entry->ring_id & 0x0c) >> 2; 5672 pipe_id = (entry->ring_id & 0x03) >> 0; 5673 queue_id = (entry->ring_id & 0x70) >> 4; 5674 5675 switch (me_id) { 5676 case 0: 5677 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5678 break; 5679 case 1: 5680 case 2: 5681 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5682 ring = &adev->gfx.compute_ring[i]; 5683 /* Per-queue interrupt is supported for MEC starting from VI. 5684 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5685 */ 5686 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5687 amdgpu_fence_process(ring); 5688 } 5689 break; 5690 } 5691 return 0; 5692 } 5693 5694 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5695 struct amdgpu_iv_entry *entry) 5696 { 5697 u8 me_id, pipe_id, queue_id; 5698 struct amdgpu_ring *ring; 5699 int i; 5700 5701 me_id = (entry->ring_id & 0x0c) >> 2; 5702 pipe_id = (entry->ring_id & 0x03) >> 0; 5703 queue_id = (entry->ring_id & 0x70) >> 4; 5704 5705 switch (me_id) { 5706 case 0: 5707 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5708 break; 5709 case 1: 5710 case 2: 5711 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5712 ring = &adev->gfx.compute_ring[i]; 5713 if (ring->me == me_id && ring->pipe == pipe_id && 5714 ring->queue == queue_id) 5715 drm_sched_fault(&ring->sched); 5716 } 5717 break; 5718 } 5719 } 5720 5721 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5722 struct amdgpu_irq_src *source, 5723 struct amdgpu_iv_entry *entry) 5724 { 5725 DRM_ERROR("Illegal register access in command stream\n"); 5726 gfx_v9_0_fault(adev, entry); 5727 return 0; 5728 } 5729 5730 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5731 struct amdgpu_irq_src *source, 5732 struct amdgpu_iv_entry *entry) 5733 { 5734 DRM_ERROR("Illegal instruction in command stream\n"); 5735 gfx_v9_0_fault(adev, entry); 5736 return 0; 5737 } 5738 5739 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 5740 struct ras_err_data *err_data, 5741 struct amdgpu_iv_entry *entry) 5742 { 5743 /* TODO ue will trigger an interrupt. */ 5744 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 5745 if (adev->gfx.funcs->query_ras_error_count) 5746 adev->gfx.funcs->query_ras_error_count(adev, err_data); 5747 amdgpu_ras_reset_gpu(adev, 0); 5748 return AMDGPU_RAS_SUCCESS; 5749 } 5750 5751 static const struct { 5752 const char *name; 5753 uint32_t ip; 5754 uint32_t inst; 5755 uint32_t seg; 5756 uint32_t reg_offset; 5757 uint32_t per_se_instance; 5758 int32_t num_instance; 5759 uint32_t sec_count_mask; 5760 uint32_t ded_count_mask; 5761 } gfx_ras_edc_regs[] = { 5762 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 5763 REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 5764 REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) }, 5765 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 5766 REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT), 5767 REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) }, 5768 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 5769 REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 }, 5770 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 5771 REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 }, 5772 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 5773 REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT), 5774 REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) }, 5775 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 5776 REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 }, 5777 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 5778 REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 5779 REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) }, 5780 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 5781 REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT), 5782 REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) }, 5783 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 5784 REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 }, 5785 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 5786 REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 }, 5787 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 5788 REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 }, 5789 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 5790 REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC), 5791 REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) }, 5792 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 5793 REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 }, 5794 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5795 0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 5796 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) }, 5797 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 5798 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 5799 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 5800 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) }, 5801 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 5802 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 5803 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 }, 5804 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 5805 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5806 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 5807 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) }, 5808 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 5809 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5810 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 5811 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) }, 5812 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 5813 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5814 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 5815 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) }, 5816 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 5817 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5818 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 5819 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) }, 5820 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1, 5821 REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 }, 5822 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5823 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 5824 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) }, 5825 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5826 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 }, 5827 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5828 REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 }, 5829 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5830 REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 }, 5831 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5832 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 }, 5833 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, 5834 REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 }, 5835 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, 5836 REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 }, 5837 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5838 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 5839 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) }, 5840 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5841 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 5842 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) }, 5843 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5844 REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 5845 REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) }, 5846 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5847 REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 5848 REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) }, 5849 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5850 REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 5851 REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) }, 5852 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5853 REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 }, 5854 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5855 REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 }, 5856 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5857 REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 }, 5858 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5859 REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 }, 5860 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5861 REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 }, 5862 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5863 REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 }, 5864 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, 5865 REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 }, 5866 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, 5867 REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 }, 5868 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 5869 16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 }, 5870 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5871 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 5872 0 }, 5873 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 5874 16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 }, 5875 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5876 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 5877 0 }, 5878 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 5879 16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 }, 5880 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72, 5881 REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 }, 5882 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5883 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 5884 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) }, 5885 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5886 REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 5887 REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) }, 5888 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5889 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 }, 5890 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5891 REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 }, 5892 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5893 REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 }, 5894 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5895 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 5896 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) }, 5897 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5898 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 5899 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) }, 5900 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, 5901 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 5902 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) }, 5903 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, 5904 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 5905 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) }, 5906 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, 5907 REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 }, 5908 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5909 REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT), 5910 REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) }, 5911 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5912 REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT), 5913 REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) }, 5914 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5915 REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT), 5916 REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) }, 5917 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5918 REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT), 5919 REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) }, 5920 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5921 REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT), 5922 REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) }, 5923 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5924 REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT), 5925 REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) }, 5926 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5927 REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT), 5928 REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) }, 5929 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5930 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 5931 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) }, 5932 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, 5933 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 5934 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) }, 5935 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5936 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 5937 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) }, 5938 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, 5939 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 5940 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) }, 5941 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5942 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 5943 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) }, 5944 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, 5945 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 5946 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) }, 5947 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5948 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 5949 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) }, 5950 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5951 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 5952 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) }, 5953 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5954 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 5955 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) }, 5956 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5957 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 5958 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) }, 5959 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", 5960 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, 5961 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 5962 0 }, 5963 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5964 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 }, 5965 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5966 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 }, 5967 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5968 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 }, 5969 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", 5970 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, 5971 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 }, 5972 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, 5973 REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 5974 REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) }, 5975 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5976 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 5977 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) }, 5978 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5979 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 5980 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) }, 5981 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5982 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 5983 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) }, 5984 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5985 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 5986 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) }, 5987 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", 5988 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, 5989 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 5990 0 }, 5991 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5992 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 }, 5993 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5994 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 }, 5995 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5996 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 }, 5997 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", 5998 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, 5999 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 }, 6000 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6001 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6002 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) }, 6003 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6004 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6005 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) }, 6006 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6007 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6008 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) }, 6009 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6010 REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6011 REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) }, 6012 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6013 REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6014 REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) }, 6015 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6016 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 }, 6017 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6018 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 }, 6019 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6020 REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 }, 6021 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6022 REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 }, 6023 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 6024 REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 }, 6025 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6026 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6027 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) }, 6028 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6029 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6030 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) }, 6031 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6032 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6033 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) }, 6034 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6035 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 }, 6036 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6037 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 }, 6038 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6039 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 }, 6040 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6041 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 }, 6042 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6043 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 }, 6044 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 6045 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 }, 6046 }; 6047 6048 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6049 void *inject_if) 6050 { 6051 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6052 int ret; 6053 struct ta_ras_trigger_error_input block_info = { 0 }; 6054 6055 if (adev->asic_type != CHIP_VEGA20) 6056 return -EINVAL; 6057 6058 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6059 return -EINVAL; 6060 6061 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6062 return -EPERM; 6063 6064 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6065 info->head.type)) { 6066 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6067 ras_gfx_subblocks[info->head.sub_block_index].name, 6068 info->head.type); 6069 return -EPERM; 6070 } 6071 6072 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6073 info->head.type)) { 6074 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6075 ras_gfx_subblocks[info->head.sub_block_index].name, 6076 info->head.type); 6077 return -EPERM; 6078 } 6079 6080 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6081 block_info.sub_block_index = 6082 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6083 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6084 block_info.address = info->address; 6085 block_info.value = info->value; 6086 6087 mutex_lock(&adev->grbm_idx_mutex); 6088 ret = psp_ras_trigger_error(&adev->psp, &block_info); 6089 mutex_unlock(&adev->grbm_idx_mutex); 6090 6091 return ret; 6092 } 6093 6094 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6095 void *ras_error_status) 6096 { 6097 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6098 uint32_t sec_count, ded_count; 6099 uint32_t i; 6100 uint32_t reg_value; 6101 uint32_t se_id, instance_id; 6102 6103 if (adev->asic_type != CHIP_VEGA20) 6104 return -EINVAL; 6105 6106 err_data->ue_count = 0; 6107 err_data->ce_count = 0; 6108 6109 mutex_lock(&adev->grbm_idx_mutex); 6110 for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { 6111 for (instance_id = 0; instance_id < 256; instance_id++) { 6112 for (i = 0; 6113 i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]); 6114 i++) { 6115 if (se_id != 0 && 6116 !gfx_ras_edc_regs[i].per_se_instance) 6117 continue; 6118 if (instance_id >= gfx_ras_edc_regs[i].num_instance) 6119 continue; 6120 6121 gfx_v9_0_select_se_sh(adev, se_id, 0, 6122 instance_id); 6123 6124 reg_value = RREG32( 6125 adev->reg_offset[gfx_ras_edc_regs[i].ip] 6126 [gfx_ras_edc_regs[i].inst] 6127 [gfx_ras_edc_regs[i].seg] + 6128 gfx_ras_edc_regs[i].reg_offset); 6129 sec_count = reg_value & 6130 gfx_ras_edc_regs[i].sec_count_mask; 6131 ded_count = reg_value & 6132 gfx_ras_edc_regs[i].ded_count_mask; 6133 if (sec_count) { 6134 DRM_INFO( 6135 "Instance[%d][%d]: SubBlock %s, SEC %d\n", 6136 se_id, instance_id, 6137 gfx_ras_edc_regs[i].name, 6138 sec_count); 6139 err_data->ce_count++; 6140 } 6141 6142 if (ded_count) { 6143 DRM_INFO( 6144 "Instance[%d][%d]: SubBlock %s, DED %d\n", 6145 se_id, instance_id, 6146 gfx_ras_edc_regs[i].name, 6147 ded_count); 6148 err_data->ue_count++; 6149 } 6150 } 6151 } 6152 } 6153 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6154 mutex_unlock(&adev->grbm_idx_mutex); 6155 6156 return 0; 6157 } 6158 6159 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, 6160 struct amdgpu_irq_src *source, 6161 struct amdgpu_iv_entry *entry) 6162 { 6163 struct ras_common_if *ras_if = adev->gfx.ras_if; 6164 struct ras_dispatch_if ih_data = { 6165 .entry = entry, 6166 }; 6167 6168 if (!ras_if) 6169 return 0; 6170 6171 ih_data.head = *ras_if; 6172 6173 DRM_ERROR("CP ECC ERROR IRQ\n"); 6174 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 6175 return 0; 6176 } 6177 6178 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6179 .name = "gfx_v9_0", 6180 .early_init = gfx_v9_0_early_init, 6181 .late_init = gfx_v9_0_late_init, 6182 .sw_init = gfx_v9_0_sw_init, 6183 .sw_fini = gfx_v9_0_sw_fini, 6184 .hw_init = gfx_v9_0_hw_init, 6185 .hw_fini = gfx_v9_0_hw_fini, 6186 .suspend = gfx_v9_0_suspend, 6187 .resume = gfx_v9_0_resume, 6188 .is_idle = gfx_v9_0_is_idle, 6189 .wait_for_idle = gfx_v9_0_wait_for_idle, 6190 .soft_reset = gfx_v9_0_soft_reset, 6191 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6192 .set_powergating_state = gfx_v9_0_set_powergating_state, 6193 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6194 }; 6195 6196 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6197 .type = AMDGPU_RING_TYPE_GFX, 6198 .align_mask = 0xff, 6199 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6200 .support_64bit_ptrs = true, 6201 .vmhub = AMDGPU_GFXHUB_0, 6202 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6203 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6204 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6205 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6206 5 + /* COND_EXEC */ 6207 7 + /* PIPELINE_SYNC */ 6208 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6209 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6210 2 + /* VM_FLUSH */ 6211 8 + /* FENCE for VM_FLUSH */ 6212 20 + /* GDS switch */ 6213 4 + /* double SWITCH_BUFFER, 6214 the first COND_EXEC jump to the place just 6215 prior to this double SWITCH_BUFFER */ 6216 5 + /* COND_EXEC */ 6217 7 + /* HDP_flush */ 6218 4 + /* VGT_flush */ 6219 14 + /* CE_META */ 6220 31 + /* DE_META */ 6221 3 + /* CNTX_CTRL */ 6222 5 + /* HDP_INVL */ 6223 8 + 8 + /* FENCE x2 */ 6224 2, /* SWITCH_BUFFER */ 6225 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6226 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6227 .emit_fence = gfx_v9_0_ring_emit_fence, 6228 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6229 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6230 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6231 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6232 .test_ring = gfx_v9_0_ring_test_ring, 6233 .test_ib = gfx_v9_0_ring_test_ib, 6234 .insert_nop = amdgpu_ring_insert_nop, 6235 .pad_ib = amdgpu_ring_generic_pad_ib, 6236 .emit_switch_buffer = gfx_v9_ring_emit_sb, 6237 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6238 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6239 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6240 .emit_tmz = gfx_v9_0_ring_emit_tmz, 6241 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6242 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6243 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6244 .soft_recovery = gfx_v9_0_ring_soft_recovery, 6245 }; 6246 6247 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6248 .type = AMDGPU_RING_TYPE_COMPUTE, 6249 .align_mask = 0xff, 6250 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6251 .support_64bit_ptrs = true, 6252 .vmhub = AMDGPU_GFXHUB_0, 6253 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6254 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6255 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6256 .emit_frame_size = 6257 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6258 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6259 5 + /* hdp invalidate */ 6260 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6261 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6262 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6263 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6264 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6265 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6266 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6267 .emit_fence = gfx_v9_0_ring_emit_fence, 6268 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6269 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6270 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6271 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6272 .test_ring = gfx_v9_0_ring_test_ring, 6273 .test_ib = gfx_v9_0_ring_test_ib, 6274 .insert_nop = amdgpu_ring_insert_nop, 6275 .pad_ib = amdgpu_ring_generic_pad_ib, 6276 .set_priority = gfx_v9_0_ring_set_priority_compute, 6277 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6278 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6279 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6280 }; 6281 6282 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 6283 .type = AMDGPU_RING_TYPE_KIQ, 6284 .align_mask = 0xff, 6285 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6286 .support_64bit_ptrs = true, 6287 .vmhub = AMDGPU_GFXHUB_0, 6288 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6289 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6290 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6291 .emit_frame_size = 6292 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6293 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6294 5 + /* hdp invalidate */ 6295 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6296 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6297 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6298 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6299 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6300 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6301 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 6302 .test_ring = gfx_v9_0_ring_test_ring, 6303 .insert_nop = amdgpu_ring_insert_nop, 6304 .pad_ib = amdgpu_ring_generic_pad_ib, 6305 .emit_rreg = gfx_v9_0_ring_emit_rreg, 6306 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6307 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6308 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6309 }; 6310 6311 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 6312 { 6313 int i; 6314 6315 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 6316 6317 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6318 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6319 6320 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6321 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 6322 } 6323 6324 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 6325 .set = gfx_v9_0_set_eop_interrupt_state, 6326 .process = gfx_v9_0_eop_irq, 6327 }; 6328 6329 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 6330 .set = gfx_v9_0_set_priv_reg_fault_state, 6331 .process = gfx_v9_0_priv_reg_irq, 6332 }; 6333 6334 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 6335 .set = gfx_v9_0_set_priv_inst_fault_state, 6336 .process = gfx_v9_0_priv_inst_irq, 6337 }; 6338 6339 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 6340 .set = gfx_v9_0_set_cp_ecc_error_state, 6341 .process = gfx_v9_0_cp_ecc_error_irq, 6342 }; 6343 6344 6345 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 6346 { 6347 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6348 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 6349 6350 adev->gfx.priv_reg_irq.num_types = 1; 6351 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 6352 6353 adev->gfx.priv_inst_irq.num_types = 1; 6354 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 6355 6356 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 6357 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 6358 } 6359 6360 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 6361 { 6362 switch (adev->asic_type) { 6363 case CHIP_VEGA10: 6364 case CHIP_VEGA12: 6365 case CHIP_VEGA20: 6366 case CHIP_RAVEN: 6367 case CHIP_ARCTURUS: 6368 case CHIP_RENOIR: 6369 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 6370 break; 6371 default: 6372 break; 6373 } 6374 } 6375 6376 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 6377 { 6378 /* init asci gds info */ 6379 switch (adev->asic_type) { 6380 case CHIP_VEGA10: 6381 case CHIP_VEGA12: 6382 case CHIP_VEGA20: 6383 adev->gds.gds_size = 0x10000; 6384 break; 6385 case CHIP_RAVEN: 6386 case CHIP_ARCTURUS: 6387 adev->gds.gds_size = 0x1000; 6388 break; 6389 default: 6390 adev->gds.gds_size = 0x10000; 6391 break; 6392 } 6393 6394 switch (adev->asic_type) { 6395 case CHIP_VEGA10: 6396 case CHIP_VEGA20: 6397 adev->gds.gds_compute_max_wave_id = 0x7ff; 6398 break; 6399 case CHIP_VEGA12: 6400 adev->gds.gds_compute_max_wave_id = 0x27f; 6401 break; 6402 case CHIP_RAVEN: 6403 if (adev->rev_id >= 0x8) 6404 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 6405 else 6406 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 6407 break; 6408 case CHIP_ARCTURUS: 6409 adev->gds.gds_compute_max_wave_id = 0xfff; 6410 break; 6411 default: 6412 /* this really depends on the chip */ 6413 adev->gds.gds_compute_max_wave_id = 0x7ff; 6414 break; 6415 } 6416 6417 adev->gds.gws_size = 64; 6418 adev->gds.oa_size = 16; 6419 } 6420 6421 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6422 u32 bitmap) 6423 { 6424 u32 data; 6425 6426 if (!bitmap) 6427 return; 6428 6429 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6430 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6431 6432 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 6433 } 6434 6435 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6436 { 6437 u32 data, mask; 6438 6439 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 6440 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 6441 6442 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6443 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6444 6445 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 6446 6447 return (~data) & mask; 6448 } 6449 6450 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 6451 struct amdgpu_cu_info *cu_info) 6452 { 6453 int i, j, k, counter, active_cu_number = 0; 6454 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6455 unsigned disable_masks[4 * 4]; 6456 6457 if (!adev || !cu_info) 6458 return -EINVAL; 6459 6460 /* 6461 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 6462 */ 6463 if (adev->gfx.config.max_shader_engines * 6464 adev->gfx.config.max_sh_per_se > 16) 6465 return -EINVAL; 6466 6467 amdgpu_gfx_parse_disable_cu(disable_masks, 6468 adev->gfx.config.max_shader_engines, 6469 adev->gfx.config.max_sh_per_se); 6470 6471 mutex_lock(&adev->grbm_idx_mutex); 6472 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6473 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6474 mask = 1; 6475 ao_bitmap = 0; 6476 counter = 0; 6477 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 6478 gfx_v9_0_set_user_cu_inactive_bitmap( 6479 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 6480 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 6481 6482 /* 6483 * The bitmap(and ao_cu_bitmap) in cu_info structure is 6484 * 4x4 size array, and it's usually suitable for Vega 6485 * ASICs which has 4*2 SE/SH layout. 6486 * But for Arcturus, SE/SH layout is changed to 8*1. 6487 * To mostly reduce the impact, we make it compatible 6488 * with current bitmap array as below: 6489 * SE4,SH0 --> bitmap[0][1] 6490 * SE5,SH0 --> bitmap[1][1] 6491 * SE6,SH0 --> bitmap[2][1] 6492 * SE7,SH0 --> bitmap[3][1] 6493 */ 6494 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 6495 6496 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 6497 if (bitmap & mask) { 6498 if (counter < adev->gfx.config.max_cu_per_sh) 6499 ao_bitmap |= mask; 6500 counter ++; 6501 } 6502 mask <<= 1; 6503 } 6504 active_cu_number += counter; 6505 if (i < 2 && j < 2) 6506 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6507 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 6508 } 6509 } 6510 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6511 mutex_unlock(&adev->grbm_idx_mutex); 6512 6513 cu_info->number = active_cu_number; 6514 cu_info->ao_cu_mask = ao_cu_mask; 6515 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 6516 6517 return 0; 6518 } 6519 6520 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 6521 { 6522 .type = AMD_IP_BLOCK_TYPE_GFX, 6523 .major = 9, 6524 .minor = 0, 6525 .rev = 0, 6526 .funcs = &gfx_v9_0_ip_funcs, 6527 }; 6528