1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 #include "vega10_enum.h" 40 #include "hdp/hdp_4_0_offset.h" 41 42 #include "soc15.h" 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #define GFX9_NUM_GFX_RINGS 1 52 #define GFX9_MEC_HPD_SIZE 4096 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 55 56 #define mmPWR_MISC_CNTL_STATUS 0x0183 57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 62 63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 65 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 69 70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 72 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 76 77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 79 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 83 84 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 86 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 87 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 90 91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 93 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 98 99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 101 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 106 107 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 108 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); 109 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 110 111 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 112 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 113 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 114 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 115 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 116 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 117 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 118 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 119 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 120 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 121 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 122 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 123 124 enum ta_ras_gfx_subblock { 125 /*CPC*/ 126 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 127 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 128 TA_RAS_BLOCK__GFX_CPC_UCODE, 129 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 130 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 131 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 132 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 133 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 134 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 135 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 136 /* CPF*/ 137 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 138 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 139 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 140 TA_RAS_BLOCK__GFX_CPF_TAG, 141 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 142 /* CPG*/ 143 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 144 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 145 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 146 TA_RAS_BLOCK__GFX_CPG_TAG, 147 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 148 /* GDS*/ 149 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 150 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 151 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 152 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 153 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 154 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 155 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 156 /* SPI*/ 157 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 158 /* SQ*/ 159 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 160 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 161 TA_RAS_BLOCK__GFX_SQ_LDS_D, 162 TA_RAS_BLOCK__GFX_SQ_LDS_I, 163 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 164 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 165 /* SQC (3 ranges)*/ 166 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 167 /* SQC range 0*/ 168 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 169 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 170 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 171 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 172 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 173 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 174 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 175 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 176 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 177 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 178 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 179 /* SQC range 1*/ 180 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 181 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 182 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 183 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 184 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 185 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 186 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 187 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 188 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 189 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 190 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 191 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 192 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 193 /* SQC range 2*/ 194 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 195 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 196 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 197 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 198 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 199 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 200 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 201 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 202 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 203 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 204 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 205 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 206 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 207 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 208 /* TA*/ 209 TA_RAS_BLOCK__GFX_TA_INDEX_START, 210 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 211 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 212 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 213 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 214 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 215 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 216 /* TCA*/ 217 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 218 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 219 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 220 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 221 /* TCC (5 sub-ranges)*/ 222 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 223 /* TCC range 0*/ 224 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 225 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 226 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 227 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 228 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 229 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 230 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 231 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 232 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 233 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 234 /* TCC range 1*/ 235 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 236 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 237 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 238 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 239 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 240 /* TCC range 2*/ 241 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 242 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 243 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 244 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 245 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 246 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 247 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 248 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 249 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 250 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 251 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 252 /* TCC range 3*/ 253 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 254 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 255 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 256 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 257 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 258 /* TCC range 4*/ 259 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 260 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 261 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 262 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 263 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 264 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 265 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 266 /* TCI*/ 267 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 268 /* TCP*/ 269 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 270 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 271 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 272 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 273 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 274 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 275 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 276 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 277 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 278 /* TD*/ 279 TA_RAS_BLOCK__GFX_TD_INDEX_START, 280 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 281 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 282 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 283 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 284 /* EA (3 sub-ranges)*/ 285 TA_RAS_BLOCK__GFX_EA_INDEX_START, 286 /* EA range 0*/ 287 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 288 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 289 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 290 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 291 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 292 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 293 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 294 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 295 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 296 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 297 /* EA range 1*/ 298 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 299 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 300 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 301 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 302 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 303 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 304 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 305 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 306 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 307 /* EA range 2*/ 308 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 309 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 310 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 311 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 312 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 313 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 314 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 315 /* UTC VM L2 bank*/ 316 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 317 /* UTC VM walker*/ 318 TA_RAS_BLOCK__UTC_VML2_WALKER, 319 /* UTC ATC L2 2MB cache*/ 320 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 321 /* UTC ATC L2 4KB cache*/ 322 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 323 TA_RAS_BLOCK__GFX_MAX 324 }; 325 326 struct ras_gfx_subblock { 327 unsigned char *name; 328 int ta_subblock; 329 int hw_supported_error_type; 330 int sw_supported_error_type; 331 }; 332 333 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 334 [AMDGPU_RAS_BLOCK__##subblock] = { \ 335 #subblock, \ 336 TA_RAS_BLOCK__##subblock, \ 337 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 338 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 339 } 340 341 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 342 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 343 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 344 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 345 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 346 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 347 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 348 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 349 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 350 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 351 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 352 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 353 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 354 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 355 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 356 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 357 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 358 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 359 0), 360 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 361 0), 362 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 363 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 364 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 365 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 366 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 367 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 368 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 369 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 370 0, 0), 371 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 372 0), 373 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 374 0, 0), 375 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 376 0), 377 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 378 0, 0), 379 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 380 0), 381 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 382 1), 383 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 384 0, 0, 0), 385 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 386 0), 387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 388 0), 389 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 390 0), 391 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 392 0), 393 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 394 0), 395 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 396 0, 0), 397 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 398 0), 399 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 400 0), 401 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 402 0, 0, 0), 403 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 404 0), 405 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 406 0), 407 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 408 0), 409 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 410 0), 411 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 412 0), 413 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 414 0, 0), 415 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 416 0), 417 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 418 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 419 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 420 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 421 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 422 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 423 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 424 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 425 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 426 1), 427 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 428 1), 429 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 430 1), 431 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 432 0), 433 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 434 0), 435 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 436 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 437 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 438 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 439 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 440 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 441 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 442 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 444 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 445 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 447 0), 448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 449 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 450 0), 451 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 452 0, 0), 453 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 454 0), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 456 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 457 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 458 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 459 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 460 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 461 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 462 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 463 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 464 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 465 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 466 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 467 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 469 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 471 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 472 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 473 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 474 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 476 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 477 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 478 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 479 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 481 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 483 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 485 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 486 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 488 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 489 }; 490 491 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 492 { 493 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 494 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 495 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 496 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 497 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 498 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 499 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 500 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 501 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 502 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 503 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 504 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 505 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 506 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 507 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 508 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 509 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 510 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 511 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 513 }; 514 515 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 516 { 517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 532 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 533 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 534 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 535 }; 536 537 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 538 { 539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 550 }; 551 552 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 553 { 554 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 555 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 556 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 557 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 562 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 569 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 570 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 571 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 572 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 578 }; 579 580 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 581 { 582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 589 }; 590 591 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 592 { 593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 597 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 598 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 599 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 600 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 602 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 608 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 609 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 610 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 611 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 612 }; 613 614 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 615 { 616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 619 }; 620 621 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 622 { 623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 631 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 632 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 633 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 634 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 639 }; 640 641 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 642 { 643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 647 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 648 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 649 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 656 }; 657 658 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 659 { 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 668 }; 669 670 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 671 { 672 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 673 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 674 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 675 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 676 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 677 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 678 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 679 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 680 }; 681 682 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 683 { 684 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 685 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 686 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 687 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 688 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 689 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 690 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 691 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 692 }; 693 694 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 695 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 696 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 697 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 698 699 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 700 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 701 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 702 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 703 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 704 struct amdgpu_cu_info *cu_info); 705 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 706 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 707 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 708 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 709 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 710 void *ras_error_status); 711 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 712 void *inject_if); 713 714 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 715 { 716 switch (adev->asic_type) { 717 case CHIP_VEGA10: 718 soc15_program_register_sequence(adev, 719 golden_settings_gc_9_0, 720 ARRAY_SIZE(golden_settings_gc_9_0)); 721 soc15_program_register_sequence(adev, 722 golden_settings_gc_9_0_vg10, 723 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 724 break; 725 case CHIP_VEGA12: 726 soc15_program_register_sequence(adev, 727 golden_settings_gc_9_2_1, 728 ARRAY_SIZE(golden_settings_gc_9_2_1)); 729 soc15_program_register_sequence(adev, 730 golden_settings_gc_9_2_1_vg12, 731 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 732 break; 733 case CHIP_VEGA20: 734 soc15_program_register_sequence(adev, 735 golden_settings_gc_9_0, 736 ARRAY_SIZE(golden_settings_gc_9_0)); 737 soc15_program_register_sequence(adev, 738 golden_settings_gc_9_0_vg20, 739 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 740 break; 741 case CHIP_ARCTURUS: 742 soc15_program_register_sequence(adev, 743 golden_settings_gc_9_4_1_arct, 744 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 745 break; 746 case CHIP_RAVEN: 747 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 748 ARRAY_SIZE(golden_settings_gc_9_1)); 749 if (adev->rev_id >= 8) 750 soc15_program_register_sequence(adev, 751 golden_settings_gc_9_1_rv2, 752 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 753 else 754 soc15_program_register_sequence(adev, 755 golden_settings_gc_9_1_rv1, 756 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 757 break; 758 default: 759 break; 760 } 761 762 if (adev->asic_type != CHIP_ARCTURUS) 763 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 764 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 765 } 766 767 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 768 { 769 adev->gfx.scratch.num_reg = 8; 770 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 771 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 772 } 773 774 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 775 bool wc, uint32_t reg, uint32_t val) 776 { 777 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 778 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 779 WRITE_DATA_DST_SEL(0) | 780 (wc ? WR_CONFIRM : 0)); 781 amdgpu_ring_write(ring, reg); 782 amdgpu_ring_write(ring, 0); 783 amdgpu_ring_write(ring, val); 784 } 785 786 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 787 int mem_space, int opt, uint32_t addr0, 788 uint32_t addr1, uint32_t ref, uint32_t mask, 789 uint32_t inv) 790 { 791 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 792 amdgpu_ring_write(ring, 793 /* memory (1) or register (0) */ 794 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 795 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 796 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 797 WAIT_REG_MEM_ENGINE(eng_sel))); 798 799 if (mem_space) 800 BUG_ON(addr0 & 0x3); /* Dword align */ 801 amdgpu_ring_write(ring, addr0); 802 amdgpu_ring_write(ring, addr1); 803 amdgpu_ring_write(ring, ref); 804 amdgpu_ring_write(ring, mask); 805 amdgpu_ring_write(ring, inv); /* poll interval */ 806 } 807 808 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 809 { 810 struct amdgpu_device *adev = ring->adev; 811 uint32_t scratch; 812 uint32_t tmp = 0; 813 unsigned i; 814 int r; 815 816 r = amdgpu_gfx_scratch_get(adev, &scratch); 817 if (r) 818 return r; 819 820 WREG32(scratch, 0xCAFEDEAD); 821 r = amdgpu_ring_alloc(ring, 3); 822 if (r) 823 goto error_free_scratch; 824 825 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 826 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 827 amdgpu_ring_write(ring, 0xDEADBEEF); 828 amdgpu_ring_commit(ring); 829 830 for (i = 0; i < adev->usec_timeout; i++) { 831 tmp = RREG32(scratch); 832 if (tmp == 0xDEADBEEF) 833 break; 834 udelay(1); 835 } 836 837 if (i >= adev->usec_timeout) 838 r = -ETIMEDOUT; 839 840 error_free_scratch: 841 amdgpu_gfx_scratch_free(adev, scratch); 842 return r; 843 } 844 845 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 846 { 847 struct amdgpu_device *adev = ring->adev; 848 struct amdgpu_ib ib; 849 struct dma_fence *f = NULL; 850 851 unsigned index; 852 uint64_t gpu_addr; 853 uint32_t tmp; 854 long r; 855 856 r = amdgpu_device_wb_get(adev, &index); 857 if (r) 858 return r; 859 860 gpu_addr = adev->wb.gpu_addr + (index * 4); 861 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 862 memset(&ib, 0, sizeof(ib)); 863 r = amdgpu_ib_get(adev, NULL, 16, &ib); 864 if (r) 865 goto err1; 866 867 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 868 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 869 ib.ptr[2] = lower_32_bits(gpu_addr); 870 ib.ptr[3] = upper_32_bits(gpu_addr); 871 ib.ptr[4] = 0xDEADBEEF; 872 ib.length_dw = 5; 873 874 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 875 if (r) 876 goto err2; 877 878 r = dma_fence_wait_timeout(f, false, timeout); 879 if (r == 0) { 880 r = -ETIMEDOUT; 881 goto err2; 882 } else if (r < 0) { 883 goto err2; 884 } 885 886 tmp = adev->wb.wb[index]; 887 if (tmp == 0xDEADBEEF) 888 r = 0; 889 else 890 r = -EINVAL; 891 892 err2: 893 amdgpu_ib_free(adev, &ib, NULL); 894 dma_fence_put(f); 895 err1: 896 amdgpu_device_wb_free(adev, index); 897 return r; 898 } 899 900 901 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 902 { 903 release_firmware(adev->gfx.pfp_fw); 904 adev->gfx.pfp_fw = NULL; 905 release_firmware(adev->gfx.me_fw); 906 adev->gfx.me_fw = NULL; 907 release_firmware(adev->gfx.ce_fw); 908 adev->gfx.ce_fw = NULL; 909 release_firmware(adev->gfx.rlc_fw); 910 adev->gfx.rlc_fw = NULL; 911 release_firmware(adev->gfx.mec_fw); 912 adev->gfx.mec_fw = NULL; 913 release_firmware(adev->gfx.mec2_fw); 914 adev->gfx.mec2_fw = NULL; 915 916 kfree(adev->gfx.rlc.register_list_format); 917 } 918 919 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 920 { 921 const struct rlc_firmware_header_v2_1 *rlc_hdr; 922 923 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 924 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 925 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 926 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 927 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 928 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 929 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 930 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 931 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 932 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 933 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 934 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 935 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 936 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 937 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 938 } 939 940 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 941 { 942 adev->gfx.me_fw_write_wait = false; 943 adev->gfx.mec_fw_write_wait = false; 944 945 switch (adev->asic_type) { 946 case CHIP_VEGA10: 947 if ((adev->gfx.me_fw_version >= 0x0000009c) && 948 (adev->gfx.me_feature_version >= 42) && 949 (adev->gfx.pfp_fw_version >= 0x000000b1) && 950 (adev->gfx.pfp_feature_version >= 42)) 951 adev->gfx.me_fw_write_wait = true; 952 953 if ((adev->gfx.mec_fw_version >= 0x00000193) && 954 (adev->gfx.mec_feature_version >= 42)) 955 adev->gfx.mec_fw_write_wait = true; 956 break; 957 case CHIP_VEGA12: 958 if ((adev->gfx.me_fw_version >= 0x0000009c) && 959 (adev->gfx.me_feature_version >= 44) && 960 (adev->gfx.pfp_fw_version >= 0x000000b2) && 961 (adev->gfx.pfp_feature_version >= 44)) 962 adev->gfx.me_fw_write_wait = true; 963 964 if ((adev->gfx.mec_fw_version >= 0x00000196) && 965 (adev->gfx.mec_feature_version >= 44)) 966 adev->gfx.mec_fw_write_wait = true; 967 break; 968 case CHIP_VEGA20: 969 if ((adev->gfx.me_fw_version >= 0x0000009c) && 970 (adev->gfx.me_feature_version >= 44) && 971 (adev->gfx.pfp_fw_version >= 0x000000b2) && 972 (adev->gfx.pfp_feature_version >= 44)) 973 adev->gfx.me_fw_write_wait = true; 974 975 if ((adev->gfx.mec_fw_version >= 0x00000197) && 976 (adev->gfx.mec_feature_version >= 44)) 977 adev->gfx.mec_fw_write_wait = true; 978 break; 979 case CHIP_RAVEN: 980 if ((adev->gfx.me_fw_version >= 0x0000009c) && 981 (adev->gfx.me_feature_version >= 42) && 982 (adev->gfx.pfp_fw_version >= 0x000000b1) && 983 (adev->gfx.pfp_feature_version >= 42)) 984 adev->gfx.me_fw_write_wait = true; 985 986 if ((adev->gfx.mec_fw_version >= 0x00000192) && 987 (adev->gfx.mec_feature_version >= 42)) 988 adev->gfx.mec_fw_write_wait = true; 989 break; 990 default: 991 break; 992 } 993 } 994 995 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 996 { 997 switch (adev->asic_type) { 998 case CHIP_VEGA10: 999 case CHIP_VEGA12: 1000 case CHIP_VEGA20: 1001 break; 1002 case CHIP_RAVEN: 1003 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) 1004 break; 1005 if ((adev->gfx.rlc_fw_version != 106 && 1006 adev->gfx.rlc_fw_version < 531) || 1007 (adev->gfx.rlc_fw_version == 53815) || 1008 (adev->gfx.rlc_feature_version < 1) || 1009 !adev->gfx.rlc.is_rlc_v2_1) 1010 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1011 break; 1012 default: 1013 break; 1014 } 1015 } 1016 1017 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1018 const char *chip_name) 1019 { 1020 char fw_name[30]; 1021 int err; 1022 struct amdgpu_firmware_info *info = NULL; 1023 const struct common_firmware_header *header = NULL; 1024 const struct gfx_firmware_header_v1_0 *cp_hdr; 1025 1026 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1027 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1028 if (err) 1029 goto out; 1030 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1031 if (err) 1032 goto out; 1033 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1034 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1035 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1036 1037 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1038 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1039 if (err) 1040 goto out; 1041 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1042 if (err) 1043 goto out; 1044 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1045 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1046 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1047 1048 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1049 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1050 if (err) 1051 goto out; 1052 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1053 if (err) 1054 goto out; 1055 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1056 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1057 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1058 1059 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1060 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1061 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1062 info->fw = adev->gfx.pfp_fw; 1063 header = (const struct common_firmware_header *)info->fw->data; 1064 adev->firmware.fw_size += 1065 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1066 1067 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1068 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1069 info->fw = adev->gfx.me_fw; 1070 header = (const struct common_firmware_header *)info->fw->data; 1071 adev->firmware.fw_size += 1072 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1073 1074 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1075 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1076 info->fw = adev->gfx.ce_fw; 1077 header = (const struct common_firmware_header *)info->fw->data; 1078 adev->firmware.fw_size += 1079 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1080 } 1081 1082 out: 1083 if (err) { 1084 dev_err(adev->dev, 1085 "gfx9: Failed to load firmware \"%s\"\n", 1086 fw_name); 1087 release_firmware(adev->gfx.pfp_fw); 1088 adev->gfx.pfp_fw = NULL; 1089 release_firmware(adev->gfx.me_fw); 1090 adev->gfx.me_fw = NULL; 1091 release_firmware(adev->gfx.ce_fw); 1092 adev->gfx.ce_fw = NULL; 1093 } 1094 return err; 1095 } 1096 1097 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1098 const char *chip_name) 1099 { 1100 char fw_name[30]; 1101 int err; 1102 struct amdgpu_firmware_info *info = NULL; 1103 const struct common_firmware_header *header = NULL; 1104 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1105 unsigned int *tmp = NULL; 1106 unsigned int i = 0; 1107 uint16_t version_major; 1108 uint16_t version_minor; 1109 uint32_t smu_version; 1110 1111 /* 1112 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1113 * instead of picasso_rlc.bin. 1114 * Judgment method: 1115 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1116 * or revision >= 0xD8 && revision <= 0xDF 1117 * otherwise is PCO FP5 1118 */ 1119 if (!strcmp(chip_name, "picasso") && 1120 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1121 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1122 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1123 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1124 (smu_version >= 0x41e2b)) 1125 /** 1126 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1127 */ 1128 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1129 else 1130 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1131 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1132 if (err) 1133 goto out; 1134 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1135 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1136 1137 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1138 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1139 if (version_major == 2 && version_minor == 1) 1140 adev->gfx.rlc.is_rlc_v2_1 = true; 1141 1142 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1143 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1144 adev->gfx.rlc.save_and_restore_offset = 1145 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1146 adev->gfx.rlc.clear_state_descriptor_offset = 1147 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1148 adev->gfx.rlc.avail_scratch_ram_locations = 1149 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1150 adev->gfx.rlc.reg_restore_list_size = 1151 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1152 adev->gfx.rlc.reg_list_format_start = 1153 le32_to_cpu(rlc_hdr->reg_list_format_start); 1154 adev->gfx.rlc.reg_list_format_separate_start = 1155 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1156 adev->gfx.rlc.starting_offsets_start = 1157 le32_to_cpu(rlc_hdr->starting_offsets_start); 1158 adev->gfx.rlc.reg_list_format_size_bytes = 1159 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1160 adev->gfx.rlc.reg_list_size_bytes = 1161 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1162 adev->gfx.rlc.register_list_format = 1163 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1164 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1165 if (!adev->gfx.rlc.register_list_format) { 1166 err = -ENOMEM; 1167 goto out; 1168 } 1169 1170 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1171 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1172 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1173 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1174 1175 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1176 1177 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1178 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1179 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1180 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1181 1182 if (adev->gfx.rlc.is_rlc_v2_1) 1183 gfx_v9_0_init_rlc_ext_microcode(adev); 1184 1185 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1186 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1187 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1188 info->fw = adev->gfx.rlc_fw; 1189 header = (const struct common_firmware_header *)info->fw->data; 1190 adev->firmware.fw_size += 1191 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1192 1193 if (adev->gfx.rlc.is_rlc_v2_1 && 1194 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1195 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1196 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1197 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1198 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1199 info->fw = adev->gfx.rlc_fw; 1200 adev->firmware.fw_size += 1201 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1202 1203 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1204 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1205 info->fw = adev->gfx.rlc_fw; 1206 adev->firmware.fw_size += 1207 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1208 1209 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1210 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1211 info->fw = adev->gfx.rlc_fw; 1212 adev->firmware.fw_size += 1213 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1214 } 1215 } 1216 1217 out: 1218 if (err) { 1219 dev_err(adev->dev, 1220 "gfx9: Failed to load firmware \"%s\"\n", 1221 fw_name); 1222 release_firmware(adev->gfx.rlc_fw); 1223 adev->gfx.rlc_fw = NULL; 1224 } 1225 return err; 1226 } 1227 1228 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1229 const char *chip_name) 1230 { 1231 char fw_name[30]; 1232 int err; 1233 struct amdgpu_firmware_info *info = NULL; 1234 const struct common_firmware_header *header = NULL; 1235 const struct gfx_firmware_header_v1_0 *cp_hdr; 1236 1237 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1238 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1239 if (err) 1240 goto out; 1241 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1242 if (err) 1243 goto out; 1244 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1245 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1246 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1247 1248 1249 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1250 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1251 if (!err) { 1252 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1253 if (err) 1254 goto out; 1255 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1256 adev->gfx.mec2_fw->data; 1257 adev->gfx.mec2_fw_version = 1258 le32_to_cpu(cp_hdr->header.ucode_version); 1259 adev->gfx.mec2_feature_version = 1260 le32_to_cpu(cp_hdr->ucode_feature_version); 1261 } else { 1262 err = 0; 1263 adev->gfx.mec2_fw = NULL; 1264 } 1265 1266 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1267 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1268 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1269 info->fw = adev->gfx.mec_fw; 1270 header = (const struct common_firmware_header *)info->fw->data; 1271 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1272 adev->firmware.fw_size += 1273 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1274 1275 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1276 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1277 info->fw = adev->gfx.mec_fw; 1278 adev->firmware.fw_size += 1279 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1280 1281 if (adev->gfx.mec2_fw) { 1282 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1283 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1284 info->fw = adev->gfx.mec2_fw; 1285 header = (const struct common_firmware_header *)info->fw->data; 1286 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1287 adev->firmware.fw_size += 1288 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1289 1290 /* TODO: Determine if MEC2 JT FW loading can be removed 1291 for all GFX V9 asic and above */ 1292 if (adev->asic_type != CHIP_ARCTURUS) { 1293 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1294 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1295 info->fw = adev->gfx.mec2_fw; 1296 adev->firmware.fw_size += 1297 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1298 PAGE_SIZE); 1299 } 1300 } 1301 } 1302 1303 out: 1304 gfx_v9_0_check_if_need_gfxoff(adev); 1305 gfx_v9_0_check_fw_write_wait(adev); 1306 if (err) { 1307 dev_err(adev->dev, 1308 "gfx9: Failed to load firmware \"%s\"\n", 1309 fw_name); 1310 release_firmware(adev->gfx.mec_fw); 1311 adev->gfx.mec_fw = NULL; 1312 release_firmware(adev->gfx.mec2_fw); 1313 adev->gfx.mec2_fw = NULL; 1314 } 1315 return err; 1316 } 1317 1318 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1319 { 1320 const char *chip_name; 1321 int r; 1322 1323 DRM_DEBUG("\n"); 1324 1325 switch (adev->asic_type) { 1326 case CHIP_VEGA10: 1327 chip_name = "vega10"; 1328 break; 1329 case CHIP_VEGA12: 1330 chip_name = "vega12"; 1331 break; 1332 case CHIP_VEGA20: 1333 chip_name = "vega20"; 1334 break; 1335 case CHIP_RAVEN: 1336 if (adev->rev_id >= 8) 1337 chip_name = "raven2"; 1338 else if (adev->pdev->device == 0x15d8) 1339 chip_name = "picasso"; 1340 else 1341 chip_name = "raven"; 1342 break; 1343 case CHIP_ARCTURUS: 1344 chip_name = "arcturus"; 1345 break; 1346 default: 1347 BUG(); 1348 } 1349 1350 /* No CPG in Arcturus */ 1351 if (adev->asic_type != CHIP_ARCTURUS) { 1352 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1353 if (r) 1354 return r; 1355 } 1356 1357 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1358 if (r) 1359 return r; 1360 1361 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1362 if (r) 1363 return r; 1364 1365 return r; 1366 } 1367 1368 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1369 { 1370 u32 count = 0; 1371 const struct cs_section_def *sect = NULL; 1372 const struct cs_extent_def *ext = NULL; 1373 1374 /* begin clear state */ 1375 count += 2; 1376 /* context control state */ 1377 count += 3; 1378 1379 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1380 for (ext = sect->section; ext->extent != NULL; ++ext) { 1381 if (sect->id == SECT_CONTEXT) 1382 count += 2 + ext->reg_count; 1383 else 1384 return 0; 1385 } 1386 } 1387 1388 /* end clear state */ 1389 count += 2; 1390 /* clear state */ 1391 count += 2; 1392 1393 return count; 1394 } 1395 1396 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1397 volatile u32 *buffer) 1398 { 1399 u32 count = 0, i; 1400 const struct cs_section_def *sect = NULL; 1401 const struct cs_extent_def *ext = NULL; 1402 1403 if (adev->gfx.rlc.cs_data == NULL) 1404 return; 1405 if (buffer == NULL) 1406 return; 1407 1408 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1409 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1410 1411 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1412 buffer[count++] = cpu_to_le32(0x80000000); 1413 buffer[count++] = cpu_to_le32(0x80000000); 1414 1415 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1416 for (ext = sect->section; ext->extent != NULL; ++ext) { 1417 if (sect->id == SECT_CONTEXT) { 1418 buffer[count++] = 1419 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1420 buffer[count++] = cpu_to_le32(ext->reg_index - 1421 PACKET3_SET_CONTEXT_REG_START); 1422 for (i = 0; i < ext->reg_count; i++) 1423 buffer[count++] = cpu_to_le32(ext->extent[i]); 1424 } else { 1425 return; 1426 } 1427 } 1428 } 1429 1430 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1431 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1432 1433 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1434 buffer[count++] = cpu_to_le32(0); 1435 } 1436 1437 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1438 { 1439 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1440 uint32_t pg_always_on_cu_num = 2; 1441 uint32_t always_on_cu_num; 1442 uint32_t i, j, k; 1443 uint32_t mask, cu_bitmap, counter; 1444 1445 if (adev->flags & AMD_IS_APU) 1446 always_on_cu_num = 4; 1447 else if (adev->asic_type == CHIP_VEGA12) 1448 always_on_cu_num = 8; 1449 else 1450 always_on_cu_num = 12; 1451 1452 mutex_lock(&adev->grbm_idx_mutex); 1453 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1454 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1455 mask = 1; 1456 cu_bitmap = 0; 1457 counter = 0; 1458 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1459 1460 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1461 if (cu_info->bitmap[i][j] & mask) { 1462 if (counter == pg_always_on_cu_num) 1463 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1464 if (counter < always_on_cu_num) 1465 cu_bitmap |= mask; 1466 else 1467 break; 1468 counter++; 1469 } 1470 mask <<= 1; 1471 } 1472 1473 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1474 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1475 } 1476 } 1477 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1478 mutex_unlock(&adev->grbm_idx_mutex); 1479 } 1480 1481 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1482 { 1483 uint32_t data; 1484 1485 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1486 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1487 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1488 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1489 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1490 1491 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1492 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1493 1494 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1495 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1496 1497 mutex_lock(&adev->grbm_idx_mutex); 1498 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1499 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1500 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1501 1502 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1503 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1504 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1505 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1506 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1507 1508 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1509 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1510 data &= 0x0000FFFF; 1511 data |= 0x00C00000; 1512 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1513 1514 /* 1515 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1516 * programmed in gfx_v9_0_init_always_on_cu_mask() 1517 */ 1518 1519 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1520 * but used for RLC_LB_CNTL configuration */ 1521 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1522 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1523 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1524 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1525 mutex_unlock(&adev->grbm_idx_mutex); 1526 1527 gfx_v9_0_init_always_on_cu_mask(adev); 1528 } 1529 1530 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1531 { 1532 uint32_t data; 1533 1534 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1535 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1536 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1537 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1538 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1539 1540 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1541 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1542 1543 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1544 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1545 1546 mutex_lock(&adev->grbm_idx_mutex); 1547 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1548 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1549 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1550 1551 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1552 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1553 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1554 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1555 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1556 1557 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1558 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1559 data &= 0x0000FFFF; 1560 data |= 0x00C00000; 1561 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1562 1563 /* 1564 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1565 * programmed in gfx_v9_0_init_always_on_cu_mask() 1566 */ 1567 1568 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1569 * but used for RLC_LB_CNTL configuration */ 1570 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1571 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1572 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1573 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1574 mutex_unlock(&adev->grbm_idx_mutex); 1575 1576 gfx_v9_0_init_always_on_cu_mask(adev); 1577 } 1578 1579 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1580 { 1581 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1582 } 1583 1584 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1585 { 1586 return 5; 1587 } 1588 1589 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1590 { 1591 const struct cs_section_def *cs_data; 1592 int r; 1593 1594 adev->gfx.rlc.cs_data = gfx9_cs_data; 1595 1596 cs_data = adev->gfx.rlc.cs_data; 1597 1598 if (cs_data) { 1599 /* init clear state block */ 1600 r = amdgpu_gfx_rlc_init_csb(adev); 1601 if (r) 1602 return r; 1603 } 1604 1605 if (adev->asic_type == CHIP_RAVEN) { 1606 /* TODO: double check the cp_table_size for RV */ 1607 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1608 r = amdgpu_gfx_rlc_init_cpt(adev); 1609 if (r) 1610 return r; 1611 } 1612 1613 switch (adev->asic_type) { 1614 case CHIP_RAVEN: 1615 gfx_v9_0_init_lbpw(adev); 1616 break; 1617 case CHIP_VEGA20: 1618 gfx_v9_4_init_lbpw(adev); 1619 break; 1620 default: 1621 break; 1622 } 1623 1624 return 0; 1625 } 1626 1627 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) 1628 { 1629 int r; 1630 1631 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1632 if (unlikely(r != 0)) 1633 return r; 1634 1635 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 1636 AMDGPU_GEM_DOMAIN_VRAM); 1637 if (!r) 1638 adev->gfx.rlc.clear_state_gpu_addr = 1639 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 1640 1641 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1642 1643 return r; 1644 } 1645 1646 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) 1647 { 1648 int r; 1649 1650 if (!adev->gfx.rlc.clear_state_obj) 1651 return; 1652 1653 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1654 if (likely(r == 0)) { 1655 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1656 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1657 } 1658 } 1659 1660 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1661 { 1662 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1663 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1664 } 1665 1666 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1667 { 1668 int r; 1669 u32 *hpd; 1670 const __le32 *fw_data; 1671 unsigned fw_size; 1672 u32 *fw; 1673 size_t mec_hpd_size; 1674 1675 const struct gfx_firmware_header_v1_0 *mec_hdr; 1676 1677 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1678 1679 /* take ownership of the relevant compute queues */ 1680 amdgpu_gfx_compute_queue_acquire(adev); 1681 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1682 1683 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1684 AMDGPU_GEM_DOMAIN_VRAM, 1685 &adev->gfx.mec.hpd_eop_obj, 1686 &adev->gfx.mec.hpd_eop_gpu_addr, 1687 (void **)&hpd); 1688 if (r) { 1689 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1690 gfx_v9_0_mec_fini(adev); 1691 return r; 1692 } 1693 1694 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1695 1696 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1697 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1698 1699 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1700 1701 fw_data = (const __le32 *) 1702 (adev->gfx.mec_fw->data + 1703 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1704 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1705 1706 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1707 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1708 &adev->gfx.mec.mec_fw_obj, 1709 &adev->gfx.mec.mec_fw_gpu_addr, 1710 (void **)&fw); 1711 if (r) { 1712 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1713 gfx_v9_0_mec_fini(adev); 1714 return r; 1715 } 1716 1717 memcpy(fw, fw_data, fw_size); 1718 1719 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1720 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1721 1722 return 0; 1723 } 1724 1725 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1726 { 1727 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1728 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1729 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1730 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1731 (SQ_IND_INDEX__FORCE_READ_MASK)); 1732 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1733 } 1734 1735 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1736 uint32_t wave, uint32_t thread, 1737 uint32_t regno, uint32_t num, uint32_t *out) 1738 { 1739 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1740 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1741 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1742 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1743 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1744 (SQ_IND_INDEX__FORCE_READ_MASK) | 1745 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1746 while (num--) 1747 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1748 } 1749 1750 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1751 { 1752 /* type 1 wave data */ 1753 dst[(*no_fields)++] = 1; 1754 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1755 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1756 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1757 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1758 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1759 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1760 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1761 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1762 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1763 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1764 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1765 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1766 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1767 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1768 } 1769 1770 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1771 uint32_t wave, uint32_t start, 1772 uint32_t size, uint32_t *dst) 1773 { 1774 wave_read_regs( 1775 adev, simd, wave, 0, 1776 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1777 } 1778 1779 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1780 uint32_t wave, uint32_t thread, 1781 uint32_t start, uint32_t size, 1782 uint32_t *dst) 1783 { 1784 wave_read_regs( 1785 adev, simd, wave, thread, 1786 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1787 } 1788 1789 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1790 u32 me, u32 pipe, u32 q, u32 vm) 1791 { 1792 soc15_grbm_select(adev, me, pipe, q, vm); 1793 } 1794 1795 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1796 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1797 .select_se_sh = &gfx_v9_0_select_se_sh, 1798 .read_wave_data = &gfx_v9_0_read_wave_data, 1799 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1800 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1801 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 1802 .ras_error_inject = &gfx_v9_0_ras_error_inject, 1803 .query_ras_error_count = &gfx_v9_0_query_ras_error_count 1804 }; 1805 1806 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1807 { 1808 u32 gb_addr_config; 1809 int err; 1810 1811 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1812 1813 switch (adev->asic_type) { 1814 case CHIP_VEGA10: 1815 adev->gfx.config.max_hw_contexts = 8; 1816 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1817 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1818 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1819 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1820 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1821 break; 1822 case CHIP_VEGA12: 1823 adev->gfx.config.max_hw_contexts = 8; 1824 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1825 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1826 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1827 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1828 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1829 DRM_INFO("fix gfx.config for vega12\n"); 1830 break; 1831 case CHIP_VEGA20: 1832 adev->gfx.config.max_hw_contexts = 8; 1833 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1834 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1835 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1836 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1837 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1838 gb_addr_config &= ~0xf3e777ff; 1839 gb_addr_config |= 0x22014042; 1840 /* check vbios table if gpu info is not available */ 1841 err = amdgpu_atomfirmware_get_gfx_info(adev); 1842 if (err) 1843 return err; 1844 break; 1845 case CHIP_RAVEN: 1846 adev->gfx.config.max_hw_contexts = 8; 1847 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1848 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1849 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1850 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1851 if (adev->rev_id >= 8) 1852 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1853 else 1854 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1855 break; 1856 case CHIP_ARCTURUS: 1857 adev->gfx.config.max_hw_contexts = 8; 1858 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1859 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1860 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1861 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1862 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1863 gb_addr_config &= ~0xf3e777ff; 1864 gb_addr_config |= 0x22014042; 1865 break; 1866 default: 1867 BUG(); 1868 break; 1869 } 1870 1871 adev->gfx.config.gb_addr_config = gb_addr_config; 1872 1873 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1874 REG_GET_FIELD( 1875 adev->gfx.config.gb_addr_config, 1876 GB_ADDR_CONFIG, 1877 NUM_PIPES); 1878 1879 adev->gfx.config.max_tile_pipes = 1880 adev->gfx.config.gb_addr_config_fields.num_pipes; 1881 1882 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1883 REG_GET_FIELD( 1884 adev->gfx.config.gb_addr_config, 1885 GB_ADDR_CONFIG, 1886 NUM_BANKS); 1887 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1888 REG_GET_FIELD( 1889 adev->gfx.config.gb_addr_config, 1890 GB_ADDR_CONFIG, 1891 MAX_COMPRESSED_FRAGS); 1892 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1893 REG_GET_FIELD( 1894 adev->gfx.config.gb_addr_config, 1895 GB_ADDR_CONFIG, 1896 NUM_RB_PER_SE); 1897 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1898 REG_GET_FIELD( 1899 adev->gfx.config.gb_addr_config, 1900 GB_ADDR_CONFIG, 1901 NUM_SHADER_ENGINES); 1902 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1903 REG_GET_FIELD( 1904 adev->gfx.config.gb_addr_config, 1905 GB_ADDR_CONFIG, 1906 PIPE_INTERLEAVE_SIZE)); 1907 1908 return 0; 1909 } 1910 1911 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1912 struct amdgpu_ngg_buf *ngg_buf, 1913 int size_se, 1914 int default_size_se) 1915 { 1916 int r; 1917 1918 if (size_se < 0) { 1919 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 1920 return -EINVAL; 1921 } 1922 size_se = size_se ? size_se : default_size_se; 1923 1924 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 1925 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 1926 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1927 &ngg_buf->bo, 1928 &ngg_buf->gpu_addr, 1929 NULL); 1930 if (r) { 1931 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 1932 return r; 1933 } 1934 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 1935 1936 return r; 1937 } 1938 1939 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 1940 { 1941 int i; 1942 1943 for (i = 0; i < NGG_BUF_MAX; i++) 1944 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 1945 &adev->gfx.ngg.buf[i].gpu_addr, 1946 NULL); 1947 1948 memset(&adev->gfx.ngg.buf[0], 0, 1949 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 1950 1951 adev->gfx.ngg.init = false; 1952 1953 return 0; 1954 } 1955 1956 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 1957 { 1958 int r; 1959 1960 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 1961 return 0; 1962 1963 /* GDS reserve memory: 64 bytes alignment */ 1964 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 1965 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; 1966 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); 1967 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 1968 1969 /* Primitive Buffer */ 1970 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 1971 amdgpu_prim_buf_per_se, 1972 64 * 1024); 1973 if (r) { 1974 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 1975 goto err; 1976 } 1977 1978 /* Position Buffer */ 1979 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 1980 amdgpu_pos_buf_per_se, 1981 256 * 1024); 1982 if (r) { 1983 dev_err(adev->dev, "Failed to create Position Buffer\n"); 1984 goto err; 1985 } 1986 1987 /* Control Sideband */ 1988 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 1989 amdgpu_cntl_sb_buf_per_se, 1990 256); 1991 if (r) { 1992 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 1993 goto err; 1994 } 1995 1996 /* Parameter Cache, not created by default */ 1997 if (amdgpu_param_buf_per_se <= 0) 1998 goto out; 1999 2000 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 2001 amdgpu_param_buf_per_se, 2002 512 * 1024); 2003 if (r) { 2004 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 2005 goto err; 2006 } 2007 2008 out: 2009 adev->gfx.ngg.init = true; 2010 return 0; 2011 err: 2012 gfx_v9_0_ngg_fini(adev); 2013 return r; 2014 } 2015 2016 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 2017 { 2018 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2019 int r; 2020 u32 data, base; 2021 2022 if (!amdgpu_ngg) 2023 return 0; 2024 2025 /* Program buffer size */ 2026 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, 2027 adev->gfx.ngg.buf[NGG_PRIM].size >> 8); 2028 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, 2029 adev->gfx.ngg.buf[NGG_POS].size >> 8); 2030 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 2031 2032 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, 2033 adev->gfx.ngg.buf[NGG_CNTL].size >> 8); 2034 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, 2035 adev->gfx.ngg.buf[NGG_PARAM].size >> 10); 2036 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 2037 2038 /* Program buffer base address */ 2039 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 2040 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 2041 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 2042 2043 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 2044 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 2045 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 2046 2047 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 2048 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 2049 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 2050 2051 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 2052 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 2053 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 2054 2055 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 2056 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 2057 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 2058 2059 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 2060 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 2061 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 2062 2063 /* Clear GDS reserved memory */ 2064 r = amdgpu_ring_alloc(ring, 17); 2065 if (r) { 2066 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", 2067 ring->name, r); 2068 return r; 2069 } 2070 2071 gfx_v9_0_write_data_to_reg(ring, 0, false, 2072 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 2073 (adev->gds.gds_size + 2074 adev->gfx.ngg.gds_reserve_size)); 2075 2076 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 2077 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 2078 PACKET3_DMA_DATA_DST_SEL(1) | 2079 PACKET3_DMA_DATA_SRC_SEL(2))); 2080 amdgpu_ring_write(ring, 0); 2081 amdgpu_ring_write(ring, 0); 2082 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 2083 amdgpu_ring_write(ring, 0); 2084 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 2085 adev->gfx.ngg.gds_reserve_size); 2086 2087 gfx_v9_0_write_data_to_reg(ring, 0, false, 2088 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); 2089 2090 amdgpu_ring_commit(ring); 2091 2092 return 0; 2093 } 2094 2095 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2096 int mec, int pipe, int queue) 2097 { 2098 int r; 2099 unsigned irq_type; 2100 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2101 2102 ring = &adev->gfx.compute_ring[ring_id]; 2103 2104 /* mec0 is me1 */ 2105 ring->me = mec + 1; 2106 ring->pipe = pipe; 2107 ring->queue = queue; 2108 2109 ring->ring_obj = NULL; 2110 ring->use_doorbell = true; 2111 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2112 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2113 + (ring_id * GFX9_MEC_HPD_SIZE); 2114 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2115 2116 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2117 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2118 + ring->pipe; 2119 2120 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2121 r = amdgpu_ring_init(adev, ring, 1024, 2122 &adev->gfx.eop_irq, irq_type); 2123 if (r) 2124 return r; 2125 2126 2127 return 0; 2128 } 2129 2130 static int gfx_v9_0_sw_init(void *handle) 2131 { 2132 int i, j, k, r, ring_id; 2133 struct amdgpu_ring *ring; 2134 struct amdgpu_kiq *kiq; 2135 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2136 2137 switch (adev->asic_type) { 2138 case CHIP_VEGA10: 2139 case CHIP_VEGA12: 2140 case CHIP_VEGA20: 2141 case CHIP_RAVEN: 2142 case CHIP_ARCTURUS: 2143 adev->gfx.mec.num_mec = 2; 2144 break; 2145 default: 2146 adev->gfx.mec.num_mec = 1; 2147 break; 2148 } 2149 2150 adev->gfx.mec.num_pipe_per_mec = 4; 2151 adev->gfx.mec.num_queue_per_pipe = 8; 2152 2153 /* EOP Event */ 2154 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2155 if (r) 2156 return r; 2157 2158 /* Privileged reg */ 2159 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2160 &adev->gfx.priv_reg_irq); 2161 if (r) 2162 return r; 2163 2164 /* Privileged inst */ 2165 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2166 &adev->gfx.priv_inst_irq); 2167 if (r) 2168 return r; 2169 2170 /* ECC error */ 2171 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2172 &adev->gfx.cp_ecc_error_irq); 2173 if (r) 2174 return r; 2175 2176 /* FUE error */ 2177 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2178 &adev->gfx.cp_ecc_error_irq); 2179 if (r) 2180 return r; 2181 2182 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2183 2184 gfx_v9_0_scratch_init(adev); 2185 2186 r = gfx_v9_0_init_microcode(adev); 2187 if (r) { 2188 DRM_ERROR("Failed to load gfx firmware!\n"); 2189 return r; 2190 } 2191 2192 r = adev->gfx.rlc.funcs->init(adev); 2193 if (r) { 2194 DRM_ERROR("Failed to init rlc BOs!\n"); 2195 return r; 2196 } 2197 2198 r = gfx_v9_0_mec_init(adev); 2199 if (r) { 2200 DRM_ERROR("Failed to init MEC BOs!\n"); 2201 return r; 2202 } 2203 2204 /* set up the gfx ring */ 2205 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2206 ring = &adev->gfx.gfx_ring[i]; 2207 ring->ring_obj = NULL; 2208 if (!i) 2209 sprintf(ring->name, "gfx"); 2210 else 2211 sprintf(ring->name, "gfx_%d", i); 2212 ring->use_doorbell = true; 2213 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2214 r = amdgpu_ring_init(adev, ring, 1024, 2215 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); 2216 if (r) 2217 return r; 2218 } 2219 2220 /* set up the compute queues - allocate horizontally across pipes */ 2221 ring_id = 0; 2222 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2223 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2224 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2225 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2226 continue; 2227 2228 r = gfx_v9_0_compute_ring_init(adev, 2229 ring_id, 2230 i, k, j); 2231 if (r) 2232 return r; 2233 2234 ring_id++; 2235 } 2236 } 2237 } 2238 2239 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2240 if (r) { 2241 DRM_ERROR("Failed to init KIQ BOs!\n"); 2242 return r; 2243 } 2244 2245 kiq = &adev->gfx.kiq; 2246 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2247 if (r) 2248 return r; 2249 2250 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2251 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2252 if (r) 2253 return r; 2254 2255 adev->gfx.ce_ram_size = 0x8000; 2256 2257 r = gfx_v9_0_gpu_early_init(adev); 2258 if (r) 2259 return r; 2260 2261 r = gfx_v9_0_ngg_init(adev); 2262 if (r) 2263 return r; 2264 2265 return 0; 2266 } 2267 2268 2269 static int gfx_v9_0_sw_fini(void *handle) 2270 { 2271 int i; 2272 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2273 2274 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && 2275 adev->gfx.ras_if) { 2276 struct ras_common_if *ras_if = adev->gfx.ras_if; 2277 struct ras_ih_if ih_info = { 2278 .head = *ras_if, 2279 }; 2280 2281 amdgpu_ras_debugfs_remove(adev, ras_if); 2282 amdgpu_ras_sysfs_remove(adev, ras_if); 2283 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 2284 amdgpu_ras_feature_enable(adev, ras_if, 0); 2285 kfree(ras_if); 2286 } 2287 2288 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2289 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2290 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2291 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2292 2293 amdgpu_gfx_mqd_sw_fini(adev); 2294 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2295 amdgpu_gfx_kiq_fini(adev); 2296 2297 gfx_v9_0_mec_fini(adev); 2298 gfx_v9_0_ngg_fini(adev); 2299 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 2300 if (adev->asic_type == CHIP_RAVEN) { 2301 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2302 &adev->gfx.rlc.cp_table_gpu_addr, 2303 (void **)&adev->gfx.rlc.cp_table_ptr); 2304 } 2305 gfx_v9_0_free_microcode(adev); 2306 2307 return 0; 2308 } 2309 2310 2311 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2312 { 2313 /* TODO */ 2314 } 2315 2316 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 2317 { 2318 u32 data; 2319 2320 if (instance == 0xffffffff) 2321 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2322 else 2323 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2324 2325 if (se_num == 0xffffffff) 2326 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2327 else 2328 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2329 2330 if (sh_num == 0xffffffff) 2331 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2332 else 2333 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2334 2335 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2336 } 2337 2338 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2339 { 2340 u32 data, mask; 2341 2342 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2343 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2344 2345 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2346 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2347 2348 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2349 adev->gfx.config.max_sh_per_se); 2350 2351 return (~data) & mask; 2352 } 2353 2354 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2355 { 2356 int i, j; 2357 u32 data; 2358 u32 active_rbs = 0; 2359 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2360 adev->gfx.config.max_sh_per_se; 2361 2362 mutex_lock(&adev->grbm_idx_mutex); 2363 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2364 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2365 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2366 data = gfx_v9_0_get_rb_active_bitmap(adev); 2367 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2368 rb_bitmap_width_per_sh); 2369 } 2370 } 2371 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2372 mutex_unlock(&adev->grbm_idx_mutex); 2373 2374 adev->gfx.config.backend_enable_mask = active_rbs; 2375 adev->gfx.config.num_rbs = hweight32(active_rbs); 2376 } 2377 2378 #define DEFAULT_SH_MEM_BASES (0x6000) 2379 #define FIRST_COMPUTE_VMID (8) 2380 #define LAST_COMPUTE_VMID (16) 2381 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2382 { 2383 int i; 2384 uint32_t sh_mem_config; 2385 uint32_t sh_mem_bases; 2386 2387 /* 2388 * Configure apertures: 2389 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2390 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2391 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2392 */ 2393 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2394 2395 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2396 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2397 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2398 2399 mutex_lock(&adev->srbm_mutex); 2400 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2401 soc15_grbm_select(adev, 0, 0, 0, i); 2402 /* CP and shaders */ 2403 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2404 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2405 } 2406 soc15_grbm_select(adev, 0, 0, 0, 0); 2407 mutex_unlock(&adev->srbm_mutex); 2408 2409 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2410 acccess. These should be enabled by FW for target VMIDs. */ 2411 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 2412 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2413 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2414 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2415 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2416 } 2417 } 2418 2419 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2420 { 2421 int vmid; 2422 2423 /* 2424 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2425 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2426 * the driver can enable them for graphics. VMID0 should maintain 2427 * access so that HWS firmware can save/restore entries. 2428 */ 2429 for (vmid = 1; vmid < 16; vmid++) { 2430 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2431 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2432 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2433 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2434 } 2435 } 2436 2437 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2438 { 2439 u32 tmp; 2440 int i; 2441 2442 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2443 2444 gfx_v9_0_tiling_mode_table_init(adev); 2445 2446 gfx_v9_0_setup_rb(adev); 2447 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2448 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2449 2450 /* XXX SH_MEM regs */ 2451 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2452 mutex_lock(&adev->srbm_mutex); 2453 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2454 soc15_grbm_select(adev, 0, 0, 0, i); 2455 /* CP and shaders */ 2456 if (i == 0) { 2457 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2458 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2459 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2460 !!amdgpu_noretry); 2461 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2462 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2463 } else { 2464 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2465 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2466 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2467 !!amdgpu_noretry); 2468 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2469 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2470 (adev->gmc.private_aperture_start >> 48)); 2471 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2472 (adev->gmc.shared_aperture_start >> 48)); 2473 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2474 } 2475 } 2476 soc15_grbm_select(adev, 0, 0, 0, 0); 2477 2478 mutex_unlock(&adev->srbm_mutex); 2479 2480 gfx_v9_0_init_compute_vmid(adev); 2481 gfx_v9_0_init_gds_vmid(adev); 2482 } 2483 2484 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2485 { 2486 u32 i, j, k; 2487 u32 mask; 2488 2489 mutex_lock(&adev->grbm_idx_mutex); 2490 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2491 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2492 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2493 for (k = 0; k < adev->usec_timeout; k++) { 2494 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2495 break; 2496 udelay(1); 2497 } 2498 if (k == adev->usec_timeout) { 2499 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2500 0xffffffff, 0xffffffff); 2501 mutex_unlock(&adev->grbm_idx_mutex); 2502 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2503 i, j); 2504 return; 2505 } 2506 } 2507 } 2508 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2509 mutex_unlock(&adev->grbm_idx_mutex); 2510 2511 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2512 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2513 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2514 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2515 for (k = 0; k < adev->usec_timeout; k++) { 2516 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2517 break; 2518 udelay(1); 2519 } 2520 } 2521 2522 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2523 bool enable) 2524 { 2525 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2526 2527 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2528 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2529 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2530 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2531 2532 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2533 } 2534 2535 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2536 { 2537 /* csib */ 2538 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2539 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2540 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2541 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2542 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2543 adev->gfx.rlc.clear_state_size); 2544 } 2545 2546 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2547 int indirect_offset, 2548 int list_size, 2549 int *unique_indirect_regs, 2550 int unique_indirect_reg_count, 2551 int *indirect_start_offsets, 2552 int *indirect_start_offsets_count, 2553 int max_start_offsets_count) 2554 { 2555 int idx; 2556 2557 for (; indirect_offset < list_size; indirect_offset++) { 2558 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2559 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2560 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2561 2562 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2563 indirect_offset += 2; 2564 2565 /* look for the matching indice */ 2566 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2567 if (unique_indirect_regs[idx] == 2568 register_list_format[indirect_offset] || 2569 !unique_indirect_regs[idx]) 2570 break; 2571 } 2572 2573 BUG_ON(idx >= unique_indirect_reg_count); 2574 2575 if (!unique_indirect_regs[idx]) 2576 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2577 2578 indirect_offset++; 2579 } 2580 } 2581 } 2582 2583 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2584 { 2585 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2586 int unique_indirect_reg_count = 0; 2587 2588 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2589 int indirect_start_offsets_count = 0; 2590 2591 int list_size = 0; 2592 int i = 0, j = 0; 2593 u32 tmp = 0; 2594 2595 u32 *register_list_format = 2596 kmemdup(adev->gfx.rlc.register_list_format, 2597 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2598 if (!register_list_format) 2599 return -ENOMEM; 2600 2601 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2602 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2603 gfx_v9_1_parse_ind_reg_list(register_list_format, 2604 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2605 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2606 unique_indirect_regs, 2607 unique_indirect_reg_count, 2608 indirect_start_offsets, 2609 &indirect_start_offsets_count, 2610 ARRAY_SIZE(indirect_start_offsets)); 2611 2612 /* enable auto inc in case it is disabled */ 2613 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2614 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2615 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2616 2617 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2618 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2619 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2620 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2621 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2622 adev->gfx.rlc.register_restore[i]); 2623 2624 /* load indirect register */ 2625 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2626 adev->gfx.rlc.reg_list_format_start); 2627 2628 /* direct register portion */ 2629 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2630 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2631 register_list_format[i]); 2632 2633 /* indirect register portion */ 2634 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2635 if (register_list_format[i] == 0xFFFFFFFF) { 2636 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2637 continue; 2638 } 2639 2640 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2641 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2642 2643 for (j = 0; j < unique_indirect_reg_count; j++) { 2644 if (register_list_format[i] == unique_indirect_regs[j]) { 2645 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2646 break; 2647 } 2648 } 2649 2650 BUG_ON(j >= unique_indirect_reg_count); 2651 2652 i++; 2653 } 2654 2655 /* set save/restore list size */ 2656 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2657 list_size = list_size >> 1; 2658 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2659 adev->gfx.rlc.reg_restore_list_size); 2660 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2661 2662 /* write the starting offsets to RLC scratch ram */ 2663 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2664 adev->gfx.rlc.starting_offsets_start); 2665 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2666 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2667 indirect_start_offsets[i]); 2668 2669 /* load unique indirect regs*/ 2670 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2671 if (unique_indirect_regs[i] != 0) { 2672 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2673 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2674 unique_indirect_regs[i] & 0x3FFFF); 2675 2676 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2677 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2678 unique_indirect_regs[i] >> 20); 2679 } 2680 } 2681 2682 kfree(register_list_format); 2683 return 0; 2684 } 2685 2686 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2687 { 2688 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2689 } 2690 2691 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2692 bool enable) 2693 { 2694 uint32_t data = 0; 2695 uint32_t default_data = 0; 2696 2697 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2698 if (enable == true) { 2699 /* enable GFXIP control over CGPG */ 2700 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2701 if(default_data != data) 2702 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2703 2704 /* update status */ 2705 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2706 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2707 if(default_data != data) 2708 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2709 } else { 2710 /* restore GFXIP control over GCPG */ 2711 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2712 if(default_data != data) 2713 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2714 } 2715 } 2716 2717 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2718 { 2719 uint32_t data = 0; 2720 2721 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2722 AMD_PG_SUPPORT_GFX_SMG | 2723 AMD_PG_SUPPORT_GFX_DMG)) { 2724 /* init IDLE_POLL_COUNT = 60 */ 2725 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2726 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2727 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2728 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2729 2730 /* init RLC PG Delay */ 2731 data = 0; 2732 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2733 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2734 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2735 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2736 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2737 2738 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2739 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2740 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2741 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2742 2743 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2744 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2745 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2746 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2747 2748 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2749 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2750 2751 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2752 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2753 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2754 2755 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2756 } 2757 } 2758 2759 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2760 bool enable) 2761 { 2762 uint32_t data = 0; 2763 uint32_t default_data = 0; 2764 2765 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2766 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2767 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2768 enable ? 1 : 0); 2769 if (default_data != data) 2770 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2771 } 2772 2773 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2774 bool enable) 2775 { 2776 uint32_t data = 0; 2777 uint32_t default_data = 0; 2778 2779 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2780 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2781 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2782 enable ? 1 : 0); 2783 if(default_data != data) 2784 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2785 } 2786 2787 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2788 bool enable) 2789 { 2790 uint32_t data = 0; 2791 uint32_t default_data = 0; 2792 2793 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2794 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2795 CP_PG_DISABLE, 2796 enable ? 0 : 1); 2797 if(default_data != data) 2798 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2799 } 2800 2801 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2802 bool enable) 2803 { 2804 uint32_t data, default_data; 2805 2806 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2807 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2808 GFX_POWER_GATING_ENABLE, 2809 enable ? 1 : 0); 2810 if(default_data != data) 2811 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2812 } 2813 2814 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2815 bool enable) 2816 { 2817 uint32_t data, default_data; 2818 2819 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2820 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2821 GFX_PIPELINE_PG_ENABLE, 2822 enable ? 1 : 0); 2823 if(default_data != data) 2824 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2825 2826 if (!enable) 2827 /* read any GFX register to wake up GFX */ 2828 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2829 } 2830 2831 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2832 bool enable) 2833 { 2834 uint32_t data, default_data; 2835 2836 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2837 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2838 STATIC_PER_CU_PG_ENABLE, 2839 enable ? 1 : 0); 2840 if(default_data != data) 2841 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2842 } 2843 2844 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2845 bool enable) 2846 { 2847 uint32_t data, default_data; 2848 2849 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2850 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2851 DYN_PER_CU_PG_ENABLE, 2852 enable ? 1 : 0); 2853 if(default_data != data) 2854 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2855 } 2856 2857 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2858 { 2859 gfx_v9_0_init_csb(adev); 2860 2861 /* 2862 * Rlc save restore list is workable since v2_1. 2863 * And it's needed by gfxoff feature. 2864 */ 2865 if (adev->gfx.rlc.is_rlc_v2_1) { 2866 gfx_v9_1_init_rlc_save_restore_list(adev); 2867 gfx_v9_0_enable_save_restore_machine(adev); 2868 } 2869 2870 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2871 AMD_PG_SUPPORT_GFX_SMG | 2872 AMD_PG_SUPPORT_GFX_DMG | 2873 AMD_PG_SUPPORT_CP | 2874 AMD_PG_SUPPORT_GDS | 2875 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2876 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2877 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2878 gfx_v9_0_init_gfx_power_gating(adev); 2879 } 2880 } 2881 2882 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2883 { 2884 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2885 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2886 gfx_v9_0_wait_for_rlc_serdes(adev); 2887 } 2888 2889 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2890 { 2891 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2892 udelay(50); 2893 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2894 udelay(50); 2895 } 2896 2897 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2898 { 2899 #ifdef AMDGPU_RLC_DEBUG_RETRY 2900 u32 rlc_ucode_ver; 2901 #endif 2902 2903 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2904 udelay(50); 2905 2906 /* carrizo do enable cp interrupt after cp inited */ 2907 if (!(adev->flags & AMD_IS_APU)) { 2908 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2909 udelay(50); 2910 } 2911 2912 #ifdef AMDGPU_RLC_DEBUG_RETRY 2913 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2914 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2915 if(rlc_ucode_ver == 0x108) { 2916 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2917 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2918 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2919 * default is 0x9C4 to create a 100us interval */ 2920 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2921 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2922 * to disable the page fault retry interrupts, default is 2923 * 0x100 (256) */ 2924 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2925 } 2926 #endif 2927 } 2928 2929 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2930 { 2931 const struct rlc_firmware_header_v2_0 *hdr; 2932 const __le32 *fw_data; 2933 unsigned i, fw_size; 2934 2935 if (!adev->gfx.rlc_fw) 2936 return -EINVAL; 2937 2938 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2939 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2940 2941 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2942 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2943 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2944 2945 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2946 RLCG_UCODE_LOADING_START_ADDRESS); 2947 for (i = 0; i < fw_size; i++) 2948 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2949 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2950 2951 return 0; 2952 } 2953 2954 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2955 { 2956 int r; 2957 2958 if (amdgpu_sriov_vf(adev)) { 2959 gfx_v9_0_init_csb(adev); 2960 return 0; 2961 } 2962 2963 adev->gfx.rlc.funcs->stop(adev); 2964 2965 /* disable CG */ 2966 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2967 2968 gfx_v9_0_init_pg(adev); 2969 2970 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2971 /* legacy rlc firmware loading */ 2972 r = gfx_v9_0_rlc_load_microcode(adev); 2973 if (r) 2974 return r; 2975 } 2976 2977 switch (adev->asic_type) { 2978 case CHIP_RAVEN: 2979 if (amdgpu_lbpw == 0) 2980 gfx_v9_0_enable_lbpw(adev, false); 2981 else 2982 gfx_v9_0_enable_lbpw(adev, true); 2983 break; 2984 case CHIP_VEGA20: 2985 if (amdgpu_lbpw > 0) 2986 gfx_v9_0_enable_lbpw(adev, true); 2987 else 2988 gfx_v9_0_enable_lbpw(adev, false); 2989 break; 2990 default: 2991 break; 2992 } 2993 2994 adev->gfx.rlc.funcs->start(adev); 2995 2996 return 0; 2997 } 2998 2999 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3000 { 3001 int i; 3002 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3003 3004 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3005 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3006 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3007 if (!enable) { 3008 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3009 adev->gfx.gfx_ring[i].sched.ready = false; 3010 } 3011 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3012 udelay(50); 3013 } 3014 3015 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3016 { 3017 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3018 const struct gfx_firmware_header_v1_0 *ce_hdr; 3019 const struct gfx_firmware_header_v1_0 *me_hdr; 3020 const __le32 *fw_data; 3021 unsigned i, fw_size; 3022 3023 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3024 return -EINVAL; 3025 3026 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3027 adev->gfx.pfp_fw->data; 3028 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3029 adev->gfx.ce_fw->data; 3030 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3031 adev->gfx.me_fw->data; 3032 3033 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3034 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3035 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3036 3037 gfx_v9_0_cp_gfx_enable(adev, false); 3038 3039 /* PFP */ 3040 fw_data = (const __le32 *) 3041 (adev->gfx.pfp_fw->data + 3042 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3043 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3044 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3045 for (i = 0; i < fw_size; i++) 3046 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3047 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3048 3049 /* CE */ 3050 fw_data = (const __le32 *) 3051 (adev->gfx.ce_fw->data + 3052 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3053 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3054 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3055 for (i = 0; i < fw_size; i++) 3056 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3057 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3058 3059 /* ME */ 3060 fw_data = (const __le32 *) 3061 (adev->gfx.me_fw->data + 3062 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3063 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3064 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3065 for (i = 0; i < fw_size; i++) 3066 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3067 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3068 3069 return 0; 3070 } 3071 3072 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3073 { 3074 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3075 const struct cs_section_def *sect = NULL; 3076 const struct cs_extent_def *ext = NULL; 3077 int r, i, tmp; 3078 3079 /* init the CP */ 3080 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3081 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3082 3083 gfx_v9_0_cp_gfx_enable(adev, true); 3084 3085 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3086 if (r) { 3087 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3088 return r; 3089 } 3090 3091 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3092 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3093 3094 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3095 amdgpu_ring_write(ring, 0x80000000); 3096 amdgpu_ring_write(ring, 0x80000000); 3097 3098 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3099 for (ext = sect->section; ext->extent != NULL; ++ext) { 3100 if (sect->id == SECT_CONTEXT) { 3101 amdgpu_ring_write(ring, 3102 PACKET3(PACKET3_SET_CONTEXT_REG, 3103 ext->reg_count)); 3104 amdgpu_ring_write(ring, 3105 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3106 for (i = 0; i < ext->reg_count; i++) 3107 amdgpu_ring_write(ring, ext->extent[i]); 3108 } 3109 } 3110 } 3111 3112 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3113 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3114 3115 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3116 amdgpu_ring_write(ring, 0); 3117 3118 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3119 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3120 amdgpu_ring_write(ring, 0x8000); 3121 amdgpu_ring_write(ring, 0x8000); 3122 3123 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3124 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3125 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3126 amdgpu_ring_write(ring, tmp); 3127 amdgpu_ring_write(ring, 0); 3128 3129 amdgpu_ring_commit(ring); 3130 3131 return 0; 3132 } 3133 3134 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3135 { 3136 struct amdgpu_ring *ring; 3137 u32 tmp; 3138 u32 rb_bufsz; 3139 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3140 3141 /* Set the write pointer delay */ 3142 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3143 3144 /* set the RB to use vmid 0 */ 3145 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3146 3147 /* Set ring buffer size */ 3148 ring = &adev->gfx.gfx_ring[0]; 3149 rb_bufsz = order_base_2(ring->ring_size / 8); 3150 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3151 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3152 #ifdef __BIG_ENDIAN 3153 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3154 #endif 3155 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3156 3157 /* Initialize the ring buffer's write pointers */ 3158 ring->wptr = 0; 3159 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3160 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3161 3162 /* set the wb address wether it's enabled or not */ 3163 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3164 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3165 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3166 3167 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3168 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3169 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3170 3171 mdelay(1); 3172 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3173 3174 rb_addr = ring->gpu_addr >> 8; 3175 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3176 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3177 3178 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3179 if (ring->use_doorbell) { 3180 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3181 DOORBELL_OFFSET, ring->doorbell_index); 3182 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3183 DOORBELL_EN, 1); 3184 } else { 3185 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3186 } 3187 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3188 3189 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3190 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3191 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3192 3193 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3194 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3195 3196 3197 /* start the ring */ 3198 gfx_v9_0_cp_gfx_start(adev); 3199 ring->sched.ready = true; 3200 3201 return 0; 3202 } 3203 3204 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3205 { 3206 int i; 3207 3208 if (enable) { 3209 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3210 } else { 3211 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3212 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3213 for (i = 0; i < adev->gfx.num_compute_rings; i++) 3214 adev->gfx.compute_ring[i].sched.ready = false; 3215 adev->gfx.kiq.ring.sched.ready = false; 3216 } 3217 udelay(50); 3218 } 3219 3220 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3221 { 3222 const struct gfx_firmware_header_v1_0 *mec_hdr; 3223 const __le32 *fw_data; 3224 unsigned i; 3225 u32 tmp; 3226 3227 if (!adev->gfx.mec_fw) 3228 return -EINVAL; 3229 3230 gfx_v9_0_cp_compute_enable(adev, false); 3231 3232 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3233 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3234 3235 fw_data = (const __le32 *) 3236 (adev->gfx.mec_fw->data + 3237 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3238 tmp = 0; 3239 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3240 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3241 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3242 3243 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3244 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3245 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3246 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3247 3248 /* MEC1 */ 3249 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3250 mec_hdr->jt_offset); 3251 for (i = 0; i < mec_hdr->jt_size; i++) 3252 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3253 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3254 3255 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3256 adev->gfx.mec_fw_version); 3257 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3258 3259 return 0; 3260 } 3261 3262 /* KIQ functions */ 3263 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3264 { 3265 uint32_t tmp; 3266 struct amdgpu_device *adev = ring->adev; 3267 3268 /* tell RLC which is KIQ queue */ 3269 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3270 tmp &= 0xffffff00; 3271 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3272 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3273 tmp |= 0x80; 3274 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3275 } 3276 3277 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 3278 { 3279 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3280 uint64_t queue_mask = 0; 3281 int r, i; 3282 3283 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 3284 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 3285 continue; 3286 3287 /* This situation may be hit in the future if a new HW 3288 * generation exposes more than 64 queues. If so, the 3289 * definition of queue_mask needs updating */ 3290 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 3291 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 3292 break; 3293 } 3294 3295 queue_mask |= (1ull << i); 3296 } 3297 3298 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 3299 if (r) { 3300 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3301 return r; 3302 } 3303 3304 /* set resources */ 3305 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 3306 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 3307 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 3308 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 3309 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 3310 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 3311 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 3312 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 3313 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 3314 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3315 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3316 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 3317 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3318 3319 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 3320 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 3321 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3322 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 3323 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 3324 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 3325 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 3326 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 3327 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 3328 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 3329 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 3330 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 3331 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 3332 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 3333 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 3334 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 3335 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 3336 } 3337 3338 r = amdgpu_ring_test_helper(kiq_ring); 3339 if (r) 3340 DRM_ERROR("KCQ enable failed\n"); 3341 3342 return r; 3343 } 3344 3345 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3346 { 3347 struct amdgpu_device *adev = ring->adev; 3348 struct v9_mqd *mqd = ring->mqd_ptr; 3349 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3350 uint32_t tmp; 3351 3352 mqd->header = 0xC0310800; 3353 mqd->compute_pipelinestat_enable = 0x00000001; 3354 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3355 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3356 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3357 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3358 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3359 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3360 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3361 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3362 mqd->compute_misc_reserved = 0x00000003; 3363 3364 mqd->dynamic_cu_mask_addr_lo = 3365 lower_32_bits(ring->mqd_gpu_addr 3366 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3367 mqd->dynamic_cu_mask_addr_hi = 3368 upper_32_bits(ring->mqd_gpu_addr 3369 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3370 3371 eop_base_addr = ring->eop_gpu_addr >> 8; 3372 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3373 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3374 3375 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3376 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3377 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3378 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3379 3380 mqd->cp_hqd_eop_control = tmp; 3381 3382 /* enable doorbell? */ 3383 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3384 3385 if (ring->use_doorbell) { 3386 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3387 DOORBELL_OFFSET, ring->doorbell_index); 3388 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3389 DOORBELL_EN, 1); 3390 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3391 DOORBELL_SOURCE, 0); 3392 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3393 DOORBELL_HIT, 0); 3394 } else { 3395 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3396 DOORBELL_EN, 0); 3397 } 3398 3399 mqd->cp_hqd_pq_doorbell_control = tmp; 3400 3401 /* disable the queue if it's active */ 3402 ring->wptr = 0; 3403 mqd->cp_hqd_dequeue_request = 0; 3404 mqd->cp_hqd_pq_rptr = 0; 3405 mqd->cp_hqd_pq_wptr_lo = 0; 3406 mqd->cp_hqd_pq_wptr_hi = 0; 3407 3408 /* set the pointer to the MQD */ 3409 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3410 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3411 3412 /* set MQD vmid to 0 */ 3413 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3414 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3415 mqd->cp_mqd_control = tmp; 3416 3417 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3418 hqd_gpu_addr = ring->gpu_addr >> 8; 3419 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3420 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3421 3422 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3423 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3424 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3425 (order_base_2(ring->ring_size / 4) - 1)); 3426 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3427 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3428 #ifdef __BIG_ENDIAN 3429 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3430 #endif 3431 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3432 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3433 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3434 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3435 mqd->cp_hqd_pq_control = tmp; 3436 3437 /* set the wb address whether it's enabled or not */ 3438 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3439 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3440 mqd->cp_hqd_pq_rptr_report_addr_hi = 3441 upper_32_bits(wb_gpu_addr) & 0xffff; 3442 3443 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3444 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3445 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3446 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3447 3448 tmp = 0; 3449 /* enable the doorbell if requested */ 3450 if (ring->use_doorbell) { 3451 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3452 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3453 DOORBELL_OFFSET, ring->doorbell_index); 3454 3455 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3456 DOORBELL_EN, 1); 3457 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3458 DOORBELL_SOURCE, 0); 3459 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3460 DOORBELL_HIT, 0); 3461 } 3462 3463 mqd->cp_hqd_pq_doorbell_control = tmp; 3464 3465 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3466 ring->wptr = 0; 3467 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3468 3469 /* set the vmid for the queue */ 3470 mqd->cp_hqd_vmid = 0; 3471 3472 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3473 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3474 mqd->cp_hqd_persistent_state = tmp; 3475 3476 /* set MIN_IB_AVAIL_SIZE */ 3477 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3478 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3479 mqd->cp_hqd_ib_control = tmp; 3480 3481 /* activate the queue */ 3482 mqd->cp_hqd_active = 1; 3483 3484 return 0; 3485 } 3486 3487 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3488 { 3489 struct amdgpu_device *adev = ring->adev; 3490 struct v9_mqd *mqd = ring->mqd_ptr; 3491 int j; 3492 3493 /* disable wptr polling */ 3494 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3495 3496 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3497 mqd->cp_hqd_eop_base_addr_lo); 3498 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3499 mqd->cp_hqd_eop_base_addr_hi); 3500 3501 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3502 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3503 mqd->cp_hqd_eop_control); 3504 3505 /* enable doorbell? */ 3506 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3507 mqd->cp_hqd_pq_doorbell_control); 3508 3509 /* disable the queue if it's active */ 3510 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3511 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3512 for (j = 0; j < adev->usec_timeout; j++) { 3513 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3514 break; 3515 udelay(1); 3516 } 3517 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3518 mqd->cp_hqd_dequeue_request); 3519 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3520 mqd->cp_hqd_pq_rptr); 3521 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3522 mqd->cp_hqd_pq_wptr_lo); 3523 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3524 mqd->cp_hqd_pq_wptr_hi); 3525 } 3526 3527 /* set the pointer to the MQD */ 3528 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3529 mqd->cp_mqd_base_addr_lo); 3530 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3531 mqd->cp_mqd_base_addr_hi); 3532 3533 /* set MQD vmid to 0 */ 3534 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3535 mqd->cp_mqd_control); 3536 3537 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3538 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3539 mqd->cp_hqd_pq_base_lo); 3540 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3541 mqd->cp_hqd_pq_base_hi); 3542 3543 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3544 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3545 mqd->cp_hqd_pq_control); 3546 3547 /* set the wb address whether it's enabled or not */ 3548 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3549 mqd->cp_hqd_pq_rptr_report_addr_lo); 3550 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3551 mqd->cp_hqd_pq_rptr_report_addr_hi); 3552 3553 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3554 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3555 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3556 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3557 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3558 3559 /* enable the doorbell if requested */ 3560 if (ring->use_doorbell) { 3561 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3562 (adev->doorbell_index.kiq * 2) << 2); 3563 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3564 (adev->doorbell_index.userqueue_end * 2) << 2); 3565 } 3566 3567 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3568 mqd->cp_hqd_pq_doorbell_control); 3569 3570 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3571 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3572 mqd->cp_hqd_pq_wptr_lo); 3573 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3574 mqd->cp_hqd_pq_wptr_hi); 3575 3576 /* set the vmid for the queue */ 3577 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3578 3579 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3580 mqd->cp_hqd_persistent_state); 3581 3582 /* activate the queue */ 3583 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3584 mqd->cp_hqd_active); 3585 3586 if (ring->use_doorbell) 3587 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3588 3589 return 0; 3590 } 3591 3592 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3593 { 3594 struct amdgpu_device *adev = ring->adev; 3595 int j; 3596 3597 /* disable the queue if it's active */ 3598 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3599 3600 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3601 3602 for (j = 0; j < adev->usec_timeout; j++) { 3603 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3604 break; 3605 udelay(1); 3606 } 3607 3608 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3609 DRM_DEBUG("KIQ dequeue request failed.\n"); 3610 3611 /* Manual disable if dequeue request times out */ 3612 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3613 } 3614 3615 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3616 0); 3617 } 3618 3619 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3620 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3621 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3622 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3623 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3624 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3625 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3626 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3627 3628 return 0; 3629 } 3630 3631 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3632 { 3633 struct amdgpu_device *adev = ring->adev; 3634 struct v9_mqd *mqd = ring->mqd_ptr; 3635 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3636 3637 gfx_v9_0_kiq_setting(ring); 3638 3639 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3640 /* reset MQD to a clean status */ 3641 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3642 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3643 3644 /* reset ring buffer */ 3645 ring->wptr = 0; 3646 amdgpu_ring_clear_ring(ring); 3647 3648 mutex_lock(&adev->srbm_mutex); 3649 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3650 gfx_v9_0_kiq_init_register(ring); 3651 soc15_grbm_select(adev, 0, 0, 0, 0); 3652 mutex_unlock(&adev->srbm_mutex); 3653 } else { 3654 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3655 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3656 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3657 mutex_lock(&adev->srbm_mutex); 3658 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3659 gfx_v9_0_mqd_init(ring); 3660 gfx_v9_0_kiq_init_register(ring); 3661 soc15_grbm_select(adev, 0, 0, 0, 0); 3662 mutex_unlock(&adev->srbm_mutex); 3663 3664 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3665 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3666 } 3667 3668 return 0; 3669 } 3670 3671 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3672 { 3673 struct amdgpu_device *adev = ring->adev; 3674 struct v9_mqd *mqd = ring->mqd_ptr; 3675 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3676 3677 if (!adev->in_gpu_reset && !adev->in_suspend) { 3678 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3679 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3680 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3681 mutex_lock(&adev->srbm_mutex); 3682 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3683 gfx_v9_0_mqd_init(ring); 3684 soc15_grbm_select(adev, 0, 0, 0, 0); 3685 mutex_unlock(&adev->srbm_mutex); 3686 3687 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3688 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3689 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3690 /* reset MQD to a clean status */ 3691 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3692 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3693 3694 /* reset ring buffer */ 3695 ring->wptr = 0; 3696 amdgpu_ring_clear_ring(ring); 3697 } else { 3698 amdgpu_ring_clear_ring(ring); 3699 } 3700 3701 return 0; 3702 } 3703 3704 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3705 { 3706 struct amdgpu_ring *ring; 3707 int r; 3708 3709 ring = &adev->gfx.kiq.ring; 3710 3711 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3712 if (unlikely(r != 0)) 3713 return r; 3714 3715 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3716 if (unlikely(r != 0)) 3717 return r; 3718 3719 gfx_v9_0_kiq_init_queue(ring); 3720 amdgpu_bo_kunmap(ring->mqd_obj); 3721 ring->mqd_ptr = NULL; 3722 amdgpu_bo_unreserve(ring->mqd_obj); 3723 ring->sched.ready = true; 3724 return 0; 3725 } 3726 3727 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3728 { 3729 struct amdgpu_ring *ring = NULL; 3730 int r = 0, i; 3731 3732 gfx_v9_0_cp_compute_enable(adev, true); 3733 3734 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3735 ring = &adev->gfx.compute_ring[i]; 3736 3737 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3738 if (unlikely(r != 0)) 3739 goto done; 3740 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3741 if (!r) { 3742 r = gfx_v9_0_kcq_init_queue(ring); 3743 amdgpu_bo_kunmap(ring->mqd_obj); 3744 ring->mqd_ptr = NULL; 3745 } 3746 amdgpu_bo_unreserve(ring->mqd_obj); 3747 if (r) 3748 goto done; 3749 } 3750 3751 r = gfx_v9_0_kiq_kcq_enable(adev); 3752 done: 3753 return r; 3754 } 3755 3756 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3757 { 3758 int r, i; 3759 struct amdgpu_ring *ring; 3760 3761 if (!(adev->flags & AMD_IS_APU)) 3762 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3763 3764 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3765 if (adev->asic_type != CHIP_ARCTURUS) { 3766 /* legacy firmware loading */ 3767 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3768 if (r) 3769 return r; 3770 } 3771 3772 r = gfx_v9_0_cp_compute_load_microcode(adev); 3773 if (r) 3774 return r; 3775 } 3776 3777 r = gfx_v9_0_kiq_resume(adev); 3778 if (r) 3779 return r; 3780 3781 if (adev->asic_type != CHIP_ARCTURUS) { 3782 r = gfx_v9_0_cp_gfx_resume(adev); 3783 if (r) 3784 return r; 3785 } 3786 3787 r = gfx_v9_0_kcq_resume(adev); 3788 if (r) 3789 return r; 3790 3791 if (adev->asic_type != CHIP_ARCTURUS) { 3792 ring = &adev->gfx.gfx_ring[0]; 3793 r = amdgpu_ring_test_helper(ring); 3794 if (r) 3795 return r; 3796 } 3797 3798 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3799 ring = &adev->gfx.compute_ring[i]; 3800 amdgpu_ring_test_helper(ring); 3801 } 3802 3803 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3804 3805 return 0; 3806 } 3807 3808 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3809 { 3810 if (adev->asic_type != CHIP_ARCTURUS) 3811 gfx_v9_0_cp_gfx_enable(adev, enable); 3812 gfx_v9_0_cp_compute_enable(adev, enable); 3813 } 3814 3815 static int gfx_v9_0_hw_init(void *handle) 3816 { 3817 int r; 3818 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3819 3820 if (!amdgpu_sriov_vf(adev)) 3821 gfx_v9_0_init_golden_registers(adev); 3822 3823 gfx_v9_0_constants_init(adev); 3824 3825 r = gfx_v9_0_csb_vram_pin(adev); 3826 if (r) 3827 return r; 3828 3829 r = adev->gfx.rlc.funcs->resume(adev); 3830 if (r) 3831 return r; 3832 3833 r = gfx_v9_0_cp_resume(adev); 3834 if (r) 3835 return r; 3836 3837 if (adev->asic_type != CHIP_ARCTURUS) { 3838 r = gfx_v9_0_ngg_en(adev); 3839 if (r) 3840 return r; 3841 } 3842 3843 return r; 3844 } 3845 3846 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3847 { 3848 int r, i; 3849 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3850 3851 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3852 if (r) 3853 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3854 3855 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3856 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3857 3858 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3859 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3860 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3861 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3862 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3863 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3864 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3865 amdgpu_ring_write(kiq_ring, 0); 3866 amdgpu_ring_write(kiq_ring, 0); 3867 amdgpu_ring_write(kiq_ring, 0); 3868 } 3869 r = amdgpu_ring_test_helper(kiq_ring); 3870 if (r) 3871 DRM_ERROR("KCQ disable failed\n"); 3872 3873 return r; 3874 } 3875 3876 static int gfx_v9_0_hw_fini(void *handle) 3877 { 3878 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3879 3880 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3881 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3882 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3883 3884 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3885 gfx_v9_0_kcq_disable(adev); 3886 3887 if (amdgpu_sriov_vf(adev)) { 3888 gfx_v9_0_cp_gfx_enable(adev, false); 3889 /* must disable polling for SRIOV when hw finished, otherwise 3890 * CPC engine may still keep fetching WB address which is already 3891 * invalid after sw finished and trigger DMAR reading error in 3892 * hypervisor side. 3893 */ 3894 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3895 return 0; 3896 } 3897 3898 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3899 * otherwise KIQ is hanging when binding back 3900 */ 3901 if (!adev->in_gpu_reset && !adev->in_suspend) { 3902 mutex_lock(&adev->srbm_mutex); 3903 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3904 adev->gfx.kiq.ring.pipe, 3905 adev->gfx.kiq.ring.queue, 0); 3906 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3907 soc15_grbm_select(adev, 0, 0, 0, 0); 3908 mutex_unlock(&adev->srbm_mutex); 3909 } 3910 3911 gfx_v9_0_cp_enable(adev, false); 3912 adev->gfx.rlc.funcs->stop(adev); 3913 3914 gfx_v9_0_csb_vram_unpin(adev); 3915 3916 return 0; 3917 } 3918 3919 static int gfx_v9_0_suspend(void *handle) 3920 { 3921 return gfx_v9_0_hw_fini(handle); 3922 } 3923 3924 static int gfx_v9_0_resume(void *handle) 3925 { 3926 return gfx_v9_0_hw_init(handle); 3927 } 3928 3929 static bool gfx_v9_0_is_idle(void *handle) 3930 { 3931 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3932 3933 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3934 GRBM_STATUS, GUI_ACTIVE)) 3935 return false; 3936 else 3937 return true; 3938 } 3939 3940 static int gfx_v9_0_wait_for_idle(void *handle) 3941 { 3942 unsigned i; 3943 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3944 3945 for (i = 0; i < adev->usec_timeout; i++) { 3946 if (gfx_v9_0_is_idle(handle)) 3947 return 0; 3948 udelay(1); 3949 } 3950 return -ETIMEDOUT; 3951 } 3952 3953 static int gfx_v9_0_soft_reset(void *handle) 3954 { 3955 u32 grbm_soft_reset = 0; 3956 u32 tmp; 3957 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3958 3959 /* GRBM_STATUS */ 3960 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3961 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3962 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3963 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 3964 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 3965 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 3966 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 3967 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3968 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3969 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3970 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 3971 } 3972 3973 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3974 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3975 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3976 } 3977 3978 /* GRBM_STATUS2 */ 3979 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3980 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3981 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3982 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3983 3984 3985 if (grbm_soft_reset) { 3986 /* stop the rlc */ 3987 adev->gfx.rlc.funcs->stop(adev); 3988 3989 if (adev->asic_type != CHIP_ARCTURUS) 3990 /* Disable GFX parsing/prefetching */ 3991 gfx_v9_0_cp_gfx_enable(adev, false); 3992 3993 /* Disable MEC parsing/prefetching */ 3994 gfx_v9_0_cp_compute_enable(adev, false); 3995 3996 if (grbm_soft_reset) { 3997 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3998 tmp |= grbm_soft_reset; 3999 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4000 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4001 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4002 4003 udelay(50); 4004 4005 tmp &= ~grbm_soft_reset; 4006 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4007 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4008 } 4009 4010 /* Wait a little for things to settle down */ 4011 udelay(50); 4012 } 4013 return 0; 4014 } 4015 4016 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4017 { 4018 uint64_t clock; 4019 4020 mutex_lock(&adev->gfx.gpu_clock_mutex); 4021 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4022 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4023 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4024 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4025 return clock; 4026 } 4027 4028 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4029 uint32_t vmid, 4030 uint32_t gds_base, uint32_t gds_size, 4031 uint32_t gws_base, uint32_t gws_size, 4032 uint32_t oa_base, uint32_t oa_size) 4033 { 4034 struct amdgpu_device *adev = ring->adev; 4035 4036 /* GDS Base */ 4037 gfx_v9_0_write_data_to_reg(ring, 0, false, 4038 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4039 gds_base); 4040 4041 /* GDS Size */ 4042 gfx_v9_0_write_data_to_reg(ring, 0, false, 4043 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4044 gds_size); 4045 4046 /* GWS */ 4047 gfx_v9_0_write_data_to_reg(ring, 0, false, 4048 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4049 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4050 4051 /* OA */ 4052 gfx_v9_0_write_data_to_reg(ring, 0, false, 4053 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4054 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4055 } 4056 4057 static const u32 vgpr_init_compute_shader[] = 4058 { 4059 0xb07c0000, 0xbe8000ff, 4060 0x000000f8, 0xbf110800, 4061 0x7e000280, 0x7e020280, 4062 0x7e040280, 0x7e060280, 4063 0x7e080280, 0x7e0a0280, 4064 0x7e0c0280, 0x7e0e0280, 4065 0x80808800, 0xbe803200, 4066 0xbf84fff5, 0xbf9c0000, 4067 0xd28c0001, 0x0001007f, 4068 0xd28d0001, 0x0002027e, 4069 0x10020288, 0xb8810904, 4070 0xb7814000, 0xd1196a01, 4071 0x00000301, 0xbe800087, 4072 0xbefc00c1, 0xd89c4000, 4073 0x00020201, 0xd89cc080, 4074 0x00040401, 0x320202ff, 4075 0x00000800, 0x80808100, 4076 0xbf84fff8, 0x7e020280, 4077 0xbf810000, 0x00000000, 4078 }; 4079 4080 static const u32 sgpr_init_compute_shader[] = 4081 { 4082 0xb07c0000, 0xbe8000ff, 4083 0x0000005f, 0xbee50080, 4084 0xbe812c65, 0xbe822c65, 4085 0xbe832c65, 0xbe842c65, 4086 0xbe852c65, 0xb77c0005, 4087 0x80808500, 0xbf84fff8, 4088 0xbe800080, 0xbf810000, 4089 }; 4090 4091 static const struct soc15_reg_entry vgpr_init_regs[] = { 4092 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4093 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4094 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4095 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4096 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 4097 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 4098 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 4099 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4100 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ 4101 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4102 }; 4103 4104 static const struct soc15_reg_entry sgpr_init_regs[] = { 4105 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4106 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4107 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4108 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4109 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 4110 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 4111 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 4112 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4113 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ 4114 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4115 }; 4116 4117 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 4118 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4119 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4120 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4121 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4122 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4123 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4124 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4125 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4126 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4127 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4128 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4129 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4130 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4131 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4132 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4133 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4134 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4135 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4136 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4137 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4138 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4139 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4140 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4141 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4142 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4143 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4144 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4145 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4146 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4147 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4148 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4149 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4150 }; 4151 4152 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4153 { 4154 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4155 int i, r; 4156 4157 r = amdgpu_ring_alloc(ring, 7); 4158 if (r) { 4159 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4160 ring->name, r); 4161 return r; 4162 } 4163 4164 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4165 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4166 4167 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4168 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4169 PACKET3_DMA_DATA_DST_SEL(1) | 4170 PACKET3_DMA_DATA_SRC_SEL(2) | 4171 PACKET3_DMA_DATA_ENGINE(0))); 4172 amdgpu_ring_write(ring, 0); 4173 amdgpu_ring_write(ring, 0); 4174 amdgpu_ring_write(ring, 0); 4175 amdgpu_ring_write(ring, 0); 4176 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4177 adev->gds.gds_size); 4178 4179 amdgpu_ring_commit(ring); 4180 4181 for (i = 0; i < adev->usec_timeout; i++) { 4182 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4183 break; 4184 udelay(1); 4185 } 4186 4187 if (i >= adev->usec_timeout) 4188 r = -ETIMEDOUT; 4189 4190 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4191 4192 return r; 4193 } 4194 4195 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4196 { 4197 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4198 struct amdgpu_ib ib; 4199 struct dma_fence *f = NULL; 4200 int r, i, j, k; 4201 unsigned total_size, vgpr_offset, sgpr_offset; 4202 u64 gpu_addr; 4203 4204 /* only support when RAS is enabled */ 4205 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4206 return 0; 4207 4208 /* bail if the compute ring is not ready */ 4209 if (!ring->sched.ready) 4210 return 0; 4211 4212 total_size = 4213 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 4214 total_size += 4215 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 4216 total_size = ALIGN(total_size, 256); 4217 vgpr_offset = total_size; 4218 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 4219 sgpr_offset = total_size; 4220 total_size += sizeof(sgpr_init_compute_shader); 4221 4222 /* allocate an indirect buffer to put the commands in */ 4223 memset(&ib, 0, sizeof(ib)); 4224 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 4225 if (r) { 4226 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4227 return r; 4228 } 4229 4230 /* load the compute shaders */ 4231 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 4232 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 4233 4234 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4235 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4236 4237 /* init the ib length to 0 */ 4238 ib.length_dw = 0; 4239 4240 /* VGPR */ 4241 /* write the register state for the compute dispatch */ 4242 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 4243 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4244 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 4245 - PACKET3_SET_SH_REG_START; 4246 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 4247 } 4248 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4249 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4250 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4251 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4252 - PACKET3_SET_SH_REG_START; 4253 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4254 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4255 4256 /* write dispatch packet */ 4257 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4258 ib.ptr[ib.length_dw++] = 128; /* x */ 4259 ib.ptr[ib.length_dw++] = 1; /* y */ 4260 ib.ptr[ib.length_dw++] = 1; /* z */ 4261 ib.ptr[ib.length_dw++] = 4262 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4263 4264 /* write CS partial flush packet */ 4265 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4266 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4267 4268 /* SGPR */ 4269 /* write the register state for the compute dispatch */ 4270 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { 4271 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4272 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) 4273 - PACKET3_SET_SH_REG_START; 4274 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; 4275 } 4276 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4277 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4278 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4279 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4280 - PACKET3_SET_SH_REG_START; 4281 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4282 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4283 4284 /* write dispatch packet */ 4285 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4286 ib.ptr[ib.length_dw++] = 128; /* x */ 4287 ib.ptr[ib.length_dw++] = 1; /* y */ 4288 ib.ptr[ib.length_dw++] = 1; /* z */ 4289 ib.ptr[ib.length_dw++] = 4290 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4291 4292 /* write CS partial flush packet */ 4293 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4294 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4295 4296 /* shedule the ib on the ring */ 4297 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4298 if (r) { 4299 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4300 goto fail; 4301 } 4302 4303 /* wait for the GPU to finish processing the IB */ 4304 r = dma_fence_wait(f, false); 4305 if (r) { 4306 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4307 goto fail; 4308 } 4309 4310 /* read back registers to clear the counters */ 4311 mutex_lock(&adev->grbm_idx_mutex); 4312 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { 4313 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { 4314 for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { 4315 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 4316 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 4317 } 4318 } 4319 } 4320 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 4321 mutex_unlock(&adev->grbm_idx_mutex); 4322 4323 fail: 4324 amdgpu_ib_free(adev, &ib, NULL); 4325 dma_fence_put(f); 4326 4327 return r; 4328 } 4329 4330 static int gfx_v9_0_early_init(void *handle) 4331 { 4332 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4333 4334 if (adev->asic_type == CHIP_ARCTURUS) 4335 adev->gfx.num_gfx_rings = 0; 4336 else 4337 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4338 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 4339 gfx_v9_0_set_ring_funcs(adev); 4340 gfx_v9_0_set_irq_funcs(adev); 4341 gfx_v9_0_set_gds_init(adev); 4342 gfx_v9_0_set_rlc_funcs(adev); 4343 4344 return 0; 4345 } 4346 4347 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 4348 struct ras_err_data *err_data, 4349 struct amdgpu_iv_entry *entry); 4350 4351 static int gfx_v9_0_ecc_late_init(void *handle) 4352 { 4353 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4354 struct ras_common_if **ras_if = &adev->gfx.ras_if; 4355 struct ras_ih_if ih_info = { 4356 .cb = gfx_v9_0_process_ras_data_cb, 4357 }; 4358 struct ras_fs_if fs_info = { 4359 .sysfs_name = "gfx_err_count", 4360 .debugfs_name = "gfx_err_inject", 4361 }; 4362 struct ras_common_if ras_block = { 4363 .block = AMDGPU_RAS_BLOCK__GFX, 4364 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 4365 .sub_block_index = 0, 4366 .name = "gfx", 4367 }; 4368 int r; 4369 4370 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 4371 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); 4372 return 0; 4373 } 4374 4375 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4376 if (r) 4377 return r; 4378 4379 /* requires IBs so do in late init after IB pool is initialized */ 4380 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4381 if (r) 4382 return r; 4383 4384 /* handle resume path. */ 4385 if (*ras_if) { 4386 /* resend ras TA enable cmd during resume. 4387 * prepare to handle failure. 4388 */ 4389 ih_info.head = **ras_if; 4390 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 4391 if (r) { 4392 if (r == -EAGAIN) { 4393 /* request a gpu reset. will run again. */ 4394 amdgpu_ras_request_reset_on_boot(adev, 4395 AMDGPU_RAS_BLOCK__GFX); 4396 return 0; 4397 } 4398 /* fail to enable ras, cleanup all. */ 4399 goto irq; 4400 } 4401 /* enable successfully. continue. */ 4402 goto resume; 4403 } 4404 4405 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 4406 if (!*ras_if) 4407 return -ENOMEM; 4408 4409 **ras_if = ras_block; 4410 4411 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 4412 if (r) { 4413 if (r == -EAGAIN) { 4414 amdgpu_ras_request_reset_on_boot(adev, 4415 AMDGPU_RAS_BLOCK__GFX); 4416 r = 0; 4417 } 4418 goto feature; 4419 } 4420 4421 ih_info.head = **ras_if; 4422 fs_info.head = **ras_if; 4423 4424 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); 4425 if (r) 4426 goto interrupt; 4427 4428 amdgpu_ras_debugfs_create(adev, &fs_info); 4429 4430 r = amdgpu_ras_sysfs_create(adev, &fs_info); 4431 if (r) 4432 goto sysfs; 4433 resume: 4434 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 4435 if (r) 4436 goto irq; 4437 4438 return 0; 4439 irq: 4440 amdgpu_ras_sysfs_remove(adev, *ras_if); 4441 sysfs: 4442 amdgpu_ras_debugfs_remove(adev, *ras_if); 4443 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 4444 interrupt: 4445 amdgpu_ras_feature_enable(adev, *ras_if, 0); 4446 feature: 4447 kfree(*ras_if); 4448 *ras_if = NULL; 4449 return r; 4450 } 4451 4452 static int gfx_v9_0_late_init(void *handle) 4453 { 4454 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4455 int r; 4456 4457 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4458 if (r) 4459 return r; 4460 4461 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4462 if (r) 4463 return r; 4464 4465 r = gfx_v9_0_ecc_late_init(handle); 4466 if (r) 4467 return r; 4468 4469 return 0; 4470 } 4471 4472 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4473 { 4474 uint32_t rlc_setting; 4475 4476 /* if RLC is not enabled, do nothing */ 4477 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4478 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4479 return false; 4480 4481 return true; 4482 } 4483 4484 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4485 { 4486 uint32_t data; 4487 unsigned i; 4488 4489 data = RLC_SAFE_MODE__CMD_MASK; 4490 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4491 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4492 4493 /* wait for RLC_SAFE_MODE */ 4494 for (i = 0; i < adev->usec_timeout; i++) { 4495 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4496 break; 4497 udelay(1); 4498 } 4499 } 4500 4501 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4502 { 4503 uint32_t data; 4504 4505 data = RLC_SAFE_MODE__CMD_MASK; 4506 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4507 } 4508 4509 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4510 bool enable) 4511 { 4512 amdgpu_gfx_rlc_enter_safe_mode(adev); 4513 4514 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4515 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4516 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4517 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4518 } else { 4519 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4520 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4521 } 4522 4523 amdgpu_gfx_rlc_exit_safe_mode(adev); 4524 } 4525 4526 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4527 bool enable) 4528 { 4529 /* TODO: double check if we need to perform under safe mode */ 4530 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4531 4532 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4533 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4534 else 4535 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4536 4537 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4538 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4539 else 4540 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4541 4542 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4543 } 4544 4545 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4546 bool enable) 4547 { 4548 uint32_t data, def; 4549 4550 amdgpu_gfx_rlc_enter_safe_mode(adev); 4551 4552 /* It is disabled by HW by default */ 4553 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4554 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4555 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4556 4557 if (adev->asic_type != CHIP_VEGA12) 4558 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4559 4560 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4561 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4562 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4563 4564 /* only for Vega10 & Raven1 */ 4565 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4566 4567 if (def != data) 4568 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4569 4570 /* MGLS is a global flag to control all MGLS in GFX */ 4571 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4572 /* 2 - RLC memory Light sleep */ 4573 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4574 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4575 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4576 if (def != data) 4577 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4578 } 4579 /* 3 - CP memory Light sleep */ 4580 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4581 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4582 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4583 if (def != data) 4584 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4585 } 4586 } 4587 } else { 4588 /* 1 - MGCG_OVERRIDE */ 4589 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4590 4591 if (adev->asic_type != CHIP_VEGA12) 4592 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4593 4594 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4595 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4596 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4597 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4598 4599 if (def != data) 4600 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4601 4602 /* 2 - disable MGLS in RLC */ 4603 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4604 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4605 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4606 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4607 } 4608 4609 /* 3 - disable MGLS in CP */ 4610 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4611 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4612 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4613 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4614 } 4615 } 4616 4617 amdgpu_gfx_rlc_exit_safe_mode(adev); 4618 } 4619 4620 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4621 bool enable) 4622 { 4623 uint32_t data, def; 4624 4625 if (adev->asic_type == CHIP_ARCTURUS) 4626 return; 4627 4628 amdgpu_gfx_rlc_enter_safe_mode(adev); 4629 4630 /* Enable 3D CGCG/CGLS */ 4631 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4632 /* write cmd to clear cgcg/cgls ov */ 4633 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4634 /* unset CGCG override */ 4635 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4636 /* update CGCG and CGLS override bits */ 4637 if (def != data) 4638 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4639 4640 /* enable 3Dcgcg FSM(0x0000363f) */ 4641 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4642 4643 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4644 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4645 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4646 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4647 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4648 if (def != data) 4649 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4650 4651 /* set IDLE_POLL_COUNT(0x00900100) */ 4652 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4653 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4654 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4655 if (def != data) 4656 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4657 } else { 4658 /* Disable CGCG/CGLS */ 4659 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4660 /* disable cgcg, cgls should be disabled */ 4661 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4662 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4663 /* disable cgcg and cgls in FSM */ 4664 if (def != data) 4665 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4666 } 4667 4668 amdgpu_gfx_rlc_exit_safe_mode(adev); 4669 } 4670 4671 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4672 bool enable) 4673 { 4674 uint32_t def, data; 4675 4676 amdgpu_gfx_rlc_enter_safe_mode(adev); 4677 4678 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4679 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4680 /* unset CGCG override */ 4681 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4682 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4683 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4684 else 4685 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4686 /* update CGCG and CGLS override bits */ 4687 if (def != data) 4688 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4689 4690 /* enable cgcg FSM(0x0000363F) */ 4691 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4692 4693 if (adev->asic_type == CHIP_ARCTURUS) 4694 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4695 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4696 else 4697 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4698 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4699 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4700 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4701 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4702 if (def != data) 4703 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4704 4705 /* set IDLE_POLL_COUNT(0x00900100) */ 4706 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4707 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4708 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4709 if (def != data) 4710 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4711 } else { 4712 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4713 /* reset CGCG/CGLS bits */ 4714 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4715 /* disable cgcg and cgls in FSM */ 4716 if (def != data) 4717 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4718 } 4719 4720 amdgpu_gfx_rlc_exit_safe_mode(adev); 4721 } 4722 4723 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4724 bool enable) 4725 { 4726 if (enable) { 4727 /* CGCG/CGLS should be enabled after MGCG/MGLS 4728 * === MGCG + MGLS === 4729 */ 4730 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4731 /* === CGCG /CGLS for GFX 3D Only === */ 4732 gfx_v9_0_update_3d_clock_gating(adev, enable); 4733 /* === CGCG + CGLS === */ 4734 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4735 } else { 4736 /* CGCG/CGLS should be disabled before MGCG/MGLS 4737 * === CGCG + CGLS === 4738 */ 4739 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4740 /* === CGCG /CGLS for GFX 3D Only === */ 4741 gfx_v9_0_update_3d_clock_gating(adev, enable); 4742 /* === MGCG + MGLS === */ 4743 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4744 } 4745 return 0; 4746 } 4747 4748 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4749 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4750 .set_safe_mode = gfx_v9_0_set_safe_mode, 4751 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4752 .init = gfx_v9_0_rlc_init, 4753 .get_csb_size = gfx_v9_0_get_csb_size, 4754 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4755 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4756 .resume = gfx_v9_0_rlc_resume, 4757 .stop = gfx_v9_0_rlc_stop, 4758 .reset = gfx_v9_0_rlc_reset, 4759 .start = gfx_v9_0_rlc_start 4760 }; 4761 4762 static int gfx_v9_0_set_powergating_state(void *handle, 4763 enum amd_powergating_state state) 4764 { 4765 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4766 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4767 4768 switch (adev->asic_type) { 4769 case CHIP_RAVEN: 4770 if (!enable) { 4771 amdgpu_gfx_off_ctrl(adev, false); 4772 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4773 } 4774 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4775 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4776 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4777 } else { 4778 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4779 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4780 } 4781 4782 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4783 gfx_v9_0_enable_cp_power_gating(adev, true); 4784 else 4785 gfx_v9_0_enable_cp_power_gating(adev, false); 4786 4787 /* update gfx cgpg state */ 4788 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4789 4790 /* update mgcg state */ 4791 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4792 4793 if (enable) 4794 amdgpu_gfx_off_ctrl(adev, true); 4795 break; 4796 case CHIP_VEGA12: 4797 if (!enable) { 4798 amdgpu_gfx_off_ctrl(adev, false); 4799 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4800 } else { 4801 amdgpu_gfx_off_ctrl(adev, true); 4802 } 4803 break; 4804 default: 4805 break; 4806 } 4807 4808 return 0; 4809 } 4810 4811 static int gfx_v9_0_set_clockgating_state(void *handle, 4812 enum amd_clockgating_state state) 4813 { 4814 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4815 4816 if (amdgpu_sriov_vf(adev)) 4817 return 0; 4818 4819 switch (adev->asic_type) { 4820 case CHIP_VEGA10: 4821 case CHIP_VEGA12: 4822 case CHIP_VEGA20: 4823 case CHIP_RAVEN: 4824 case CHIP_ARCTURUS: 4825 gfx_v9_0_update_gfx_clock_gating(adev, 4826 state == AMD_CG_STATE_GATE ? true : false); 4827 break; 4828 default: 4829 break; 4830 } 4831 return 0; 4832 } 4833 4834 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4835 { 4836 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4837 int data; 4838 4839 if (amdgpu_sriov_vf(adev)) 4840 *flags = 0; 4841 4842 /* AMD_CG_SUPPORT_GFX_MGCG */ 4843 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4844 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4845 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4846 4847 /* AMD_CG_SUPPORT_GFX_CGCG */ 4848 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4849 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4850 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4851 4852 /* AMD_CG_SUPPORT_GFX_CGLS */ 4853 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4854 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4855 4856 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4857 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4858 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4859 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4860 4861 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4862 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4863 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4864 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4865 4866 if (adev->asic_type != CHIP_ARCTURUS) { 4867 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4868 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4869 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4870 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4871 4872 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4873 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4874 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4875 } 4876 } 4877 4878 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4879 { 4880 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4881 } 4882 4883 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4884 { 4885 struct amdgpu_device *adev = ring->adev; 4886 u64 wptr; 4887 4888 /* XXX check if swapping is necessary on BE */ 4889 if (ring->use_doorbell) { 4890 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4891 } else { 4892 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4893 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4894 } 4895 4896 return wptr; 4897 } 4898 4899 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4900 { 4901 struct amdgpu_device *adev = ring->adev; 4902 4903 if (ring->use_doorbell) { 4904 /* XXX check if swapping is necessary on BE */ 4905 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4906 WDOORBELL64(ring->doorbell_index, ring->wptr); 4907 } else { 4908 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4909 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4910 } 4911 } 4912 4913 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4914 { 4915 struct amdgpu_device *adev = ring->adev; 4916 u32 ref_and_mask, reg_mem_engine; 4917 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; 4918 4919 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4920 switch (ring->me) { 4921 case 1: 4922 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4923 break; 4924 case 2: 4925 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4926 break; 4927 default: 4928 return; 4929 } 4930 reg_mem_engine = 0; 4931 } else { 4932 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4933 reg_mem_engine = 1; /* pfp */ 4934 } 4935 4936 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4937 adev->nbio_funcs->get_hdp_flush_req_offset(adev), 4938 adev->nbio_funcs->get_hdp_flush_done_offset(adev), 4939 ref_and_mask, ref_and_mask, 0x20); 4940 } 4941 4942 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4943 struct amdgpu_job *job, 4944 struct amdgpu_ib *ib, 4945 uint32_t flags) 4946 { 4947 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4948 u32 header, control = 0; 4949 4950 if (ib->flags & AMDGPU_IB_FLAG_CE) 4951 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4952 else 4953 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4954 4955 control |= ib->length_dw | (vmid << 24); 4956 4957 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4958 control |= INDIRECT_BUFFER_PRE_ENB(1); 4959 4960 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4961 gfx_v9_0_ring_emit_de_meta(ring); 4962 } 4963 4964 amdgpu_ring_write(ring, header); 4965 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4966 amdgpu_ring_write(ring, 4967 #ifdef __BIG_ENDIAN 4968 (2 << 0) | 4969 #endif 4970 lower_32_bits(ib->gpu_addr)); 4971 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4972 amdgpu_ring_write(ring, control); 4973 } 4974 4975 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4976 struct amdgpu_job *job, 4977 struct amdgpu_ib *ib, 4978 uint32_t flags) 4979 { 4980 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4981 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4982 4983 /* Currently, there is a high possibility to get wave ID mismatch 4984 * between ME and GDS, leading to a hw deadlock, because ME generates 4985 * different wave IDs than the GDS expects. This situation happens 4986 * randomly when at least 5 compute pipes use GDS ordered append. 4987 * The wave IDs generated by ME are also wrong after suspend/resume. 4988 * Those are probably bugs somewhere else in the kernel driver. 4989 * 4990 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 4991 * GDS to 0 for this ring (me/pipe). 4992 */ 4993 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 4994 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4995 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 4996 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 4997 } 4998 4999 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5000 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5001 amdgpu_ring_write(ring, 5002 #ifdef __BIG_ENDIAN 5003 (2 << 0) | 5004 #endif 5005 lower_32_bits(ib->gpu_addr)); 5006 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5007 amdgpu_ring_write(ring, control); 5008 } 5009 5010 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5011 u64 seq, unsigned flags) 5012 { 5013 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5014 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5015 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5016 5017 /* RELEASE_MEM - flush caches, send int */ 5018 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5019 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 5020 EOP_TC_NC_ACTION_EN) : 5021 (EOP_TCL1_ACTION_EN | 5022 EOP_TC_ACTION_EN | 5023 EOP_TC_WB_ACTION_EN | 5024 EOP_TC_MD_ACTION_EN)) | 5025 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5026 EVENT_INDEX(5))); 5027 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5028 5029 /* 5030 * the address should be Qword aligned if 64bit write, Dword 5031 * aligned if only send 32bit data low (discard data high) 5032 */ 5033 if (write64bit) 5034 BUG_ON(addr & 0x7); 5035 else 5036 BUG_ON(addr & 0x3); 5037 amdgpu_ring_write(ring, lower_32_bits(addr)); 5038 amdgpu_ring_write(ring, upper_32_bits(addr)); 5039 amdgpu_ring_write(ring, lower_32_bits(seq)); 5040 amdgpu_ring_write(ring, upper_32_bits(seq)); 5041 amdgpu_ring_write(ring, 0); 5042 } 5043 5044 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5045 { 5046 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5047 uint32_t seq = ring->fence_drv.sync_seq; 5048 uint64_t addr = ring->fence_drv.gpu_addr; 5049 5050 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5051 lower_32_bits(addr), upper_32_bits(addr), 5052 seq, 0xffffffff, 4); 5053 } 5054 5055 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5056 unsigned vmid, uint64_t pd_addr) 5057 { 5058 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5059 5060 /* compute doesn't have PFP */ 5061 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5062 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5063 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5064 amdgpu_ring_write(ring, 0x0); 5065 } 5066 } 5067 5068 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5069 { 5070 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 5071 } 5072 5073 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5074 { 5075 u64 wptr; 5076 5077 /* XXX check if swapping is necessary on BE */ 5078 if (ring->use_doorbell) 5079 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 5080 else 5081 BUG(); 5082 return wptr; 5083 } 5084 5085 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 5086 bool acquire) 5087 { 5088 struct amdgpu_device *adev = ring->adev; 5089 int pipe_num, tmp, reg; 5090 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 5091 5092 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 5093 5094 /* first me only has 2 entries, GFX and HP3D */ 5095 if (ring->me > 0) 5096 pipe_num -= 2; 5097 5098 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 5099 tmp = RREG32(reg); 5100 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 5101 WREG32(reg, tmp); 5102 } 5103 5104 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 5105 struct amdgpu_ring *ring, 5106 bool acquire) 5107 { 5108 int i, pipe; 5109 bool reserve; 5110 struct amdgpu_ring *iring; 5111 5112 mutex_lock(&adev->gfx.pipe_reserve_mutex); 5113 pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0); 5114 if (acquire) 5115 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5116 else 5117 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5118 5119 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 5120 /* Clear all reservations - everyone reacquires all resources */ 5121 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 5122 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 5123 true); 5124 5125 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 5126 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 5127 true); 5128 } else { 5129 /* Lower all pipes without a current reservation */ 5130 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 5131 iring = &adev->gfx.gfx_ring[i]; 5132 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 5133 iring->me, 5134 iring->pipe, 5135 0); 5136 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5137 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 5138 } 5139 5140 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 5141 iring = &adev->gfx.compute_ring[i]; 5142 pipe = amdgpu_gfx_mec_queue_to_bit(adev, 5143 iring->me, 5144 iring->pipe, 5145 0); 5146 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 5147 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 5148 } 5149 } 5150 5151 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 5152 } 5153 5154 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 5155 struct amdgpu_ring *ring, 5156 bool acquire) 5157 { 5158 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 5159 uint32_t queue_priority = acquire ? 0xf : 0x0; 5160 5161 mutex_lock(&adev->srbm_mutex); 5162 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5163 5164 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 5165 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 5166 5167 soc15_grbm_select(adev, 0, 0, 0, 0); 5168 mutex_unlock(&adev->srbm_mutex); 5169 } 5170 5171 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 5172 enum drm_sched_priority priority) 5173 { 5174 struct amdgpu_device *adev = ring->adev; 5175 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 5176 5177 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 5178 return; 5179 5180 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 5181 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 5182 } 5183 5184 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5185 { 5186 struct amdgpu_device *adev = ring->adev; 5187 5188 /* XXX check if swapping is necessary on BE */ 5189 if (ring->use_doorbell) { 5190 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5191 WDOORBELL64(ring->doorbell_index, ring->wptr); 5192 } else{ 5193 BUG(); /* only DOORBELL method supported on gfx9 now */ 5194 } 5195 } 5196 5197 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5198 u64 seq, unsigned int flags) 5199 { 5200 struct amdgpu_device *adev = ring->adev; 5201 5202 /* we only allocate 32bit for each seq wb address */ 5203 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5204 5205 /* write fence seq to the "addr" */ 5206 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5207 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5208 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5209 amdgpu_ring_write(ring, lower_32_bits(addr)); 5210 amdgpu_ring_write(ring, upper_32_bits(addr)); 5211 amdgpu_ring_write(ring, lower_32_bits(seq)); 5212 5213 if (flags & AMDGPU_FENCE_FLAG_INT) { 5214 /* set register to trigger INT */ 5215 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5216 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5217 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5218 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5219 amdgpu_ring_write(ring, 0); 5220 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5221 } 5222 } 5223 5224 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5225 { 5226 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5227 amdgpu_ring_write(ring, 0); 5228 } 5229 5230 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5231 { 5232 struct v9_ce_ib_state ce_payload = {0}; 5233 uint64_t csa_addr; 5234 int cnt; 5235 5236 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5237 csa_addr = amdgpu_csa_vaddr(ring->adev); 5238 5239 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5240 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5241 WRITE_DATA_DST_SEL(8) | 5242 WR_CONFIRM) | 5243 WRITE_DATA_CACHE_POLICY(0)); 5244 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5245 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5246 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5247 } 5248 5249 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5250 { 5251 struct v9_de_ib_state de_payload = {0}; 5252 uint64_t csa_addr, gds_addr; 5253 int cnt; 5254 5255 csa_addr = amdgpu_csa_vaddr(ring->adev); 5256 gds_addr = csa_addr + 4096; 5257 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5258 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5259 5260 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5261 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5262 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5263 WRITE_DATA_DST_SEL(8) | 5264 WR_CONFIRM) | 5265 WRITE_DATA_CACHE_POLICY(0)); 5266 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5267 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5268 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5269 } 5270 5271 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 5272 { 5273 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5274 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 5275 } 5276 5277 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5278 { 5279 uint32_t dw2 = 0; 5280 5281 if (amdgpu_sriov_vf(ring->adev)) 5282 gfx_v9_0_ring_emit_ce_meta(ring); 5283 5284 gfx_v9_0_ring_emit_tmz(ring, true); 5285 5286 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5287 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5288 /* set load_global_config & load_global_uconfig */ 5289 dw2 |= 0x8001; 5290 /* set load_cs_sh_regs */ 5291 dw2 |= 0x01000000; 5292 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5293 dw2 |= 0x10002; 5294 5295 /* set load_ce_ram if preamble presented */ 5296 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5297 dw2 |= 0x10000000; 5298 } else { 5299 /* still load_ce_ram if this is the first time preamble presented 5300 * although there is no context switch happens. 5301 */ 5302 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5303 dw2 |= 0x10000000; 5304 } 5305 5306 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5307 amdgpu_ring_write(ring, dw2); 5308 amdgpu_ring_write(ring, 0); 5309 } 5310 5311 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5312 { 5313 unsigned ret; 5314 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5315 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5316 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5317 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5318 ret = ring->wptr & ring->buf_mask; 5319 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5320 return ret; 5321 } 5322 5323 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5324 { 5325 unsigned cur; 5326 BUG_ON(offset > ring->buf_mask); 5327 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5328 5329 cur = (ring->wptr & ring->buf_mask) - 1; 5330 if (likely(cur > offset)) 5331 ring->ring[offset] = cur - offset; 5332 else 5333 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5334 } 5335 5336 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 5337 { 5338 struct amdgpu_device *adev = ring->adev; 5339 5340 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5341 amdgpu_ring_write(ring, 0 | /* src: register*/ 5342 (5 << 8) | /* dst: memory */ 5343 (1 << 20)); /* write confirm */ 5344 amdgpu_ring_write(ring, reg); 5345 amdgpu_ring_write(ring, 0); 5346 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5347 adev->virt.reg_val_offs * 4)); 5348 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5349 adev->virt.reg_val_offs * 4)); 5350 } 5351 5352 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5353 uint32_t val) 5354 { 5355 uint32_t cmd = 0; 5356 5357 switch (ring->funcs->type) { 5358 case AMDGPU_RING_TYPE_GFX: 5359 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5360 break; 5361 case AMDGPU_RING_TYPE_KIQ: 5362 cmd = (1 << 16); /* no inc addr */ 5363 break; 5364 default: 5365 cmd = WR_CONFIRM; 5366 break; 5367 } 5368 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5369 amdgpu_ring_write(ring, cmd); 5370 amdgpu_ring_write(ring, reg); 5371 amdgpu_ring_write(ring, 0); 5372 amdgpu_ring_write(ring, val); 5373 } 5374 5375 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5376 uint32_t val, uint32_t mask) 5377 { 5378 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5379 } 5380 5381 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5382 uint32_t reg0, uint32_t reg1, 5383 uint32_t ref, uint32_t mask) 5384 { 5385 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5386 struct amdgpu_device *adev = ring->adev; 5387 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5388 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5389 5390 if (fw_version_ok) 5391 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5392 ref, mask, 0x20); 5393 else 5394 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5395 ref, mask); 5396 } 5397 5398 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5399 { 5400 struct amdgpu_device *adev = ring->adev; 5401 uint32_t value = 0; 5402 5403 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5404 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5405 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5406 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5407 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5408 } 5409 5410 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5411 enum amdgpu_interrupt_state state) 5412 { 5413 switch (state) { 5414 case AMDGPU_IRQ_STATE_DISABLE: 5415 case AMDGPU_IRQ_STATE_ENABLE: 5416 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5417 TIME_STAMP_INT_ENABLE, 5418 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5419 break; 5420 default: 5421 break; 5422 } 5423 } 5424 5425 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5426 int me, int pipe, 5427 enum amdgpu_interrupt_state state) 5428 { 5429 u32 mec_int_cntl, mec_int_cntl_reg; 5430 5431 /* 5432 * amdgpu controls only the first MEC. That's why this function only 5433 * handles the setting of interrupts for this specific MEC. All other 5434 * pipes' interrupts are set by amdkfd. 5435 */ 5436 5437 if (me == 1) { 5438 switch (pipe) { 5439 case 0: 5440 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5441 break; 5442 case 1: 5443 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5444 break; 5445 case 2: 5446 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5447 break; 5448 case 3: 5449 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5450 break; 5451 default: 5452 DRM_DEBUG("invalid pipe %d\n", pipe); 5453 return; 5454 } 5455 } else { 5456 DRM_DEBUG("invalid me %d\n", me); 5457 return; 5458 } 5459 5460 switch (state) { 5461 case AMDGPU_IRQ_STATE_DISABLE: 5462 mec_int_cntl = RREG32(mec_int_cntl_reg); 5463 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5464 TIME_STAMP_INT_ENABLE, 0); 5465 WREG32(mec_int_cntl_reg, mec_int_cntl); 5466 break; 5467 case AMDGPU_IRQ_STATE_ENABLE: 5468 mec_int_cntl = RREG32(mec_int_cntl_reg); 5469 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5470 TIME_STAMP_INT_ENABLE, 1); 5471 WREG32(mec_int_cntl_reg, mec_int_cntl); 5472 break; 5473 default: 5474 break; 5475 } 5476 } 5477 5478 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5479 struct amdgpu_irq_src *source, 5480 unsigned type, 5481 enum amdgpu_interrupt_state state) 5482 { 5483 switch (state) { 5484 case AMDGPU_IRQ_STATE_DISABLE: 5485 case AMDGPU_IRQ_STATE_ENABLE: 5486 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5487 PRIV_REG_INT_ENABLE, 5488 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5489 break; 5490 default: 5491 break; 5492 } 5493 5494 return 0; 5495 } 5496 5497 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5498 struct amdgpu_irq_src *source, 5499 unsigned type, 5500 enum amdgpu_interrupt_state state) 5501 { 5502 switch (state) { 5503 case AMDGPU_IRQ_STATE_DISABLE: 5504 case AMDGPU_IRQ_STATE_ENABLE: 5505 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5506 PRIV_INSTR_INT_ENABLE, 5507 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5508 default: 5509 break; 5510 } 5511 5512 return 0; 5513 } 5514 5515 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5516 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5517 CP_ECC_ERROR_INT_ENABLE, 1) 5518 5519 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5520 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5521 CP_ECC_ERROR_INT_ENABLE, 0) 5522 5523 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5524 struct amdgpu_irq_src *source, 5525 unsigned type, 5526 enum amdgpu_interrupt_state state) 5527 { 5528 switch (state) { 5529 case AMDGPU_IRQ_STATE_DISABLE: 5530 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5531 CP_ECC_ERROR_INT_ENABLE, 0); 5532 DISABLE_ECC_ON_ME_PIPE(1, 0); 5533 DISABLE_ECC_ON_ME_PIPE(1, 1); 5534 DISABLE_ECC_ON_ME_PIPE(1, 2); 5535 DISABLE_ECC_ON_ME_PIPE(1, 3); 5536 break; 5537 5538 case AMDGPU_IRQ_STATE_ENABLE: 5539 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5540 CP_ECC_ERROR_INT_ENABLE, 1); 5541 ENABLE_ECC_ON_ME_PIPE(1, 0); 5542 ENABLE_ECC_ON_ME_PIPE(1, 1); 5543 ENABLE_ECC_ON_ME_PIPE(1, 2); 5544 ENABLE_ECC_ON_ME_PIPE(1, 3); 5545 break; 5546 default: 5547 break; 5548 } 5549 5550 return 0; 5551 } 5552 5553 5554 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5555 struct amdgpu_irq_src *src, 5556 unsigned type, 5557 enum amdgpu_interrupt_state state) 5558 { 5559 switch (type) { 5560 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5561 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5562 break; 5563 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5564 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5565 break; 5566 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5567 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5568 break; 5569 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5570 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5571 break; 5572 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5573 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5574 break; 5575 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5576 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5577 break; 5578 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5579 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5580 break; 5581 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5582 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5583 break; 5584 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5585 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5586 break; 5587 default: 5588 break; 5589 } 5590 return 0; 5591 } 5592 5593 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5594 struct amdgpu_irq_src *source, 5595 struct amdgpu_iv_entry *entry) 5596 { 5597 int i; 5598 u8 me_id, pipe_id, queue_id; 5599 struct amdgpu_ring *ring; 5600 5601 DRM_DEBUG("IH: CP EOP\n"); 5602 me_id = (entry->ring_id & 0x0c) >> 2; 5603 pipe_id = (entry->ring_id & 0x03) >> 0; 5604 queue_id = (entry->ring_id & 0x70) >> 4; 5605 5606 switch (me_id) { 5607 case 0: 5608 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5609 break; 5610 case 1: 5611 case 2: 5612 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5613 ring = &adev->gfx.compute_ring[i]; 5614 /* Per-queue interrupt is supported for MEC starting from VI. 5615 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5616 */ 5617 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5618 amdgpu_fence_process(ring); 5619 } 5620 break; 5621 } 5622 return 0; 5623 } 5624 5625 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5626 struct amdgpu_iv_entry *entry) 5627 { 5628 u8 me_id, pipe_id, queue_id; 5629 struct amdgpu_ring *ring; 5630 int i; 5631 5632 me_id = (entry->ring_id & 0x0c) >> 2; 5633 pipe_id = (entry->ring_id & 0x03) >> 0; 5634 queue_id = (entry->ring_id & 0x70) >> 4; 5635 5636 switch (me_id) { 5637 case 0: 5638 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5639 break; 5640 case 1: 5641 case 2: 5642 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5643 ring = &adev->gfx.compute_ring[i]; 5644 if (ring->me == me_id && ring->pipe == pipe_id && 5645 ring->queue == queue_id) 5646 drm_sched_fault(&ring->sched); 5647 } 5648 break; 5649 } 5650 } 5651 5652 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5653 struct amdgpu_irq_src *source, 5654 struct amdgpu_iv_entry *entry) 5655 { 5656 DRM_ERROR("Illegal register access in command stream\n"); 5657 gfx_v9_0_fault(adev, entry); 5658 return 0; 5659 } 5660 5661 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5662 struct amdgpu_irq_src *source, 5663 struct amdgpu_iv_entry *entry) 5664 { 5665 DRM_ERROR("Illegal instruction in command stream\n"); 5666 gfx_v9_0_fault(adev, entry); 5667 return 0; 5668 } 5669 5670 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 5671 struct ras_err_data *err_data, 5672 struct amdgpu_iv_entry *entry) 5673 { 5674 /* TODO ue will trigger an interrupt. */ 5675 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 5676 if (adev->gfx.funcs->query_ras_error_count) 5677 adev->gfx.funcs->query_ras_error_count(adev, err_data); 5678 amdgpu_ras_reset_gpu(adev, 0); 5679 return AMDGPU_RAS_SUCCESS; 5680 } 5681 5682 static const struct { 5683 const char *name; 5684 uint32_t ip; 5685 uint32_t inst; 5686 uint32_t seg; 5687 uint32_t reg_offset; 5688 uint32_t per_se_instance; 5689 int32_t num_instance; 5690 uint32_t sec_count_mask; 5691 uint32_t ded_count_mask; 5692 } gfx_ras_edc_regs[] = { 5693 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 5694 REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 5695 REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) }, 5696 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 5697 REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT), 5698 REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) }, 5699 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 5700 REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 }, 5701 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 5702 REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 }, 5703 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 5704 REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT), 5705 REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) }, 5706 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 5707 REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 }, 5708 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 5709 REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 5710 REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) }, 5711 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 5712 REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT), 5713 REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) }, 5714 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 5715 REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 }, 5716 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 5717 REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 }, 5718 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 5719 REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 }, 5720 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 5721 REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC), 5722 REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) }, 5723 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 5724 REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 }, 5725 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 5726 0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 5727 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) }, 5728 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 5729 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 5730 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 5731 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) }, 5732 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 5733 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 5734 REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 }, 5735 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 5736 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5737 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 5738 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) }, 5739 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 5740 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5741 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 5742 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) }, 5743 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 5744 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5745 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 5746 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) }, 5747 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 5748 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 5749 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 5750 REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) }, 5751 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1, 5752 REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 }, 5753 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5754 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 5755 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) }, 5756 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5757 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 }, 5758 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5759 REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 }, 5760 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5761 REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 }, 5762 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, 5763 REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 }, 5764 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, 5765 REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 }, 5766 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, 5767 REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 }, 5768 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5769 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 5770 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) }, 5771 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5772 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 5773 REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) }, 5774 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5775 REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 5776 REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) }, 5777 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5778 REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 5779 REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) }, 5780 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5781 REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 5782 REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) }, 5783 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5784 REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 }, 5785 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5786 REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 }, 5787 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5788 REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 }, 5789 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5790 REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 }, 5791 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5792 REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 }, 5793 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, 5794 REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 }, 5795 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, 5796 REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 }, 5797 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, 5798 REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 }, 5799 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 5800 16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 }, 5801 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5802 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 5803 0 }, 5804 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 5805 16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 }, 5806 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 5807 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 5808 0 }, 5809 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 5810 16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 }, 5811 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72, 5812 REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 }, 5813 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5814 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 5815 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) }, 5816 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5817 REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 5818 REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) }, 5819 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5820 REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 }, 5821 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5822 REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 }, 5823 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5824 REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 }, 5825 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5826 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 5827 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) }, 5828 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, 5829 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 5830 REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) }, 5831 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, 5832 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 5833 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) }, 5834 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, 5835 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 5836 REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) }, 5837 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, 5838 REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 }, 5839 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5840 REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT), 5841 REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) }, 5842 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5843 REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT), 5844 REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) }, 5845 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5846 REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT), 5847 REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) }, 5848 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5849 REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT), 5850 REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) }, 5851 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5852 REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT), 5853 REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) }, 5854 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5855 REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT), 5856 REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) }, 5857 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, 5858 REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT), 5859 REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) }, 5860 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5861 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 5862 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) }, 5863 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, 5864 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 5865 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) }, 5866 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5867 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 5868 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) }, 5869 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, 5870 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 5871 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) }, 5872 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 5873 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 5874 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) }, 5875 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, 5876 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 5877 REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) }, 5878 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5879 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 5880 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) }, 5881 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5882 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 5883 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) }, 5884 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5885 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 5886 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) }, 5887 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5888 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 5889 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) }, 5890 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", 5891 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, 5892 REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 5893 0 }, 5894 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5895 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 }, 5896 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5897 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 }, 5898 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 5899 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 }, 5900 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", 5901 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, 5902 REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 }, 5903 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, 5904 REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 5905 REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) }, 5906 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5907 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 5908 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) }, 5909 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5910 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 5911 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) }, 5912 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5913 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 5914 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) }, 5915 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5916 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 5917 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) }, 5918 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", 5919 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, 5920 REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 5921 0 }, 5922 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5923 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 }, 5924 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5925 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 }, 5926 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 5927 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 }, 5928 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", 5929 SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, 5930 REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 }, 5931 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5932 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 5933 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) }, 5934 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5935 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 5936 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) }, 5937 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5938 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 5939 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) }, 5940 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5941 REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 5942 REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) }, 5943 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5944 REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 5945 REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) }, 5946 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5947 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 }, 5948 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5949 REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 }, 5950 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5951 REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 }, 5952 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5953 REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 }, 5954 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, 5955 REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 }, 5956 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 5957 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 5958 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) }, 5959 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 5960 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 5961 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) }, 5962 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 5963 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 5964 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) }, 5965 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 5966 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 }, 5967 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 5968 REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 }, 5969 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 5970 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 }, 5971 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 5972 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 }, 5973 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 5974 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 }, 5975 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, 5976 REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 }, 5977 }; 5978 5979 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 5980 void *inject_if) 5981 { 5982 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 5983 int ret; 5984 struct ta_ras_trigger_error_input block_info = { 0 }; 5985 5986 if (adev->asic_type != CHIP_VEGA20) 5987 return -EINVAL; 5988 5989 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 5990 return -EINVAL; 5991 5992 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 5993 return -EPERM; 5994 5995 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 5996 info->head.type)) { 5997 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 5998 ras_gfx_subblocks[info->head.sub_block_index].name, 5999 info->head.type); 6000 return -EPERM; 6001 } 6002 6003 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6004 info->head.type)) { 6005 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6006 ras_gfx_subblocks[info->head.sub_block_index].name, 6007 info->head.type); 6008 return -EPERM; 6009 } 6010 6011 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6012 block_info.sub_block_index = 6013 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6014 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6015 block_info.address = info->address; 6016 block_info.value = info->value; 6017 6018 mutex_lock(&adev->grbm_idx_mutex); 6019 ret = psp_ras_trigger_error(&adev->psp, &block_info); 6020 mutex_unlock(&adev->grbm_idx_mutex); 6021 6022 return ret; 6023 } 6024 6025 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6026 void *ras_error_status) 6027 { 6028 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6029 uint32_t sec_count, ded_count; 6030 uint32_t i; 6031 uint32_t reg_value; 6032 uint32_t se_id, instance_id; 6033 6034 if (adev->asic_type != CHIP_VEGA20) 6035 return -EINVAL; 6036 6037 err_data->ue_count = 0; 6038 err_data->ce_count = 0; 6039 6040 mutex_lock(&adev->grbm_idx_mutex); 6041 for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { 6042 for (instance_id = 0; instance_id < 256; instance_id++) { 6043 for (i = 0; 6044 i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]); 6045 i++) { 6046 if (se_id != 0 && 6047 !gfx_ras_edc_regs[i].per_se_instance) 6048 continue; 6049 if (instance_id >= gfx_ras_edc_regs[i].num_instance) 6050 continue; 6051 6052 gfx_v9_0_select_se_sh(adev, se_id, 0, 6053 instance_id); 6054 6055 reg_value = RREG32( 6056 adev->reg_offset[gfx_ras_edc_regs[i].ip] 6057 [gfx_ras_edc_regs[i].inst] 6058 [gfx_ras_edc_regs[i].seg] + 6059 gfx_ras_edc_regs[i].reg_offset); 6060 sec_count = reg_value & 6061 gfx_ras_edc_regs[i].sec_count_mask; 6062 ded_count = reg_value & 6063 gfx_ras_edc_regs[i].ded_count_mask; 6064 if (sec_count) { 6065 DRM_INFO( 6066 "Instance[%d][%d]: SubBlock %s, SEC %d\n", 6067 se_id, instance_id, 6068 gfx_ras_edc_regs[i].name, 6069 sec_count); 6070 err_data->ce_count++; 6071 } 6072 6073 if (ded_count) { 6074 DRM_INFO( 6075 "Instance[%d][%d]: SubBlock %s, DED %d\n", 6076 se_id, instance_id, 6077 gfx_ras_edc_regs[i].name, 6078 ded_count); 6079 err_data->ue_count++; 6080 } 6081 } 6082 } 6083 } 6084 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6085 mutex_unlock(&adev->grbm_idx_mutex); 6086 6087 return 0; 6088 } 6089 6090 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, 6091 struct amdgpu_irq_src *source, 6092 struct amdgpu_iv_entry *entry) 6093 { 6094 struct ras_common_if *ras_if = adev->gfx.ras_if; 6095 struct ras_dispatch_if ih_data = { 6096 .entry = entry, 6097 }; 6098 6099 if (!ras_if) 6100 return 0; 6101 6102 ih_data.head = *ras_if; 6103 6104 DRM_ERROR("CP ECC ERROR IRQ\n"); 6105 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 6106 return 0; 6107 } 6108 6109 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6110 .name = "gfx_v9_0", 6111 .early_init = gfx_v9_0_early_init, 6112 .late_init = gfx_v9_0_late_init, 6113 .sw_init = gfx_v9_0_sw_init, 6114 .sw_fini = gfx_v9_0_sw_fini, 6115 .hw_init = gfx_v9_0_hw_init, 6116 .hw_fini = gfx_v9_0_hw_fini, 6117 .suspend = gfx_v9_0_suspend, 6118 .resume = gfx_v9_0_resume, 6119 .is_idle = gfx_v9_0_is_idle, 6120 .wait_for_idle = gfx_v9_0_wait_for_idle, 6121 .soft_reset = gfx_v9_0_soft_reset, 6122 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6123 .set_powergating_state = gfx_v9_0_set_powergating_state, 6124 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6125 }; 6126 6127 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6128 .type = AMDGPU_RING_TYPE_GFX, 6129 .align_mask = 0xff, 6130 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6131 .support_64bit_ptrs = true, 6132 .vmhub = AMDGPU_GFXHUB_0, 6133 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6134 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6135 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6136 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6137 5 + /* COND_EXEC */ 6138 7 + /* PIPELINE_SYNC */ 6139 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6140 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6141 2 + /* VM_FLUSH */ 6142 8 + /* FENCE for VM_FLUSH */ 6143 20 + /* GDS switch */ 6144 4 + /* double SWITCH_BUFFER, 6145 the first COND_EXEC jump to the place just 6146 prior to this double SWITCH_BUFFER */ 6147 5 + /* COND_EXEC */ 6148 7 + /* HDP_flush */ 6149 4 + /* VGT_flush */ 6150 14 + /* CE_META */ 6151 31 + /* DE_META */ 6152 3 + /* CNTX_CTRL */ 6153 5 + /* HDP_INVL */ 6154 8 + 8 + /* FENCE x2 */ 6155 2, /* SWITCH_BUFFER */ 6156 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6157 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6158 .emit_fence = gfx_v9_0_ring_emit_fence, 6159 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6160 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6161 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6162 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6163 .test_ring = gfx_v9_0_ring_test_ring, 6164 .test_ib = gfx_v9_0_ring_test_ib, 6165 .insert_nop = amdgpu_ring_insert_nop, 6166 .pad_ib = amdgpu_ring_generic_pad_ib, 6167 .emit_switch_buffer = gfx_v9_ring_emit_sb, 6168 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6169 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6170 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6171 .emit_tmz = gfx_v9_0_ring_emit_tmz, 6172 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6173 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6174 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6175 .soft_recovery = gfx_v9_0_ring_soft_recovery, 6176 }; 6177 6178 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6179 .type = AMDGPU_RING_TYPE_COMPUTE, 6180 .align_mask = 0xff, 6181 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6182 .support_64bit_ptrs = true, 6183 .vmhub = AMDGPU_GFXHUB_0, 6184 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6185 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6186 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6187 .emit_frame_size = 6188 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6189 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6190 5 + /* hdp invalidate */ 6191 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6192 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6193 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6194 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6195 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6196 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6197 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6198 .emit_fence = gfx_v9_0_ring_emit_fence, 6199 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6200 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6201 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6202 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6203 .test_ring = gfx_v9_0_ring_test_ring, 6204 .test_ib = gfx_v9_0_ring_test_ib, 6205 .insert_nop = amdgpu_ring_insert_nop, 6206 .pad_ib = amdgpu_ring_generic_pad_ib, 6207 .set_priority = gfx_v9_0_ring_set_priority_compute, 6208 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6209 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6210 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6211 }; 6212 6213 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 6214 .type = AMDGPU_RING_TYPE_KIQ, 6215 .align_mask = 0xff, 6216 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6217 .support_64bit_ptrs = true, 6218 .vmhub = AMDGPU_GFXHUB_0, 6219 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6220 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6221 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6222 .emit_frame_size = 6223 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6224 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6225 5 + /* hdp invalidate */ 6226 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6227 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6228 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6229 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6230 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6231 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6232 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 6233 .test_ring = gfx_v9_0_ring_test_ring, 6234 .insert_nop = amdgpu_ring_insert_nop, 6235 .pad_ib = amdgpu_ring_generic_pad_ib, 6236 .emit_rreg = gfx_v9_0_ring_emit_rreg, 6237 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6238 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6239 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6240 }; 6241 6242 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 6243 { 6244 int i; 6245 6246 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 6247 6248 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6249 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6250 6251 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6252 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 6253 } 6254 6255 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 6256 .set = gfx_v9_0_set_eop_interrupt_state, 6257 .process = gfx_v9_0_eop_irq, 6258 }; 6259 6260 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 6261 .set = gfx_v9_0_set_priv_reg_fault_state, 6262 .process = gfx_v9_0_priv_reg_irq, 6263 }; 6264 6265 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 6266 .set = gfx_v9_0_set_priv_inst_fault_state, 6267 .process = gfx_v9_0_priv_inst_irq, 6268 }; 6269 6270 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 6271 .set = gfx_v9_0_set_cp_ecc_error_state, 6272 .process = gfx_v9_0_cp_ecc_error_irq, 6273 }; 6274 6275 6276 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 6277 { 6278 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6279 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 6280 6281 adev->gfx.priv_reg_irq.num_types = 1; 6282 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 6283 6284 adev->gfx.priv_inst_irq.num_types = 1; 6285 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 6286 6287 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 6288 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 6289 } 6290 6291 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 6292 { 6293 switch (adev->asic_type) { 6294 case CHIP_VEGA10: 6295 case CHIP_VEGA12: 6296 case CHIP_VEGA20: 6297 case CHIP_RAVEN: 6298 case CHIP_ARCTURUS: 6299 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 6300 break; 6301 default: 6302 break; 6303 } 6304 } 6305 6306 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 6307 { 6308 /* init asci gds info */ 6309 switch (adev->asic_type) { 6310 case CHIP_VEGA10: 6311 case CHIP_VEGA12: 6312 case CHIP_VEGA20: 6313 adev->gds.gds_size = 0x10000; 6314 break; 6315 case CHIP_RAVEN: 6316 case CHIP_ARCTURUS: 6317 adev->gds.gds_size = 0x1000; 6318 break; 6319 default: 6320 adev->gds.gds_size = 0x10000; 6321 break; 6322 } 6323 6324 switch (adev->asic_type) { 6325 case CHIP_VEGA10: 6326 case CHIP_VEGA20: 6327 adev->gds.gds_compute_max_wave_id = 0x7ff; 6328 break; 6329 case CHIP_VEGA12: 6330 adev->gds.gds_compute_max_wave_id = 0x27f; 6331 break; 6332 case CHIP_RAVEN: 6333 if (adev->rev_id >= 0x8) 6334 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 6335 else 6336 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 6337 break; 6338 case CHIP_ARCTURUS: 6339 adev->gds.gds_compute_max_wave_id = 0xfff; 6340 break; 6341 default: 6342 /* this really depends on the chip */ 6343 adev->gds.gds_compute_max_wave_id = 0x7ff; 6344 break; 6345 } 6346 6347 adev->gds.gws_size = 64; 6348 adev->gds.oa_size = 16; 6349 } 6350 6351 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6352 u32 bitmap) 6353 { 6354 u32 data; 6355 6356 if (!bitmap) 6357 return; 6358 6359 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6360 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6361 6362 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 6363 } 6364 6365 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6366 { 6367 u32 data, mask; 6368 6369 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 6370 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 6371 6372 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6373 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6374 6375 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 6376 6377 return (~data) & mask; 6378 } 6379 6380 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 6381 struct amdgpu_cu_info *cu_info) 6382 { 6383 int i, j, k, counter, active_cu_number = 0; 6384 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6385 unsigned disable_masks[4 * 4]; 6386 6387 if (!adev || !cu_info) 6388 return -EINVAL; 6389 6390 /* 6391 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 6392 */ 6393 if (adev->gfx.config.max_shader_engines * 6394 adev->gfx.config.max_sh_per_se > 16) 6395 return -EINVAL; 6396 6397 amdgpu_gfx_parse_disable_cu(disable_masks, 6398 adev->gfx.config.max_shader_engines, 6399 adev->gfx.config.max_sh_per_se); 6400 6401 mutex_lock(&adev->grbm_idx_mutex); 6402 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6403 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6404 mask = 1; 6405 ao_bitmap = 0; 6406 counter = 0; 6407 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 6408 gfx_v9_0_set_user_cu_inactive_bitmap( 6409 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 6410 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 6411 6412 /* 6413 * The bitmap(and ao_cu_bitmap) in cu_info structure is 6414 * 4x4 size array, and it's usually suitable for Vega 6415 * ASICs which has 4*2 SE/SH layout. 6416 * But for Arcturus, SE/SH layout is changed to 8*1. 6417 * To mostly reduce the impact, we make it compatible 6418 * with current bitmap array as below: 6419 * SE4,SH0 --> bitmap[0][1] 6420 * SE5,SH0 --> bitmap[1][1] 6421 * SE6,SH0 --> bitmap[2][1] 6422 * SE7,SH0 --> bitmap[3][1] 6423 */ 6424 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 6425 6426 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 6427 if (bitmap & mask) { 6428 if (counter < adev->gfx.config.max_cu_per_sh) 6429 ao_bitmap |= mask; 6430 counter ++; 6431 } 6432 mask <<= 1; 6433 } 6434 active_cu_number += counter; 6435 if (i < 2 && j < 2) 6436 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6437 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 6438 } 6439 } 6440 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6441 mutex_unlock(&adev->grbm_idx_mutex); 6442 6443 cu_info->number = active_cu_number; 6444 cu_info->ao_cu_mask = ao_cu_mask; 6445 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 6446 6447 return 0; 6448 } 6449 6450 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 6451 { 6452 .type = AMD_IP_BLOCK_TYPE_GFX, 6453 .major = 9, 6454 .minor = 0, 6455 .rev = 0, 6456 .funcs = &gfx_v9_0_ip_funcs, 6457 }; 6458