1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 42 #include "soc15_common.h" 43 #include "clearstate_gfx9.h" 44 #include "v9_structs.h" 45 46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 47 48 #include "amdgpu_ras.h" 49 50 #include "gfx_v9_4.h" 51 #include "gfx_v9_0.h" 52 #include "gfx_v9_4_2.h" 53 54 #include "asic_reg/pwr/pwr_10_0_offset.h" 55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h" 56 #include "asic_reg/gc/gc_9_0_default.h" 57 58 #define GFX9_NUM_GFX_RINGS 1 59 #define GFX9_MEC_HPD_SIZE 4096 60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 62 63 #define mmGCEA_PROBE_MAP 0x070c 64 #define mmGCEA_PROBE_MAP_BASE_IDX 0 65 66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 72 73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 79 80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 86 87 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 93 94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 101 102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 112 113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 115 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 118 119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin"); 120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin"); 121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin"); 122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin"); 123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin"); 124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); 125 126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); 127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); 128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); 129 130 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 132 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 134 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 136 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 138 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 140 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 142 143 enum ta_ras_gfx_subblock { 144 /*CPC*/ 145 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 146 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 147 TA_RAS_BLOCK__GFX_CPC_UCODE, 148 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 149 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 150 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 151 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 152 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 153 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 154 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 155 /* CPF*/ 156 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 157 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 158 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 159 TA_RAS_BLOCK__GFX_CPF_TAG, 160 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 161 /* CPG*/ 162 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 163 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 164 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 165 TA_RAS_BLOCK__GFX_CPG_TAG, 166 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 167 /* GDS*/ 168 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 169 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 170 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 171 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 172 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 173 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 174 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 175 /* SPI*/ 176 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 177 /* SQ*/ 178 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 179 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 180 TA_RAS_BLOCK__GFX_SQ_LDS_D, 181 TA_RAS_BLOCK__GFX_SQ_LDS_I, 182 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 183 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 184 /* SQC (3 ranges)*/ 185 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 186 /* SQC range 0*/ 187 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 188 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 189 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 190 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 191 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 192 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 193 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 194 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 195 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 196 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 197 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 198 /* SQC range 1*/ 199 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 200 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 201 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 202 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 203 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 204 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 205 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 206 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 207 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 208 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 209 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 210 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 212 /* SQC range 2*/ 213 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 214 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 215 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 216 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 217 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 218 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 219 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 220 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 221 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 222 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 223 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 224 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 225 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 226 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 227 /* TA*/ 228 TA_RAS_BLOCK__GFX_TA_INDEX_START, 229 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 230 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 231 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 232 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 233 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 234 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 235 /* TCA*/ 236 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 237 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 238 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 239 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 240 /* TCC (5 sub-ranges)*/ 241 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 242 /* TCC range 0*/ 243 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 244 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 245 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 246 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 247 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 248 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 249 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 250 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 251 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 252 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 253 /* TCC range 1*/ 254 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 255 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 256 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 257 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 258 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 259 /* TCC range 2*/ 260 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 261 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 262 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 263 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 264 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 265 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 266 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 267 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 268 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 269 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 270 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 271 /* TCC range 3*/ 272 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 273 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 274 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 275 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 276 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 277 /* TCC range 4*/ 278 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 279 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 280 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 281 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 282 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 283 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 284 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 285 /* TCI*/ 286 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 287 /* TCP*/ 288 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 289 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 290 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 291 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 292 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 293 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 294 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 295 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 296 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 297 /* TD*/ 298 TA_RAS_BLOCK__GFX_TD_INDEX_START, 299 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 300 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 301 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 302 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 303 /* EA (3 sub-ranges)*/ 304 TA_RAS_BLOCK__GFX_EA_INDEX_START, 305 /* EA range 0*/ 306 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 307 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 308 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 309 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 310 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 311 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 312 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 313 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 314 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 315 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 316 /* EA range 1*/ 317 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 318 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 319 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 320 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 321 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 322 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 323 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 324 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 325 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 326 /* EA range 2*/ 327 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 328 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 329 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 330 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 331 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 332 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 333 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 334 /* UTC VM L2 bank*/ 335 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 336 /* UTC VM walker*/ 337 TA_RAS_BLOCK__UTC_VML2_WALKER, 338 /* UTC ATC L2 2MB cache*/ 339 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 340 /* UTC ATC L2 4KB cache*/ 341 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 342 TA_RAS_BLOCK__GFX_MAX 343 }; 344 345 struct ras_gfx_subblock { 346 unsigned char *name; 347 int ta_subblock; 348 int hw_supported_error_type; 349 int sw_supported_error_type; 350 }; 351 352 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 353 [AMDGPU_RAS_BLOCK__##subblock] = { \ 354 #subblock, \ 355 TA_RAS_BLOCK__##subblock, \ 356 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 357 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 358 } 359 360 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 361 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 362 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 363 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 364 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 365 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 366 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 367 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 368 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 369 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 370 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 371 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 372 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 373 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 374 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 375 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 376 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 377 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 378 0), 379 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 380 0), 381 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 382 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 383 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 384 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 385 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 386 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 388 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 389 0, 0), 390 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 391 0), 392 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 393 0, 0), 394 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 395 0), 396 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 397 0, 0), 398 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 399 0), 400 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 401 1), 402 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 403 0, 0, 0), 404 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 405 0), 406 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 407 0), 408 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 409 0), 410 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 411 0), 412 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 413 0), 414 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 415 0, 0), 416 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 417 0), 418 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 419 0), 420 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 421 0, 0, 0), 422 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 423 0), 424 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 425 0), 426 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 427 0), 428 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 429 0), 430 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 431 0), 432 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 433 0, 0), 434 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 435 0), 436 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 437 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 438 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 439 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 440 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 441 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 442 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 444 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 445 1), 446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 447 1), 448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 449 1), 450 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 451 0), 452 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 453 0), 454 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 457 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 460 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 462 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 464 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 465 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 466 0), 467 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 469 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 471 0, 0), 472 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 473 0), 474 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 476 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 477 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 478 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 479 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 481 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 483 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 485 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 486 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 488 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 489 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 495 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 496 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 497 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 498 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 499 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 500 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 501 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 502 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 503 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 504 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 505 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 506 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 507 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 508 }; 509 510 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 511 { 512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 513 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 514 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87), 522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f), 523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 532 }; 533 534 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 535 { 536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 554 }; 555 556 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 557 { 558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 562 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 569 }; 570 571 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 572 { 573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 590 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 591 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 597 }; 598 599 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 600 { 601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 602 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 608 }; 609 610 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 611 { 612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 622 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 631 }; 632 633 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 634 { 635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 639 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 640 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 641 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 647 }; 648 649 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 650 { 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 654 }; 655 656 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 657 { 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 674 }; 675 676 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 677 { 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 691 }; 692 693 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 694 { 695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000) 706 }; 707 708 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = { 709 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)}, 710 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)}, 711 }; 712 713 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 714 { 715 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 716 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 717 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 718 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 719 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 720 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 721 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 722 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 723 }; 724 725 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 726 { 727 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 728 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 729 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 730 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 731 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 732 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 733 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 734 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 735 }; 736 737 static void gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag) 738 { 739 static void *scratch_reg0; 740 static void *scratch_reg1; 741 static void *scratch_reg2; 742 static void *scratch_reg3; 743 static void *spare_int; 744 static uint32_t grbm_cntl; 745 static uint32_t grbm_idx; 746 747 scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; 748 scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; 749 scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4; 750 scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4; 751 spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4; 752 753 grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; 754 grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; 755 756 if (amdgpu_sriov_runtime(adev)) { 757 pr_err("shouldn't call rlcg write register during runtime\n"); 758 return; 759 } 760 761 if (offset == grbm_cntl || offset == grbm_idx) { 762 if (offset == grbm_cntl) 763 writel(v, scratch_reg2); 764 else if (offset == grbm_idx) 765 writel(v, scratch_reg3); 766 767 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4)); 768 } else { 769 uint32_t i = 0; 770 uint32_t retries = 50000; 771 772 writel(v, scratch_reg0); 773 writel(offset | 0x80000000, scratch_reg1); 774 writel(1, spare_int); 775 for (i = 0; i < retries; i++) { 776 u32 tmp; 777 778 tmp = readl(scratch_reg1); 779 if (!(tmp & 0x80000000)) 780 break; 781 782 udelay(10); 783 } 784 if (i >= retries) 785 pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset); 786 } 787 788 } 789 790 static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag) 791 { 792 if (amdgpu_sriov_fullaccess(adev)) { 793 gfx_v9_0_rlcg_rw(adev, offset, v, flag); 794 795 return; 796 } 797 798 if (flag & AMDGPU_REGS_NO_KIQ) 799 WREG32_NO_KIQ(offset, v); 800 else 801 WREG32(offset, v); 802 } 803 804 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 805 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 806 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 807 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 808 809 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 810 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 811 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 812 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 813 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 814 struct amdgpu_cu_info *cu_info); 815 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 816 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 817 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 818 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 819 void *ras_error_status); 820 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 821 void *inject_if); 822 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); 823 824 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 825 uint64_t queue_mask) 826 { 827 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 828 amdgpu_ring_write(kiq_ring, 829 PACKET3_SET_RESOURCES_VMID_MASK(0) | 830 /* vmid_mask:0* queue_type:0 (KIQ) */ 831 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 832 amdgpu_ring_write(kiq_ring, 833 lower_32_bits(queue_mask)); /* queue mask lo */ 834 amdgpu_ring_write(kiq_ring, 835 upper_32_bits(queue_mask)); /* queue mask hi */ 836 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 837 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 838 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 839 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 840 } 841 842 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 843 struct amdgpu_ring *ring) 844 { 845 struct amdgpu_device *adev = kiq_ring->adev; 846 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 847 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 848 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 849 850 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 851 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 852 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 853 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 854 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 855 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 856 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 857 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 858 /*queue_type: normal compute queue */ 859 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 860 /* alloc format: all_on_one_pipe */ 861 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 862 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 863 /* num_queues: must be 1 */ 864 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 865 amdgpu_ring_write(kiq_ring, 866 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 867 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 868 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 869 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 870 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 871 } 872 873 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 874 struct amdgpu_ring *ring, 875 enum amdgpu_unmap_queues_action action, 876 u64 gpu_addr, u64 seq) 877 { 878 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 879 880 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 881 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 882 PACKET3_UNMAP_QUEUES_ACTION(action) | 883 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 884 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 885 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 886 amdgpu_ring_write(kiq_ring, 887 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 888 889 if (action == PREEMPT_QUEUES_NO_UNMAP) { 890 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 891 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 892 amdgpu_ring_write(kiq_ring, seq); 893 } else { 894 amdgpu_ring_write(kiq_ring, 0); 895 amdgpu_ring_write(kiq_ring, 0); 896 amdgpu_ring_write(kiq_ring, 0); 897 } 898 } 899 900 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 901 struct amdgpu_ring *ring, 902 u64 addr, 903 u64 seq) 904 { 905 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 906 907 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 908 amdgpu_ring_write(kiq_ring, 909 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 910 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 911 PACKET3_QUERY_STATUS_COMMAND(2)); 912 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 913 amdgpu_ring_write(kiq_ring, 914 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 915 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 916 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 917 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 918 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 919 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 920 } 921 922 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 923 uint16_t pasid, uint32_t flush_type, 924 bool all_hub) 925 { 926 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 927 amdgpu_ring_write(kiq_ring, 928 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 929 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 930 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 931 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 932 } 933 934 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 935 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 936 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 937 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 938 .kiq_query_status = gfx_v9_0_kiq_query_status, 939 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 940 .set_resources_size = 8, 941 .map_queues_size = 7, 942 .unmap_queues_size = 6, 943 .query_status_size = 7, 944 .invalidate_tlbs_size = 2, 945 }; 946 947 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 948 { 949 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; 950 } 951 952 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 953 { 954 switch (adev->asic_type) { 955 case CHIP_VEGA10: 956 soc15_program_register_sequence(adev, 957 golden_settings_gc_9_0, 958 ARRAY_SIZE(golden_settings_gc_9_0)); 959 soc15_program_register_sequence(adev, 960 golden_settings_gc_9_0_vg10, 961 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 962 break; 963 case CHIP_VEGA12: 964 soc15_program_register_sequence(adev, 965 golden_settings_gc_9_2_1, 966 ARRAY_SIZE(golden_settings_gc_9_2_1)); 967 soc15_program_register_sequence(adev, 968 golden_settings_gc_9_2_1_vg12, 969 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 970 break; 971 case CHIP_VEGA20: 972 soc15_program_register_sequence(adev, 973 golden_settings_gc_9_0, 974 ARRAY_SIZE(golden_settings_gc_9_0)); 975 soc15_program_register_sequence(adev, 976 golden_settings_gc_9_0_vg20, 977 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 978 break; 979 case CHIP_ARCTURUS: 980 soc15_program_register_sequence(adev, 981 golden_settings_gc_9_4_1_arct, 982 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 983 break; 984 case CHIP_RAVEN: 985 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 986 ARRAY_SIZE(golden_settings_gc_9_1)); 987 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 988 soc15_program_register_sequence(adev, 989 golden_settings_gc_9_1_rv2, 990 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 991 else 992 soc15_program_register_sequence(adev, 993 golden_settings_gc_9_1_rv1, 994 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 995 break; 996 case CHIP_RENOIR: 997 soc15_program_register_sequence(adev, 998 golden_settings_gc_9_1_rn, 999 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 1000 return; /* for renoir, don't need common goldensetting */ 1001 case CHIP_ALDEBARAN: 1002 gfx_v9_4_2_init_golden_registers(adev, 1003 adev->smuio.funcs->get_die_id(adev)); 1004 break; 1005 default: 1006 break; 1007 } 1008 1009 if ((adev->asic_type != CHIP_ARCTURUS) && 1010 (adev->asic_type != CHIP_ALDEBARAN)) 1011 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 1012 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 1013 } 1014 1015 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 1016 { 1017 adev->gfx.scratch.num_reg = 8; 1018 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1019 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 1020 } 1021 1022 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 1023 bool wc, uint32_t reg, uint32_t val) 1024 { 1025 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1026 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 1027 WRITE_DATA_DST_SEL(0) | 1028 (wc ? WR_CONFIRM : 0)); 1029 amdgpu_ring_write(ring, reg); 1030 amdgpu_ring_write(ring, 0); 1031 amdgpu_ring_write(ring, val); 1032 } 1033 1034 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 1035 int mem_space, int opt, uint32_t addr0, 1036 uint32_t addr1, uint32_t ref, uint32_t mask, 1037 uint32_t inv) 1038 { 1039 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 1040 amdgpu_ring_write(ring, 1041 /* memory (1) or register (0) */ 1042 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 1043 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 1044 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 1045 WAIT_REG_MEM_ENGINE(eng_sel))); 1046 1047 if (mem_space) 1048 BUG_ON(addr0 & 0x3); /* Dword align */ 1049 amdgpu_ring_write(ring, addr0); 1050 amdgpu_ring_write(ring, addr1); 1051 amdgpu_ring_write(ring, ref); 1052 amdgpu_ring_write(ring, mask); 1053 amdgpu_ring_write(ring, inv); /* poll interval */ 1054 } 1055 1056 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 1057 { 1058 struct amdgpu_device *adev = ring->adev; 1059 uint32_t scratch; 1060 uint32_t tmp = 0; 1061 unsigned i; 1062 int r; 1063 1064 r = amdgpu_gfx_scratch_get(adev, &scratch); 1065 if (r) 1066 return r; 1067 1068 WREG32(scratch, 0xCAFEDEAD); 1069 r = amdgpu_ring_alloc(ring, 3); 1070 if (r) 1071 goto error_free_scratch; 1072 1073 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1074 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 1075 amdgpu_ring_write(ring, 0xDEADBEEF); 1076 amdgpu_ring_commit(ring); 1077 1078 for (i = 0; i < adev->usec_timeout; i++) { 1079 tmp = RREG32(scratch); 1080 if (tmp == 0xDEADBEEF) 1081 break; 1082 udelay(1); 1083 } 1084 1085 if (i >= adev->usec_timeout) 1086 r = -ETIMEDOUT; 1087 1088 error_free_scratch: 1089 amdgpu_gfx_scratch_free(adev, scratch); 1090 return r; 1091 } 1092 1093 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1094 { 1095 struct amdgpu_device *adev = ring->adev; 1096 struct amdgpu_ib ib; 1097 struct dma_fence *f = NULL; 1098 1099 unsigned index; 1100 uint64_t gpu_addr; 1101 uint32_t tmp; 1102 long r; 1103 1104 r = amdgpu_device_wb_get(adev, &index); 1105 if (r) 1106 return r; 1107 1108 gpu_addr = adev->wb.gpu_addr + (index * 4); 1109 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1110 memset(&ib, 0, sizeof(ib)); 1111 r = amdgpu_ib_get(adev, NULL, 16, 1112 AMDGPU_IB_POOL_DIRECT, &ib); 1113 if (r) 1114 goto err1; 1115 1116 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1117 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1118 ib.ptr[2] = lower_32_bits(gpu_addr); 1119 ib.ptr[3] = upper_32_bits(gpu_addr); 1120 ib.ptr[4] = 0xDEADBEEF; 1121 ib.length_dw = 5; 1122 1123 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1124 if (r) 1125 goto err2; 1126 1127 r = dma_fence_wait_timeout(f, false, timeout); 1128 if (r == 0) { 1129 r = -ETIMEDOUT; 1130 goto err2; 1131 } else if (r < 0) { 1132 goto err2; 1133 } 1134 1135 tmp = adev->wb.wb[index]; 1136 if (tmp == 0xDEADBEEF) 1137 r = 0; 1138 else 1139 r = -EINVAL; 1140 1141 err2: 1142 amdgpu_ib_free(adev, &ib, NULL); 1143 dma_fence_put(f); 1144 err1: 1145 amdgpu_device_wb_free(adev, index); 1146 return r; 1147 } 1148 1149 1150 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1151 { 1152 release_firmware(adev->gfx.pfp_fw); 1153 adev->gfx.pfp_fw = NULL; 1154 release_firmware(adev->gfx.me_fw); 1155 adev->gfx.me_fw = NULL; 1156 release_firmware(adev->gfx.ce_fw); 1157 adev->gfx.ce_fw = NULL; 1158 release_firmware(adev->gfx.rlc_fw); 1159 adev->gfx.rlc_fw = NULL; 1160 release_firmware(adev->gfx.mec_fw); 1161 adev->gfx.mec_fw = NULL; 1162 release_firmware(adev->gfx.mec2_fw); 1163 adev->gfx.mec2_fw = NULL; 1164 1165 kfree(adev->gfx.rlc.register_list_format); 1166 } 1167 1168 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 1169 { 1170 const struct rlc_firmware_header_v2_1 *rlc_hdr; 1171 1172 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 1173 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 1174 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 1175 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 1176 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 1177 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 1178 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 1179 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 1180 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 1181 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 1182 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 1183 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 1184 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 1185 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 1186 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 1187 } 1188 1189 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1190 { 1191 adev->gfx.me_fw_write_wait = false; 1192 adev->gfx.mec_fw_write_wait = false; 1193 1194 if ((adev->asic_type != CHIP_ARCTURUS) && 1195 ((adev->gfx.mec_fw_version < 0x000001a5) || 1196 (adev->gfx.mec_feature_version < 46) || 1197 (adev->gfx.pfp_fw_version < 0x000000b7) || 1198 (adev->gfx.pfp_feature_version < 46))) 1199 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1200 1201 switch (adev->asic_type) { 1202 case CHIP_VEGA10: 1203 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1204 (adev->gfx.me_feature_version >= 42) && 1205 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1206 (adev->gfx.pfp_feature_version >= 42)) 1207 adev->gfx.me_fw_write_wait = true; 1208 1209 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1210 (adev->gfx.mec_feature_version >= 42)) 1211 adev->gfx.mec_fw_write_wait = true; 1212 break; 1213 case CHIP_VEGA12: 1214 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1215 (adev->gfx.me_feature_version >= 44) && 1216 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1217 (adev->gfx.pfp_feature_version >= 44)) 1218 adev->gfx.me_fw_write_wait = true; 1219 1220 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1221 (adev->gfx.mec_feature_version >= 44)) 1222 adev->gfx.mec_fw_write_wait = true; 1223 break; 1224 case CHIP_VEGA20: 1225 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1226 (adev->gfx.me_feature_version >= 44) && 1227 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1228 (adev->gfx.pfp_feature_version >= 44)) 1229 adev->gfx.me_fw_write_wait = true; 1230 1231 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1232 (adev->gfx.mec_feature_version >= 44)) 1233 adev->gfx.mec_fw_write_wait = true; 1234 break; 1235 case CHIP_RAVEN: 1236 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1237 (adev->gfx.me_feature_version >= 42) && 1238 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1239 (adev->gfx.pfp_feature_version >= 42)) 1240 adev->gfx.me_fw_write_wait = true; 1241 1242 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1243 (adev->gfx.mec_feature_version >= 42)) 1244 adev->gfx.mec_fw_write_wait = true; 1245 break; 1246 default: 1247 adev->gfx.me_fw_write_wait = true; 1248 adev->gfx.mec_fw_write_wait = true; 1249 break; 1250 } 1251 } 1252 1253 struct amdgpu_gfxoff_quirk { 1254 u16 chip_vendor; 1255 u16 chip_device; 1256 u16 subsys_vendor; 1257 u16 subsys_device; 1258 u8 revision; 1259 }; 1260 1261 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { 1262 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ 1263 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, 1264 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ 1265 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, 1266 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ 1267 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, 1268 { 0, 0, 0, 0, 0 }, 1269 }; 1270 1271 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) 1272 { 1273 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; 1274 1275 while (p && p->chip_device != 0) { 1276 if (pdev->vendor == p->chip_vendor && 1277 pdev->device == p->chip_device && 1278 pdev->subsystem_vendor == p->subsys_vendor && 1279 pdev->subsystem_device == p->subsys_device && 1280 pdev->revision == p->revision) { 1281 return true; 1282 } 1283 ++p; 1284 } 1285 return false; 1286 } 1287 1288 static bool is_raven_kicker(struct amdgpu_device *adev) 1289 { 1290 if (adev->pm.fw_version >= 0x41e2b) 1291 return true; 1292 else 1293 return false; 1294 } 1295 1296 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1297 { 1298 if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) 1299 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1300 1301 switch (adev->asic_type) { 1302 case CHIP_VEGA10: 1303 case CHIP_VEGA12: 1304 case CHIP_VEGA20: 1305 break; 1306 case CHIP_RAVEN: 1307 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) || 1308 (adev->apu_flags & AMD_APU_IS_PICASSO)) && 1309 ((!is_raven_kicker(adev) && 1310 adev->gfx.rlc_fw_version < 531) || 1311 (adev->gfx.rlc_feature_version < 1) || 1312 !adev->gfx.rlc.is_rlc_v2_1)) 1313 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1314 1315 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1316 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1317 AMD_PG_SUPPORT_CP | 1318 AMD_PG_SUPPORT_RLC_SMU_HS; 1319 break; 1320 case CHIP_RENOIR: 1321 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1322 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1323 AMD_PG_SUPPORT_CP | 1324 AMD_PG_SUPPORT_RLC_SMU_HS; 1325 break; 1326 default: 1327 break; 1328 } 1329 } 1330 1331 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1332 const char *chip_name) 1333 { 1334 char fw_name[30]; 1335 int err; 1336 struct amdgpu_firmware_info *info = NULL; 1337 const struct common_firmware_header *header = NULL; 1338 const struct gfx_firmware_header_v1_0 *cp_hdr; 1339 1340 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1341 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1342 if (err) 1343 goto out; 1344 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1345 if (err) 1346 goto out; 1347 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1348 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1349 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1350 1351 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1352 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1353 if (err) 1354 goto out; 1355 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1356 if (err) 1357 goto out; 1358 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1359 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1360 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1361 1362 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1363 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1364 if (err) 1365 goto out; 1366 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1367 if (err) 1368 goto out; 1369 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1370 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1371 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1372 1373 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1374 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1375 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1376 info->fw = adev->gfx.pfp_fw; 1377 header = (const struct common_firmware_header *)info->fw->data; 1378 adev->firmware.fw_size += 1379 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1380 1381 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1382 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1383 info->fw = adev->gfx.me_fw; 1384 header = (const struct common_firmware_header *)info->fw->data; 1385 adev->firmware.fw_size += 1386 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1387 1388 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1389 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1390 info->fw = adev->gfx.ce_fw; 1391 header = (const struct common_firmware_header *)info->fw->data; 1392 adev->firmware.fw_size += 1393 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1394 } 1395 1396 out: 1397 if (err) { 1398 dev_err(adev->dev, 1399 "gfx9: Failed to load firmware \"%s\"\n", 1400 fw_name); 1401 release_firmware(adev->gfx.pfp_fw); 1402 adev->gfx.pfp_fw = NULL; 1403 release_firmware(adev->gfx.me_fw); 1404 adev->gfx.me_fw = NULL; 1405 release_firmware(adev->gfx.ce_fw); 1406 adev->gfx.ce_fw = NULL; 1407 } 1408 return err; 1409 } 1410 1411 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1412 const char *chip_name) 1413 { 1414 char fw_name[30]; 1415 int err; 1416 struct amdgpu_firmware_info *info = NULL; 1417 const struct common_firmware_header *header = NULL; 1418 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1419 unsigned int *tmp = NULL; 1420 unsigned int i = 0; 1421 uint16_t version_major; 1422 uint16_t version_minor; 1423 uint32_t smu_version; 1424 1425 /* 1426 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1427 * instead of picasso_rlc.bin. 1428 * Judgment method: 1429 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1430 * or revision >= 0xD8 && revision <= 0xDF 1431 * otherwise is PCO FP5 1432 */ 1433 if (!strcmp(chip_name, "picasso") && 1434 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1435 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1436 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1437 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1438 (smu_version >= 0x41e2b)) 1439 /** 1440 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1441 */ 1442 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1443 else 1444 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1445 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1446 if (err) 1447 goto out; 1448 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1449 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1450 1451 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1452 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1453 if (version_major == 2 && version_minor == 1) 1454 adev->gfx.rlc.is_rlc_v2_1 = true; 1455 1456 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1457 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1458 adev->gfx.rlc.save_and_restore_offset = 1459 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1460 adev->gfx.rlc.clear_state_descriptor_offset = 1461 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1462 adev->gfx.rlc.avail_scratch_ram_locations = 1463 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1464 adev->gfx.rlc.reg_restore_list_size = 1465 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1466 adev->gfx.rlc.reg_list_format_start = 1467 le32_to_cpu(rlc_hdr->reg_list_format_start); 1468 adev->gfx.rlc.reg_list_format_separate_start = 1469 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1470 adev->gfx.rlc.starting_offsets_start = 1471 le32_to_cpu(rlc_hdr->starting_offsets_start); 1472 adev->gfx.rlc.reg_list_format_size_bytes = 1473 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1474 adev->gfx.rlc.reg_list_size_bytes = 1475 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1476 adev->gfx.rlc.register_list_format = 1477 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1478 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1479 if (!adev->gfx.rlc.register_list_format) { 1480 err = -ENOMEM; 1481 goto out; 1482 } 1483 1484 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1485 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1486 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1487 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1488 1489 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1490 1491 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1492 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1493 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1494 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1495 1496 if (adev->gfx.rlc.is_rlc_v2_1) 1497 gfx_v9_0_init_rlc_ext_microcode(adev); 1498 1499 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1500 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1501 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1502 info->fw = adev->gfx.rlc_fw; 1503 header = (const struct common_firmware_header *)info->fw->data; 1504 adev->firmware.fw_size += 1505 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1506 1507 if (adev->gfx.rlc.is_rlc_v2_1 && 1508 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1509 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1510 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1511 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1512 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1513 info->fw = adev->gfx.rlc_fw; 1514 adev->firmware.fw_size += 1515 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1516 1517 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1518 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1519 info->fw = adev->gfx.rlc_fw; 1520 adev->firmware.fw_size += 1521 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1522 1523 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1524 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1525 info->fw = adev->gfx.rlc_fw; 1526 adev->firmware.fw_size += 1527 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1528 } 1529 } 1530 1531 out: 1532 if (err) { 1533 dev_err(adev->dev, 1534 "gfx9: Failed to load firmware \"%s\"\n", 1535 fw_name); 1536 release_firmware(adev->gfx.rlc_fw); 1537 adev->gfx.rlc_fw = NULL; 1538 } 1539 return err; 1540 } 1541 1542 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) 1543 { 1544 if (adev->asic_type == CHIP_ALDEBARAN || 1545 adev->asic_type == CHIP_ARCTURUS || 1546 adev->asic_type == CHIP_RENOIR) 1547 return false; 1548 1549 return true; 1550 } 1551 1552 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1553 const char *chip_name) 1554 { 1555 char fw_name[30]; 1556 int err; 1557 struct amdgpu_firmware_info *info = NULL; 1558 const struct common_firmware_header *header = NULL; 1559 const struct gfx_firmware_header_v1_0 *cp_hdr; 1560 1561 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1562 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1563 if (err) 1564 goto out; 1565 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1566 if (err) 1567 goto out; 1568 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1569 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1570 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1571 1572 1573 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1574 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1575 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1576 if (!err) { 1577 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1578 if (err) 1579 goto out; 1580 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1581 adev->gfx.mec2_fw->data; 1582 adev->gfx.mec2_fw_version = 1583 le32_to_cpu(cp_hdr->header.ucode_version); 1584 adev->gfx.mec2_feature_version = 1585 le32_to_cpu(cp_hdr->ucode_feature_version); 1586 } else { 1587 err = 0; 1588 adev->gfx.mec2_fw = NULL; 1589 } 1590 } 1591 1592 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1593 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1594 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1595 info->fw = adev->gfx.mec_fw; 1596 header = (const struct common_firmware_header *)info->fw->data; 1597 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1598 adev->firmware.fw_size += 1599 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1600 1601 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1602 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1603 info->fw = adev->gfx.mec_fw; 1604 adev->firmware.fw_size += 1605 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1606 1607 if (adev->gfx.mec2_fw) { 1608 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1609 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1610 info->fw = adev->gfx.mec2_fw; 1611 header = (const struct common_firmware_header *)info->fw->data; 1612 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1613 adev->firmware.fw_size += 1614 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1615 1616 /* TODO: Determine if MEC2 JT FW loading can be removed 1617 for all GFX V9 asic and above */ 1618 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1619 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1620 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1621 info->fw = adev->gfx.mec2_fw; 1622 adev->firmware.fw_size += 1623 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1624 PAGE_SIZE); 1625 } 1626 } 1627 } 1628 1629 out: 1630 gfx_v9_0_check_if_need_gfxoff(adev); 1631 gfx_v9_0_check_fw_write_wait(adev); 1632 if (err) { 1633 dev_err(adev->dev, 1634 "gfx9: Failed to load firmware \"%s\"\n", 1635 fw_name); 1636 release_firmware(adev->gfx.mec_fw); 1637 adev->gfx.mec_fw = NULL; 1638 release_firmware(adev->gfx.mec2_fw); 1639 adev->gfx.mec2_fw = NULL; 1640 } 1641 return err; 1642 } 1643 1644 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1645 { 1646 const char *chip_name; 1647 int r; 1648 1649 DRM_DEBUG("\n"); 1650 1651 switch (adev->asic_type) { 1652 case CHIP_VEGA10: 1653 chip_name = "vega10"; 1654 break; 1655 case CHIP_VEGA12: 1656 chip_name = "vega12"; 1657 break; 1658 case CHIP_VEGA20: 1659 chip_name = "vega20"; 1660 break; 1661 case CHIP_RAVEN: 1662 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1663 chip_name = "raven2"; 1664 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 1665 chip_name = "picasso"; 1666 else 1667 chip_name = "raven"; 1668 break; 1669 case CHIP_ARCTURUS: 1670 chip_name = "arcturus"; 1671 break; 1672 case CHIP_RENOIR: 1673 if (adev->apu_flags & AMD_APU_IS_RENOIR) 1674 chip_name = "renoir"; 1675 else 1676 chip_name = "green_sardine"; 1677 break; 1678 case CHIP_ALDEBARAN: 1679 chip_name = "aldebaran"; 1680 break; 1681 default: 1682 BUG(); 1683 } 1684 1685 /* No CPG in Arcturus */ 1686 if (adev->gfx.num_gfx_rings) { 1687 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1688 if (r) 1689 return r; 1690 } 1691 1692 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1693 if (r) 1694 return r; 1695 1696 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1697 if (r) 1698 return r; 1699 1700 return r; 1701 } 1702 1703 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1704 { 1705 u32 count = 0; 1706 const struct cs_section_def *sect = NULL; 1707 const struct cs_extent_def *ext = NULL; 1708 1709 /* begin clear state */ 1710 count += 2; 1711 /* context control state */ 1712 count += 3; 1713 1714 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1715 for (ext = sect->section; ext->extent != NULL; ++ext) { 1716 if (sect->id == SECT_CONTEXT) 1717 count += 2 + ext->reg_count; 1718 else 1719 return 0; 1720 } 1721 } 1722 1723 /* end clear state */ 1724 count += 2; 1725 /* clear state */ 1726 count += 2; 1727 1728 return count; 1729 } 1730 1731 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1732 volatile u32 *buffer) 1733 { 1734 u32 count = 0, i; 1735 const struct cs_section_def *sect = NULL; 1736 const struct cs_extent_def *ext = NULL; 1737 1738 if (adev->gfx.rlc.cs_data == NULL) 1739 return; 1740 if (buffer == NULL) 1741 return; 1742 1743 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1744 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1745 1746 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1747 buffer[count++] = cpu_to_le32(0x80000000); 1748 buffer[count++] = cpu_to_le32(0x80000000); 1749 1750 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1751 for (ext = sect->section; ext->extent != NULL; ++ext) { 1752 if (sect->id == SECT_CONTEXT) { 1753 buffer[count++] = 1754 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1755 buffer[count++] = cpu_to_le32(ext->reg_index - 1756 PACKET3_SET_CONTEXT_REG_START); 1757 for (i = 0; i < ext->reg_count; i++) 1758 buffer[count++] = cpu_to_le32(ext->extent[i]); 1759 } else { 1760 return; 1761 } 1762 } 1763 } 1764 1765 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1766 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1767 1768 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1769 buffer[count++] = cpu_to_le32(0); 1770 } 1771 1772 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1773 { 1774 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1775 uint32_t pg_always_on_cu_num = 2; 1776 uint32_t always_on_cu_num; 1777 uint32_t i, j, k; 1778 uint32_t mask, cu_bitmap, counter; 1779 1780 if (adev->flags & AMD_IS_APU) 1781 always_on_cu_num = 4; 1782 else if (adev->asic_type == CHIP_VEGA12) 1783 always_on_cu_num = 8; 1784 else 1785 always_on_cu_num = 12; 1786 1787 mutex_lock(&adev->grbm_idx_mutex); 1788 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1789 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1790 mask = 1; 1791 cu_bitmap = 0; 1792 counter = 0; 1793 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1794 1795 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1796 if (cu_info->bitmap[i][j] & mask) { 1797 if (counter == pg_always_on_cu_num) 1798 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1799 if (counter < always_on_cu_num) 1800 cu_bitmap |= mask; 1801 else 1802 break; 1803 counter++; 1804 } 1805 mask <<= 1; 1806 } 1807 1808 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1809 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1810 } 1811 } 1812 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1813 mutex_unlock(&adev->grbm_idx_mutex); 1814 } 1815 1816 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1817 { 1818 uint32_t data; 1819 1820 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1821 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1822 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1823 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1824 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1825 1826 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1827 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1828 1829 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1830 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1831 1832 mutex_lock(&adev->grbm_idx_mutex); 1833 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1834 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1835 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1836 1837 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1838 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1839 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1840 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1841 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1842 1843 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1844 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1845 data &= 0x0000FFFF; 1846 data |= 0x00C00000; 1847 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1848 1849 /* 1850 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1851 * programmed in gfx_v9_0_init_always_on_cu_mask() 1852 */ 1853 1854 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1855 * but used for RLC_LB_CNTL configuration */ 1856 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1857 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1858 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1859 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1860 mutex_unlock(&adev->grbm_idx_mutex); 1861 1862 gfx_v9_0_init_always_on_cu_mask(adev); 1863 } 1864 1865 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1866 { 1867 uint32_t data; 1868 1869 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1870 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1871 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1872 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1873 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1874 1875 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1876 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1877 1878 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1879 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1880 1881 mutex_lock(&adev->grbm_idx_mutex); 1882 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1883 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1884 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1885 1886 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1887 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1888 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1889 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1890 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1891 1892 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1893 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1894 data &= 0x0000FFFF; 1895 data |= 0x00C00000; 1896 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1897 1898 /* 1899 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1900 * programmed in gfx_v9_0_init_always_on_cu_mask() 1901 */ 1902 1903 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1904 * but used for RLC_LB_CNTL configuration */ 1905 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1906 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1907 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1908 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1909 mutex_unlock(&adev->grbm_idx_mutex); 1910 1911 gfx_v9_0_init_always_on_cu_mask(adev); 1912 } 1913 1914 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1915 { 1916 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1917 } 1918 1919 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1920 { 1921 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) 1922 return 5; 1923 else 1924 return 4; 1925 } 1926 1927 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1928 { 1929 const struct cs_section_def *cs_data; 1930 int r; 1931 1932 adev->gfx.rlc.cs_data = gfx9_cs_data; 1933 1934 cs_data = adev->gfx.rlc.cs_data; 1935 1936 if (cs_data) { 1937 /* init clear state block */ 1938 r = amdgpu_gfx_rlc_init_csb(adev); 1939 if (r) 1940 return r; 1941 } 1942 1943 if (adev->flags & AMD_IS_APU) { 1944 /* TODO: double check the cp_table_size for RV */ 1945 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1946 r = amdgpu_gfx_rlc_init_cpt(adev); 1947 if (r) 1948 return r; 1949 } 1950 1951 switch (adev->asic_type) { 1952 case CHIP_RAVEN: 1953 gfx_v9_0_init_lbpw(adev); 1954 break; 1955 case CHIP_VEGA20: 1956 gfx_v9_4_init_lbpw(adev); 1957 break; 1958 default: 1959 break; 1960 } 1961 1962 /* init spm vmid with 0xf */ 1963 if (adev->gfx.rlc.funcs->update_spm_vmid) 1964 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); 1965 1966 return 0; 1967 } 1968 1969 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1970 { 1971 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1972 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1973 } 1974 1975 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1976 { 1977 int r; 1978 u32 *hpd; 1979 const __le32 *fw_data; 1980 unsigned fw_size; 1981 u32 *fw; 1982 size_t mec_hpd_size; 1983 1984 const struct gfx_firmware_header_v1_0 *mec_hdr; 1985 1986 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1987 1988 /* take ownership of the relevant compute queues */ 1989 amdgpu_gfx_compute_queue_acquire(adev); 1990 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1991 if (mec_hpd_size) { 1992 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1993 AMDGPU_GEM_DOMAIN_VRAM, 1994 &adev->gfx.mec.hpd_eop_obj, 1995 &adev->gfx.mec.hpd_eop_gpu_addr, 1996 (void **)&hpd); 1997 if (r) { 1998 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1999 gfx_v9_0_mec_fini(adev); 2000 return r; 2001 } 2002 2003 memset(hpd, 0, mec_hpd_size); 2004 2005 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 2006 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 2007 } 2008 2009 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2010 2011 fw_data = (const __le32 *) 2012 (adev->gfx.mec_fw->data + 2013 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2014 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 2015 2016 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 2017 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2018 &adev->gfx.mec.mec_fw_obj, 2019 &adev->gfx.mec.mec_fw_gpu_addr, 2020 (void **)&fw); 2021 if (r) { 2022 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 2023 gfx_v9_0_mec_fini(adev); 2024 return r; 2025 } 2026 2027 memcpy(fw, fw_data, fw_size); 2028 2029 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 2030 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 2031 2032 return 0; 2033 } 2034 2035 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 2036 { 2037 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 2038 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 2039 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 2040 (address << SQ_IND_INDEX__INDEX__SHIFT) | 2041 (SQ_IND_INDEX__FORCE_READ_MASK)); 2042 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 2043 } 2044 2045 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 2046 uint32_t wave, uint32_t thread, 2047 uint32_t regno, uint32_t num, uint32_t *out) 2048 { 2049 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 2050 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 2051 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 2052 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 2053 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 2054 (SQ_IND_INDEX__FORCE_READ_MASK) | 2055 (SQ_IND_INDEX__AUTO_INCR_MASK)); 2056 while (num--) 2057 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 2058 } 2059 2060 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 2061 { 2062 /* type 1 wave data */ 2063 dst[(*no_fields)++] = 1; 2064 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 2065 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 2066 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 2067 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 2068 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 2069 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 2070 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 2071 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 2072 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 2073 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 2074 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 2075 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 2076 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 2077 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 2078 } 2079 2080 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 2081 uint32_t wave, uint32_t start, 2082 uint32_t size, uint32_t *dst) 2083 { 2084 wave_read_regs( 2085 adev, simd, wave, 0, 2086 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 2087 } 2088 2089 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 2090 uint32_t wave, uint32_t thread, 2091 uint32_t start, uint32_t size, 2092 uint32_t *dst) 2093 { 2094 wave_read_regs( 2095 adev, simd, wave, thread, 2096 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 2097 } 2098 2099 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 2100 u32 me, u32 pipe, u32 q, u32 vm) 2101 { 2102 soc15_grbm_select(adev, me, pipe, q, vm); 2103 } 2104 2105 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 2106 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 2107 .select_se_sh = &gfx_v9_0_select_se_sh, 2108 .read_wave_data = &gfx_v9_0_read_wave_data, 2109 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 2110 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 2111 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 2112 .ras_error_inject = &gfx_v9_0_ras_error_inject, 2113 .query_ras_error_count = &gfx_v9_0_query_ras_error_count, 2114 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, 2115 }; 2116 2117 static const struct amdgpu_gfx_funcs gfx_v9_4_gfx_funcs = { 2118 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 2119 .select_se_sh = &gfx_v9_0_select_se_sh, 2120 .read_wave_data = &gfx_v9_0_read_wave_data, 2121 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 2122 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 2123 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 2124 .ras_error_inject = &gfx_v9_4_ras_error_inject, 2125 .query_ras_error_count = &gfx_v9_4_query_ras_error_count, 2126 .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count, 2127 .query_ras_error_status = &gfx_v9_4_query_ras_error_status, 2128 }; 2129 2130 static const struct amdgpu_gfx_funcs gfx_v9_4_2_gfx_funcs = { 2131 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 2132 .select_se_sh = &gfx_v9_0_select_se_sh, 2133 .read_wave_data = &gfx_v9_0_read_wave_data, 2134 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 2135 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 2136 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 2137 .ras_error_inject = &gfx_v9_4_2_ras_error_inject, 2138 .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count, 2139 .reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count, 2140 .query_ras_error_status = &gfx_v9_4_2_query_ras_error_status, 2141 .reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status, 2142 .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer, 2143 }; 2144 2145 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 2146 { 2147 u32 gb_addr_config; 2148 int err; 2149 2150 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 2151 2152 switch (adev->asic_type) { 2153 case CHIP_VEGA10: 2154 adev->gfx.config.max_hw_contexts = 8; 2155 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2156 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2157 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2158 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2159 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 2160 break; 2161 case CHIP_VEGA12: 2162 adev->gfx.config.max_hw_contexts = 8; 2163 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2164 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2165 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2166 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2167 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 2168 DRM_INFO("fix gfx.config for vega12\n"); 2169 break; 2170 case CHIP_VEGA20: 2171 adev->gfx.config.max_hw_contexts = 8; 2172 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2173 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2174 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2175 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2176 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2177 gb_addr_config &= ~0xf3e777ff; 2178 gb_addr_config |= 0x22014042; 2179 /* check vbios table if gpu info is not available */ 2180 err = amdgpu_atomfirmware_get_gfx_info(adev); 2181 if (err) 2182 return err; 2183 break; 2184 case CHIP_RAVEN: 2185 adev->gfx.config.max_hw_contexts = 8; 2186 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2187 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2188 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2189 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2190 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 2191 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 2192 else 2193 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2194 break; 2195 case CHIP_ARCTURUS: 2196 adev->gfx.funcs = &gfx_v9_4_gfx_funcs; 2197 adev->gfx.config.max_hw_contexts = 8; 2198 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2199 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2200 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2201 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2202 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2203 gb_addr_config &= ~0xf3e777ff; 2204 gb_addr_config |= 0x22014042; 2205 break; 2206 case CHIP_RENOIR: 2207 adev->gfx.config.max_hw_contexts = 8; 2208 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2209 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2210 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2211 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2212 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2213 gb_addr_config &= ~0xf3e777ff; 2214 gb_addr_config |= 0x22010042; 2215 break; 2216 case CHIP_ALDEBARAN: 2217 adev->gfx.funcs = &gfx_v9_4_2_gfx_funcs; 2218 adev->gfx.config.max_hw_contexts = 8; 2219 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2220 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2221 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2222 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2223 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2224 gb_addr_config &= ~0xf3e777ff; 2225 gb_addr_config |= 0x22014042; 2226 /* check vbios table if gpu info is not available */ 2227 err = amdgpu_atomfirmware_get_gfx_info(adev); 2228 if (err) 2229 return err; 2230 break; 2231 default: 2232 BUG(); 2233 break; 2234 } 2235 2236 adev->gfx.config.gb_addr_config = gb_addr_config; 2237 2238 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2239 REG_GET_FIELD( 2240 adev->gfx.config.gb_addr_config, 2241 GB_ADDR_CONFIG, 2242 NUM_PIPES); 2243 2244 adev->gfx.config.max_tile_pipes = 2245 adev->gfx.config.gb_addr_config_fields.num_pipes; 2246 2247 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2248 REG_GET_FIELD( 2249 adev->gfx.config.gb_addr_config, 2250 GB_ADDR_CONFIG, 2251 NUM_BANKS); 2252 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2253 REG_GET_FIELD( 2254 adev->gfx.config.gb_addr_config, 2255 GB_ADDR_CONFIG, 2256 MAX_COMPRESSED_FRAGS); 2257 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2258 REG_GET_FIELD( 2259 adev->gfx.config.gb_addr_config, 2260 GB_ADDR_CONFIG, 2261 NUM_RB_PER_SE); 2262 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2263 REG_GET_FIELD( 2264 adev->gfx.config.gb_addr_config, 2265 GB_ADDR_CONFIG, 2266 NUM_SHADER_ENGINES); 2267 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2268 REG_GET_FIELD( 2269 adev->gfx.config.gb_addr_config, 2270 GB_ADDR_CONFIG, 2271 PIPE_INTERLEAVE_SIZE)); 2272 2273 return 0; 2274 } 2275 2276 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2277 int mec, int pipe, int queue) 2278 { 2279 unsigned irq_type; 2280 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2281 unsigned int hw_prio; 2282 2283 ring = &adev->gfx.compute_ring[ring_id]; 2284 2285 /* mec0 is me1 */ 2286 ring->me = mec + 1; 2287 ring->pipe = pipe; 2288 ring->queue = queue; 2289 2290 ring->ring_obj = NULL; 2291 ring->use_doorbell = true; 2292 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2293 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2294 + (ring_id * GFX9_MEC_HPD_SIZE); 2295 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2296 2297 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2298 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2299 + ring->pipe; 2300 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 2301 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 2302 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2303 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 2304 hw_prio, NULL); 2305 } 2306 2307 static int gfx_v9_0_sw_init(void *handle) 2308 { 2309 int i, j, k, r, ring_id; 2310 struct amdgpu_ring *ring; 2311 struct amdgpu_kiq *kiq; 2312 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2313 2314 switch (adev->asic_type) { 2315 case CHIP_VEGA10: 2316 case CHIP_VEGA12: 2317 case CHIP_VEGA20: 2318 case CHIP_RAVEN: 2319 case CHIP_ARCTURUS: 2320 case CHIP_RENOIR: 2321 case CHIP_ALDEBARAN: 2322 adev->gfx.mec.num_mec = 2; 2323 break; 2324 default: 2325 adev->gfx.mec.num_mec = 1; 2326 break; 2327 } 2328 2329 adev->gfx.mec.num_pipe_per_mec = 4; 2330 adev->gfx.mec.num_queue_per_pipe = 8; 2331 2332 /* EOP Event */ 2333 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2334 if (r) 2335 return r; 2336 2337 /* Privileged reg */ 2338 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2339 &adev->gfx.priv_reg_irq); 2340 if (r) 2341 return r; 2342 2343 /* Privileged inst */ 2344 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2345 &adev->gfx.priv_inst_irq); 2346 if (r) 2347 return r; 2348 2349 /* ECC error */ 2350 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2351 &adev->gfx.cp_ecc_error_irq); 2352 if (r) 2353 return r; 2354 2355 /* FUE error */ 2356 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2357 &adev->gfx.cp_ecc_error_irq); 2358 if (r) 2359 return r; 2360 2361 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2362 2363 gfx_v9_0_scratch_init(adev); 2364 2365 r = gfx_v9_0_init_microcode(adev); 2366 if (r) { 2367 DRM_ERROR("Failed to load gfx firmware!\n"); 2368 return r; 2369 } 2370 2371 r = adev->gfx.rlc.funcs->init(adev); 2372 if (r) { 2373 DRM_ERROR("Failed to init rlc BOs!\n"); 2374 return r; 2375 } 2376 2377 r = gfx_v9_0_mec_init(adev); 2378 if (r) { 2379 DRM_ERROR("Failed to init MEC BOs!\n"); 2380 return r; 2381 } 2382 2383 /* set up the gfx ring */ 2384 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2385 ring = &adev->gfx.gfx_ring[i]; 2386 ring->ring_obj = NULL; 2387 if (!i) 2388 sprintf(ring->name, "gfx"); 2389 else 2390 sprintf(ring->name, "gfx_%d", i); 2391 ring->use_doorbell = true; 2392 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2393 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2394 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2395 AMDGPU_RING_PRIO_DEFAULT, NULL); 2396 if (r) 2397 return r; 2398 } 2399 2400 /* set up the compute queues - allocate horizontally across pipes */ 2401 ring_id = 0; 2402 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2403 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2404 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2405 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2406 continue; 2407 2408 r = gfx_v9_0_compute_ring_init(adev, 2409 ring_id, 2410 i, k, j); 2411 if (r) 2412 return r; 2413 2414 ring_id++; 2415 } 2416 } 2417 } 2418 2419 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2420 if (r) { 2421 DRM_ERROR("Failed to init KIQ BOs!\n"); 2422 return r; 2423 } 2424 2425 kiq = &adev->gfx.kiq; 2426 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2427 if (r) 2428 return r; 2429 2430 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2431 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2432 if (r) 2433 return r; 2434 2435 adev->gfx.ce_ram_size = 0x8000; 2436 2437 r = gfx_v9_0_gpu_early_init(adev); 2438 if (r) 2439 return r; 2440 2441 return 0; 2442 } 2443 2444 2445 static int gfx_v9_0_sw_fini(void *handle) 2446 { 2447 int i; 2448 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2449 2450 amdgpu_gfx_ras_fini(adev); 2451 2452 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2453 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2454 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2455 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2456 2457 amdgpu_gfx_mqd_sw_fini(adev); 2458 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); 2459 amdgpu_gfx_kiq_fini(adev); 2460 2461 gfx_v9_0_mec_fini(adev); 2462 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 2463 if (adev->flags & AMD_IS_APU) { 2464 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2465 &adev->gfx.rlc.cp_table_gpu_addr, 2466 (void **)&adev->gfx.rlc.cp_table_ptr); 2467 } 2468 gfx_v9_0_free_microcode(adev); 2469 2470 return 0; 2471 } 2472 2473 2474 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2475 { 2476 /* TODO */ 2477 } 2478 2479 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, 2480 u32 instance) 2481 { 2482 u32 data; 2483 2484 if (instance == 0xffffffff) 2485 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2486 else 2487 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2488 2489 if (se_num == 0xffffffff) 2490 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2491 else 2492 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2493 2494 if (sh_num == 0xffffffff) 2495 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2496 else 2497 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2498 2499 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2500 } 2501 2502 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2503 { 2504 u32 data, mask; 2505 2506 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2507 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2508 2509 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2510 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2511 2512 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2513 adev->gfx.config.max_sh_per_se); 2514 2515 return (~data) & mask; 2516 } 2517 2518 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2519 { 2520 int i, j; 2521 u32 data; 2522 u32 active_rbs = 0; 2523 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2524 adev->gfx.config.max_sh_per_se; 2525 2526 mutex_lock(&adev->grbm_idx_mutex); 2527 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2528 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2529 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2530 data = gfx_v9_0_get_rb_active_bitmap(adev); 2531 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2532 rb_bitmap_width_per_sh); 2533 } 2534 } 2535 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2536 mutex_unlock(&adev->grbm_idx_mutex); 2537 2538 adev->gfx.config.backend_enable_mask = active_rbs; 2539 adev->gfx.config.num_rbs = hweight32(active_rbs); 2540 } 2541 2542 #define DEFAULT_SH_MEM_BASES (0x6000) 2543 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2544 { 2545 int i; 2546 uint32_t sh_mem_config; 2547 uint32_t sh_mem_bases; 2548 2549 /* 2550 * Configure apertures: 2551 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2552 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2553 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2554 */ 2555 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2556 2557 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2558 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2559 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2560 2561 mutex_lock(&adev->srbm_mutex); 2562 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2563 soc15_grbm_select(adev, 0, 0, 0, i); 2564 /* CP and shaders */ 2565 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2566 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2567 } 2568 soc15_grbm_select(adev, 0, 0, 0, 0); 2569 mutex_unlock(&adev->srbm_mutex); 2570 2571 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2572 acccess. These should be enabled by FW for target VMIDs. */ 2573 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2574 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2575 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2576 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2577 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2578 } 2579 } 2580 2581 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2582 { 2583 int vmid; 2584 2585 /* 2586 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2587 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2588 * the driver can enable them for graphics. VMID0 should maintain 2589 * access so that HWS firmware can save/restore entries. 2590 */ 2591 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 2592 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2593 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2594 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2595 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2596 } 2597 } 2598 2599 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) 2600 { 2601 uint32_t tmp; 2602 2603 switch (adev->asic_type) { 2604 case CHIP_ARCTURUS: 2605 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); 2606 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, 2607 DISABLE_BARRIER_WAITCNT, 1); 2608 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); 2609 break; 2610 default: 2611 break; 2612 } 2613 } 2614 2615 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2616 { 2617 u32 tmp; 2618 int i; 2619 2620 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2621 2622 gfx_v9_0_tiling_mode_table_init(adev); 2623 2624 gfx_v9_0_setup_rb(adev); 2625 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2626 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2627 2628 /* XXX SH_MEM regs */ 2629 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2630 mutex_lock(&adev->srbm_mutex); 2631 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2632 soc15_grbm_select(adev, 0, 0, 0, i); 2633 /* CP and shaders */ 2634 if (i == 0) { 2635 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2636 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2637 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2638 !!adev->gmc.noretry); 2639 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2640 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2641 } else { 2642 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2643 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2644 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2645 !!adev->gmc.noretry); 2646 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2647 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2648 (adev->gmc.private_aperture_start >> 48)); 2649 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2650 (adev->gmc.shared_aperture_start >> 48)); 2651 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2652 } 2653 } 2654 soc15_grbm_select(adev, 0, 0, 0, 0); 2655 2656 mutex_unlock(&adev->srbm_mutex); 2657 2658 gfx_v9_0_init_compute_vmid(adev); 2659 gfx_v9_0_init_gds_vmid(adev); 2660 gfx_v9_0_init_sq_config(adev); 2661 } 2662 2663 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2664 { 2665 u32 i, j, k; 2666 u32 mask; 2667 2668 mutex_lock(&adev->grbm_idx_mutex); 2669 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2670 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2671 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2672 for (k = 0; k < adev->usec_timeout; k++) { 2673 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2674 break; 2675 udelay(1); 2676 } 2677 if (k == adev->usec_timeout) { 2678 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2679 0xffffffff, 0xffffffff); 2680 mutex_unlock(&adev->grbm_idx_mutex); 2681 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2682 i, j); 2683 return; 2684 } 2685 } 2686 } 2687 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2688 mutex_unlock(&adev->grbm_idx_mutex); 2689 2690 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2691 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2692 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2693 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2694 for (k = 0; k < adev->usec_timeout; k++) { 2695 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2696 break; 2697 udelay(1); 2698 } 2699 } 2700 2701 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2702 bool enable) 2703 { 2704 u32 tmp; 2705 2706 /* These interrupts should be enabled to drive DS clock */ 2707 2708 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2709 2710 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2711 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2712 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2713 if(adev->gfx.num_gfx_rings) 2714 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2715 2716 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2717 } 2718 2719 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2720 { 2721 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2722 /* csib */ 2723 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2724 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2725 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2726 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2727 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2728 adev->gfx.rlc.clear_state_size); 2729 } 2730 2731 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2732 int indirect_offset, 2733 int list_size, 2734 int *unique_indirect_regs, 2735 int unique_indirect_reg_count, 2736 int *indirect_start_offsets, 2737 int *indirect_start_offsets_count, 2738 int max_start_offsets_count) 2739 { 2740 int idx; 2741 2742 for (; indirect_offset < list_size; indirect_offset++) { 2743 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2744 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2745 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2746 2747 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2748 indirect_offset += 2; 2749 2750 /* look for the matching indice */ 2751 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2752 if (unique_indirect_regs[idx] == 2753 register_list_format[indirect_offset] || 2754 !unique_indirect_regs[idx]) 2755 break; 2756 } 2757 2758 BUG_ON(idx >= unique_indirect_reg_count); 2759 2760 if (!unique_indirect_regs[idx]) 2761 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2762 2763 indirect_offset++; 2764 } 2765 } 2766 } 2767 2768 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2769 { 2770 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2771 int unique_indirect_reg_count = 0; 2772 2773 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2774 int indirect_start_offsets_count = 0; 2775 2776 int list_size = 0; 2777 int i = 0, j = 0; 2778 u32 tmp = 0; 2779 2780 u32 *register_list_format = 2781 kmemdup(adev->gfx.rlc.register_list_format, 2782 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2783 if (!register_list_format) 2784 return -ENOMEM; 2785 2786 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2787 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2788 gfx_v9_1_parse_ind_reg_list(register_list_format, 2789 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2790 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2791 unique_indirect_regs, 2792 unique_indirect_reg_count, 2793 indirect_start_offsets, 2794 &indirect_start_offsets_count, 2795 ARRAY_SIZE(indirect_start_offsets)); 2796 2797 /* enable auto inc in case it is disabled */ 2798 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2799 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2800 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2801 2802 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2803 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2804 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2805 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2806 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2807 adev->gfx.rlc.register_restore[i]); 2808 2809 /* load indirect register */ 2810 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2811 adev->gfx.rlc.reg_list_format_start); 2812 2813 /* direct register portion */ 2814 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2815 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2816 register_list_format[i]); 2817 2818 /* indirect register portion */ 2819 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2820 if (register_list_format[i] == 0xFFFFFFFF) { 2821 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2822 continue; 2823 } 2824 2825 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2826 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2827 2828 for (j = 0; j < unique_indirect_reg_count; j++) { 2829 if (register_list_format[i] == unique_indirect_regs[j]) { 2830 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2831 break; 2832 } 2833 } 2834 2835 BUG_ON(j >= unique_indirect_reg_count); 2836 2837 i++; 2838 } 2839 2840 /* set save/restore list size */ 2841 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2842 list_size = list_size >> 1; 2843 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2844 adev->gfx.rlc.reg_restore_list_size); 2845 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2846 2847 /* write the starting offsets to RLC scratch ram */ 2848 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2849 adev->gfx.rlc.starting_offsets_start); 2850 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2851 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2852 indirect_start_offsets[i]); 2853 2854 /* load unique indirect regs*/ 2855 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2856 if (unique_indirect_regs[i] != 0) { 2857 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2858 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2859 unique_indirect_regs[i] & 0x3FFFF); 2860 2861 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2862 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2863 unique_indirect_regs[i] >> 20); 2864 } 2865 } 2866 2867 kfree(register_list_format); 2868 return 0; 2869 } 2870 2871 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2872 { 2873 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2874 } 2875 2876 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2877 bool enable) 2878 { 2879 uint32_t data = 0; 2880 uint32_t default_data = 0; 2881 2882 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2883 if (enable) { 2884 /* enable GFXIP control over CGPG */ 2885 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2886 if(default_data != data) 2887 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2888 2889 /* update status */ 2890 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2891 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2892 if(default_data != data) 2893 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2894 } else { 2895 /* restore GFXIP control over GCPG */ 2896 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2897 if(default_data != data) 2898 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2899 } 2900 } 2901 2902 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2903 { 2904 uint32_t data = 0; 2905 2906 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2907 AMD_PG_SUPPORT_GFX_SMG | 2908 AMD_PG_SUPPORT_GFX_DMG)) { 2909 /* init IDLE_POLL_COUNT = 60 */ 2910 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2911 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2912 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2913 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2914 2915 /* init RLC PG Delay */ 2916 data = 0; 2917 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2918 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2919 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2920 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2921 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2922 2923 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2924 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2925 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2926 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2927 2928 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2929 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2930 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2931 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2932 2933 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2934 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2935 2936 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2937 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2938 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2939 if (adev->asic_type != CHIP_RENOIR) 2940 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2941 } 2942 } 2943 2944 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2945 bool enable) 2946 { 2947 uint32_t data = 0; 2948 uint32_t default_data = 0; 2949 2950 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2951 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2952 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2953 enable ? 1 : 0); 2954 if (default_data != data) 2955 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2956 } 2957 2958 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2959 bool enable) 2960 { 2961 uint32_t data = 0; 2962 uint32_t default_data = 0; 2963 2964 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2965 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2966 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2967 enable ? 1 : 0); 2968 if(default_data != data) 2969 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2970 } 2971 2972 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2973 bool enable) 2974 { 2975 uint32_t data = 0; 2976 uint32_t default_data = 0; 2977 2978 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2979 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2980 CP_PG_DISABLE, 2981 enable ? 0 : 1); 2982 if(default_data != data) 2983 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2984 } 2985 2986 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2987 bool enable) 2988 { 2989 uint32_t data, default_data; 2990 2991 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2992 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2993 GFX_POWER_GATING_ENABLE, 2994 enable ? 1 : 0); 2995 if(default_data != data) 2996 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2997 } 2998 2999 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 3000 bool enable) 3001 { 3002 uint32_t data, default_data; 3003 3004 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3005 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3006 GFX_PIPELINE_PG_ENABLE, 3007 enable ? 1 : 0); 3008 if(default_data != data) 3009 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3010 3011 if (!enable) 3012 /* read any GFX register to wake up GFX */ 3013 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 3014 } 3015 3016 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 3017 bool enable) 3018 { 3019 uint32_t data, default_data; 3020 3021 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3022 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3023 STATIC_PER_CU_PG_ENABLE, 3024 enable ? 1 : 0); 3025 if(default_data != data) 3026 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3027 } 3028 3029 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 3030 bool enable) 3031 { 3032 uint32_t data, default_data; 3033 3034 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3035 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3036 DYN_PER_CU_PG_ENABLE, 3037 enable ? 1 : 0); 3038 if(default_data != data) 3039 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3040 } 3041 3042 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 3043 { 3044 gfx_v9_0_init_csb(adev); 3045 3046 /* 3047 * Rlc save restore list is workable since v2_1. 3048 * And it's needed by gfxoff feature. 3049 */ 3050 if (adev->gfx.rlc.is_rlc_v2_1) { 3051 if (adev->asic_type == CHIP_VEGA12 || 3052 (adev->apu_flags & AMD_APU_IS_RAVEN2)) 3053 gfx_v9_1_init_rlc_save_restore_list(adev); 3054 gfx_v9_0_enable_save_restore_machine(adev); 3055 } 3056 3057 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3058 AMD_PG_SUPPORT_GFX_SMG | 3059 AMD_PG_SUPPORT_GFX_DMG | 3060 AMD_PG_SUPPORT_CP | 3061 AMD_PG_SUPPORT_GDS | 3062 AMD_PG_SUPPORT_RLC_SMU_HS)) { 3063 WREG32(mmRLC_JUMP_TABLE_RESTORE, 3064 adev->gfx.rlc.cp_table_gpu_addr >> 8); 3065 gfx_v9_0_init_gfx_power_gating(adev); 3066 } 3067 } 3068 3069 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 3070 { 3071 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 3072 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3073 gfx_v9_0_wait_for_rlc_serdes(adev); 3074 } 3075 3076 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 3077 { 3078 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3079 udelay(50); 3080 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3081 udelay(50); 3082 } 3083 3084 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 3085 { 3086 #ifdef AMDGPU_RLC_DEBUG_RETRY 3087 u32 rlc_ucode_ver; 3088 #endif 3089 3090 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 3091 udelay(50); 3092 3093 /* carrizo do enable cp interrupt after cp inited */ 3094 if (!(adev->flags & AMD_IS_APU)) { 3095 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3096 udelay(50); 3097 } 3098 3099 #ifdef AMDGPU_RLC_DEBUG_RETRY 3100 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 3101 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 3102 if(rlc_ucode_ver == 0x108) { 3103 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 3104 rlc_ucode_ver, adev->gfx.rlc_fw_version); 3105 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 3106 * default is 0x9C4 to create a 100us interval */ 3107 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 3108 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 3109 * to disable the page fault retry interrupts, default is 3110 * 0x100 (256) */ 3111 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 3112 } 3113 #endif 3114 } 3115 3116 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 3117 { 3118 const struct rlc_firmware_header_v2_0 *hdr; 3119 const __le32 *fw_data; 3120 unsigned i, fw_size; 3121 3122 if (!adev->gfx.rlc_fw) 3123 return -EINVAL; 3124 3125 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3126 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3127 3128 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3129 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3130 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3131 3132 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 3133 RLCG_UCODE_LOADING_START_ADDRESS); 3134 for (i = 0; i < fw_size; i++) 3135 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3136 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3137 3138 return 0; 3139 } 3140 3141 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3142 { 3143 int r; 3144 3145 if (amdgpu_sriov_vf(adev)) { 3146 gfx_v9_0_init_csb(adev); 3147 return 0; 3148 } 3149 3150 adev->gfx.rlc.funcs->stop(adev); 3151 3152 /* disable CG */ 3153 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3154 3155 gfx_v9_0_init_pg(adev); 3156 3157 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3158 /* legacy rlc firmware loading */ 3159 r = gfx_v9_0_rlc_load_microcode(adev); 3160 if (r) 3161 return r; 3162 } 3163 3164 switch (adev->asic_type) { 3165 case CHIP_RAVEN: 3166 if (amdgpu_lbpw == 0) 3167 gfx_v9_0_enable_lbpw(adev, false); 3168 else 3169 gfx_v9_0_enable_lbpw(adev, true); 3170 break; 3171 case CHIP_VEGA20: 3172 if (amdgpu_lbpw > 0) 3173 gfx_v9_0_enable_lbpw(adev, true); 3174 else 3175 gfx_v9_0_enable_lbpw(adev, false); 3176 break; 3177 default: 3178 break; 3179 } 3180 3181 adev->gfx.rlc.funcs->start(adev); 3182 3183 return 0; 3184 } 3185 3186 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3187 { 3188 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3189 3190 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3191 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3192 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3193 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3194 udelay(50); 3195 } 3196 3197 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3198 { 3199 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3200 const struct gfx_firmware_header_v1_0 *ce_hdr; 3201 const struct gfx_firmware_header_v1_0 *me_hdr; 3202 const __le32 *fw_data; 3203 unsigned i, fw_size; 3204 3205 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3206 return -EINVAL; 3207 3208 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3209 adev->gfx.pfp_fw->data; 3210 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3211 adev->gfx.ce_fw->data; 3212 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3213 adev->gfx.me_fw->data; 3214 3215 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3216 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3217 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3218 3219 gfx_v9_0_cp_gfx_enable(adev, false); 3220 3221 /* PFP */ 3222 fw_data = (const __le32 *) 3223 (adev->gfx.pfp_fw->data + 3224 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3225 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3226 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3227 for (i = 0; i < fw_size; i++) 3228 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3229 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3230 3231 /* CE */ 3232 fw_data = (const __le32 *) 3233 (adev->gfx.ce_fw->data + 3234 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3235 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3236 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3237 for (i = 0; i < fw_size; i++) 3238 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3239 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3240 3241 /* ME */ 3242 fw_data = (const __le32 *) 3243 (adev->gfx.me_fw->data + 3244 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3245 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3246 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3247 for (i = 0; i < fw_size; i++) 3248 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3249 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3250 3251 return 0; 3252 } 3253 3254 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3255 { 3256 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3257 const struct cs_section_def *sect = NULL; 3258 const struct cs_extent_def *ext = NULL; 3259 int r, i, tmp; 3260 3261 /* init the CP */ 3262 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3263 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3264 3265 gfx_v9_0_cp_gfx_enable(adev, true); 3266 3267 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3268 if (r) { 3269 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3270 return r; 3271 } 3272 3273 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3274 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3275 3276 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3277 amdgpu_ring_write(ring, 0x80000000); 3278 amdgpu_ring_write(ring, 0x80000000); 3279 3280 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3281 for (ext = sect->section; ext->extent != NULL; ++ext) { 3282 if (sect->id == SECT_CONTEXT) { 3283 amdgpu_ring_write(ring, 3284 PACKET3(PACKET3_SET_CONTEXT_REG, 3285 ext->reg_count)); 3286 amdgpu_ring_write(ring, 3287 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3288 for (i = 0; i < ext->reg_count; i++) 3289 amdgpu_ring_write(ring, ext->extent[i]); 3290 } 3291 } 3292 } 3293 3294 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3295 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3296 3297 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3298 amdgpu_ring_write(ring, 0); 3299 3300 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3301 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3302 amdgpu_ring_write(ring, 0x8000); 3303 amdgpu_ring_write(ring, 0x8000); 3304 3305 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3306 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3307 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3308 amdgpu_ring_write(ring, tmp); 3309 amdgpu_ring_write(ring, 0); 3310 3311 amdgpu_ring_commit(ring); 3312 3313 return 0; 3314 } 3315 3316 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3317 { 3318 struct amdgpu_ring *ring; 3319 u32 tmp; 3320 u32 rb_bufsz; 3321 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3322 3323 /* Set the write pointer delay */ 3324 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3325 3326 /* set the RB to use vmid 0 */ 3327 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3328 3329 /* Set ring buffer size */ 3330 ring = &adev->gfx.gfx_ring[0]; 3331 rb_bufsz = order_base_2(ring->ring_size / 8); 3332 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3333 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3334 #ifdef __BIG_ENDIAN 3335 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3336 #endif 3337 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3338 3339 /* Initialize the ring buffer's write pointers */ 3340 ring->wptr = 0; 3341 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3342 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3343 3344 /* set the wb address wether it's enabled or not */ 3345 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3346 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3347 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3348 3349 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3350 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3351 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3352 3353 mdelay(1); 3354 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3355 3356 rb_addr = ring->gpu_addr >> 8; 3357 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3358 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3359 3360 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3361 if (ring->use_doorbell) { 3362 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3363 DOORBELL_OFFSET, ring->doorbell_index); 3364 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3365 DOORBELL_EN, 1); 3366 } else { 3367 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3368 } 3369 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3370 3371 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3372 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3373 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3374 3375 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3376 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3377 3378 3379 /* start the ring */ 3380 gfx_v9_0_cp_gfx_start(adev); 3381 ring->sched.ready = true; 3382 3383 return 0; 3384 } 3385 3386 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3387 { 3388 if (enable) { 3389 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3390 } else { 3391 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3392 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3393 adev->gfx.kiq.ring.sched.ready = false; 3394 } 3395 udelay(50); 3396 } 3397 3398 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3399 { 3400 const struct gfx_firmware_header_v1_0 *mec_hdr; 3401 const __le32 *fw_data; 3402 unsigned i; 3403 u32 tmp; 3404 3405 if (!adev->gfx.mec_fw) 3406 return -EINVAL; 3407 3408 gfx_v9_0_cp_compute_enable(adev, false); 3409 3410 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3411 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3412 3413 fw_data = (const __le32 *) 3414 (adev->gfx.mec_fw->data + 3415 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3416 tmp = 0; 3417 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3418 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3419 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3420 3421 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3422 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3423 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3424 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3425 3426 /* MEC1 */ 3427 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3428 mec_hdr->jt_offset); 3429 for (i = 0; i < mec_hdr->jt_size; i++) 3430 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3431 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3432 3433 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3434 adev->gfx.mec_fw_version); 3435 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3436 3437 return 0; 3438 } 3439 3440 /* KIQ functions */ 3441 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3442 { 3443 uint32_t tmp; 3444 struct amdgpu_device *adev = ring->adev; 3445 3446 /* tell RLC which is KIQ queue */ 3447 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3448 tmp &= 0xffffff00; 3449 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3450 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3451 tmp |= 0x80; 3452 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3453 } 3454 3455 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) 3456 { 3457 struct amdgpu_device *adev = ring->adev; 3458 3459 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3460 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 3461 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 3462 mqd->cp_hqd_queue_priority = 3463 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 3464 } 3465 } 3466 } 3467 3468 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3469 { 3470 struct amdgpu_device *adev = ring->adev; 3471 struct v9_mqd *mqd = ring->mqd_ptr; 3472 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3473 uint32_t tmp; 3474 3475 mqd->header = 0xC0310800; 3476 mqd->compute_pipelinestat_enable = 0x00000001; 3477 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3478 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3479 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3480 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3481 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3482 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3483 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3484 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3485 mqd->compute_misc_reserved = 0x00000003; 3486 3487 mqd->dynamic_cu_mask_addr_lo = 3488 lower_32_bits(ring->mqd_gpu_addr 3489 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3490 mqd->dynamic_cu_mask_addr_hi = 3491 upper_32_bits(ring->mqd_gpu_addr 3492 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3493 3494 eop_base_addr = ring->eop_gpu_addr >> 8; 3495 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3496 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3497 3498 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3499 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3500 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3501 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3502 3503 mqd->cp_hqd_eop_control = tmp; 3504 3505 /* enable doorbell? */ 3506 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3507 3508 if (ring->use_doorbell) { 3509 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3510 DOORBELL_OFFSET, ring->doorbell_index); 3511 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3512 DOORBELL_EN, 1); 3513 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3514 DOORBELL_SOURCE, 0); 3515 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3516 DOORBELL_HIT, 0); 3517 } else { 3518 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3519 DOORBELL_EN, 0); 3520 } 3521 3522 mqd->cp_hqd_pq_doorbell_control = tmp; 3523 3524 /* disable the queue if it's active */ 3525 ring->wptr = 0; 3526 mqd->cp_hqd_dequeue_request = 0; 3527 mqd->cp_hqd_pq_rptr = 0; 3528 mqd->cp_hqd_pq_wptr_lo = 0; 3529 mqd->cp_hqd_pq_wptr_hi = 0; 3530 3531 /* set the pointer to the MQD */ 3532 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3533 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3534 3535 /* set MQD vmid to 0 */ 3536 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3537 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3538 mqd->cp_mqd_control = tmp; 3539 3540 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3541 hqd_gpu_addr = ring->gpu_addr >> 8; 3542 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3543 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3544 3545 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3546 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3547 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3548 (order_base_2(ring->ring_size / 4) - 1)); 3549 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3550 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3551 #ifdef __BIG_ENDIAN 3552 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3553 #endif 3554 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3555 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3556 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3557 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3558 mqd->cp_hqd_pq_control = tmp; 3559 3560 /* set the wb address whether it's enabled or not */ 3561 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3562 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3563 mqd->cp_hqd_pq_rptr_report_addr_hi = 3564 upper_32_bits(wb_gpu_addr) & 0xffff; 3565 3566 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3567 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3568 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3569 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3570 3571 tmp = 0; 3572 /* enable the doorbell if requested */ 3573 if (ring->use_doorbell) { 3574 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3575 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3576 DOORBELL_OFFSET, ring->doorbell_index); 3577 3578 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3579 DOORBELL_EN, 1); 3580 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3581 DOORBELL_SOURCE, 0); 3582 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3583 DOORBELL_HIT, 0); 3584 } 3585 3586 mqd->cp_hqd_pq_doorbell_control = tmp; 3587 3588 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3589 ring->wptr = 0; 3590 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3591 3592 /* set the vmid for the queue */ 3593 mqd->cp_hqd_vmid = 0; 3594 3595 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3596 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3597 mqd->cp_hqd_persistent_state = tmp; 3598 3599 /* set MIN_IB_AVAIL_SIZE */ 3600 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3601 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3602 mqd->cp_hqd_ib_control = tmp; 3603 3604 /* set static priority for a queue/ring */ 3605 gfx_v9_0_mqd_set_priority(ring, mqd); 3606 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 3607 3608 /* map_queues packet doesn't need activate the queue, 3609 * so only kiq need set this field. 3610 */ 3611 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3612 mqd->cp_hqd_active = 1; 3613 3614 return 0; 3615 } 3616 3617 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3618 { 3619 struct amdgpu_device *adev = ring->adev; 3620 struct v9_mqd *mqd = ring->mqd_ptr; 3621 int j; 3622 3623 /* disable wptr polling */ 3624 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3625 3626 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3627 mqd->cp_hqd_eop_base_addr_lo); 3628 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3629 mqd->cp_hqd_eop_base_addr_hi); 3630 3631 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3632 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3633 mqd->cp_hqd_eop_control); 3634 3635 /* enable doorbell? */ 3636 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3637 mqd->cp_hqd_pq_doorbell_control); 3638 3639 /* disable the queue if it's active */ 3640 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3641 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3642 for (j = 0; j < adev->usec_timeout; j++) { 3643 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3644 break; 3645 udelay(1); 3646 } 3647 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3648 mqd->cp_hqd_dequeue_request); 3649 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3650 mqd->cp_hqd_pq_rptr); 3651 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3652 mqd->cp_hqd_pq_wptr_lo); 3653 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3654 mqd->cp_hqd_pq_wptr_hi); 3655 } 3656 3657 /* set the pointer to the MQD */ 3658 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3659 mqd->cp_mqd_base_addr_lo); 3660 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3661 mqd->cp_mqd_base_addr_hi); 3662 3663 /* set MQD vmid to 0 */ 3664 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3665 mqd->cp_mqd_control); 3666 3667 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3668 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3669 mqd->cp_hqd_pq_base_lo); 3670 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3671 mqd->cp_hqd_pq_base_hi); 3672 3673 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3674 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3675 mqd->cp_hqd_pq_control); 3676 3677 /* set the wb address whether it's enabled or not */ 3678 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3679 mqd->cp_hqd_pq_rptr_report_addr_lo); 3680 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3681 mqd->cp_hqd_pq_rptr_report_addr_hi); 3682 3683 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3684 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3685 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3686 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3687 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3688 3689 /* enable the doorbell if requested */ 3690 if (ring->use_doorbell) { 3691 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3692 (adev->doorbell_index.kiq * 2) << 2); 3693 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3694 (adev->doorbell_index.userqueue_end * 2) << 2); 3695 } 3696 3697 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3698 mqd->cp_hqd_pq_doorbell_control); 3699 3700 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3701 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3702 mqd->cp_hqd_pq_wptr_lo); 3703 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3704 mqd->cp_hqd_pq_wptr_hi); 3705 3706 /* set the vmid for the queue */ 3707 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3708 3709 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3710 mqd->cp_hqd_persistent_state); 3711 3712 /* activate the queue */ 3713 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3714 mqd->cp_hqd_active); 3715 3716 if (ring->use_doorbell) 3717 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3718 3719 return 0; 3720 } 3721 3722 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3723 { 3724 struct amdgpu_device *adev = ring->adev; 3725 int j; 3726 3727 /* disable the queue if it's active */ 3728 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3729 3730 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3731 3732 for (j = 0; j < adev->usec_timeout; j++) { 3733 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3734 break; 3735 udelay(1); 3736 } 3737 3738 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3739 DRM_DEBUG("KIQ dequeue request failed.\n"); 3740 3741 /* Manual disable if dequeue request times out */ 3742 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3743 } 3744 3745 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3746 0); 3747 } 3748 3749 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3750 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3751 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3752 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3753 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3754 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3755 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3756 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3757 3758 return 0; 3759 } 3760 3761 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3762 { 3763 struct amdgpu_device *adev = ring->adev; 3764 struct v9_mqd *mqd = ring->mqd_ptr; 3765 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3766 struct v9_mqd *tmp_mqd; 3767 3768 gfx_v9_0_kiq_setting(ring); 3769 3770 /* GPU could be in bad state during probe, driver trigger the reset 3771 * after load the SMU, in this case , the mqd is not be initialized. 3772 * driver need to re-init the mqd. 3773 * check mqd->cp_hqd_pq_control since this value should not be 0 3774 */ 3775 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3776 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ 3777 /* for GPU_RESET case , reset MQD to a clean status */ 3778 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3779 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3780 3781 /* reset ring buffer */ 3782 ring->wptr = 0; 3783 amdgpu_ring_clear_ring(ring); 3784 3785 mutex_lock(&adev->srbm_mutex); 3786 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3787 gfx_v9_0_kiq_init_register(ring); 3788 soc15_grbm_select(adev, 0, 0, 0, 0); 3789 mutex_unlock(&adev->srbm_mutex); 3790 } else { 3791 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3792 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3793 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3794 mutex_lock(&adev->srbm_mutex); 3795 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3796 gfx_v9_0_mqd_init(ring); 3797 gfx_v9_0_kiq_init_register(ring); 3798 soc15_grbm_select(adev, 0, 0, 0, 0); 3799 mutex_unlock(&adev->srbm_mutex); 3800 3801 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3802 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3803 } 3804 3805 return 0; 3806 } 3807 3808 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3809 { 3810 struct amdgpu_device *adev = ring->adev; 3811 struct v9_mqd *mqd = ring->mqd_ptr; 3812 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3813 struct v9_mqd *tmp_mqd; 3814 3815 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control 3816 * is not be initialized before 3817 */ 3818 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3819 3820 if (!tmp_mqd->cp_hqd_pq_control || 3821 (!amdgpu_in_reset(adev) && !adev->in_suspend)) { 3822 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3823 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3824 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3825 mutex_lock(&adev->srbm_mutex); 3826 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3827 gfx_v9_0_mqd_init(ring); 3828 soc15_grbm_select(adev, 0, 0, 0, 0); 3829 mutex_unlock(&adev->srbm_mutex); 3830 3831 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3832 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3833 } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 3834 /* reset MQD to a clean status */ 3835 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3836 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3837 3838 /* reset ring buffer */ 3839 ring->wptr = 0; 3840 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); 3841 amdgpu_ring_clear_ring(ring); 3842 } else { 3843 amdgpu_ring_clear_ring(ring); 3844 } 3845 3846 return 0; 3847 } 3848 3849 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3850 { 3851 struct amdgpu_ring *ring; 3852 int r; 3853 3854 ring = &adev->gfx.kiq.ring; 3855 3856 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3857 if (unlikely(r != 0)) 3858 return r; 3859 3860 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3861 if (unlikely(r != 0)) 3862 return r; 3863 3864 gfx_v9_0_kiq_init_queue(ring); 3865 amdgpu_bo_kunmap(ring->mqd_obj); 3866 ring->mqd_ptr = NULL; 3867 amdgpu_bo_unreserve(ring->mqd_obj); 3868 ring->sched.ready = true; 3869 return 0; 3870 } 3871 3872 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3873 { 3874 struct amdgpu_ring *ring = NULL; 3875 int r = 0, i; 3876 3877 gfx_v9_0_cp_compute_enable(adev, true); 3878 3879 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3880 ring = &adev->gfx.compute_ring[i]; 3881 3882 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3883 if (unlikely(r != 0)) 3884 goto done; 3885 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3886 if (!r) { 3887 r = gfx_v9_0_kcq_init_queue(ring); 3888 amdgpu_bo_kunmap(ring->mqd_obj); 3889 ring->mqd_ptr = NULL; 3890 } 3891 amdgpu_bo_unreserve(ring->mqd_obj); 3892 if (r) 3893 goto done; 3894 } 3895 3896 r = amdgpu_gfx_enable_kcq(adev); 3897 done: 3898 return r; 3899 } 3900 3901 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3902 { 3903 int r, i; 3904 struct amdgpu_ring *ring; 3905 3906 if (!(adev->flags & AMD_IS_APU)) 3907 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3908 3909 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3910 if (adev->gfx.num_gfx_rings) { 3911 /* legacy firmware loading */ 3912 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3913 if (r) 3914 return r; 3915 } 3916 3917 r = gfx_v9_0_cp_compute_load_microcode(adev); 3918 if (r) 3919 return r; 3920 } 3921 3922 r = gfx_v9_0_kiq_resume(adev); 3923 if (r) 3924 return r; 3925 3926 if (adev->gfx.num_gfx_rings) { 3927 r = gfx_v9_0_cp_gfx_resume(adev); 3928 if (r) 3929 return r; 3930 } 3931 3932 r = gfx_v9_0_kcq_resume(adev); 3933 if (r) 3934 return r; 3935 3936 if (adev->gfx.num_gfx_rings) { 3937 ring = &adev->gfx.gfx_ring[0]; 3938 r = amdgpu_ring_test_helper(ring); 3939 if (r) 3940 return r; 3941 } 3942 3943 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3944 ring = &adev->gfx.compute_ring[i]; 3945 amdgpu_ring_test_helper(ring); 3946 } 3947 3948 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3949 3950 return 0; 3951 } 3952 3953 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 3954 { 3955 u32 tmp; 3956 3957 if (adev->asic_type != CHIP_ARCTURUS) 3958 return; 3959 3960 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 3961 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 3962 adev->df.hash_status.hash_64k); 3963 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 3964 adev->df.hash_status.hash_2m); 3965 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 3966 adev->df.hash_status.hash_1g); 3967 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 3968 } 3969 3970 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3971 { 3972 if (adev->gfx.num_gfx_rings) 3973 gfx_v9_0_cp_gfx_enable(adev, enable); 3974 gfx_v9_0_cp_compute_enable(adev, enable); 3975 } 3976 3977 static int gfx_v9_0_hw_init(void *handle) 3978 { 3979 int r; 3980 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3981 3982 if (!amdgpu_sriov_vf(adev)) 3983 gfx_v9_0_init_golden_registers(adev); 3984 3985 gfx_v9_0_constants_init(adev); 3986 3987 gfx_v9_0_init_tcp_config(adev); 3988 3989 r = adev->gfx.rlc.funcs->resume(adev); 3990 if (r) 3991 return r; 3992 3993 r = gfx_v9_0_cp_resume(adev); 3994 if (r) 3995 return r; 3996 3997 if (adev->asic_type == CHIP_ALDEBARAN) 3998 gfx_v9_4_2_set_power_brake_sequence(adev); 3999 4000 return r; 4001 } 4002 4003 static int gfx_v9_0_hw_fini(void *handle) 4004 { 4005 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4006 4007 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 4008 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4009 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4010 4011 /* DF freeze and kcq disable will fail */ 4012 if (!amdgpu_ras_intr_triggered()) 4013 /* disable KCQ to avoid CPC touch memory not valid anymore */ 4014 amdgpu_gfx_disable_kcq(adev); 4015 4016 if (amdgpu_sriov_vf(adev)) { 4017 gfx_v9_0_cp_gfx_enable(adev, false); 4018 /* must disable polling for SRIOV when hw finished, otherwise 4019 * CPC engine may still keep fetching WB address which is already 4020 * invalid after sw finished and trigger DMAR reading error in 4021 * hypervisor side. 4022 */ 4023 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4024 return 0; 4025 } 4026 4027 /* Use deinitialize sequence from CAIL when unbinding device from driver, 4028 * otherwise KIQ is hanging when binding back 4029 */ 4030 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 4031 mutex_lock(&adev->srbm_mutex); 4032 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 4033 adev->gfx.kiq.ring.pipe, 4034 adev->gfx.kiq.ring.queue, 0); 4035 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 4036 soc15_grbm_select(adev, 0, 0, 0, 0); 4037 mutex_unlock(&adev->srbm_mutex); 4038 } 4039 4040 gfx_v9_0_cp_enable(adev, false); 4041 4042 /* Skip suspend with A+A reset */ 4043 if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { 4044 dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n"); 4045 return 0; 4046 } 4047 4048 adev->gfx.rlc.funcs->stop(adev); 4049 return 0; 4050 } 4051 4052 static int gfx_v9_0_suspend(void *handle) 4053 { 4054 return gfx_v9_0_hw_fini(handle); 4055 } 4056 4057 static int gfx_v9_0_resume(void *handle) 4058 { 4059 return gfx_v9_0_hw_init(handle); 4060 } 4061 4062 static bool gfx_v9_0_is_idle(void *handle) 4063 { 4064 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4065 4066 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 4067 GRBM_STATUS, GUI_ACTIVE)) 4068 return false; 4069 else 4070 return true; 4071 } 4072 4073 static int gfx_v9_0_wait_for_idle(void *handle) 4074 { 4075 unsigned i; 4076 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4077 4078 for (i = 0; i < adev->usec_timeout; i++) { 4079 if (gfx_v9_0_is_idle(handle)) 4080 return 0; 4081 udelay(1); 4082 } 4083 return -ETIMEDOUT; 4084 } 4085 4086 static int gfx_v9_0_soft_reset(void *handle) 4087 { 4088 u32 grbm_soft_reset = 0; 4089 u32 tmp; 4090 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4091 4092 /* GRBM_STATUS */ 4093 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4094 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4095 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4096 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4097 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4098 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4099 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4100 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4101 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4102 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4103 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4104 } 4105 4106 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4107 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4108 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4109 } 4110 4111 /* GRBM_STATUS2 */ 4112 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4113 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4114 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4115 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4116 4117 4118 if (grbm_soft_reset) { 4119 /* stop the rlc */ 4120 adev->gfx.rlc.funcs->stop(adev); 4121 4122 if (adev->gfx.num_gfx_rings) 4123 /* Disable GFX parsing/prefetching */ 4124 gfx_v9_0_cp_gfx_enable(adev, false); 4125 4126 /* Disable MEC parsing/prefetching */ 4127 gfx_v9_0_cp_compute_enable(adev, false); 4128 4129 if (grbm_soft_reset) { 4130 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4131 tmp |= grbm_soft_reset; 4132 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4133 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4134 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4135 4136 udelay(50); 4137 4138 tmp &= ~grbm_soft_reset; 4139 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4140 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4141 } 4142 4143 /* Wait a little for things to settle down */ 4144 udelay(50); 4145 } 4146 return 0; 4147 } 4148 4149 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) 4150 { 4151 signed long r, cnt = 0; 4152 unsigned long flags; 4153 uint32_t seq, reg_val_offs = 0; 4154 uint64_t value = 0; 4155 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 4156 struct amdgpu_ring *ring = &kiq->ring; 4157 4158 BUG_ON(!ring->funcs->emit_rreg); 4159 4160 spin_lock_irqsave(&kiq->ring_lock, flags); 4161 if (amdgpu_device_wb_get(adev, ®_val_offs)) { 4162 pr_err("critical bug! too many kiq readers\n"); 4163 goto failed_unlock; 4164 } 4165 amdgpu_ring_alloc(ring, 32); 4166 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4167 amdgpu_ring_write(ring, 9 | /* src: register*/ 4168 (5 << 8) | /* dst: memory */ 4169 (1 << 16) | /* count sel */ 4170 (1 << 20)); /* write confirm */ 4171 amdgpu_ring_write(ring, 0); 4172 amdgpu_ring_write(ring, 0); 4173 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4174 reg_val_offs * 4)); 4175 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4176 reg_val_offs * 4)); 4177 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 4178 if (r) 4179 goto failed_undo; 4180 4181 amdgpu_ring_commit(ring); 4182 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4183 4184 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4185 4186 /* don't wait anymore for gpu reset case because this way may 4187 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 4188 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 4189 * never return if we keep waiting in virt_kiq_rreg, which cause 4190 * gpu_recover() hang there. 4191 * 4192 * also don't wait anymore for IRQ context 4193 * */ 4194 if (r < 1 && (amdgpu_in_reset(adev))) 4195 goto failed_kiq_read; 4196 4197 might_sleep(); 4198 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 4199 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 4200 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4201 } 4202 4203 if (cnt > MAX_KIQ_REG_TRY) 4204 goto failed_kiq_read; 4205 4206 mb(); 4207 value = (uint64_t)adev->wb.wb[reg_val_offs] | 4208 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL; 4209 amdgpu_device_wb_free(adev, reg_val_offs); 4210 return value; 4211 4212 failed_undo: 4213 amdgpu_ring_undo(ring); 4214 failed_unlock: 4215 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4216 failed_kiq_read: 4217 if (reg_val_offs) 4218 amdgpu_device_wb_free(adev, reg_val_offs); 4219 pr_err("failed to read gpu clock\n"); 4220 return ~0; 4221 } 4222 4223 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4224 { 4225 uint64_t clock; 4226 4227 amdgpu_gfx_off_ctrl(adev, false); 4228 mutex_lock(&adev->gfx.gpu_clock_mutex); 4229 if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { 4230 clock = gfx_v9_0_kiq_read_clock(adev); 4231 } else { 4232 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4233 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4234 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4235 } 4236 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4237 amdgpu_gfx_off_ctrl(adev, true); 4238 return clock; 4239 } 4240 4241 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4242 uint32_t vmid, 4243 uint32_t gds_base, uint32_t gds_size, 4244 uint32_t gws_base, uint32_t gws_size, 4245 uint32_t oa_base, uint32_t oa_size) 4246 { 4247 struct amdgpu_device *adev = ring->adev; 4248 4249 /* GDS Base */ 4250 gfx_v9_0_write_data_to_reg(ring, 0, false, 4251 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4252 gds_base); 4253 4254 /* GDS Size */ 4255 gfx_v9_0_write_data_to_reg(ring, 0, false, 4256 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4257 gds_size); 4258 4259 /* GWS */ 4260 gfx_v9_0_write_data_to_reg(ring, 0, false, 4261 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4262 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4263 4264 /* OA */ 4265 gfx_v9_0_write_data_to_reg(ring, 0, false, 4266 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4267 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4268 } 4269 4270 static const u32 vgpr_init_compute_shader[] = 4271 { 4272 0xb07c0000, 0xbe8000ff, 4273 0x000000f8, 0xbf110800, 4274 0x7e000280, 0x7e020280, 4275 0x7e040280, 0x7e060280, 4276 0x7e080280, 0x7e0a0280, 4277 0x7e0c0280, 0x7e0e0280, 4278 0x80808800, 0xbe803200, 4279 0xbf84fff5, 0xbf9c0000, 4280 0xd28c0001, 0x0001007f, 4281 0xd28d0001, 0x0002027e, 4282 0x10020288, 0xb8810904, 4283 0xb7814000, 0xd1196a01, 4284 0x00000301, 0xbe800087, 4285 0xbefc00c1, 0xd89c4000, 4286 0x00020201, 0xd89cc080, 4287 0x00040401, 0x320202ff, 4288 0x00000800, 0x80808100, 4289 0xbf84fff8, 0x7e020280, 4290 0xbf810000, 0x00000000, 4291 }; 4292 4293 static const u32 sgpr_init_compute_shader[] = 4294 { 4295 0xb07c0000, 0xbe8000ff, 4296 0x0000005f, 0xbee50080, 4297 0xbe812c65, 0xbe822c65, 4298 0xbe832c65, 0xbe842c65, 4299 0xbe852c65, 0xb77c0005, 4300 0x80808500, 0xbf84fff8, 4301 0xbe800080, 0xbf810000, 4302 }; 4303 4304 static const u32 vgpr_init_compute_shader_arcturus[] = { 4305 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 4306 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 4307 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 4308 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 4309 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 4310 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 4311 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 4312 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 4313 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 4314 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 4315 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 4316 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 4317 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 4318 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 4319 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 4320 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 4321 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 4322 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 4323 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 4324 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 4325 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 4326 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 4327 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 4328 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 4329 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 4330 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 4331 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 4332 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 4333 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 4334 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 4335 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 4336 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 4337 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 4338 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 4339 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 4340 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 4341 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 4342 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 4343 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 4344 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 4345 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 4346 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 4347 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 4348 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 4349 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 4350 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 4351 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 4352 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 4353 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 4354 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 4355 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 4356 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 4357 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 4358 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 4359 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 4360 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 4361 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 4362 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 4363 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 4364 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 4365 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 4366 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 4367 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 4368 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 4369 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 4370 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 4371 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 4372 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 4373 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 4374 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 4375 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 4376 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 4377 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 4378 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 4379 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 4380 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 4381 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 4382 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 4383 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 4384 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 4385 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 4386 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 4387 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 4388 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 4389 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 4390 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 4391 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 4392 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 4393 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 4394 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 4395 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 4396 0xbf84fff8, 0xbf810000, 4397 }; 4398 4399 /* When below register arrays changed, please update gpr_reg_size, 4400 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 4401 to cover all gfx9 ASICs */ 4402 static const struct soc15_reg_entry vgpr_init_regs[] = { 4403 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4404 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4405 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4406 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4407 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 4408 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4409 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4410 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4411 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4412 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4413 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4414 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4415 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4416 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4417 }; 4418 4419 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = { 4420 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4421 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4422 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4423 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4424 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf }, 4425 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4426 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4427 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4428 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4429 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4430 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4431 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4432 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4433 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4434 }; 4435 4436 static const struct soc15_reg_entry sgpr1_init_regs[] = { 4437 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4438 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4439 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4440 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4441 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4442 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4443 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4444 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4445 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4446 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4447 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4448 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4449 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4450 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4451 }; 4452 4453 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4454 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4455 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4456 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4457 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4458 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4459 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4460 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4461 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4462 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4463 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4464 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4465 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4466 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4467 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4468 }; 4469 4470 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { 4471 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4472 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4473 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4474 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4475 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4476 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4477 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4478 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4479 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4480 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4481 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4482 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4483 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4484 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4485 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4486 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4487 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4488 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4489 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4490 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4491 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4492 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4493 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4494 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4495 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4496 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4497 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4498 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4499 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4500 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4501 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4502 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4503 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4504 }; 4505 4506 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4507 { 4508 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4509 int i, r; 4510 4511 /* only support when RAS is enabled */ 4512 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4513 return 0; 4514 4515 r = amdgpu_ring_alloc(ring, 7); 4516 if (r) { 4517 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4518 ring->name, r); 4519 return r; 4520 } 4521 4522 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4523 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4524 4525 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4526 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4527 PACKET3_DMA_DATA_DST_SEL(1) | 4528 PACKET3_DMA_DATA_SRC_SEL(2) | 4529 PACKET3_DMA_DATA_ENGINE(0))); 4530 amdgpu_ring_write(ring, 0); 4531 amdgpu_ring_write(ring, 0); 4532 amdgpu_ring_write(ring, 0); 4533 amdgpu_ring_write(ring, 0); 4534 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4535 adev->gds.gds_size); 4536 4537 amdgpu_ring_commit(ring); 4538 4539 for (i = 0; i < adev->usec_timeout; i++) { 4540 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4541 break; 4542 udelay(1); 4543 } 4544 4545 if (i >= adev->usec_timeout) 4546 r = -ETIMEDOUT; 4547 4548 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4549 4550 return r; 4551 } 4552 4553 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4554 { 4555 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4556 struct amdgpu_ib ib; 4557 struct dma_fence *f = NULL; 4558 int r, i; 4559 unsigned total_size, vgpr_offset, sgpr_offset; 4560 u64 gpu_addr; 4561 4562 int compute_dim_x = adev->gfx.config.max_shader_engines * 4563 adev->gfx.config.max_cu_per_sh * 4564 adev->gfx.config.max_sh_per_se; 4565 int sgpr_work_group_size = 5; 4566 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6; 4567 int vgpr_init_shader_size; 4568 const u32 *vgpr_init_shader_ptr; 4569 const struct soc15_reg_entry *vgpr_init_regs_ptr; 4570 4571 /* only support when RAS is enabled */ 4572 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4573 return 0; 4574 4575 /* bail if the compute ring is not ready */ 4576 if (!ring->sched.ready) 4577 return 0; 4578 4579 if (adev->asic_type == CHIP_ARCTURUS || 4580 adev->asic_type == CHIP_ALDEBARAN) { 4581 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; 4582 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); 4583 vgpr_init_regs_ptr = vgpr_init_regs_arcturus; 4584 } else { 4585 vgpr_init_shader_ptr = vgpr_init_compute_shader; 4586 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader); 4587 vgpr_init_regs_ptr = vgpr_init_regs; 4588 } 4589 4590 total_size = 4591 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4592 total_size += 4593 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4594 total_size += 4595 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4596 total_size = ALIGN(total_size, 256); 4597 vgpr_offset = total_size; 4598 total_size += ALIGN(vgpr_init_shader_size, 256); 4599 sgpr_offset = total_size; 4600 total_size += sizeof(sgpr_init_compute_shader); 4601 4602 /* allocate an indirect buffer to put the commands in */ 4603 memset(&ib, 0, sizeof(ib)); 4604 r = amdgpu_ib_get(adev, NULL, total_size, 4605 AMDGPU_IB_POOL_DIRECT, &ib); 4606 if (r) { 4607 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4608 return r; 4609 } 4610 4611 /* load the compute shaders */ 4612 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++) 4613 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i]; 4614 4615 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4616 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4617 4618 /* init the ib length to 0 */ 4619 ib.length_dw = 0; 4620 4621 /* VGPR */ 4622 /* write the register state for the compute dispatch */ 4623 for (i = 0; i < gpr_reg_size; i++) { 4624 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4625 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i]) 4626 - PACKET3_SET_SH_REG_START; 4627 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value; 4628 } 4629 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4630 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4631 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4632 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4633 - PACKET3_SET_SH_REG_START; 4634 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4635 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4636 4637 /* write dispatch packet */ 4638 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4639 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */ 4640 ib.ptr[ib.length_dw++] = 1; /* y */ 4641 ib.ptr[ib.length_dw++] = 1; /* z */ 4642 ib.ptr[ib.length_dw++] = 4643 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4644 4645 /* write CS partial flush packet */ 4646 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4647 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4648 4649 /* SGPR1 */ 4650 /* write the register state for the compute dispatch */ 4651 for (i = 0; i < gpr_reg_size; i++) { 4652 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4653 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4654 - PACKET3_SET_SH_REG_START; 4655 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4656 } 4657 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4658 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4659 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4660 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4661 - PACKET3_SET_SH_REG_START; 4662 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4663 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4664 4665 /* write dispatch packet */ 4666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4667 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4668 ib.ptr[ib.length_dw++] = 1; /* y */ 4669 ib.ptr[ib.length_dw++] = 1; /* z */ 4670 ib.ptr[ib.length_dw++] = 4671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4672 4673 /* write CS partial flush packet */ 4674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4675 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4676 4677 /* SGPR2 */ 4678 /* write the register state for the compute dispatch */ 4679 for (i = 0; i < gpr_reg_size; i++) { 4680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4681 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4682 - PACKET3_SET_SH_REG_START; 4683 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4684 } 4685 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4686 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4687 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4688 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4689 - PACKET3_SET_SH_REG_START; 4690 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4691 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4692 4693 /* write dispatch packet */ 4694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4695 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4696 ib.ptr[ib.length_dw++] = 1; /* y */ 4697 ib.ptr[ib.length_dw++] = 1; /* z */ 4698 ib.ptr[ib.length_dw++] = 4699 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4700 4701 /* write CS partial flush packet */ 4702 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4703 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4704 4705 /* shedule the ib on the ring */ 4706 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4707 if (r) { 4708 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4709 goto fail; 4710 } 4711 4712 /* wait for the GPU to finish processing the IB */ 4713 r = dma_fence_wait(f, false); 4714 if (r) { 4715 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4716 goto fail; 4717 } 4718 4719 fail: 4720 amdgpu_ib_free(adev, &ib, NULL); 4721 dma_fence_put(f); 4722 4723 return r; 4724 } 4725 4726 static int gfx_v9_0_early_init(void *handle) 4727 { 4728 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4729 4730 if (adev->asic_type == CHIP_ARCTURUS || 4731 adev->asic_type == CHIP_ALDEBARAN) 4732 adev->gfx.num_gfx_rings = 0; 4733 else 4734 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4735 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4736 AMDGPU_MAX_COMPUTE_RINGS); 4737 gfx_v9_0_set_kiq_pm4_funcs(adev); 4738 gfx_v9_0_set_ring_funcs(adev); 4739 gfx_v9_0_set_irq_funcs(adev); 4740 gfx_v9_0_set_gds_init(adev); 4741 gfx_v9_0_set_rlc_funcs(adev); 4742 4743 return 0; 4744 } 4745 4746 static int gfx_v9_0_ecc_late_init(void *handle) 4747 { 4748 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4749 int r; 4750 4751 /* 4752 * Temp workaround to fix the issue that CP firmware fails to 4753 * update read pointer when CPDMA is writing clearing operation 4754 * to GDS in suspend/resume sequence on several cards. So just 4755 * limit this operation in cold boot sequence. 4756 */ 4757 if ((!adev->in_suspend) && 4758 (adev->gds.gds_size)) { 4759 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4760 if (r) 4761 return r; 4762 } 4763 4764 /* requires IBs so do in late init after IB pool is initialized */ 4765 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4766 if (r) 4767 return r; 4768 4769 r = amdgpu_gfx_ras_late_init(adev); 4770 if (r) 4771 return r; 4772 4773 if (adev->gfx.funcs->enable_watchdog_timer) 4774 adev->gfx.funcs->enable_watchdog_timer(adev); 4775 4776 return 0; 4777 } 4778 4779 static int gfx_v9_0_late_init(void *handle) 4780 { 4781 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4782 int r; 4783 4784 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4785 if (r) 4786 return r; 4787 4788 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4789 if (r) 4790 return r; 4791 4792 r = gfx_v9_0_ecc_late_init(handle); 4793 if (r) 4794 return r; 4795 4796 return 0; 4797 } 4798 4799 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4800 { 4801 uint32_t rlc_setting; 4802 4803 /* if RLC is not enabled, do nothing */ 4804 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4805 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4806 return false; 4807 4808 return true; 4809 } 4810 4811 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4812 { 4813 uint32_t data; 4814 unsigned i; 4815 4816 data = RLC_SAFE_MODE__CMD_MASK; 4817 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4818 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4819 4820 /* wait for RLC_SAFE_MODE */ 4821 for (i = 0; i < adev->usec_timeout; i++) { 4822 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4823 break; 4824 udelay(1); 4825 } 4826 } 4827 4828 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4829 { 4830 uint32_t data; 4831 4832 data = RLC_SAFE_MODE__CMD_MASK; 4833 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4834 } 4835 4836 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4837 bool enable) 4838 { 4839 amdgpu_gfx_rlc_enter_safe_mode(adev); 4840 4841 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4842 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4843 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4844 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4845 } else { 4846 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4847 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4848 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4849 } 4850 4851 amdgpu_gfx_rlc_exit_safe_mode(adev); 4852 } 4853 4854 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4855 bool enable) 4856 { 4857 /* TODO: double check if we need to perform under safe mode */ 4858 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4859 4860 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4861 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4862 else 4863 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4864 4865 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4866 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4867 else 4868 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4869 4870 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4871 } 4872 4873 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4874 bool enable) 4875 { 4876 uint32_t data, def; 4877 4878 amdgpu_gfx_rlc_enter_safe_mode(adev); 4879 4880 /* It is disabled by HW by default */ 4881 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4882 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4883 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4884 4885 if (adev->asic_type != CHIP_VEGA12) 4886 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4887 4888 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4889 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4890 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4891 4892 /* only for Vega10 & Raven1 */ 4893 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4894 4895 if (def != data) 4896 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4897 4898 /* MGLS is a global flag to control all MGLS in GFX */ 4899 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4900 /* 2 - RLC memory Light sleep */ 4901 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4902 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4903 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4904 if (def != data) 4905 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4906 } 4907 /* 3 - CP memory Light sleep */ 4908 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4909 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4910 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4911 if (def != data) 4912 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4913 } 4914 } 4915 } else { 4916 /* 1 - MGCG_OVERRIDE */ 4917 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4918 4919 if (adev->asic_type != CHIP_VEGA12) 4920 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4921 4922 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4923 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4924 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4925 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4926 4927 if (def != data) 4928 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4929 4930 /* 2 - disable MGLS in RLC */ 4931 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4932 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4933 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4934 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4935 } 4936 4937 /* 3 - disable MGLS in CP */ 4938 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4939 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4940 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4941 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4942 } 4943 } 4944 4945 amdgpu_gfx_rlc_exit_safe_mode(adev); 4946 } 4947 4948 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4949 bool enable) 4950 { 4951 uint32_t data, def; 4952 4953 if (!adev->gfx.num_gfx_rings) 4954 return; 4955 4956 amdgpu_gfx_rlc_enter_safe_mode(adev); 4957 4958 /* Enable 3D CGCG/CGLS */ 4959 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4960 /* write cmd to clear cgcg/cgls ov */ 4961 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4962 /* unset CGCG override */ 4963 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4964 /* update CGCG and CGLS override bits */ 4965 if (def != data) 4966 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4967 4968 /* enable 3Dcgcg FSM(0x0000363f) */ 4969 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4970 4971 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4972 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4973 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4974 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4975 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4976 if (def != data) 4977 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4978 4979 /* set IDLE_POLL_COUNT(0x00900100) */ 4980 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4981 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4982 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4983 if (def != data) 4984 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4985 } else { 4986 /* Disable CGCG/CGLS */ 4987 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4988 /* disable cgcg, cgls should be disabled */ 4989 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4990 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4991 /* disable cgcg and cgls in FSM */ 4992 if (def != data) 4993 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4994 } 4995 4996 amdgpu_gfx_rlc_exit_safe_mode(adev); 4997 } 4998 4999 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5000 bool enable) 5001 { 5002 uint32_t def, data; 5003 5004 amdgpu_gfx_rlc_enter_safe_mode(adev); 5005 5006 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5007 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5008 /* unset CGCG override */ 5009 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5010 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5011 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5012 else 5013 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5014 /* update CGCG and CGLS override bits */ 5015 if (def != data) 5016 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5017 5018 /* enable cgcg FSM(0x0000363F) */ 5019 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5020 5021 if (adev->asic_type == CHIP_ARCTURUS) 5022 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5023 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5024 else 5025 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5026 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5027 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5028 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5029 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5030 if (def != data) 5031 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5032 5033 /* set IDLE_POLL_COUNT(0x00900100) */ 5034 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5035 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5036 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5037 if (def != data) 5038 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5039 } else { 5040 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5041 /* reset CGCG/CGLS bits */ 5042 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5043 /* disable cgcg and cgls in FSM */ 5044 if (def != data) 5045 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5046 } 5047 5048 amdgpu_gfx_rlc_exit_safe_mode(adev); 5049 } 5050 5051 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5052 bool enable) 5053 { 5054 if (enable) { 5055 /* CGCG/CGLS should be enabled after MGCG/MGLS 5056 * === MGCG + MGLS === 5057 */ 5058 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5059 /* === CGCG /CGLS for GFX 3D Only === */ 5060 gfx_v9_0_update_3d_clock_gating(adev, enable); 5061 /* === CGCG + CGLS === */ 5062 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5063 } else { 5064 /* CGCG/CGLS should be disabled before MGCG/MGLS 5065 * === CGCG + CGLS === 5066 */ 5067 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5068 /* === CGCG /CGLS for GFX 3D Only === */ 5069 gfx_v9_0_update_3d_clock_gating(adev, enable); 5070 /* === MGCG + MGLS === */ 5071 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5072 } 5073 return 0; 5074 } 5075 5076 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) 5077 { 5078 u32 reg, data; 5079 5080 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); 5081 if (amdgpu_sriov_is_pp_one_vf(adev)) 5082 data = RREG32_NO_KIQ(reg); 5083 else 5084 data = RREG32(reg); 5085 5086 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5087 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5088 5089 if (amdgpu_sriov_is_pp_one_vf(adev)) 5090 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); 5091 else 5092 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); 5093 } 5094 5095 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev, 5096 uint32_t offset, 5097 struct soc15_reg_rlcg *entries, int arr_size) 5098 { 5099 int i; 5100 uint32_t reg; 5101 5102 if (!entries) 5103 return false; 5104 5105 for (i = 0; i < arr_size; i++) { 5106 const struct soc15_reg_rlcg *entry; 5107 5108 entry = &entries[i]; 5109 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 5110 if (offset == reg) 5111 return true; 5112 } 5113 5114 return false; 5115 } 5116 5117 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 5118 { 5119 return gfx_v9_0_check_rlcg_range(adev, offset, 5120 (void *)rlcg_access_gc_9_0, 5121 ARRAY_SIZE(rlcg_access_gc_9_0)); 5122 } 5123 5124 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 5125 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 5126 .set_safe_mode = gfx_v9_0_set_safe_mode, 5127 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 5128 .init = gfx_v9_0_rlc_init, 5129 .get_csb_size = gfx_v9_0_get_csb_size, 5130 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 5131 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 5132 .resume = gfx_v9_0_rlc_resume, 5133 .stop = gfx_v9_0_rlc_stop, 5134 .reset = gfx_v9_0_rlc_reset, 5135 .start = gfx_v9_0_rlc_start, 5136 .update_spm_vmid = gfx_v9_0_update_spm_vmid, 5137 .rlcg_wreg = gfx_v9_0_rlcg_wreg, 5138 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, 5139 }; 5140 5141 static int gfx_v9_0_set_powergating_state(void *handle, 5142 enum amd_powergating_state state) 5143 { 5144 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5145 bool enable = (state == AMD_PG_STATE_GATE); 5146 5147 switch (adev->asic_type) { 5148 case CHIP_RAVEN: 5149 case CHIP_RENOIR: 5150 if (!enable) 5151 amdgpu_gfx_off_ctrl(adev, false); 5152 5153 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5154 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 5155 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 5156 } else { 5157 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 5158 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 5159 } 5160 5161 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5162 gfx_v9_0_enable_cp_power_gating(adev, true); 5163 else 5164 gfx_v9_0_enable_cp_power_gating(adev, false); 5165 5166 /* update gfx cgpg state */ 5167 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 5168 5169 /* update mgcg state */ 5170 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 5171 5172 if (enable) 5173 amdgpu_gfx_off_ctrl(adev, true); 5174 break; 5175 case CHIP_VEGA12: 5176 amdgpu_gfx_off_ctrl(adev, enable); 5177 break; 5178 default: 5179 break; 5180 } 5181 5182 return 0; 5183 } 5184 5185 static int gfx_v9_0_set_clockgating_state(void *handle, 5186 enum amd_clockgating_state state) 5187 { 5188 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5189 5190 if (amdgpu_sriov_vf(adev)) 5191 return 0; 5192 5193 switch (adev->asic_type) { 5194 case CHIP_VEGA10: 5195 case CHIP_VEGA12: 5196 case CHIP_VEGA20: 5197 case CHIP_RAVEN: 5198 case CHIP_ARCTURUS: 5199 case CHIP_RENOIR: 5200 case CHIP_ALDEBARAN: 5201 gfx_v9_0_update_gfx_clock_gating(adev, 5202 state == AMD_CG_STATE_GATE); 5203 break; 5204 default: 5205 break; 5206 } 5207 return 0; 5208 } 5209 5210 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 5211 { 5212 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5213 int data; 5214 5215 if (amdgpu_sriov_vf(adev)) 5216 *flags = 0; 5217 5218 /* AMD_CG_SUPPORT_GFX_MGCG */ 5219 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 5220 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5221 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5222 5223 /* AMD_CG_SUPPORT_GFX_CGCG */ 5224 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 5225 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5226 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5227 5228 /* AMD_CG_SUPPORT_GFX_CGLS */ 5229 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5230 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5231 5232 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5233 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 5234 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5235 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5236 5237 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5238 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 5239 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5240 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5241 5242 if (adev->asic_type != CHIP_ARCTURUS) { 5243 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5244 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 5245 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5246 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5247 5248 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5249 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5250 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5251 } 5252 } 5253 5254 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5255 { 5256 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 5257 } 5258 5259 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5260 { 5261 struct amdgpu_device *adev = ring->adev; 5262 u64 wptr; 5263 5264 /* XXX check if swapping is necessary on BE */ 5265 if (ring->use_doorbell) { 5266 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 5267 } else { 5268 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 5269 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 5270 } 5271 5272 return wptr; 5273 } 5274 5275 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5276 { 5277 struct amdgpu_device *adev = ring->adev; 5278 5279 if (ring->use_doorbell) { 5280 /* XXX check if swapping is necessary on BE */ 5281 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5282 WDOORBELL64(ring->doorbell_index, ring->wptr); 5283 } else { 5284 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 5285 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 5286 } 5287 } 5288 5289 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5290 { 5291 struct amdgpu_device *adev = ring->adev; 5292 u32 ref_and_mask, reg_mem_engine; 5293 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5294 5295 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5296 switch (ring->me) { 5297 case 1: 5298 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5299 break; 5300 case 2: 5301 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5302 break; 5303 default: 5304 return; 5305 } 5306 reg_mem_engine = 0; 5307 } else { 5308 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5309 reg_mem_engine = 1; /* pfp */ 5310 } 5311 5312 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5313 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5314 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5315 ref_and_mask, ref_and_mask, 0x20); 5316 } 5317 5318 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5319 struct amdgpu_job *job, 5320 struct amdgpu_ib *ib, 5321 uint32_t flags) 5322 { 5323 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5324 u32 header, control = 0; 5325 5326 if (ib->flags & AMDGPU_IB_FLAG_CE) 5327 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5328 else 5329 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5330 5331 control |= ib->length_dw | (vmid << 24); 5332 5333 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5334 control |= INDIRECT_BUFFER_PRE_ENB(1); 5335 5336 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 5337 gfx_v9_0_ring_emit_de_meta(ring); 5338 } 5339 5340 amdgpu_ring_write(ring, header); 5341 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5342 amdgpu_ring_write(ring, 5343 #ifdef __BIG_ENDIAN 5344 (2 << 0) | 5345 #endif 5346 lower_32_bits(ib->gpu_addr)); 5347 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5348 amdgpu_ring_write(ring, control); 5349 } 5350 5351 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5352 struct amdgpu_job *job, 5353 struct amdgpu_ib *ib, 5354 uint32_t flags) 5355 { 5356 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5357 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5358 5359 /* Currently, there is a high possibility to get wave ID mismatch 5360 * between ME and GDS, leading to a hw deadlock, because ME generates 5361 * different wave IDs than the GDS expects. This situation happens 5362 * randomly when at least 5 compute pipes use GDS ordered append. 5363 * The wave IDs generated by ME are also wrong after suspend/resume. 5364 * Those are probably bugs somewhere else in the kernel driver. 5365 * 5366 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5367 * GDS to 0 for this ring (me/pipe). 5368 */ 5369 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5370 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5371 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5372 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5373 } 5374 5375 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5376 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5377 amdgpu_ring_write(ring, 5378 #ifdef __BIG_ENDIAN 5379 (2 << 0) | 5380 #endif 5381 lower_32_bits(ib->gpu_addr)); 5382 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5383 amdgpu_ring_write(ring, control); 5384 } 5385 5386 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5387 u64 seq, unsigned flags) 5388 { 5389 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5390 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5391 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5392 5393 /* RELEASE_MEM - flush caches, send int */ 5394 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5395 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 5396 EOP_TC_NC_ACTION_EN) : 5397 (EOP_TCL1_ACTION_EN | 5398 EOP_TC_ACTION_EN | 5399 EOP_TC_WB_ACTION_EN | 5400 EOP_TC_MD_ACTION_EN)) | 5401 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5402 EVENT_INDEX(5))); 5403 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5404 5405 /* 5406 * the address should be Qword aligned if 64bit write, Dword 5407 * aligned if only send 32bit data low (discard data high) 5408 */ 5409 if (write64bit) 5410 BUG_ON(addr & 0x7); 5411 else 5412 BUG_ON(addr & 0x3); 5413 amdgpu_ring_write(ring, lower_32_bits(addr)); 5414 amdgpu_ring_write(ring, upper_32_bits(addr)); 5415 amdgpu_ring_write(ring, lower_32_bits(seq)); 5416 amdgpu_ring_write(ring, upper_32_bits(seq)); 5417 amdgpu_ring_write(ring, 0); 5418 } 5419 5420 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5421 { 5422 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5423 uint32_t seq = ring->fence_drv.sync_seq; 5424 uint64_t addr = ring->fence_drv.gpu_addr; 5425 5426 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5427 lower_32_bits(addr), upper_32_bits(addr), 5428 seq, 0xffffffff, 4); 5429 } 5430 5431 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5432 unsigned vmid, uint64_t pd_addr) 5433 { 5434 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5435 5436 /* compute doesn't have PFP */ 5437 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5438 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5439 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5440 amdgpu_ring_write(ring, 0x0); 5441 } 5442 } 5443 5444 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5445 { 5446 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 5447 } 5448 5449 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5450 { 5451 u64 wptr; 5452 5453 /* XXX check if swapping is necessary on BE */ 5454 if (ring->use_doorbell) 5455 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 5456 else 5457 BUG(); 5458 return wptr; 5459 } 5460 5461 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5462 { 5463 struct amdgpu_device *adev = ring->adev; 5464 5465 /* XXX check if swapping is necessary on BE */ 5466 if (ring->use_doorbell) { 5467 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5468 WDOORBELL64(ring->doorbell_index, ring->wptr); 5469 } else{ 5470 BUG(); /* only DOORBELL method supported on gfx9 now */ 5471 } 5472 } 5473 5474 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5475 u64 seq, unsigned int flags) 5476 { 5477 struct amdgpu_device *adev = ring->adev; 5478 5479 /* we only allocate 32bit for each seq wb address */ 5480 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5481 5482 /* write fence seq to the "addr" */ 5483 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5484 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5485 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5486 amdgpu_ring_write(ring, lower_32_bits(addr)); 5487 amdgpu_ring_write(ring, upper_32_bits(addr)); 5488 amdgpu_ring_write(ring, lower_32_bits(seq)); 5489 5490 if (flags & AMDGPU_FENCE_FLAG_INT) { 5491 /* set register to trigger INT */ 5492 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5493 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5494 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5495 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5496 amdgpu_ring_write(ring, 0); 5497 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5498 } 5499 } 5500 5501 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5502 { 5503 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5504 amdgpu_ring_write(ring, 0); 5505 } 5506 5507 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5508 { 5509 struct v9_ce_ib_state ce_payload = {0}; 5510 uint64_t csa_addr; 5511 int cnt; 5512 5513 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5514 csa_addr = amdgpu_csa_vaddr(ring->adev); 5515 5516 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5517 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5518 WRITE_DATA_DST_SEL(8) | 5519 WR_CONFIRM) | 5520 WRITE_DATA_CACHE_POLICY(0)); 5521 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5522 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5523 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5524 } 5525 5526 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5527 { 5528 struct v9_de_ib_state de_payload = {0}; 5529 uint64_t csa_addr, gds_addr; 5530 int cnt; 5531 5532 csa_addr = amdgpu_csa_vaddr(ring->adev); 5533 gds_addr = csa_addr + 4096; 5534 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5535 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5536 5537 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5538 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5539 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5540 WRITE_DATA_DST_SEL(8) | 5541 WR_CONFIRM) | 5542 WRITE_DATA_CACHE_POLICY(0)); 5543 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5544 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5545 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5546 } 5547 5548 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5549 bool secure) 5550 { 5551 uint32_t v = secure ? FRAME_TMZ : 0; 5552 5553 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5554 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5555 } 5556 5557 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5558 { 5559 uint32_t dw2 = 0; 5560 5561 if (amdgpu_sriov_vf(ring->adev)) 5562 gfx_v9_0_ring_emit_ce_meta(ring); 5563 5564 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5565 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5566 /* set load_global_config & load_global_uconfig */ 5567 dw2 |= 0x8001; 5568 /* set load_cs_sh_regs */ 5569 dw2 |= 0x01000000; 5570 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5571 dw2 |= 0x10002; 5572 5573 /* set load_ce_ram if preamble presented */ 5574 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5575 dw2 |= 0x10000000; 5576 } else { 5577 /* still load_ce_ram if this is the first time preamble presented 5578 * although there is no context switch happens. 5579 */ 5580 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5581 dw2 |= 0x10000000; 5582 } 5583 5584 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5585 amdgpu_ring_write(ring, dw2); 5586 amdgpu_ring_write(ring, 0); 5587 } 5588 5589 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5590 { 5591 unsigned ret; 5592 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5593 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5594 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5595 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5596 ret = ring->wptr & ring->buf_mask; 5597 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5598 return ret; 5599 } 5600 5601 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5602 { 5603 unsigned cur; 5604 BUG_ON(offset > ring->buf_mask); 5605 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5606 5607 cur = (ring->wptr & ring->buf_mask) - 1; 5608 if (likely(cur > offset)) 5609 ring->ring[offset] = cur - offset; 5610 else 5611 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5612 } 5613 5614 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5615 uint32_t reg_val_offs) 5616 { 5617 struct amdgpu_device *adev = ring->adev; 5618 5619 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5620 amdgpu_ring_write(ring, 0 | /* src: register*/ 5621 (5 << 8) | /* dst: memory */ 5622 (1 << 20)); /* write confirm */ 5623 amdgpu_ring_write(ring, reg); 5624 amdgpu_ring_write(ring, 0); 5625 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5626 reg_val_offs * 4)); 5627 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5628 reg_val_offs * 4)); 5629 } 5630 5631 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5632 uint32_t val) 5633 { 5634 uint32_t cmd = 0; 5635 5636 switch (ring->funcs->type) { 5637 case AMDGPU_RING_TYPE_GFX: 5638 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5639 break; 5640 case AMDGPU_RING_TYPE_KIQ: 5641 cmd = (1 << 16); /* no inc addr */ 5642 break; 5643 default: 5644 cmd = WR_CONFIRM; 5645 break; 5646 } 5647 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5648 amdgpu_ring_write(ring, cmd); 5649 amdgpu_ring_write(ring, reg); 5650 amdgpu_ring_write(ring, 0); 5651 amdgpu_ring_write(ring, val); 5652 } 5653 5654 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5655 uint32_t val, uint32_t mask) 5656 { 5657 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5658 } 5659 5660 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5661 uint32_t reg0, uint32_t reg1, 5662 uint32_t ref, uint32_t mask) 5663 { 5664 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5665 struct amdgpu_device *adev = ring->adev; 5666 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5667 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5668 5669 if (fw_version_ok) 5670 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5671 ref, mask, 0x20); 5672 else 5673 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5674 ref, mask); 5675 } 5676 5677 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5678 { 5679 struct amdgpu_device *adev = ring->adev; 5680 uint32_t value = 0; 5681 5682 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5683 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5684 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5685 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5686 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5687 } 5688 5689 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5690 enum amdgpu_interrupt_state state) 5691 { 5692 switch (state) { 5693 case AMDGPU_IRQ_STATE_DISABLE: 5694 case AMDGPU_IRQ_STATE_ENABLE: 5695 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5696 TIME_STAMP_INT_ENABLE, 5697 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5698 break; 5699 default: 5700 break; 5701 } 5702 } 5703 5704 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5705 int me, int pipe, 5706 enum amdgpu_interrupt_state state) 5707 { 5708 u32 mec_int_cntl, mec_int_cntl_reg; 5709 5710 /* 5711 * amdgpu controls only the first MEC. That's why this function only 5712 * handles the setting of interrupts for this specific MEC. All other 5713 * pipes' interrupts are set by amdkfd. 5714 */ 5715 5716 if (me == 1) { 5717 switch (pipe) { 5718 case 0: 5719 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5720 break; 5721 case 1: 5722 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5723 break; 5724 case 2: 5725 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5726 break; 5727 case 3: 5728 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5729 break; 5730 default: 5731 DRM_DEBUG("invalid pipe %d\n", pipe); 5732 return; 5733 } 5734 } else { 5735 DRM_DEBUG("invalid me %d\n", me); 5736 return; 5737 } 5738 5739 switch (state) { 5740 case AMDGPU_IRQ_STATE_DISABLE: 5741 mec_int_cntl = RREG32(mec_int_cntl_reg); 5742 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5743 TIME_STAMP_INT_ENABLE, 0); 5744 WREG32(mec_int_cntl_reg, mec_int_cntl); 5745 break; 5746 case AMDGPU_IRQ_STATE_ENABLE: 5747 mec_int_cntl = RREG32(mec_int_cntl_reg); 5748 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5749 TIME_STAMP_INT_ENABLE, 1); 5750 WREG32(mec_int_cntl_reg, mec_int_cntl); 5751 break; 5752 default: 5753 break; 5754 } 5755 } 5756 5757 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5758 struct amdgpu_irq_src *source, 5759 unsigned type, 5760 enum amdgpu_interrupt_state state) 5761 { 5762 switch (state) { 5763 case AMDGPU_IRQ_STATE_DISABLE: 5764 case AMDGPU_IRQ_STATE_ENABLE: 5765 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5766 PRIV_REG_INT_ENABLE, 5767 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5768 break; 5769 default: 5770 break; 5771 } 5772 5773 return 0; 5774 } 5775 5776 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5777 struct amdgpu_irq_src *source, 5778 unsigned type, 5779 enum amdgpu_interrupt_state state) 5780 { 5781 switch (state) { 5782 case AMDGPU_IRQ_STATE_DISABLE: 5783 case AMDGPU_IRQ_STATE_ENABLE: 5784 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5785 PRIV_INSTR_INT_ENABLE, 5786 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5787 break; 5788 default: 5789 break; 5790 } 5791 5792 return 0; 5793 } 5794 5795 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5796 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5797 CP_ECC_ERROR_INT_ENABLE, 1) 5798 5799 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5800 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5801 CP_ECC_ERROR_INT_ENABLE, 0) 5802 5803 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5804 struct amdgpu_irq_src *source, 5805 unsigned type, 5806 enum amdgpu_interrupt_state state) 5807 { 5808 switch (state) { 5809 case AMDGPU_IRQ_STATE_DISABLE: 5810 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5811 CP_ECC_ERROR_INT_ENABLE, 0); 5812 DISABLE_ECC_ON_ME_PIPE(1, 0); 5813 DISABLE_ECC_ON_ME_PIPE(1, 1); 5814 DISABLE_ECC_ON_ME_PIPE(1, 2); 5815 DISABLE_ECC_ON_ME_PIPE(1, 3); 5816 break; 5817 5818 case AMDGPU_IRQ_STATE_ENABLE: 5819 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5820 CP_ECC_ERROR_INT_ENABLE, 1); 5821 ENABLE_ECC_ON_ME_PIPE(1, 0); 5822 ENABLE_ECC_ON_ME_PIPE(1, 1); 5823 ENABLE_ECC_ON_ME_PIPE(1, 2); 5824 ENABLE_ECC_ON_ME_PIPE(1, 3); 5825 break; 5826 default: 5827 break; 5828 } 5829 5830 return 0; 5831 } 5832 5833 5834 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5835 struct amdgpu_irq_src *src, 5836 unsigned type, 5837 enum amdgpu_interrupt_state state) 5838 { 5839 switch (type) { 5840 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5841 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5842 break; 5843 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5844 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5845 break; 5846 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5847 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5848 break; 5849 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5850 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5851 break; 5852 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5853 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5854 break; 5855 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5856 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5857 break; 5858 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5859 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5860 break; 5861 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5862 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5863 break; 5864 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5865 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5866 break; 5867 default: 5868 break; 5869 } 5870 return 0; 5871 } 5872 5873 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5874 struct amdgpu_irq_src *source, 5875 struct amdgpu_iv_entry *entry) 5876 { 5877 int i; 5878 u8 me_id, pipe_id, queue_id; 5879 struct amdgpu_ring *ring; 5880 5881 DRM_DEBUG("IH: CP EOP\n"); 5882 me_id = (entry->ring_id & 0x0c) >> 2; 5883 pipe_id = (entry->ring_id & 0x03) >> 0; 5884 queue_id = (entry->ring_id & 0x70) >> 4; 5885 5886 switch (me_id) { 5887 case 0: 5888 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5889 break; 5890 case 1: 5891 case 2: 5892 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5893 ring = &adev->gfx.compute_ring[i]; 5894 /* Per-queue interrupt is supported for MEC starting from VI. 5895 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5896 */ 5897 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5898 amdgpu_fence_process(ring); 5899 } 5900 break; 5901 } 5902 return 0; 5903 } 5904 5905 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5906 struct amdgpu_iv_entry *entry) 5907 { 5908 u8 me_id, pipe_id, queue_id; 5909 struct amdgpu_ring *ring; 5910 int i; 5911 5912 me_id = (entry->ring_id & 0x0c) >> 2; 5913 pipe_id = (entry->ring_id & 0x03) >> 0; 5914 queue_id = (entry->ring_id & 0x70) >> 4; 5915 5916 switch (me_id) { 5917 case 0: 5918 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5919 break; 5920 case 1: 5921 case 2: 5922 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5923 ring = &adev->gfx.compute_ring[i]; 5924 if (ring->me == me_id && ring->pipe == pipe_id && 5925 ring->queue == queue_id) 5926 drm_sched_fault(&ring->sched); 5927 } 5928 break; 5929 } 5930 } 5931 5932 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5933 struct amdgpu_irq_src *source, 5934 struct amdgpu_iv_entry *entry) 5935 { 5936 DRM_ERROR("Illegal register access in command stream\n"); 5937 gfx_v9_0_fault(adev, entry); 5938 return 0; 5939 } 5940 5941 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5942 struct amdgpu_irq_src *source, 5943 struct amdgpu_iv_entry *entry) 5944 { 5945 DRM_ERROR("Illegal instruction in command stream\n"); 5946 gfx_v9_0_fault(adev, entry); 5947 return 0; 5948 } 5949 5950 5951 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { 5952 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 5953 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 5954 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 5955 }, 5956 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 5957 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 5958 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 5959 }, 5960 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5961 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 5962 0, 0 5963 }, 5964 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5965 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 5966 0, 0 5967 }, 5968 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 5969 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 5970 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 5971 }, 5972 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5973 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 5974 0, 0 5975 }, 5976 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5977 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 5978 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 5979 }, 5980 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 5981 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 5982 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 5983 }, 5984 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 5985 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 5986 0, 0 5987 }, 5988 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 5989 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 5990 0, 0 5991 }, 5992 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 5993 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 5994 0, 0 5995 }, 5996 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5997 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 5998 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 5999 }, 6000 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6001 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 6002 0, 0 6003 }, 6004 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6005 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 6006 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 6007 }, 6008 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 6009 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6010 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 6011 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 6012 }, 6013 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 6014 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6015 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 6016 0, 0 6017 }, 6018 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 6019 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6020 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 6021 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 6022 }, 6023 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 6024 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6025 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 6026 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 6027 }, 6028 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 6029 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6030 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 6031 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 6032 }, 6033 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 6034 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6035 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 6036 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 6037 }, 6038 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 6039 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 6040 0, 0 6041 }, 6042 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6043 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 6044 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 6045 }, 6046 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6047 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 6048 0, 0 6049 }, 6050 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6051 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 6052 0, 0 6053 }, 6054 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6055 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 6056 0, 0 6057 }, 6058 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6059 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 6060 0, 0 6061 }, 6062 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6063 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 6064 0, 0 6065 }, 6066 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6067 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 6068 0, 0 6069 }, 6070 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6071 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 6072 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 6073 }, 6074 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6075 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 6076 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 6077 }, 6078 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6079 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 6080 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 6081 }, 6082 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6083 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 6084 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 6085 }, 6086 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6087 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 6088 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 6089 }, 6090 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6091 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 6092 0, 0 6093 }, 6094 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6095 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 6096 0, 0 6097 }, 6098 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6099 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 6100 0, 0 6101 }, 6102 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6103 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 6104 0, 0 6105 }, 6106 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6107 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 6108 0, 0 6109 }, 6110 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6111 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 6112 0, 0 6113 }, 6114 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6115 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 6116 0, 0 6117 }, 6118 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6119 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 6120 0, 0 6121 }, 6122 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6123 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 6124 0, 0 6125 }, 6126 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6127 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 6128 0, 0 6129 }, 6130 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6131 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 6132 0, 0 6133 }, 6134 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6135 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 6136 0, 0 6137 }, 6138 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6139 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 6140 0, 0 6141 }, 6142 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 6143 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 6144 0, 0 6145 }, 6146 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6147 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 6148 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 6149 }, 6150 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6151 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 6152 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 6153 }, 6154 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6155 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 6156 0, 0 6157 }, 6158 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6159 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 6160 0, 0 6161 }, 6162 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6163 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 6164 0, 0 6165 }, 6166 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6167 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 6168 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 6169 }, 6170 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6171 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 6172 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 6173 }, 6174 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6175 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 6176 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 6177 }, 6178 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6179 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 6180 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 6181 }, 6182 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6183 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 6184 0, 0 6185 }, 6186 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6187 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 6188 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 6189 }, 6190 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6191 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 6192 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 6193 }, 6194 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6195 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 6196 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 6197 }, 6198 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6199 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 6200 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 6201 }, 6202 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6203 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 6204 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 6205 }, 6206 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6207 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 6208 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 6209 }, 6210 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6211 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 6212 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 6213 }, 6214 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6215 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 6216 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 6217 }, 6218 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6219 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 6220 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 6221 }, 6222 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6223 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 6224 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 6225 }, 6226 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6227 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 6228 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 6229 }, 6230 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6231 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 6232 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 6233 }, 6234 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6235 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 6236 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 6237 }, 6238 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6239 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 6240 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 6241 }, 6242 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6243 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 6244 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 6245 }, 6246 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6247 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 6248 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 6249 }, 6250 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6251 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 6252 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 6253 }, 6254 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6255 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 6256 0, 0 6257 }, 6258 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6259 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 6260 0, 0 6261 }, 6262 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6263 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 6264 0, 0 6265 }, 6266 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6267 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 6268 0, 0 6269 }, 6270 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6271 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 6272 0, 0 6273 }, 6274 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6275 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 6276 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 6277 }, 6278 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6279 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 6280 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 6281 }, 6282 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6283 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 6284 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 6285 }, 6286 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6287 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 6288 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 6289 }, 6290 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6291 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 6292 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 6293 }, 6294 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6295 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 6296 0, 0 6297 }, 6298 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6299 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 6300 0, 0 6301 }, 6302 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6303 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 6304 0, 0 6305 }, 6306 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6307 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 6308 0, 0 6309 }, 6310 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6311 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 6312 0, 0 6313 }, 6314 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6315 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6316 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 6317 }, 6318 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6319 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6320 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 6321 }, 6322 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6323 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6324 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 6325 }, 6326 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6327 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6328 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 6329 }, 6330 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6331 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6332 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 6333 }, 6334 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6335 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 6336 0, 0 6337 }, 6338 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6339 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 6340 0, 0 6341 }, 6342 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6343 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 6344 0, 0 6345 }, 6346 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6347 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 6348 0, 0 6349 }, 6350 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6351 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 6352 0, 0 6353 }, 6354 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6355 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6356 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 6357 }, 6358 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6359 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6360 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 6361 }, 6362 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6363 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6364 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 6365 }, 6366 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6367 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 6368 0, 0 6369 }, 6370 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6371 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 6372 0, 0 6373 }, 6374 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6375 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 6376 0, 0 6377 }, 6378 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6379 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 6380 0, 0 6381 }, 6382 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6383 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 6384 0, 0 6385 }, 6386 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6387 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 6388 0, 0 6389 } 6390 }; 6391 6392 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6393 void *inject_if) 6394 { 6395 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6396 int ret; 6397 struct ta_ras_trigger_error_input block_info = { 0 }; 6398 6399 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6400 return -EINVAL; 6401 6402 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6403 return -EINVAL; 6404 6405 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6406 return -EPERM; 6407 6408 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6409 info->head.type)) { 6410 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6411 ras_gfx_subblocks[info->head.sub_block_index].name, 6412 info->head.type); 6413 return -EPERM; 6414 } 6415 6416 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6417 info->head.type)) { 6418 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6419 ras_gfx_subblocks[info->head.sub_block_index].name, 6420 info->head.type); 6421 return -EPERM; 6422 } 6423 6424 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6425 block_info.sub_block_index = 6426 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6427 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6428 block_info.address = info->address; 6429 block_info.value = info->value; 6430 6431 mutex_lock(&adev->grbm_idx_mutex); 6432 ret = psp_ras_trigger_error(&adev->psp, &block_info); 6433 mutex_unlock(&adev->grbm_idx_mutex); 6434 6435 return ret; 6436 } 6437 6438 static const char *vml2_mems[] = { 6439 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6440 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6441 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6442 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6443 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6444 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6445 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6446 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6447 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6448 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6449 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6450 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6451 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6452 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6453 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6454 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6455 }; 6456 6457 static const char *vml2_walker_mems[] = { 6458 "UTC_VML2_CACHE_PDE0_MEM0", 6459 "UTC_VML2_CACHE_PDE0_MEM1", 6460 "UTC_VML2_CACHE_PDE1_MEM0", 6461 "UTC_VML2_CACHE_PDE1_MEM1", 6462 "UTC_VML2_CACHE_PDE2_MEM0", 6463 "UTC_VML2_CACHE_PDE2_MEM1", 6464 "UTC_VML2_RDIF_LOG_FIFO", 6465 }; 6466 6467 static const char *atc_l2_cache_2m_mems[] = { 6468 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6469 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6470 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6471 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6472 }; 6473 6474 static const char *atc_l2_cache_4k_mems[] = { 6475 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6476 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6477 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6478 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6479 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6480 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6481 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6482 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6483 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6484 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6485 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6486 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6487 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6488 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6489 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6490 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6491 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6492 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6493 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6494 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6495 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6496 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6497 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6498 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6499 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6500 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6501 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6502 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6503 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6504 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6505 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6506 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6507 }; 6508 6509 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6510 struct ras_err_data *err_data) 6511 { 6512 uint32_t i, data; 6513 uint32_t sec_count, ded_count; 6514 6515 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6516 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6517 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6518 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6519 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6520 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6521 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6522 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6523 6524 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6525 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6526 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6527 6528 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6529 if (sec_count) { 6530 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6531 "SEC %d\n", i, vml2_mems[i], sec_count); 6532 err_data->ce_count += sec_count; 6533 } 6534 6535 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6536 if (ded_count) { 6537 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6538 "DED %d\n", i, vml2_mems[i], ded_count); 6539 err_data->ue_count += ded_count; 6540 } 6541 } 6542 6543 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6544 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6545 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6546 6547 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6548 SEC_COUNT); 6549 if (sec_count) { 6550 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6551 "SEC %d\n", i, vml2_walker_mems[i], sec_count); 6552 err_data->ce_count += sec_count; 6553 } 6554 6555 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6556 DED_COUNT); 6557 if (ded_count) { 6558 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6559 "DED %d\n", i, vml2_walker_mems[i], ded_count); 6560 err_data->ue_count += ded_count; 6561 } 6562 } 6563 6564 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6565 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6566 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6567 6568 sec_count = (data & 0x00006000L) >> 0xd; 6569 if (sec_count) { 6570 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6571 "SEC %d\n", i, atc_l2_cache_2m_mems[i], 6572 sec_count); 6573 err_data->ce_count += sec_count; 6574 } 6575 } 6576 6577 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6578 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6579 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6580 6581 sec_count = (data & 0x00006000L) >> 0xd; 6582 if (sec_count) { 6583 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6584 "SEC %d\n", i, atc_l2_cache_4k_mems[i], 6585 sec_count); 6586 err_data->ce_count += sec_count; 6587 } 6588 6589 ded_count = (data & 0x00018000L) >> 0xf; 6590 if (ded_count) { 6591 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6592 "DED %d\n", i, atc_l2_cache_4k_mems[i], 6593 ded_count); 6594 err_data->ue_count += ded_count; 6595 } 6596 } 6597 6598 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6599 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6600 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6601 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6602 6603 return 0; 6604 } 6605 6606 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev, 6607 const struct soc15_reg_entry *reg, 6608 uint32_t se_id, uint32_t inst_id, uint32_t value, 6609 uint32_t *sec_count, uint32_t *ded_count) 6610 { 6611 uint32_t i; 6612 uint32_t sec_cnt, ded_cnt; 6613 6614 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { 6615 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || 6616 gfx_v9_0_ras_fields[i].seg != reg->seg || 6617 gfx_v9_0_ras_fields[i].inst != reg->inst) 6618 continue; 6619 6620 sec_cnt = (value & 6621 gfx_v9_0_ras_fields[i].sec_count_mask) >> 6622 gfx_v9_0_ras_fields[i].sec_count_shift; 6623 if (sec_cnt) { 6624 dev_info(adev->dev, "GFX SubBlock %s, " 6625 "Instance[%d][%d], SEC %d\n", 6626 gfx_v9_0_ras_fields[i].name, 6627 se_id, inst_id, 6628 sec_cnt); 6629 *sec_count += sec_cnt; 6630 } 6631 6632 ded_cnt = (value & 6633 gfx_v9_0_ras_fields[i].ded_count_mask) >> 6634 gfx_v9_0_ras_fields[i].ded_count_shift; 6635 if (ded_cnt) { 6636 dev_info(adev->dev, "GFX SubBlock %s, " 6637 "Instance[%d][%d], DED %d\n", 6638 gfx_v9_0_ras_fields[i].name, 6639 se_id, inst_id, 6640 ded_cnt); 6641 *ded_count += ded_cnt; 6642 } 6643 } 6644 6645 return 0; 6646 } 6647 6648 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) 6649 { 6650 int i, j, k; 6651 6652 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6653 return; 6654 6655 /* read back registers to clear the counters */ 6656 mutex_lock(&adev->grbm_idx_mutex); 6657 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6658 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6659 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6660 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 6661 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6662 } 6663 } 6664 } 6665 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 6666 mutex_unlock(&adev->grbm_idx_mutex); 6667 6668 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6669 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6670 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6671 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6672 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6673 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6674 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6675 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6676 6677 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6678 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6679 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6680 } 6681 6682 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6683 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6684 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6685 } 6686 6687 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6688 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6689 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6690 } 6691 6692 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6693 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6694 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6695 } 6696 6697 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6698 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6699 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6700 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6701 } 6702 6703 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6704 void *ras_error_status) 6705 { 6706 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6707 uint32_t sec_count = 0, ded_count = 0; 6708 uint32_t i, j, k; 6709 uint32_t reg_value; 6710 6711 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6712 return -EINVAL; 6713 6714 err_data->ue_count = 0; 6715 err_data->ce_count = 0; 6716 6717 mutex_lock(&adev->grbm_idx_mutex); 6718 6719 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6720 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6721 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6722 gfx_v9_0_select_se_sh(adev, j, 0, k); 6723 reg_value = 6724 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6725 if (reg_value) 6726 gfx_v9_0_ras_error_count(adev, 6727 &gfx_v9_0_edc_counter_regs[i], 6728 j, k, reg_value, 6729 &sec_count, &ded_count); 6730 } 6731 } 6732 } 6733 6734 err_data->ce_count += sec_count; 6735 err_data->ue_count += ded_count; 6736 6737 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6738 mutex_unlock(&adev->grbm_idx_mutex); 6739 6740 gfx_v9_0_query_utc_edc_status(adev, err_data); 6741 6742 return 0; 6743 } 6744 6745 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) 6746 { 6747 const unsigned int cp_coher_cntl = 6748 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | 6749 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | 6750 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | 6751 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | 6752 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); 6753 6754 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ 6755 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 6756 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ 6757 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6758 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6759 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6760 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6761 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6762 } 6763 6764 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, 6765 uint32_t pipe, bool enable) 6766 { 6767 struct amdgpu_device *adev = ring->adev; 6768 uint32_t val; 6769 uint32_t wcl_cs_reg; 6770 6771 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 6772 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT; 6773 6774 switch (pipe) { 6775 case 0: 6776 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0); 6777 break; 6778 case 1: 6779 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1); 6780 break; 6781 case 2: 6782 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2); 6783 break; 6784 case 3: 6785 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3); 6786 break; 6787 default: 6788 DRM_DEBUG("invalid pipe %d\n", pipe); 6789 return; 6790 } 6791 6792 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 6793 6794 } 6795 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 6796 { 6797 struct amdgpu_device *adev = ring->adev; 6798 uint32_t val; 6799 int i; 6800 6801 6802 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 6803 * number of gfx waves. Setting 5 bit will make sure gfx only gets 6804 * around 25% of gpu resources. 6805 */ 6806 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; 6807 amdgpu_ring_emit_wreg(ring, 6808 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX), 6809 val); 6810 6811 /* Restrict waves for normal/low priority compute queues as well 6812 * to get best QoS for high priority compute jobs. 6813 * 6814 * amdgpu controls only 1st ME(0-3 CS pipes). 6815 */ 6816 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 6817 if (i != ring->pipe) 6818 gfx_v9_0_emit_wave_limit_cs(ring, i, enable); 6819 6820 } 6821 } 6822 6823 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6824 .name = "gfx_v9_0", 6825 .early_init = gfx_v9_0_early_init, 6826 .late_init = gfx_v9_0_late_init, 6827 .sw_init = gfx_v9_0_sw_init, 6828 .sw_fini = gfx_v9_0_sw_fini, 6829 .hw_init = gfx_v9_0_hw_init, 6830 .hw_fini = gfx_v9_0_hw_fini, 6831 .suspend = gfx_v9_0_suspend, 6832 .resume = gfx_v9_0_resume, 6833 .is_idle = gfx_v9_0_is_idle, 6834 .wait_for_idle = gfx_v9_0_wait_for_idle, 6835 .soft_reset = gfx_v9_0_soft_reset, 6836 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6837 .set_powergating_state = gfx_v9_0_set_powergating_state, 6838 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6839 }; 6840 6841 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6842 .type = AMDGPU_RING_TYPE_GFX, 6843 .align_mask = 0xff, 6844 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6845 .support_64bit_ptrs = true, 6846 .vmhub = AMDGPU_GFXHUB_0, 6847 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6848 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6849 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6850 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6851 5 + /* COND_EXEC */ 6852 7 + /* PIPELINE_SYNC */ 6853 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6854 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6855 2 + /* VM_FLUSH */ 6856 8 + /* FENCE for VM_FLUSH */ 6857 20 + /* GDS switch */ 6858 4 + /* double SWITCH_BUFFER, 6859 the first COND_EXEC jump to the place just 6860 prior to this double SWITCH_BUFFER */ 6861 5 + /* COND_EXEC */ 6862 7 + /* HDP_flush */ 6863 4 + /* VGT_flush */ 6864 14 + /* CE_META */ 6865 31 + /* DE_META */ 6866 3 + /* CNTX_CTRL */ 6867 5 + /* HDP_INVL */ 6868 8 + 8 + /* FENCE x2 */ 6869 2 + /* SWITCH_BUFFER */ 6870 7, /* gfx_v9_0_emit_mem_sync */ 6871 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6872 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6873 .emit_fence = gfx_v9_0_ring_emit_fence, 6874 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6875 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6876 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6877 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6878 .test_ring = gfx_v9_0_ring_test_ring, 6879 .test_ib = gfx_v9_0_ring_test_ib, 6880 .insert_nop = amdgpu_ring_insert_nop, 6881 .pad_ib = amdgpu_ring_generic_pad_ib, 6882 .emit_switch_buffer = gfx_v9_ring_emit_sb, 6883 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6884 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6885 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6886 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 6887 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6888 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6889 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6890 .soft_recovery = gfx_v9_0_ring_soft_recovery, 6891 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 6892 }; 6893 6894 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6895 .type = AMDGPU_RING_TYPE_COMPUTE, 6896 .align_mask = 0xff, 6897 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6898 .support_64bit_ptrs = true, 6899 .vmhub = AMDGPU_GFXHUB_0, 6900 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6901 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6902 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6903 .emit_frame_size = 6904 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6905 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6906 5 + /* hdp invalidate */ 6907 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6908 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6909 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6910 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6911 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6912 7 + /* gfx_v9_0_emit_mem_sync */ 6913 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ 6914 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ 6915 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6916 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6917 .emit_fence = gfx_v9_0_ring_emit_fence, 6918 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6919 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6920 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6921 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6922 .test_ring = gfx_v9_0_ring_test_ring, 6923 .test_ib = gfx_v9_0_ring_test_ib, 6924 .insert_nop = amdgpu_ring_insert_nop, 6925 .pad_ib = amdgpu_ring_generic_pad_ib, 6926 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6927 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6928 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6929 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 6930 .emit_wave_limit = gfx_v9_0_emit_wave_limit, 6931 }; 6932 6933 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 6934 .type = AMDGPU_RING_TYPE_KIQ, 6935 .align_mask = 0xff, 6936 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6937 .support_64bit_ptrs = true, 6938 .vmhub = AMDGPU_GFXHUB_0, 6939 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6940 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6941 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6942 .emit_frame_size = 6943 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6944 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6945 5 + /* hdp invalidate */ 6946 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6947 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6948 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6949 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6950 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6951 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6952 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 6953 .test_ring = gfx_v9_0_ring_test_ring, 6954 .insert_nop = amdgpu_ring_insert_nop, 6955 .pad_ib = amdgpu_ring_generic_pad_ib, 6956 .emit_rreg = gfx_v9_0_ring_emit_rreg, 6957 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6958 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6959 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6960 }; 6961 6962 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 6963 { 6964 int i; 6965 6966 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 6967 6968 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6969 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6970 6971 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6972 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 6973 } 6974 6975 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 6976 .set = gfx_v9_0_set_eop_interrupt_state, 6977 .process = gfx_v9_0_eop_irq, 6978 }; 6979 6980 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 6981 .set = gfx_v9_0_set_priv_reg_fault_state, 6982 .process = gfx_v9_0_priv_reg_irq, 6983 }; 6984 6985 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 6986 .set = gfx_v9_0_set_priv_inst_fault_state, 6987 .process = gfx_v9_0_priv_inst_irq, 6988 }; 6989 6990 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 6991 .set = gfx_v9_0_set_cp_ecc_error_state, 6992 .process = amdgpu_gfx_cp_ecc_error_irq, 6993 }; 6994 6995 6996 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 6997 { 6998 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6999 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 7000 7001 adev->gfx.priv_reg_irq.num_types = 1; 7002 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 7003 7004 adev->gfx.priv_inst_irq.num_types = 1; 7005 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 7006 7007 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 7008 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 7009 } 7010 7011 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 7012 { 7013 switch (adev->asic_type) { 7014 case CHIP_VEGA10: 7015 case CHIP_VEGA12: 7016 case CHIP_VEGA20: 7017 case CHIP_RAVEN: 7018 case CHIP_ARCTURUS: 7019 case CHIP_RENOIR: 7020 case CHIP_ALDEBARAN: 7021 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 7022 break; 7023 default: 7024 break; 7025 } 7026 } 7027 7028 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 7029 { 7030 /* init asci gds info */ 7031 switch (adev->asic_type) { 7032 case CHIP_VEGA10: 7033 case CHIP_VEGA12: 7034 case CHIP_VEGA20: 7035 adev->gds.gds_size = 0x10000; 7036 break; 7037 case CHIP_RAVEN: 7038 case CHIP_ARCTURUS: 7039 adev->gds.gds_size = 0x1000; 7040 break; 7041 case CHIP_ALDEBARAN: 7042 /* aldebaran removed all the GDS internal memory, 7043 * only support GWS opcode in kernel, like barrier 7044 * semaphore.etc */ 7045 adev->gds.gds_size = 0; 7046 break; 7047 default: 7048 adev->gds.gds_size = 0x10000; 7049 break; 7050 } 7051 7052 switch (adev->asic_type) { 7053 case CHIP_VEGA10: 7054 case CHIP_VEGA20: 7055 adev->gds.gds_compute_max_wave_id = 0x7ff; 7056 break; 7057 case CHIP_VEGA12: 7058 adev->gds.gds_compute_max_wave_id = 0x27f; 7059 break; 7060 case CHIP_RAVEN: 7061 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 7062 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 7063 else 7064 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 7065 break; 7066 case CHIP_ARCTURUS: 7067 adev->gds.gds_compute_max_wave_id = 0xfff; 7068 break; 7069 case CHIP_ALDEBARAN: 7070 /* deprecated for Aldebaran, no usage at all */ 7071 adev->gds.gds_compute_max_wave_id = 0; 7072 break; 7073 default: 7074 /* this really depends on the chip */ 7075 adev->gds.gds_compute_max_wave_id = 0x7ff; 7076 break; 7077 } 7078 7079 adev->gds.gws_size = 64; 7080 adev->gds.oa_size = 16; 7081 } 7082 7083 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7084 u32 bitmap) 7085 { 7086 u32 data; 7087 7088 if (!bitmap) 7089 return; 7090 7091 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7092 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7093 7094 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 7095 } 7096 7097 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7098 { 7099 u32 data, mask; 7100 7101 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 7102 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 7103 7104 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7105 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7106 7107 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7108 7109 return (~data) & mask; 7110 } 7111 7112 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 7113 struct amdgpu_cu_info *cu_info) 7114 { 7115 int i, j, k, counter, active_cu_number = 0; 7116 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7117 unsigned disable_masks[4 * 4]; 7118 7119 if (!adev || !cu_info) 7120 return -EINVAL; 7121 7122 /* 7123 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 7124 */ 7125 if (adev->gfx.config.max_shader_engines * 7126 adev->gfx.config.max_sh_per_se > 16) 7127 return -EINVAL; 7128 7129 amdgpu_gfx_parse_disable_cu(disable_masks, 7130 adev->gfx.config.max_shader_engines, 7131 adev->gfx.config.max_sh_per_se); 7132 7133 mutex_lock(&adev->grbm_idx_mutex); 7134 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7135 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7136 mask = 1; 7137 ao_bitmap = 0; 7138 counter = 0; 7139 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 7140 gfx_v9_0_set_user_cu_inactive_bitmap( 7141 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 7142 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 7143 7144 /* 7145 * The bitmap(and ao_cu_bitmap) in cu_info structure is 7146 * 4x4 size array, and it's usually suitable for Vega 7147 * ASICs which has 4*2 SE/SH layout. 7148 * But for Arcturus, SE/SH layout is changed to 8*1. 7149 * To mostly reduce the impact, we make it compatible 7150 * with current bitmap array as below: 7151 * SE4,SH0 --> bitmap[0][1] 7152 * SE5,SH0 --> bitmap[1][1] 7153 * SE6,SH0 --> bitmap[2][1] 7154 * SE7,SH0 --> bitmap[3][1] 7155 */ 7156 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 7157 7158 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7159 if (bitmap & mask) { 7160 if (counter < adev->gfx.config.max_cu_per_sh) 7161 ao_bitmap |= mask; 7162 counter ++; 7163 } 7164 mask <<= 1; 7165 } 7166 active_cu_number += counter; 7167 if (i < 2 && j < 2) 7168 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7169 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 7170 } 7171 } 7172 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7173 mutex_unlock(&adev->grbm_idx_mutex); 7174 7175 cu_info->number = active_cu_number; 7176 cu_info->ao_cu_mask = ao_cu_mask; 7177 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7178 7179 return 0; 7180 } 7181 7182 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 7183 { 7184 .type = AMD_IP_BLOCK_TYPE_GFX, 7185 .major = 9, 7186 .minor = 0, 7187 .rev = 0, 7188 .funcs = &gfx_v9_0_ip_funcs, 7189 }; 7190