1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 42 #include "soc15_common.h" 43 #include "clearstate_gfx9.h" 44 #include "v9_structs.h" 45 46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 47 48 #include "amdgpu_ras.h" 49 50 #include "gfx_v9_4.h" 51 #include "gfx_v9_0.h" 52 #include "gfx_v9_4_2.h" 53 54 #include "asic_reg/pwr/pwr_10_0_offset.h" 55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h" 56 #include "asic_reg/gc/gc_9_0_default.h" 57 58 #define GFX9_NUM_GFX_RINGS 1 59 #define GFX9_MEC_HPD_SIZE 4096 60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 62 63 #define mmGCEA_PROBE_MAP 0x070c 64 #define mmGCEA_PROBE_MAP_BASE_IDX 0 65 66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 72 73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 79 80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 86 87 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 93 94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 101 102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 112 113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 115 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 118 119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin"); 120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin"); 121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin"); 122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin"); 123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin"); 124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); 125 126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); 127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); 128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); 129 130 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 132 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 134 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 136 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 138 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 140 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 142 143 enum ta_ras_gfx_subblock { 144 /*CPC*/ 145 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 146 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 147 TA_RAS_BLOCK__GFX_CPC_UCODE, 148 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 149 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 150 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 151 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 152 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 153 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 154 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 155 /* CPF*/ 156 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 157 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 158 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 159 TA_RAS_BLOCK__GFX_CPF_TAG, 160 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 161 /* CPG*/ 162 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 163 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 164 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 165 TA_RAS_BLOCK__GFX_CPG_TAG, 166 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 167 /* GDS*/ 168 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 169 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 170 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 171 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 172 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 173 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 174 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 175 /* SPI*/ 176 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 177 /* SQ*/ 178 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 179 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 180 TA_RAS_BLOCK__GFX_SQ_LDS_D, 181 TA_RAS_BLOCK__GFX_SQ_LDS_I, 182 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 183 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 184 /* SQC (3 ranges)*/ 185 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 186 /* SQC range 0*/ 187 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 188 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 189 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 190 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 191 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 192 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 193 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 194 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 195 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 196 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 197 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 198 /* SQC range 1*/ 199 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 200 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 201 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 202 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 203 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 204 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 205 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 206 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 207 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 208 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 209 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 210 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 211 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 212 /* SQC range 2*/ 213 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 214 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 215 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 216 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 217 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 218 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 219 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 220 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 221 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 222 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 223 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 224 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 225 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 226 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 227 /* TA*/ 228 TA_RAS_BLOCK__GFX_TA_INDEX_START, 229 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 230 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 231 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 232 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 233 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 234 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 235 /* TCA*/ 236 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 237 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 238 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 239 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 240 /* TCC (5 sub-ranges)*/ 241 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 242 /* TCC range 0*/ 243 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 244 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 245 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 246 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 247 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 248 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 249 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 250 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 251 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 252 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 253 /* TCC range 1*/ 254 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 255 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 256 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 257 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 258 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 259 /* TCC range 2*/ 260 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 261 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 262 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 263 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 264 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 265 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 266 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 267 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 268 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 269 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 270 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 271 /* TCC range 3*/ 272 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 273 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 274 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 275 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 276 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 277 /* TCC range 4*/ 278 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 279 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 280 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 281 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 282 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 283 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 284 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 285 /* TCI*/ 286 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 287 /* TCP*/ 288 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 289 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 290 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 291 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 292 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 293 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 294 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 295 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 296 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 297 /* TD*/ 298 TA_RAS_BLOCK__GFX_TD_INDEX_START, 299 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 300 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 301 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 302 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 303 /* EA (3 sub-ranges)*/ 304 TA_RAS_BLOCK__GFX_EA_INDEX_START, 305 /* EA range 0*/ 306 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 307 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 308 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 309 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 310 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 311 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 312 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 313 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 314 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 315 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 316 /* EA range 1*/ 317 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 318 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 319 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 320 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 321 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 322 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 323 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 324 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 325 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 326 /* EA range 2*/ 327 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 328 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 329 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 330 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 331 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 332 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 333 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 334 /* UTC VM L2 bank*/ 335 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 336 /* UTC VM walker*/ 337 TA_RAS_BLOCK__UTC_VML2_WALKER, 338 /* UTC ATC L2 2MB cache*/ 339 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 340 /* UTC ATC L2 4KB cache*/ 341 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 342 TA_RAS_BLOCK__GFX_MAX 343 }; 344 345 struct ras_gfx_subblock { 346 unsigned char *name; 347 int ta_subblock; 348 int hw_supported_error_type; 349 int sw_supported_error_type; 350 }; 351 352 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 353 [AMDGPU_RAS_BLOCK__##subblock] = { \ 354 #subblock, \ 355 TA_RAS_BLOCK__##subblock, \ 356 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 357 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 358 } 359 360 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 361 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 362 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 363 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 364 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 365 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 366 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 367 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 368 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 369 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 370 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 371 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 372 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 373 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 374 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 375 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 376 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 377 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 378 0), 379 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 380 0), 381 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 382 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 383 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 384 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 385 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 386 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 387 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 388 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 389 0, 0), 390 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 391 0), 392 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 393 0, 0), 394 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 395 0), 396 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 397 0, 0), 398 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 399 0), 400 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 401 1), 402 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 403 0, 0, 0), 404 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 405 0), 406 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 407 0), 408 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 409 0), 410 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 411 0), 412 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 413 0), 414 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 415 0, 0), 416 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 417 0), 418 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 419 0), 420 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 421 0, 0, 0), 422 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 423 0), 424 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 425 0), 426 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 427 0), 428 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 429 0), 430 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 431 0), 432 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 433 0, 0), 434 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 435 0), 436 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 437 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 438 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 439 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 440 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 441 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 442 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 443 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 444 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 445 1), 446 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 447 1), 448 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 449 1), 450 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 451 0), 452 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 453 0), 454 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 455 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 456 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 457 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 458 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 459 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 460 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 461 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 462 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 463 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 464 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 465 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 466 0), 467 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 468 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 469 0), 470 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 471 0, 0), 472 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 473 0), 474 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 475 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 476 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 477 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 478 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 479 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 480 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 481 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 482 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 483 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 484 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 485 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 486 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 487 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 488 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 489 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 490 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 491 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 492 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 493 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 494 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 495 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 496 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 497 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 498 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 499 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 500 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 501 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 502 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 503 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 504 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 505 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 506 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 507 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 508 }; 509 510 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 511 { 512 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 513 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 514 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 515 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 516 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 517 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 518 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 519 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 520 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 521 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87), 522 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f), 523 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 524 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 525 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 526 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 527 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 528 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 529 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 530 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 531 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 532 }; 533 534 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 535 { 536 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 537 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 538 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 539 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 540 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 541 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 542 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 543 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 544 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 545 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 546 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 547 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 548 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 549 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 550 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 551 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 552 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 553 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 554 }; 555 556 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 557 { 558 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 559 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 560 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 561 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 562 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 563 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 564 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 565 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 566 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 567 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 568 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 569 }; 570 571 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 572 { 573 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 574 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 575 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 576 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 577 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 578 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 579 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 580 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 581 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 582 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 583 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 584 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 585 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 586 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 587 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 588 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 589 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 590 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 591 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 592 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 593 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 594 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 595 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 596 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 597 }; 598 599 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 600 { 601 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 602 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 603 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 604 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 605 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 606 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 607 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 608 }; 609 610 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 611 { 612 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 613 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 614 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 615 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 616 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 617 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 618 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 619 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 620 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 621 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 622 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 623 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 624 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 625 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 626 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 627 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 628 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 629 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 630 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 631 }; 632 633 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 634 { 635 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 636 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 637 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 638 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 639 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 640 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 641 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 642 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 643 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 644 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 645 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 646 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 647 }; 648 649 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 650 { 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 654 }; 655 656 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 657 { 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 671 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 672 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 673 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 674 }; 675 676 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 677 { 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 691 }; 692 693 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 694 { 695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000) 706 }; 707 708 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = { 709 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)}, 710 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)}, 711 }; 712 713 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 714 { 715 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 716 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 717 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 718 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 719 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 720 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 721 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 722 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 723 }; 724 725 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 726 { 727 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 728 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 729 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 730 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 731 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 732 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 733 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 734 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 735 }; 736 737 static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag) 738 { 739 static void *scratch_reg0; 740 static void *scratch_reg1; 741 static void *scratch_reg2; 742 static void *scratch_reg3; 743 static void *spare_int; 744 static uint32_t grbm_cntl; 745 static uint32_t grbm_idx; 746 747 scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; 748 scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; 749 scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4; 750 scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4; 751 spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4; 752 753 grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; 754 grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; 755 756 if (amdgpu_sriov_runtime(adev)) { 757 pr_err("shouldn't call rlcg write register during runtime\n"); 758 return; 759 } 760 761 if (offset == grbm_cntl || offset == grbm_idx) { 762 if (offset == grbm_cntl) 763 writel(v, scratch_reg2); 764 else if (offset == grbm_idx) 765 writel(v, scratch_reg3); 766 767 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4)); 768 } else { 769 uint32_t i = 0; 770 uint32_t retries = 50000; 771 772 writel(v, scratch_reg0); 773 writel(offset | 0x80000000, scratch_reg1); 774 writel(1, spare_int); 775 for (i = 0; i < retries; i++) { 776 u32 tmp; 777 778 tmp = readl(scratch_reg1); 779 if (!(tmp & 0x80000000)) 780 break; 781 782 udelay(10); 783 } 784 if (i >= retries) 785 pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset); 786 } 787 788 } 789 790 static void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, 791 u32 v, u32 acc_flags, u32 hwip) 792 { 793 if (amdgpu_sriov_fullaccess(adev)) { 794 gfx_v9_0_rlcg_w(adev, offset, v, acc_flags); 795 796 return; 797 } 798 799 if (acc_flags & AMDGPU_REGS_NO_KIQ) 800 WREG32_NO_KIQ(offset, v); 801 else 802 WREG32(offset, v); 803 } 804 805 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 806 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 807 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 808 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 809 810 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 811 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 812 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 813 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 814 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 815 struct amdgpu_cu_info *cu_info); 816 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 817 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 818 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 819 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 820 void *ras_error_status); 821 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 822 void *inject_if); 823 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); 824 825 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 826 uint64_t queue_mask) 827 { 828 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 829 amdgpu_ring_write(kiq_ring, 830 PACKET3_SET_RESOURCES_VMID_MASK(0) | 831 /* vmid_mask:0* queue_type:0 (KIQ) */ 832 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 833 amdgpu_ring_write(kiq_ring, 834 lower_32_bits(queue_mask)); /* queue mask lo */ 835 amdgpu_ring_write(kiq_ring, 836 upper_32_bits(queue_mask)); /* queue mask hi */ 837 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 838 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 839 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 840 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 841 } 842 843 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 844 struct amdgpu_ring *ring) 845 { 846 struct amdgpu_device *adev = kiq_ring->adev; 847 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 848 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 849 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 850 851 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 852 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 853 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 854 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 855 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 856 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 857 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 858 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 859 /*queue_type: normal compute queue */ 860 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 861 /* alloc format: all_on_one_pipe */ 862 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 863 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 864 /* num_queues: must be 1 */ 865 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 866 amdgpu_ring_write(kiq_ring, 867 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 868 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 869 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 870 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 871 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 872 } 873 874 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 875 struct amdgpu_ring *ring, 876 enum amdgpu_unmap_queues_action action, 877 u64 gpu_addr, u64 seq) 878 { 879 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 880 881 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 882 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 883 PACKET3_UNMAP_QUEUES_ACTION(action) | 884 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 885 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 886 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 887 amdgpu_ring_write(kiq_ring, 888 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 889 890 if (action == PREEMPT_QUEUES_NO_UNMAP) { 891 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 892 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 893 amdgpu_ring_write(kiq_ring, seq); 894 } else { 895 amdgpu_ring_write(kiq_ring, 0); 896 amdgpu_ring_write(kiq_ring, 0); 897 amdgpu_ring_write(kiq_ring, 0); 898 } 899 } 900 901 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 902 struct amdgpu_ring *ring, 903 u64 addr, 904 u64 seq) 905 { 906 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 907 908 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 909 amdgpu_ring_write(kiq_ring, 910 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 911 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 912 PACKET3_QUERY_STATUS_COMMAND(2)); 913 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 914 amdgpu_ring_write(kiq_ring, 915 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 916 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 917 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 918 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 919 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 920 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 921 } 922 923 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 924 uint16_t pasid, uint32_t flush_type, 925 bool all_hub) 926 { 927 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 928 amdgpu_ring_write(kiq_ring, 929 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 930 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 931 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 932 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 933 } 934 935 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 936 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 937 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 938 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 939 .kiq_query_status = gfx_v9_0_kiq_query_status, 940 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 941 .set_resources_size = 8, 942 .map_queues_size = 7, 943 .unmap_queues_size = 6, 944 .query_status_size = 7, 945 .invalidate_tlbs_size = 2, 946 }; 947 948 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 949 { 950 adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; 951 } 952 953 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 954 { 955 switch (adev->asic_type) { 956 case CHIP_VEGA10: 957 soc15_program_register_sequence(adev, 958 golden_settings_gc_9_0, 959 ARRAY_SIZE(golden_settings_gc_9_0)); 960 soc15_program_register_sequence(adev, 961 golden_settings_gc_9_0_vg10, 962 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 963 break; 964 case CHIP_VEGA12: 965 soc15_program_register_sequence(adev, 966 golden_settings_gc_9_2_1, 967 ARRAY_SIZE(golden_settings_gc_9_2_1)); 968 soc15_program_register_sequence(adev, 969 golden_settings_gc_9_2_1_vg12, 970 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 971 break; 972 case CHIP_VEGA20: 973 soc15_program_register_sequence(adev, 974 golden_settings_gc_9_0, 975 ARRAY_SIZE(golden_settings_gc_9_0)); 976 soc15_program_register_sequence(adev, 977 golden_settings_gc_9_0_vg20, 978 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 979 break; 980 case CHIP_ARCTURUS: 981 soc15_program_register_sequence(adev, 982 golden_settings_gc_9_4_1_arct, 983 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 984 break; 985 case CHIP_RAVEN: 986 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 987 ARRAY_SIZE(golden_settings_gc_9_1)); 988 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 989 soc15_program_register_sequence(adev, 990 golden_settings_gc_9_1_rv2, 991 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 992 else 993 soc15_program_register_sequence(adev, 994 golden_settings_gc_9_1_rv1, 995 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 996 break; 997 case CHIP_RENOIR: 998 soc15_program_register_sequence(adev, 999 golden_settings_gc_9_1_rn, 1000 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 1001 return; /* for renoir, don't need common goldensetting */ 1002 case CHIP_ALDEBARAN: 1003 gfx_v9_4_2_init_golden_registers(adev, 1004 adev->smuio.funcs->get_die_id(adev)); 1005 break; 1006 default: 1007 break; 1008 } 1009 1010 if ((adev->asic_type != CHIP_ARCTURUS) && 1011 (adev->asic_type != CHIP_ALDEBARAN)) 1012 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 1013 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 1014 } 1015 1016 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 1017 { 1018 adev->gfx.scratch.num_reg = 8; 1019 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1020 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 1021 } 1022 1023 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 1024 bool wc, uint32_t reg, uint32_t val) 1025 { 1026 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1027 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 1028 WRITE_DATA_DST_SEL(0) | 1029 (wc ? WR_CONFIRM : 0)); 1030 amdgpu_ring_write(ring, reg); 1031 amdgpu_ring_write(ring, 0); 1032 amdgpu_ring_write(ring, val); 1033 } 1034 1035 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 1036 int mem_space, int opt, uint32_t addr0, 1037 uint32_t addr1, uint32_t ref, uint32_t mask, 1038 uint32_t inv) 1039 { 1040 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 1041 amdgpu_ring_write(ring, 1042 /* memory (1) or register (0) */ 1043 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 1044 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 1045 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 1046 WAIT_REG_MEM_ENGINE(eng_sel))); 1047 1048 if (mem_space) 1049 BUG_ON(addr0 & 0x3); /* Dword align */ 1050 amdgpu_ring_write(ring, addr0); 1051 amdgpu_ring_write(ring, addr1); 1052 amdgpu_ring_write(ring, ref); 1053 amdgpu_ring_write(ring, mask); 1054 amdgpu_ring_write(ring, inv); /* poll interval */ 1055 } 1056 1057 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 1058 { 1059 struct amdgpu_device *adev = ring->adev; 1060 uint32_t scratch; 1061 uint32_t tmp = 0; 1062 unsigned i; 1063 int r; 1064 1065 r = amdgpu_gfx_scratch_get(adev, &scratch); 1066 if (r) 1067 return r; 1068 1069 WREG32(scratch, 0xCAFEDEAD); 1070 r = amdgpu_ring_alloc(ring, 3); 1071 if (r) 1072 goto error_free_scratch; 1073 1074 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1075 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 1076 amdgpu_ring_write(ring, 0xDEADBEEF); 1077 amdgpu_ring_commit(ring); 1078 1079 for (i = 0; i < adev->usec_timeout; i++) { 1080 tmp = RREG32(scratch); 1081 if (tmp == 0xDEADBEEF) 1082 break; 1083 udelay(1); 1084 } 1085 1086 if (i >= adev->usec_timeout) 1087 r = -ETIMEDOUT; 1088 1089 error_free_scratch: 1090 amdgpu_gfx_scratch_free(adev, scratch); 1091 return r; 1092 } 1093 1094 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1095 { 1096 struct amdgpu_device *adev = ring->adev; 1097 struct amdgpu_ib ib; 1098 struct dma_fence *f = NULL; 1099 1100 unsigned index; 1101 uint64_t gpu_addr; 1102 uint32_t tmp; 1103 long r; 1104 1105 r = amdgpu_device_wb_get(adev, &index); 1106 if (r) 1107 return r; 1108 1109 gpu_addr = adev->wb.gpu_addr + (index * 4); 1110 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1111 memset(&ib, 0, sizeof(ib)); 1112 r = amdgpu_ib_get(adev, NULL, 16, 1113 AMDGPU_IB_POOL_DIRECT, &ib); 1114 if (r) 1115 goto err1; 1116 1117 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1118 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1119 ib.ptr[2] = lower_32_bits(gpu_addr); 1120 ib.ptr[3] = upper_32_bits(gpu_addr); 1121 ib.ptr[4] = 0xDEADBEEF; 1122 ib.length_dw = 5; 1123 1124 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1125 if (r) 1126 goto err2; 1127 1128 r = dma_fence_wait_timeout(f, false, timeout); 1129 if (r == 0) { 1130 r = -ETIMEDOUT; 1131 goto err2; 1132 } else if (r < 0) { 1133 goto err2; 1134 } 1135 1136 tmp = adev->wb.wb[index]; 1137 if (tmp == 0xDEADBEEF) 1138 r = 0; 1139 else 1140 r = -EINVAL; 1141 1142 err2: 1143 amdgpu_ib_free(adev, &ib, NULL); 1144 dma_fence_put(f); 1145 err1: 1146 amdgpu_device_wb_free(adev, index); 1147 return r; 1148 } 1149 1150 1151 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1152 { 1153 release_firmware(adev->gfx.pfp_fw); 1154 adev->gfx.pfp_fw = NULL; 1155 release_firmware(adev->gfx.me_fw); 1156 adev->gfx.me_fw = NULL; 1157 release_firmware(adev->gfx.ce_fw); 1158 adev->gfx.ce_fw = NULL; 1159 release_firmware(adev->gfx.rlc_fw); 1160 adev->gfx.rlc_fw = NULL; 1161 release_firmware(adev->gfx.mec_fw); 1162 adev->gfx.mec_fw = NULL; 1163 release_firmware(adev->gfx.mec2_fw); 1164 adev->gfx.mec2_fw = NULL; 1165 1166 kfree(adev->gfx.rlc.register_list_format); 1167 } 1168 1169 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 1170 { 1171 const struct rlc_firmware_header_v2_1 *rlc_hdr; 1172 1173 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 1174 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 1175 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 1176 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 1177 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 1178 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 1179 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 1180 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 1181 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 1182 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 1183 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 1184 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 1185 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 1186 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 1187 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 1188 } 1189 1190 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1191 { 1192 adev->gfx.me_fw_write_wait = false; 1193 adev->gfx.mec_fw_write_wait = false; 1194 1195 if ((adev->asic_type != CHIP_ARCTURUS) && 1196 ((adev->gfx.mec_fw_version < 0x000001a5) || 1197 (adev->gfx.mec_feature_version < 46) || 1198 (adev->gfx.pfp_fw_version < 0x000000b7) || 1199 (adev->gfx.pfp_feature_version < 46))) 1200 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1201 1202 switch (adev->asic_type) { 1203 case CHIP_VEGA10: 1204 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1205 (adev->gfx.me_feature_version >= 42) && 1206 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1207 (adev->gfx.pfp_feature_version >= 42)) 1208 adev->gfx.me_fw_write_wait = true; 1209 1210 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1211 (adev->gfx.mec_feature_version >= 42)) 1212 adev->gfx.mec_fw_write_wait = true; 1213 break; 1214 case CHIP_VEGA12: 1215 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1216 (adev->gfx.me_feature_version >= 44) && 1217 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1218 (adev->gfx.pfp_feature_version >= 44)) 1219 adev->gfx.me_fw_write_wait = true; 1220 1221 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1222 (adev->gfx.mec_feature_version >= 44)) 1223 adev->gfx.mec_fw_write_wait = true; 1224 break; 1225 case CHIP_VEGA20: 1226 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1227 (adev->gfx.me_feature_version >= 44) && 1228 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1229 (adev->gfx.pfp_feature_version >= 44)) 1230 adev->gfx.me_fw_write_wait = true; 1231 1232 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1233 (adev->gfx.mec_feature_version >= 44)) 1234 adev->gfx.mec_fw_write_wait = true; 1235 break; 1236 case CHIP_RAVEN: 1237 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1238 (adev->gfx.me_feature_version >= 42) && 1239 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1240 (adev->gfx.pfp_feature_version >= 42)) 1241 adev->gfx.me_fw_write_wait = true; 1242 1243 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1244 (adev->gfx.mec_feature_version >= 42)) 1245 adev->gfx.mec_fw_write_wait = true; 1246 break; 1247 default: 1248 adev->gfx.me_fw_write_wait = true; 1249 adev->gfx.mec_fw_write_wait = true; 1250 break; 1251 } 1252 } 1253 1254 struct amdgpu_gfxoff_quirk { 1255 u16 chip_vendor; 1256 u16 chip_device; 1257 u16 subsys_vendor; 1258 u16 subsys_device; 1259 u8 revision; 1260 }; 1261 1262 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { 1263 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ 1264 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, 1265 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ 1266 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, 1267 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ 1268 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, 1269 { 0, 0, 0, 0, 0 }, 1270 }; 1271 1272 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) 1273 { 1274 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; 1275 1276 while (p && p->chip_device != 0) { 1277 if (pdev->vendor == p->chip_vendor && 1278 pdev->device == p->chip_device && 1279 pdev->subsystem_vendor == p->subsys_vendor && 1280 pdev->subsystem_device == p->subsys_device && 1281 pdev->revision == p->revision) { 1282 return true; 1283 } 1284 ++p; 1285 } 1286 return false; 1287 } 1288 1289 static bool is_raven_kicker(struct amdgpu_device *adev) 1290 { 1291 if (adev->pm.fw_version >= 0x41e2b) 1292 return true; 1293 else 1294 return false; 1295 } 1296 1297 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1298 { 1299 if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) 1300 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1301 1302 switch (adev->asic_type) { 1303 case CHIP_VEGA10: 1304 case CHIP_VEGA12: 1305 case CHIP_VEGA20: 1306 break; 1307 case CHIP_RAVEN: 1308 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) || 1309 (adev->apu_flags & AMD_APU_IS_PICASSO)) && 1310 ((!is_raven_kicker(adev) && 1311 adev->gfx.rlc_fw_version < 531) || 1312 (adev->gfx.rlc_feature_version < 1) || 1313 !adev->gfx.rlc.is_rlc_v2_1)) 1314 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1315 1316 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1317 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1318 AMD_PG_SUPPORT_CP | 1319 AMD_PG_SUPPORT_RLC_SMU_HS; 1320 break; 1321 case CHIP_RENOIR: 1322 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1323 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1324 AMD_PG_SUPPORT_CP | 1325 AMD_PG_SUPPORT_RLC_SMU_HS; 1326 break; 1327 default: 1328 break; 1329 } 1330 } 1331 1332 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1333 const char *chip_name) 1334 { 1335 char fw_name[30]; 1336 int err; 1337 struct amdgpu_firmware_info *info = NULL; 1338 const struct common_firmware_header *header = NULL; 1339 const struct gfx_firmware_header_v1_0 *cp_hdr; 1340 1341 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1342 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1343 if (err) 1344 goto out; 1345 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1346 if (err) 1347 goto out; 1348 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1349 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1350 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1351 1352 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1353 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1354 if (err) 1355 goto out; 1356 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1357 if (err) 1358 goto out; 1359 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1360 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1361 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1362 1363 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1364 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1365 if (err) 1366 goto out; 1367 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1368 if (err) 1369 goto out; 1370 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1371 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1372 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1373 1374 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1375 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1376 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1377 info->fw = adev->gfx.pfp_fw; 1378 header = (const struct common_firmware_header *)info->fw->data; 1379 adev->firmware.fw_size += 1380 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1381 1382 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1383 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1384 info->fw = adev->gfx.me_fw; 1385 header = (const struct common_firmware_header *)info->fw->data; 1386 adev->firmware.fw_size += 1387 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1388 1389 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1390 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1391 info->fw = adev->gfx.ce_fw; 1392 header = (const struct common_firmware_header *)info->fw->data; 1393 adev->firmware.fw_size += 1394 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1395 } 1396 1397 out: 1398 if (err) { 1399 dev_err(adev->dev, 1400 "gfx9: Failed to load firmware \"%s\"\n", 1401 fw_name); 1402 release_firmware(adev->gfx.pfp_fw); 1403 adev->gfx.pfp_fw = NULL; 1404 release_firmware(adev->gfx.me_fw); 1405 adev->gfx.me_fw = NULL; 1406 release_firmware(adev->gfx.ce_fw); 1407 adev->gfx.ce_fw = NULL; 1408 } 1409 return err; 1410 } 1411 1412 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1413 const char *chip_name) 1414 { 1415 char fw_name[30]; 1416 int err; 1417 struct amdgpu_firmware_info *info = NULL; 1418 const struct common_firmware_header *header = NULL; 1419 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1420 unsigned int *tmp = NULL; 1421 unsigned int i = 0; 1422 uint16_t version_major; 1423 uint16_t version_minor; 1424 uint32_t smu_version; 1425 1426 /* 1427 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1428 * instead of picasso_rlc.bin. 1429 * Judgment method: 1430 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1431 * or revision >= 0xD8 && revision <= 0xDF 1432 * otherwise is PCO FP5 1433 */ 1434 if (!strcmp(chip_name, "picasso") && 1435 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1436 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1437 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1438 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1439 (smu_version >= 0x41e2b)) 1440 /** 1441 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1442 */ 1443 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1444 else 1445 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1446 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1447 if (err) 1448 goto out; 1449 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1450 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1451 1452 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1453 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1454 if (version_major == 2 && version_minor == 1) 1455 adev->gfx.rlc.is_rlc_v2_1 = true; 1456 1457 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1458 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1459 adev->gfx.rlc.save_and_restore_offset = 1460 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1461 adev->gfx.rlc.clear_state_descriptor_offset = 1462 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1463 adev->gfx.rlc.avail_scratch_ram_locations = 1464 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1465 adev->gfx.rlc.reg_restore_list_size = 1466 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1467 adev->gfx.rlc.reg_list_format_start = 1468 le32_to_cpu(rlc_hdr->reg_list_format_start); 1469 adev->gfx.rlc.reg_list_format_separate_start = 1470 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1471 adev->gfx.rlc.starting_offsets_start = 1472 le32_to_cpu(rlc_hdr->starting_offsets_start); 1473 adev->gfx.rlc.reg_list_format_size_bytes = 1474 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1475 adev->gfx.rlc.reg_list_size_bytes = 1476 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1477 adev->gfx.rlc.register_list_format = 1478 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1479 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1480 if (!adev->gfx.rlc.register_list_format) { 1481 err = -ENOMEM; 1482 goto out; 1483 } 1484 1485 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1486 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1487 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1488 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1489 1490 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1491 1492 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1493 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1494 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1495 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1496 1497 if (adev->gfx.rlc.is_rlc_v2_1) 1498 gfx_v9_0_init_rlc_ext_microcode(adev); 1499 1500 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1501 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1502 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1503 info->fw = adev->gfx.rlc_fw; 1504 header = (const struct common_firmware_header *)info->fw->data; 1505 adev->firmware.fw_size += 1506 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1507 1508 if (adev->gfx.rlc.is_rlc_v2_1 && 1509 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 1510 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 1511 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 1512 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 1513 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 1514 info->fw = adev->gfx.rlc_fw; 1515 adev->firmware.fw_size += 1516 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 1517 1518 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 1519 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 1520 info->fw = adev->gfx.rlc_fw; 1521 adev->firmware.fw_size += 1522 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 1523 1524 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 1525 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 1526 info->fw = adev->gfx.rlc_fw; 1527 adev->firmware.fw_size += 1528 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 1529 } 1530 } 1531 1532 out: 1533 if (err) { 1534 dev_err(adev->dev, 1535 "gfx9: Failed to load firmware \"%s\"\n", 1536 fw_name); 1537 release_firmware(adev->gfx.rlc_fw); 1538 adev->gfx.rlc_fw = NULL; 1539 } 1540 return err; 1541 } 1542 1543 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) 1544 { 1545 if (adev->asic_type == CHIP_ALDEBARAN || 1546 adev->asic_type == CHIP_ARCTURUS || 1547 adev->asic_type == CHIP_RENOIR) 1548 return false; 1549 1550 return true; 1551 } 1552 1553 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1554 const char *chip_name) 1555 { 1556 char fw_name[30]; 1557 int err; 1558 struct amdgpu_firmware_info *info = NULL; 1559 const struct common_firmware_header *header = NULL; 1560 const struct gfx_firmware_header_v1_0 *cp_hdr; 1561 1562 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1563 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1564 if (err) 1565 goto out; 1566 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1567 if (err) 1568 goto out; 1569 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1570 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1571 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1572 1573 1574 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1575 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1576 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1577 if (!err) { 1578 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1579 if (err) 1580 goto out; 1581 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1582 adev->gfx.mec2_fw->data; 1583 adev->gfx.mec2_fw_version = 1584 le32_to_cpu(cp_hdr->header.ucode_version); 1585 adev->gfx.mec2_feature_version = 1586 le32_to_cpu(cp_hdr->ucode_feature_version); 1587 } else { 1588 err = 0; 1589 adev->gfx.mec2_fw = NULL; 1590 } 1591 } else { 1592 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; 1593 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; 1594 } 1595 1596 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1597 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1598 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1599 info->fw = adev->gfx.mec_fw; 1600 header = (const struct common_firmware_header *)info->fw->data; 1601 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1602 adev->firmware.fw_size += 1603 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1604 1605 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 1606 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 1607 info->fw = adev->gfx.mec_fw; 1608 adev->firmware.fw_size += 1609 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1610 1611 if (adev->gfx.mec2_fw) { 1612 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1613 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1614 info->fw = adev->gfx.mec2_fw; 1615 header = (const struct common_firmware_header *)info->fw->data; 1616 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 1617 adev->firmware.fw_size += 1618 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 1619 1620 /* TODO: Determine if MEC2 JT FW loading can be removed 1621 for all GFX V9 asic and above */ 1622 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1623 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 1624 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 1625 info->fw = adev->gfx.mec2_fw; 1626 adev->firmware.fw_size += 1627 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, 1628 PAGE_SIZE); 1629 } 1630 } 1631 } 1632 1633 out: 1634 gfx_v9_0_check_if_need_gfxoff(adev); 1635 gfx_v9_0_check_fw_write_wait(adev); 1636 if (err) { 1637 dev_err(adev->dev, 1638 "gfx9: Failed to load firmware \"%s\"\n", 1639 fw_name); 1640 release_firmware(adev->gfx.mec_fw); 1641 adev->gfx.mec_fw = NULL; 1642 release_firmware(adev->gfx.mec2_fw); 1643 adev->gfx.mec2_fw = NULL; 1644 } 1645 return err; 1646 } 1647 1648 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1649 { 1650 const char *chip_name; 1651 int r; 1652 1653 DRM_DEBUG("\n"); 1654 1655 switch (adev->asic_type) { 1656 case CHIP_VEGA10: 1657 chip_name = "vega10"; 1658 break; 1659 case CHIP_VEGA12: 1660 chip_name = "vega12"; 1661 break; 1662 case CHIP_VEGA20: 1663 chip_name = "vega20"; 1664 break; 1665 case CHIP_RAVEN: 1666 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1667 chip_name = "raven2"; 1668 else if (adev->apu_flags & AMD_APU_IS_PICASSO) 1669 chip_name = "picasso"; 1670 else 1671 chip_name = "raven"; 1672 break; 1673 case CHIP_ARCTURUS: 1674 chip_name = "arcturus"; 1675 break; 1676 case CHIP_RENOIR: 1677 if (adev->apu_flags & AMD_APU_IS_RENOIR) 1678 chip_name = "renoir"; 1679 else 1680 chip_name = "green_sardine"; 1681 break; 1682 case CHIP_ALDEBARAN: 1683 chip_name = "aldebaran"; 1684 break; 1685 default: 1686 BUG(); 1687 } 1688 1689 /* No CPG in Arcturus */ 1690 if (adev->gfx.num_gfx_rings) { 1691 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); 1692 if (r) 1693 return r; 1694 } 1695 1696 r = gfx_v9_0_init_rlc_microcode(adev, chip_name); 1697 if (r) 1698 return r; 1699 1700 r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); 1701 if (r) 1702 return r; 1703 1704 return r; 1705 } 1706 1707 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1708 { 1709 u32 count = 0; 1710 const struct cs_section_def *sect = NULL; 1711 const struct cs_extent_def *ext = NULL; 1712 1713 /* begin clear state */ 1714 count += 2; 1715 /* context control state */ 1716 count += 3; 1717 1718 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1719 for (ext = sect->section; ext->extent != NULL; ++ext) { 1720 if (sect->id == SECT_CONTEXT) 1721 count += 2 + ext->reg_count; 1722 else 1723 return 0; 1724 } 1725 } 1726 1727 /* end clear state */ 1728 count += 2; 1729 /* clear state */ 1730 count += 2; 1731 1732 return count; 1733 } 1734 1735 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1736 volatile u32 *buffer) 1737 { 1738 u32 count = 0, i; 1739 const struct cs_section_def *sect = NULL; 1740 const struct cs_extent_def *ext = NULL; 1741 1742 if (adev->gfx.rlc.cs_data == NULL) 1743 return; 1744 if (buffer == NULL) 1745 return; 1746 1747 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1748 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1749 1750 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1751 buffer[count++] = cpu_to_le32(0x80000000); 1752 buffer[count++] = cpu_to_le32(0x80000000); 1753 1754 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1755 for (ext = sect->section; ext->extent != NULL; ++ext) { 1756 if (sect->id == SECT_CONTEXT) { 1757 buffer[count++] = 1758 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1759 buffer[count++] = cpu_to_le32(ext->reg_index - 1760 PACKET3_SET_CONTEXT_REG_START); 1761 for (i = 0; i < ext->reg_count; i++) 1762 buffer[count++] = cpu_to_le32(ext->extent[i]); 1763 } else { 1764 return; 1765 } 1766 } 1767 } 1768 1769 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1770 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1771 1772 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1773 buffer[count++] = cpu_to_le32(0); 1774 } 1775 1776 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1777 { 1778 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1779 uint32_t pg_always_on_cu_num = 2; 1780 uint32_t always_on_cu_num; 1781 uint32_t i, j, k; 1782 uint32_t mask, cu_bitmap, counter; 1783 1784 if (adev->flags & AMD_IS_APU) 1785 always_on_cu_num = 4; 1786 else if (adev->asic_type == CHIP_VEGA12) 1787 always_on_cu_num = 8; 1788 else 1789 always_on_cu_num = 12; 1790 1791 mutex_lock(&adev->grbm_idx_mutex); 1792 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1793 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1794 mask = 1; 1795 cu_bitmap = 0; 1796 counter = 0; 1797 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1798 1799 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1800 if (cu_info->bitmap[i][j] & mask) { 1801 if (counter == pg_always_on_cu_num) 1802 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1803 if (counter < always_on_cu_num) 1804 cu_bitmap |= mask; 1805 else 1806 break; 1807 counter++; 1808 } 1809 mask <<= 1; 1810 } 1811 1812 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1813 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1814 } 1815 } 1816 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1817 mutex_unlock(&adev->grbm_idx_mutex); 1818 } 1819 1820 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1821 { 1822 uint32_t data; 1823 1824 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1825 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1826 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1827 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1828 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1829 1830 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1831 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1832 1833 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1834 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1835 1836 mutex_lock(&adev->grbm_idx_mutex); 1837 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1838 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1839 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1840 1841 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1842 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1843 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1844 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1845 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1846 1847 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1848 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1849 data &= 0x0000FFFF; 1850 data |= 0x00C00000; 1851 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1852 1853 /* 1854 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1855 * programmed in gfx_v9_0_init_always_on_cu_mask() 1856 */ 1857 1858 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1859 * but used for RLC_LB_CNTL configuration */ 1860 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1861 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1862 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1863 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1864 mutex_unlock(&adev->grbm_idx_mutex); 1865 1866 gfx_v9_0_init_always_on_cu_mask(adev); 1867 } 1868 1869 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1870 { 1871 uint32_t data; 1872 1873 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1874 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1875 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1876 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1877 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1878 1879 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1880 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1881 1882 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1883 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1884 1885 mutex_lock(&adev->grbm_idx_mutex); 1886 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1887 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1888 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1889 1890 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1891 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1892 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1893 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1894 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1895 1896 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1897 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1898 data &= 0x0000FFFF; 1899 data |= 0x00C00000; 1900 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1901 1902 /* 1903 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1904 * programmed in gfx_v9_0_init_always_on_cu_mask() 1905 */ 1906 1907 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1908 * but used for RLC_LB_CNTL configuration */ 1909 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1910 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1911 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1912 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1913 mutex_unlock(&adev->grbm_idx_mutex); 1914 1915 gfx_v9_0_init_always_on_cu_mask(adev); 1916 } 1917 1918 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1919 { 1920 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1921 } 1922 1923 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1924 { 1925 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) 1926 return 5; 1927 else 1928 return 4; 1929 } 1930 1931 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1932 { 1933 const struct cs_section_def *cs_data; 1934 int r; 1935 1936 adev->gfx.rlc.cs_data = gfx9_cs_data; 1937 1938 cs_data = adev->gfx.rlc.cs_data; 1939 1940 if (cs_data) { 1941 /* init clear state block */ 1942 r = amdgpu_gfx_rlc_init_csb(adev); 1943 if (r) 1944 return r; 1945 } 1946 1947 if (adev->flags & AMD_IS_APU) { 1948 /* TODO: double check the cp_table_size for RV */ 1949 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1950 r = amdgpu_gfx_rlc_init_cpt(adev); 1951 if (r) 1952 return r; 1953 } 1954 1955 switch (adev->asic_type) { 1956 case CHIP_RAVEN: 1957 gfx_v9_0_init_lbpw(adev); 1958 break; 1959 case CHIP_VEGA20: 1960 gfx_v9_4_init_lbpw(adev); 1961 break; 1962 default: 1963 break; 1964 } 1965 1966 /* init spm vmid with 0xf */ 1967 if (adev->gfx.rlc.funcs->update_spm_vmid) 1968 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); 1969 1970 return 0; 1971 } 1972 1973 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1974 { 1975 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1976 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1977 } 1978 1979 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1980 { 1981 int r; 1982 u32 *hpd; 1983 const __le32 *fw_data; 1984 unsigned fw_size; 1985 u32 *fw; 1986 size_t mec_hpd_size; 1987 1988 const struct gfx_firmware_header_v1_0 *mec_hdr; 1989 1990 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1991 1992 /* take ownership of the relevant compute queues */ 1993 amdgpu_gfx_compute_queue_acquire(adev); 1994 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1995 if (mec_hpd_size) { 1996 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1997 AMDGPU_GEM_DOMAIN_VRAM, 1998 &adev->gfx.mec.hpd_eop_obj, 1999 &adev->gfx.mec.hpd_eop_gpu_addr, 2000 (void **)&hpd); 2001 if (r) { 2002 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 2003 gfx_v9_0_mec_fini(adev); 2004 return r; 2005 } 2006 2007 memset(hpd, 0, mec_hpd_size); 2008 2009 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 2010 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 2011 } 2012 2013 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2014 2015 fw_data = (const __le32 *) 2016 (adev->gfx.mec_fw->data + 2017 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2018 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 2019 2020 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 2021 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2022 &adev->gfx.mec.mec_fw_obj, 2023 &adev->gfx.mec.mec_fw_gpu_addr, 2024 (void **)&fw); 2025 if (r) { 2026 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 2027 gfx_v9_0_mec_fini(adev); 2028 return r; 2029 } 2030 2031 memcpy(fw, fw_data, fw_size); 2032 2033 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 2034 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 2035 2036 return 0; 2037 } 2038 2039 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 2040 { 2041 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 2042 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 2043 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 2044 (address << SQ_IND_INDEX__INDEX__SHIFT) | 2045 (SQ_IND_INDEX__FORCE_READ_MASK)); 2046 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 2047 } 2048 2049 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 2050 uint32_t wave, uint32_t thread, 2051 uint32_t regno, uint32_t num, uint32_t *out) 2052 { 2053 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 2054 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 2055 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 2056 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 2057 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 2058 (SQ_IND_INDEX__FORCE_READ_MASK) | 2059 (SQ_IND_INDEX__AUTO_INCR_MASK)); 2060 while (num--) 2061 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 2062 } 2063 2064 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 2065 { 2066 /* type 1 wave data */ 2067 dst[(*no_fields)++] = 1; 2068 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 2069 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 2070 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 2071 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 2072 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 2073 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 2074 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 2075 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 2076 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 2077 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 2078 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 2079 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 2080 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 2081 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 2082 } 2083 2084 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 2085 uint32_t wave, uint32_t start, 2086 uint32_t size, uint32_t *dst) 2087 { 2088 wave_read_regs( 2089 adev, simd, wave, 0, 2090 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 2091 } 2092 2093 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 2094 uint32_t wave, uint32_t thread, 2095 uint32_t start, uint32_t size, 2096 uint32_t *dst) 2097 { 2098 wave_read_regs( 2099 adev, simd, wave, thread, 2100 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 2101 } 2102 2103 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 2104 u32 me, u32 pipe, u32 q, u32 vm) 2105 { 2106 soc15_grbm_select(adev, me, pipe, q, vm); 2107 } 2108 2109 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 2110 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 2111 .select_se_sh = &gfx_v9_0_select_se_sh, 2112 .read_wave_data = &gfx_v9_0_read_wave_data, 2113 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 2114 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 2115 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 2116 }; 2117 2118 static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = { 2119 .ras_late_init = amdgpu_gfx_ras_late_init, 2120 .ras_fini = amdgpu_gfx_ras_fini, 2121 .ras_error_inject = &gfx_v9_0_ras_error_inject, 2122 .query_ras_error_count = &gfx_v9_0_query_ras_error_count, 2123 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, 2124 }; 2125 2126 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 2127 { 2128 u32 gb_addr_config; 2129 int err; 2130 2131 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 2132 2133 switch (adev->asic_type) { 2134 case CHIP_VEGA10: 2135 adev->gfx.config.max_hw_contexts = 8; 2136 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2137 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2138 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2139 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2140 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 2141 break; 2142 case CHIP_VEGA12: 2143 adev->gfx.config.max_hw_contexts = 8; 2144 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2145 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2146 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2147 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2148 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 2149 DRM_INFO("fix gfx.config for vega12\n"); 2150 break; 2151 case CHIP_VEGA20: 2152 adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs; 2153 adev->gfx.config.max_hw_contexts = 8; 2154 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2155 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2156 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2157 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2158 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2159 gb_addr_config &= ~0xf3e777ff; 2160 gb_addr_config |= 0x22014042; 2161 /* check vbios table if gpu info is not available */ 2162 err = amdgpu_atomfirmware_get_gfx_info(adev); 2163 if (err) 2164 return err; 2165 break; 2166 case CHIP_RAVEN: 2167 adev->gfx.config.max_hw_contexts = 8; 2168 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2169 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2170 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2171 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2172 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 2173 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 2174 else 2175 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2176 break; 2177 case CHIP_ARCTURUS: 2178 adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs; 2179 adev->gfx.config.max_hw_contexts = 8; 2180 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2181 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2182 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2183 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2184 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2185 gb_addr_config &= ~0xf3e777ff; 2186 gb_addr_config |= 0x22014042; 2187 break; 2188 case CHIP_RENOIR: 2189 adev->gfx.config.max_hw_contexts = 8; 2190 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2191 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2192 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2193 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2194 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2195 gb_addr_config &= ~0xf3e777ff; 2196 gb_addr_config |= 0x22010042; 2197 break; 2198 case CHIP_ALDEBARAN: 2199 adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs; 2200 adev->gfx.config.max_hw_contexts = 8; 2201 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2202 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2203 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2204 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2205 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2206 gb_addr_config &= ~0xf3e777ff; 2207 gb_addr_config |= 0x22014042; 2208 /* check vbios table if gpu info is not available */ 2209 err = amdgpu_atomfirmware_get_gfx_info(adev); 2210 if (err) 2211 return err; 2212 break; 2213 default: 2214 BUG(); 2215 break; 2216 } 2217 2218 adev->gfx.config.gb_addr_config = gb_addr_config; 2219 2220 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2221 REG_GET_FIELD( 2222 adev->gfx.config.gb_addr_config, 2223 GB_ADDR_CONFIG, 2224 NUM_PIPES); 2225 2226 adev->gfx.config.max_tile_pipes = 2227 adev->gfx.config.gb_addr_config_fields.num_pipes; 2228 2229 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2230 REG_GET_FIELD( 2231 adev->gfx.config.gb_addr_config, 2232 GB_ADDR_CONFIG, 2233 NUM_BANKS); 2234 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2235 REG_GET_FIELD( 2236 adev->gfx.config.gb_addr_config, 2237 GB_ADDR_CONFIG, 2238 MAX_COMPRESSED_FRAGS); 2239 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2240 REG_GET_FIELD( 2241 adev->gfx.config.gb_addr_config, 2242 GB_ADDR_CONFIG, 2243 NUM_RB_PER_SE); 2244 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2245 REG_GET_FIELD( 2246 adev->gfx.config.gb_addr_config, 2247 GB_ADDR_CONFIG, 2248 NUM_SHADER_ENGINES); 2249 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2250 REG_GET_FIELD( 2251 adev->gfx.config.gb_addr_config, 2252 GB_ADDR_CONFIG, 2253 PIPE_INTERLEAVE_SIZE)); 2254 2255 return 0; 2256 } 2257 2258 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2259 int mec, int pipe, int queue) 2260 { 2261 unsigned irq_type; 2262 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2263 unsigned int hw_prio; 2264 2265 ring = &adev->gfx.compute_ring[ring_id]; 2266 2267 /* mec0 is me1 */ 2268 ring->me = mec + 1; 2269 ring->pipe = pipe; 2270 ring->queue = queue; 2271 2272 ring->ring_obj = NULL; 2273 ring->use_doorbell = true; 2274 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2275 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2276 + (ring_id * GFX9_MEC_HPD_SIZE); 2277 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2278 2279 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2280 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2281 + ring->pipe; 2282 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 2283 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 2284 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2285 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 2286 hw_prio, NULL); 2287 } 2288 2289 static int gfx_v9_0_sw_init(void *handle) 2290 { 2291 int i, j, k, r, ring_id; 2292 struct amdgpu_ring *ring; 2293 struct amdgpu_kiq *kiq; 2294 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2295 2296 switch (adev->asic_type) { 2297 case CHIP_VEGA10: 2298 case CHIP_VEGA12: 2299 case CHIP_VEGA20: 2300 case CHIP_RAVEN: 2301 case CHIP_ARCTURUS: 2302 case CHIP_RENOIR: 2303 case CHIP_ALDEBARAN: 2304 adev->gfx.mec.num_mec = 2; 2305 break; 2306 default: 2307 adev->gfx.mec.num_mec = 1; 2308 break; 2309 } 2310 2311 adev->gfx.mec.num_pipe_per_mec = 4; 2312 adev->gfx.mec.num_queue_per_pipe = 8; 2313 2314 /* EOP Event */ 2315 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2316 if (r) 2317 return r; 2318 2319 /* Privileged reg */ 2320 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2321 &adev->gfx.priv_reg_irq); 2322 if (r) 2323 return r; 2324 2325 /* Privileged inst */ 2326 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2327 &adev->gfx.priv_inst_irq); 2328 if (r) 2329 return r; 2330 2331 /* ECC error */ 2332 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2333 &adev->gfx.cp_ecc_error_irq); 2334 if (r) 2335 return r; 2336 2337 /* FUE error */ 2338 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2339 &adev->gfx.cp_ecc_error_irq); 2340 if (r) 2341 return r; 2342 2343 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2344 2345 gfx_v9_0_scratch_init(adev); 2346 2347 r = gfx_v9_0_init_microcode(adev); 2348 if (r) { 2349 DRM_ERROR("Failed to load gfx firmware!\n"); 2350 return r; 2351 } 2352 2353 r = adev->gfx.rlc.funcs->init(adev); 2354 if (r) { 2355 DRM_ERROR("Failed to init rlc BOs!\n"); 2356 return r; 2357 } 2358 2359 r = gfx_v9_0_mec_init(adev); 2360 if (r) { 2361 DRM_ERROR("Failed to init MEC BOs!\n"); 2362 return r; 2363 } 2364 2365 /* set up the gfx ring */ 2366 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2367 ring = &adev->gfx.gfx_ring[i]; 2368 ring->ring_obj = NULL; 2369 if (!i) 2370 sprintf(ring->name, "gfx"); 2371 else 2372 sprintf(ring->name, "gfx_%d", i); 2373 ring->use_doorbell = true; 2374 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2375 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2376 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2377 AMDGPU_RING_PRIO_DEFAULT, NULL); 2378 if (r) 2379 return r; 2380 } 2381 2382 /* set up the compute queues - allocate horizontally across pipes */ 2383 ring_id = 0; 2384 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2385 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2386 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2387 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2388 continue; 2389 2390 r = gfx_v9_0_compute_ring_init(adev, 2391 ring_id, 2392 i, k, j); 2393 if (r) 2394 return r; 2395 2396 ring_id++; 2397 } 2398 } 2399 } 2400 2401 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 2402 if (r) { 2403 DRM_ERROR("Failed to init KIQ BOs!\n"); 2404 return r; 2405 } 2406 2407 kiq = &adev->gfx.kiq; 2408 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2409 if (r) 2410 return r; 2411 2412 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2413 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 2414 if (r) 2415 return r; 2416 2417 adev->gfx.ce_ram_size = 0x8000; 2418 2419 r = gfx_v9_0_gpu_early_init(adev); 2420 if (r) 2421 return r; 2422 2423 return 0; 2424 } 2425 2426 2427 static int gfx_v9_0_sw_fini(void *handle) 2428 { 2429 int i; 2430 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2431 2432 if (adev->gfx.ras_funcs && 2433 adev->gfx.ras_funcs->ras_fini) 2434 adev->gfx.ras_funcs->ras_fini(adev); 2435 2436 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2437 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2438 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2439 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2440 2441 amdgpu_gfx_mqd_sw_fini(adev); 2442 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); 2443 amdgpu_gfx_kiq_fini(adev); 2444 2445 gfx_v9_0_mec_fini(adev); 2446 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 2447 if (adev->flags & AMD_IS_APU) { 2448 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2449 &adev->gfx.rlc.cp_table_gpu_addr, 2450 (void **)&adev->gfx.rlc.cp_table_ptr); 2451 } 2452 gfx_v9_0_free_microcode(adev); 2453 2454 return 0; 2455 } 2456 2457 2458 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2459 { 2460 /* TODO */ 2461 } 2462 2463 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, 2464 u32 instance) 2465 { 2466 u32 data; 2467 2468 if (instance == 0xffffffff) 2469 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2470 else 2471 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2472 2473 if (se_num == 0xffffffff) 2474 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2475 else 2476 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2477 2478 if (sh_num == 0xffffffff) 2479 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2480 else 2481 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2482 2483 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2484 } 2485 2486 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2487 { 2488 u32 data, mask; 2489 2490 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2491 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2492 2493 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2494 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2495 2496 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2497 adev->gfx.config.max_sh_per_se); 2498 2499 return (~data) & mask; 2500 } 2501 2502 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2503 { 2504 int i, j; 2505 u32 data; 2506 u32 active_rbs = 0; 2507 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2508 adev->gfx.config.max_sh_per_se; 2509 2510 mutex_lock(&adev->grbm_idx_mutex); 2511 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2512 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2513 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2514 data = gfx_v9_0_get_rb_active_bitmap(adev); 2515 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2516 rb_bitmap_width_per_sh); 2517 } 2518 } 2519 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2520 mutex_unlock(&adev->grbm_idx_mutex); 2521 2522 adev->gfx.config.backend_enable_mask = active_rbs; 2523 adev->gfx.config.num_rbs = hweight32(active_rbs); 2524 } 2525 2526 #define DEFAULT_SH_MEM_BASES (0x6000) 2527 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2528 { 2529 int i; 2530 uint32_t sh_mem_config; 2531 uint32_t sh_mem_bases; 2532 2533 /* 2534 * Configure apertures: 2535 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2536 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2537 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2538 */ 2539 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2540 2541 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2542 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2543 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2544 2545 mutex_lock(&adev->srbm_mutex); 2546 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2547 soc15_grbm_select(adev, 0, 0, 0, i); 2548 /* CP and shaders */ 2549 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2550 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2551 } 2552 soc15_grbm_select(adev, 0, 0, 0, 0); 2553 mutex_unlock(&adev->srbm_mutex); 2554 2555 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2556 acccess. These should be enabled by FW for target VMIDs. */ 2557 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2558 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2559 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2560 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2561 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2562 } 2563 } 2564 2565 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2566 { 2567 int vmid; 2568 2569 /* 2570 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2571 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2572 * the driver can enable them for graphics. VMID0 should maintain 2573 * access so that HWS firmware can save/restore entries. 2574 */ 2575 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 2576 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2577 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2578 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2579 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2580 } 2581 } 2582 2583 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) 2584 { 2585 uint32_t tmp; 2586 2587 switch (adev->asic_type) { 2588 case CHIP_ARCTURUS: 2589 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); 2590 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, 2591 DISABLE_BARRIER_WAITCNT, 1); 2592 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); 2593 break; 2594 default: 2595 break; 2596 } 2597 } 2598 2599 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2600 { 2601 u32 tmp; 2602 int i; 2603 2604 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2605 2606 gfx_v9_0_tiling_mode_table_init(adev); 2607 2608 gfx_v9_0_setup_rb(adev); 2609 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2610 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2611 2612 /* XXX SH_MEM regs */ 2613 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2614 mutex_lock(&adev->srbm_mutex); 2615 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { 2616 soc15_grbm_select(adev, 0, 0, 0, i); 2617 /* CP and shaders */ 2618 if (i == 0) { 2619 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2620 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2621 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2622 !!adev->gmc.noretry); 2623 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2624 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2625 } else { 2626 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2627 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2628 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2629 !!adev->gmc.noretry); 2630 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2631 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2632 (adev->gmc.private_aperture_start >> 48)); 2633 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2634 (adev->gmc.shared_aperture_start >> 48)); 2635 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2636 } 2637 } 2638 soc15_grbm_select(adev, 0, 0, 0, 0); 2639 2640 mutex_unlock(&adev->srbm_mutex); 2641 2642 gfx_v9_0_init_compute_vmid(adev); 2643 gfx_v9_0_init_gds_vmid(adev); 2644 gfx_v9_0_init_sq_config(adev); 2645 } 2646 2647 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2648 { 2649 u32 i, j, k; 2650 u32 mask; 2651 2652 mutex_lock(&adev->grbm_idx_mutex); 2653 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2654 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2655 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 2656 for (k = 0; k < adev->usec_timeout; k++) { 2657 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2658 break; 2659 udelay(1); 2660 } 2661 if (k == adev->usec_timeout) { 2662 gfx_v9_0_select_se_sh(adev, 0xffffffff, 2663 0xffffffff, 0xffffffff); 2664 mutex_unlock(&adev->grbm_idx_mutex); 2665 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2666 i, j); 2667 return; 2668 } 2669 } 2670 } 2671 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2672 mutex_unlock(&adev->grbm_idx_mutex); 2673 2674 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2675 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2676 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2677 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2678 for (k = 0; k < adev->usec_timeout; k++) { 2679 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2680 break; 2681 udelay(1); 2682 } 2683 } 2684 2685 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2686 bool enable) 2687 { 2688 u32 tmp; 2689 2690 /* These interrupts should be enabled to drive DS clock */ 2691 2692 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2693 2694 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2695 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2696 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2697 if(adev->gfx.num_gfx_rings) 2698 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2699 2700 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2701 } 2702 2703 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2704 { 2705 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2706 /* csib */ 2707 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2708 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2709 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2710 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2711 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2712 adev->gfx.rlc.clear_state_size); 2713 } 2714 2715 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2716 int indirect_offset, 2717 int list_size, 2718 int *unique_indirect_regs, 2719 int unique_indirect_reg_count, 2720 int *indirect_start_offsets, 2721 int *indirect_start_offsets_count, 2722 int max_start_offsets_count) 2723 { 2724 int idx; 2725 2726 for (; indirect_offset < list_size; indirect_offset++) { 2727 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2728 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2729 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2730 2731 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2732 indirect_offset += 2; 2733 2734 /* look for the matching indice */ 2735 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2736 if (unique_indirect_regs[idx] == 2737 register_list_format[indirect_offset] || 2738 !unique_indirect_regs[idx]) 2739 break; 2740 } 2741 2742 BUG_ON(idx >= unique_indirect_reg_count); 2743 2744 if (!unique_indirect_regs[idx]) 2745 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2746 2747 indirect_offset++; 2748 } 2749 } 2750 } 2751 2752 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2753 { 2754 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2755 int unique_indirect_reg_count = 0; 2756 2757 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2758 int indirect_start_offsets_count = 0; 2759 2760 int list_size = 0; 2761 int i = 0, j = 0; 2762 u32 tmp = 0; 2763 2764 u32 *register_list_format = 2765 kmemdup(adev->gfx.rlc.register_list_format, 2766 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2767 if (!register_list_format) 2768 return -ENOMEM; 2769 2770 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2771 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2772 gfx_v9_1_parse_ind_reg_list(register_list_format, 2773 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2774 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2775 unique_indirect_regs, 2776 unique_indirect_reg_count, 2777 indirect_start_offsets, 2778 &indirect_start_offsets_count, 2779 ARRAY_SIZE(indirect_start_offsets)); 2780 2781 /* enable auto inc in case it is disabled */ 2782 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2783 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2784 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2785 2786 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2787 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2788 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2789 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2790 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2791 adev->gfx.rlc.register_restore[i]); 2792 2793 /* load indirect register */ 2794 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2795 adev->gfx.rlc.reg_list_format_start); 2796 2797 /* direct register portion */ 2798 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2799 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2800 register_list_format[i]); 2801 2802 /* indirect register portion */ 2803 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2804 if (register_list_format[i] == 0xFFFFFFFF) { 2805 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2806 continue; 2807 } 2808 2809 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2810 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2811 2812 for (j = 0; j < unique_indirect_reg_count; j++) { 2813 if (register_list_format[i] == unique_indirect_regs[j]) { 2814 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2815 break; 2816 } 2817 } 2818 2819 BUG_ON(j >= unique_indirect_reg_count); 2820 2821 i++; 2822 } 2823 2824 /* set save/restore list size */ 2825 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2826 list_size = list_size >> 1; 2827 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2828 adev->gfx.rlc.reg_restore_list_size); 2829 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2830 2831 /* write the starting offsets to RLC scratch ram */ 2832 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2833 adev->gfx.rlc.starting_offsets_start); 2834 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2835 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2836 indirect_start_offsets[i]); 2837 2838 /* load unique indirect regs*/ 2839 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2840 if (unique_indirect_regs[i] != 0) { 2841 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2842 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2843 unique_indirect_regs[i] & 0x3FFFF); 2844 2845 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2846 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2847 unique_indirect_regs[i] >> 20); 2848 } 2849 } 2850 2851 kfree(register_list_format); 2852 return 0; 2853 } 2854 2855 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2856 { 2857 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2858 } 2859 2860 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2861 bool enable) 2862 { 2863 uint32_t data = 0; 2864 uint32_t default_data = 0; 2865 2866 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2867 if (enable) { 2868 /* enable GFXIP control over CGPG */ 2869 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2870 if(default_data != data) 2871 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2872 2873 /* update status */ 2874 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2875 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2876 if(default_data != data) 2877 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2878 } else { 2879 /* restore GFXIP control over GCPG */ 2880 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2881 if(default_data != data) 2882 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2883 } 2884 } 2885 2886 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2887 { 2888 uint32_t data = 0; 2889 2890 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2891 AMD_PG_SUPPORT_GFX_SMG | 2892 AMD_PG_SUPPORT_GFX_DMG)) { 2893 /* init IDLE_POLL_COUNT = 60 */ 2894 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2895 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2896 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2897 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2898 2899 /* init RLC PG Delay */ 2900 data = 0; 2901 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2902 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2903 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2904 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2905 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2906 2907 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2908 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2909 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2910 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2911 2912 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2913 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2914 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2915 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2916 2917 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2918 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2919 2920 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2921 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2922 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2923 if (adev->asic_type != CHIP_RENOIR) 2924 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2925 } 2926 } 2927 2928 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2929 bool enable) 2930 { 2931 uint32_t data = 0; 2932 uint32_t default_data = 0; 2933 2934 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2935 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2936 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2937 enable ? 1 : 0); 2938 if (default_data != data) 2939 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2940 } 2941 2942 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2943 bool enable) 2944 { 2945 uint32_t data = 0; 2946 uint32_t default_data = 0; 2947 2948 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2949 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2950 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2951 enable ? 1 : 0); 2952 if(default_data != data) 2953 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2954 } 2955 2956 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2957 bool enable) 2958 { 2959 uint32_t data = 0; 2960 uint32_t default_data = 0; 2961 2962 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2963 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2964 CP_PG_DISABLE, 2965 enable ? 0 : 1); 2966 if(default_data != data) 2967 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2968 } 2969 2970 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2971 bool enable) 2972 { 2973 uint32_t data, default_data; 2974 2975 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2976 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2977 GFX_POWER_GATING_ENABLE, 2978 enable ? 1 : 0); 2979 if(default_data != data) 2980 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2981 } 2982 2983 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2984 bool enable) 2985 { 2986 uint32_t data, default_data; 2987 2988 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2989 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2990 GFX_PIPELINE_PG_ENABLE, 2991 enable ? 1 : 0); 2992 if(default_data != data) 2993 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2994 2995 if (!enable) 2996 /* read any GFX register to wake up GFX */ 2997 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2998 } 2999 3000 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 3001 bool enable) 3002 { 3003 uint32_t data, default_data; 3004 3005 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3006 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3007 STATIC_PER_CU_PG_ENABLE, 3008 enable ? 1 : 0); 3009 if(default_data != data) 3010 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3011 } 3012 3013 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 3014 bool enable) 3015 { 3016 uint32_t data, default_data; 3017 3018 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3019 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3020 DYN_PER_CU_PG_ENABLE, 3021 enable ? 1 : 0); 3022 if(default_data != data) 3023 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3024 } 3025 3026 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 3027 { 3028 gfx_v9_0_init_csb(adev); 3029 3030 /* 3031 * Rlc save restore list is workable since v2_1. 3032 * And it's needed by gfxoff feature. 3033 */ 3034 if (adev->gfx.rlc.is_rlc_v2_1) { 3035 if (adev->asic_type == CHIP_VEGA12 || 3036 (adev->apu_flags & AMD_APU_IS_RAVEN2)) 3037 gfx_v9_1_init_rlc_save_restore_list(adev); 3038 gfx_v9_0_enable_save_restore_machine(adev); 3039 } 3040 3041 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3042 AMD_PG_SUPPORT_GFX_SMG | 3043 AMD_PG_SUPPORT_GFX_DMG | 3044 AMD_PG_SUPPORT_CP | 3045 AMD_PG_SUPPORT_GDS | 3046 AMD_PG_SUPPORT_RLC_SMU_HS)) { 3047 WREG32(mmRLC_JUMP_TABLE_RESTORE, 3048 adev->gfx.rlc.cp_table_gpu_addr >> 8); 3049 gfx_v9_0_init_gfx_power_gating(adev); 3050 } 3051 } 3052 3053 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 3054 { 3055 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 3056 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3057 gfx_v9_0_wait_for_rlc_serdes(adev); 3058 } 3059 3060 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 3061 { 3062 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3063 udelay(50); 3064 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3065 udelay(50); 3066 } 3067 3068 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 3069 { 3070 #ifdef AMDGPU_RLC_DEBUG_RETRY 3071 u32 rlc_ucode_ver; 3072 #endif 3073 3074 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 3075 udelay(50); 3076 3077 /* carrizo do enable cp interrupt after cp inited */ 3078 if (!(adev->flags & AMD_IS_APU)) { 3079 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3080 udelay(50); 3081 } 3082 3083 #ifdef AMDGPU_RLC_DEBUG_RETRY 3084 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 3085 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 3086 if(rlc_ucode_ver == 0x108) { 3087 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 3088 rlc_ucode_ver, adev->gfx.rlc_fw_version); 3089 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 3090 * default is 0x9C4 to create a 100us interval */ 3091 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 3092 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 3093 * to disable the page fault retry interrupts, default is 3094 * 0x100 (256) */ 3095 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 3096 } 3097 #endif 3098 } 3099 3100 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 3101 { 3102 const struct rlc_firmware_header_v2_0 *hdr; 3103 const __le32 *fw_data; 3104 unsigned i, fw_size; 3105 3106 if (!adev->gfx.rlc_fw) 3107 return -EINVAL; 3108 3109 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3110 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3111 3112 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3113 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3114 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3115 3116 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 3117 RLCG_UCODE_LOADING_START_ADDRESS); 3118 for (i = 0; i < fw_size; i++) 3119 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3120 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3121 3122 return 0; 3123 } 3124 3125 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3126 { 3127 int r; 3128 3129 if (amdgpu_sriov_vf(adev)) { 3130 gfx_v9_0_init_csb(adev); 3131 return 0; 3132 } 3133 3134 adev->gfx.rlc.funcs->stop(adev); 3135 3136 /* disable CG */ 3137 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3138 3139 gfx_v9_0_init_pg(adev); 3140 3141 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3142 /* legacy rlc firmware loading */ 3143 r = gfx_v9_0_rlc_load_microcode(adev); 3144 if (r) 3145 return r; 3146 } 3147 3148 switch (adev->asic_type) { 3149 case CHIP_RAVEN: 3150 if (amdgpu_lbpw == 0) 3151 gfx_v9_0_enable_lbpw(adev, false); 3152 else 3153 gfx_v9_0_enable_lbpw(adev, true); 3154 break; 3155 case CHIP_VEGA20: 3156 if (amdgpu_lbpw > 0) 3157 gfx_v9_0_enable_lbpw(adev, true); 3158 else 3159 gfx_v9_0_enable_lbpw(adev, false); 3160 break; 3161 default: 3162 break; 3163 } 3164 3165 adev->gfx.rlc.funcs->start(adev); 3166 3167 return 0; 3168 } 3169 3170 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3171 { 3172 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3173 3174 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3175 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3176 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3177 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3178 udelay(50); 3179 } 3180 3181 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3182 { 3183 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3184 const struct gfx_firmware_header_v1_0 *ce_hdr; 3185 const struct gfx_firmware_header_v1_0 *me_hdr; 3186 const __le32 *fw_data; 3187 unsigned i, fw_size; 3188 3189 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3190 return -EINVAL; 3191 3192 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3193 adev->gfx.pfp_fw->data; 3194 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3195 adev->gfx.ce_fw->data; 3196 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3197 adev->gfx.me_fw->data; 3198 3199 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3200 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3201 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3202 3203 gfx_v9_0_cp_gfx_enable(adev, false); 3204 3205 /* PFP */ 3206 fw_data = (const __le32 *) 3207 (adev->gfx.pfp_fw->data + 3208 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3209 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3210 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3211 for (i = 0; i < fw_size; i++) 3212 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3213 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3214 3215 /* CE */ 3216 fw_data = (const __le32 *) 3217 (adev->gfx.ce_fw->data + 3218 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3219 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3220 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3221 for (i = 0; i < fw_size; i++) 3222 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3223 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3224 3225 /* ME */ 3226 fw_data = (const __le32 *) 3227 (adev->gfx.me_fw->data + 3228 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3229 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3230 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3231 for (i = 0; i < fw_size; i++) 3232 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3233 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3234 3235 return 0; 3236 } 3237 3238 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3239 { 3240 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3241 const struct cs_section_def *sect = NULL; 3242 const struct cs_extent_def *ext = NULL; 3243 int r, i, tmp; 3244 3245 /* init the CP */ 3246 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3247 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3248 3249 gfx_v9_0_cp_gfx_enable(adev, true); 3250 3251 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3252 if (r) { 3253 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3254 return r; 3255 } 3256 3257 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3258 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3259 3260 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3261 amdgpu_ring_write(ring, 0x80000000); 3262 amdgpu_ring_write(ring, 0x80000000); 3263 3264 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3265 for (ext = sect->section; ext->extent != NULL; ++ext) { 3266 if (sect->id == SECT_CONTEXT) { 3267 amdgpu_ring_write(ring, 3268 PACKET3(PACKET3_SET_CONTEXT_REG, 3269 ext->reg_count)); 3270 amdgpu_ring_write(ring, 3271 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3272 for (i = 0; i < ext->reg_count; i++) 3273 amdgpu_ring_write(ring, ext->extent[i]); 3274 } 3275 } 3276 } 3277 3278 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3279 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3280 3281 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3282 amdgpu_ring_write(ring, 0); 3283 3284 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3285 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3286 amdgpu_ring_write(ring, 0x8000); 3287 amdgpu_ring_write(ring, 0x8000); 3288 3289 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3290 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3291 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3292 amdgpu_ring_write(ring, tmp); 3293 amdgpu_ring_write(ring, 0); 3294 3295 amdgpu_ring_commit(ring); 3296 3297 return 0; 3298 } 3299 3300 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3301 { 3302 struct amdgpu_ring *ring; 3303 u32 tmp; 3304 u32 rb_bufsz; 3305 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3306 3307 /* Set the write pointer delay */ 3308 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3309 3310 /* set the RB to use vmid 0 */ 3311 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3312 3313 /* Set ring buffer size */ 3314 ring = &adev->gfx.gfx_ring[0]; 3315 rb_bufsz = order_base_2(ring->ring_size / 8); 3316 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3317 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3318 #ifdef __BIG_ENDIAN 3319 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3320 #endif 3321 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3322 3323 /* Initialize the ring buffer's write pointers */ 3324 ring->wptr = 0; 3325 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3326 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3327 3328 /* set the wb address wether it's enabled or not */ 3329 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3330 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3331 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3332 3333 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3334 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3335 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3336 3337 mdelay(1); 3338 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3339 3340 rb_addr = ring->gpu_addr >> 8; 3341 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3342 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3343 3344 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3345 if (ring->use_doorbell) { 3346 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3347 DOORBELL_OFFSET, ring->doorbell_index); 3348 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3349 DOORBELL_EN, 1); 3350 } else { 3351 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3352 } 3353 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3354 3355 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3356 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3357 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3358 3359 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3360 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3361 3362 3363 /* start the ring */ 3364 gfx_v9_0_cp_gfx_start(adev); 3365 ring->sched.ready = true; 3366 3367 return 0; 3368 } 3369 3370 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3371 { 3372 if (enable) { 3373 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3374 } else { 3375 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3376 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3377 adev->gfx.kiq.ring.sched.ready = false; 3378 } 3379 udelay(50); 3380 } 3381 3382 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3383 { 3384 const struct gfx_firmware_header_v1_0 *mec_hdr; 3385 const __le32 *fw_data; 3386 unsigned i; 3387 u32 tmp; 3388 3389 if (!adev->gfx.mec_fw) 3390 return -EINVAL; 3391 3392 gfx_v9_0_cp_compute_enable(adev, false); 3393 3394 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3395 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3396 3397 fw_data = (const __le32 *) 3398 (adev->gfx.mec_fw->data + 3399 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3400 tmp = 0; 3401 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3402 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3403 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3404 3405 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3406 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3407 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3408 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3409 3410 /* MEC1 */ 3411 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3412 mec_hdr->jt_offset); 3413 for (i = 0; i < mec_hdr->jt_size; i++) 3414 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3415 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3416 3417 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3418 adev->gfx.mec_fw_version); 3419 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3420 3421 return 0; 3422 } 3423 3424 /* KIQ functions */ 3425 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3426 { 3427 uint32_t tmp; 3428 struct amdgpu_device *adev = ring->adev; 3429 3430 /* tell RLC which is KIQ queue */ 3431 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3432 tmp &= 0xffffff00; 3433 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3434 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3435 tmp |= 0x80; 3436 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3437 } 3438 3439 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) 3440 { 3441 struct amdgpu_device *adev = ring->adev; 3442 3443 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3444 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 3445 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 3446 mqd->cp_hqd_queue_priority = 3447 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 3448 } 3449 } 3450 } 3451 3452 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3453 { 3454 struct amdgpu_device *adev = ring->adev; 3455 struct v9_mqd *mqd = ring->mqd_ptr; 3456 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3457 uint32_t tmp; 3458 3459 mqd->header = 0xC0310800; 3460 mqd->compute_pipelinestat_enable = 0x00000001; 3461 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3462 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3463 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3464 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3465 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3466 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3467 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3468 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3469 mqd->compute_misc_reserved = 0x00000003; 3470 3471 mqd->dynamic_cu_mask_addr_lo = 3472 lower_32_bits(ring->mqd_gpu_addr 3473 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3474 mqd->dynamic_cu_mask_addr_hi = 3475 upper_32_bits(ring->mqd_gpu_addr 3476 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3477 3478 eop_base_addr = ring->eop_gpu_addr >> 8; 3479 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3480 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3481 3482 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3483 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3484 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3485 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3486 3487 mqd->cp_hqd_eop_control = tmp; 3488 3489 /* enable doorbell? */ 3490 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3491 3492 if (ring->use_doorbell) { 3493 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3494 DOORBELL_OFFSET, ring->doorbell_index); 3495 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3496 DOORBELL_EN, 1); 3497 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3498 DOORBELL_SOURCE, 0); 3499 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3500 DOORBELL_HIT, 0); 3501 } else { 3502 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3503 DOORBELL_EN, 0); 3504 } 3505 3506 mqd->cp_hqd_pq_doorbell_control = tmp; 3507 3508 /* disable the queue if it's active */ 3509 ring->wptr = 0; 3510 mqd->cp_hqd_dequeue_request = 0; 3511 mqd->cp_hqd_pq_rptr = 0; 3512 mqd->cp_hqd_pq_wptr_lo = 0; 3513 mqd->cp_hqd_pq_wptr_hi = 0; 3514 3515 /* set the pointer to the MQD */ 3516 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3517 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3518 3519 /* set MQD vmid to 0 */ 3520 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3521 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3522 mqd->cp_mqd_control = tmp; 3523 3524 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3525 hqd_gpu_addr = ring->gpu_addr >> 8; 3526 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3527 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3528 3529 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3530 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3531 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3532 (order_base_2(ring->ring_size / 4) - 1)); 3533 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3534 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 3535 #ifdef __BIG_ENDIAN 3536 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3537 #endif 3538 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3539 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3540 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3541 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3542 mqd->cp_hqd_pq_control = tmp; 3543 3544 /* set the wb address whether it's enabled or not */ 3545 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 3546 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3547 mqd->cp_hqd_pq_rptr_report_addr_hi = 3548 upper_32_bits(wb_gpu_addr) & 0xffff; 3549 3550 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3551 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 3552 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3553 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3554 3555 tmp = 0; 3556 /* enable the doorbell if requested */ 3557 if (ring->use_doorbell) { 3558 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3559 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3560 DOORBELL_OFFSET, ring->doorbell_index); 3561 3562 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3563 DOORBELL_EN, 1); 3564 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3565 DOORBELL_SOURCE, 0); 3566 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3567 DOORBELL_HIT, 0); 3568 } 3569 3570 mqd->cp_hqd_pq_doorbell_control = tmp; 3571 3572 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3573 ring->wptr = 0; 3574 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3575 3576 /* set the vmid for the queue */ 3577 mqd->cp_hqd_vmid = 0; 3578 3579 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3580 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3581 mqd->cp_hqd_persistent_state = tmp; 3582 3583 /* set MIN_IB_AVAIL_SIZE */ 3584 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3585 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3586 mqd->cp_hqd_ib_control = tmp; 3587 3588 /* set static priority for a queue/ring */ 3589 gfx_v9_0_mqd_set_priority(ring, mqd); 3590 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 3591 3592 /* map_queues packet doesn't need activate the queue, 3593 * so only kiq need set this field. 3594 */ 3595 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3596 mqd->cp_hqd_active = 1; 3597 3598 return 0; 3599 } 3600 3601 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3602 { 3603 struct amdgpu_device *adev = ring->adev; 3604 struct v9_mqd *mqd = ring->mqd_ptr; 3605 int j; 3606 3607 /* disable wptr polling */ 3608 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3609 3610 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3611 mqd->cp_hqd_eop_base_addr_lo); 3612 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3613 mqd->cp_hqd_eop_base_addr_hi); 3614 3615 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3616 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3617 mqd->cp_hqd_eop_control); 3618 3619 /* enable doorbell? */ 3620 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3621 mqd->cp_hqd_pq_doorbell_control); 3622 3623 /* disable the queue if it's active */ 3624 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3625 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3626 for (j = 0; j < adev->usec_timeout; j++) { 3627 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3628 break; 3629 udelay(1); 3630 } 3631 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3632 mqd->cp_hqd_dequeue_request); 3633 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3634 mqd->cp_hqd_pq_rptr); 3635 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3636 mqd->cp_hqd_pq_wptr_lo); 3637 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3638 mqd->cp_hqd_pq_wptr_hi); 3639 } 3640 3641 /* set the pointer to the MQD */ 3642 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3643 mqd->cp_mqd_base_addr_lo); 3644 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3645 mqd->cp_mqd_base_addr_hi); 3646 3647 /* set MQD vmid to 0 */ 3648 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3649 mqd->cp_mqd_control); 3650 3651 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3652 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3653 mqd->cp_hqd_pq_base_lo); 3654 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3655 mqd->cp_hqd_pq_base_hi); 3656 3657 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3658 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3659 mqd->cp_hqd_pq_control); 3660 3661 /* set the wb address whether it's enabled or not */ 3662 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3663 mqd->cp_hqd_pq_rptr_report_addr_lo); 3664 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3665 mqd->cp_hqd_pq_rptr_report_addr_hi); 3666 3667 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3668 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3669 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3670 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3671 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3672 3673 /* enable the doorbell if requested */ 3674 if (ring->use_doorbell) { 3675 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3676 (adev->doorbell_index.kiq * 2) << 2); 3677 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3678 (adev->doorbell_index.userqueue_end * 2) << 2); 3679 } 3680 3681 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3682 mqd->cp_hqd_pq_doorbell_control); 3683 3684 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3685 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3686 mqd->cp_hqd_pq_wptr_lo); 3687 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3688 mqd->cp_hqd_pq_wptr_hi); 3689 3690 /* set the vmid for the queue */ 3691 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3692 3693 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3694 mqd->cp_hqd_persistent_state); 3695 3696 /* activate the queue */ 3697 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3698 mqd->cp_hqd_active); 3699 3700 if (ring->use_doorbell) 3701 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3702 3703 return 0; 3704 } 3705 3706 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3707 { 3708 struct amdgpu_device *adev = ring->adev; 3709 int j; 3710 3711 /* disable the queue if it's active */ 3712 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3713 3714 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3715 3716 for (j = 0; j < adev->usec_timeout; j++) { 3717 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3718 break; 3719 udelay(1); 3720 } 3721 3722 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3723 DRM_DEBUG("KIQ dequeue request failed.\n"); 3724 3725 /* Manual disable if dequeue request times out */ 3726 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3727 } 3728 3729 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3730 0); 3731 } 3732 3733 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3734 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3735 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3736 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3737 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3738 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3739 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3740 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3741 3742 return 0; 3743 } 3744 3745 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3746 { 3747 struct amdgpu_device *adev = ring->adev; 3748 struct v9_mqd *mqd = ring->mqd_ptr; 3749 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3750 struct v9_mqd *tmp_mqd; 3751 3752 gfx_v9_0_kiq_setting(ring); 3753 3754 /* GPU could be in bad state during probe, driver trigger the reset 3755 * after load the SMU, in this case , the mqd is not be initialized. 3756 * driver need to re-init the mqd. 3757 * check mqd->cp_hqd_pq_control since this value should not be 0 3758 */ 3759 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3760 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ 3761 /* for GPU_RESET case , reset MQD to a clean status */ 3762 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3763 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3764 3765 /* reset ring buffer */ 3766 ring->wptr = 0; 3767 amdgpu_ring_clear_ring(ring); 3768 3769 mutex_lock(&adev->srbm_mutex); 3770 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3771 gfx_v9_0_kiq_init_register(ring); 3772 soc15_grbm_select(adev, 0, 0, 0, 0); 3773 mutex_unlock(&adev->srbm_mutex); 3774 } else { 3775 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3776 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3777 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3778 mutex_lock(&adev->srbm_mutex); 3779 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3780 gfx_v9_0_mqd_init(ring); 3781 gfx_v9_0_kiq_init_register(ring); 3782 soc15_grbm_select(adev, 0, 0, 0, 0); 3783 mutex_unlock(&adev->srbm_mutex); 3784 3785 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3786 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3787 } 3788 3789 return 0; 3790 } 3791 3792 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3793 { 3794 struct amdgpu_device *adev = ring->adev; 3795 struct v9_mqd *mqd = ring->mqd_ptr; 3796 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3797 struct v9_mqd *tmp_mqd; 3798 3799 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control 3800 * is not be initialized before 3801 */ 3802 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3803 3804 if (!tmp_mqd->cp_hqd_pq_control || 3805 (!amdgpu_in_reset(adev) && !adev->in_suspend)) { 3806 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3807 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3808 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3809 mutex_lock(&adev->srbm_mutex); 3810 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3811 gfx_v9_0_mqd_init(ring); 3812 soc15_grbm_select(adev, 0, 0, 0, 0); 3813 mutex_unlock(&adev->srbm_mutex); 3814 3815 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3816 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3817 } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 3818 /* reset MQD to a clean status */ 3819 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3820 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3821 3822 /* reset ring buffer */ 3823 ring->wptr = 0; 3824 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); 3825 amdgpu_ring_clear_ring(ring); 3826 } else { 3827 amdgpu_ring_clear_ring(ring); 3828 } 3829 3830 return 0; 3831 } 3832 3833 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3834 { 3835 struct amdgpu_ring *ring; 3836 int r; 3837 3838 ring = &adev->gfx.kiq.ring; 3839 3840 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3841 if (unlikely(r != 0)) 3842 return r; 3843 3844 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3845 if (unlikely(r != 0)) 3846 return r; 3847 3848 gfx_v9_0_kiq_init_queue(ring); 3849 amdgpu_bo_kunmap(ring->mqd_obj); 3850 ring->mqd_ptr = NULL; 3851 amdgpu_bo_unreserve(ring->mqd_obj); 3852 ring->sched.ready = true; 3853 return 0; 3854 } 3855 3856 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3857 { 3858 struct amdgpu_ring *ring = NULL; 3859 int r = 0, i; 3860 3861 gfx_v9_0_cp_compute_enable(adev, true); 3862 3863 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3864 ring = &adev->gfx.compute_ring[i]; 3865 3866 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3867 if (unlikely(r != 0)) 3868 goto done; 3869 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3870 if (!r) { 3871 r = gfx_v9_0_kcq_init_queue(ring); 3872 amdgpu_bo_kunmap(ring->mqd_obj); 3873 ring->mqd_ptr = NULL; 3874 } 3875 amdgpu_bo_unreserve(ring->mqd_obj); 3876 if (r) 3877 goto done; 3878 } 3879 3880 r = amdgpu_gfx_enable_kcq(adev); 3881 done: 3882 return r; 3883 } 3884 3885 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3886 { 3887 int r, i; 3888 struct amdgpu_ring *ring; 3889 3890 if (!(adev->flags & AMD_IS_APU)) 3891 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3892 3893 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3894 if (adev->gfx.num_gfx_rings) { 3895 /* legacy firmware loading */ 3896 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3897 if (r) 3898 return r; 3899 } 3900 3901 r = gfx_v9_0_cp_compute_load_microcode(adev); 3902 if (r) 3903 return r; 3904 } 3905 3906 r = gfx_v9_0_kiq_resume(adev); 3907 if (r) 3908 return r; 3909 3910 if (adev->gfx.num_gfx_rings) { 3911 r = gfx_v9_0_cp_gfx_resume(adev); 3912 if (r) 3913 return r; 3914 } 3915 3916 r = gfx_v9_0_kcq_resume(adev); 3917 if (r) 3918 return r; 3919 3920 if (adev->gfx.num_gfx_rings) { 3921 ring = &adev->gfx.gfx_ring[0]; 3922 r = amdgpu_ring_test_helper(ring); 3923 if (r) 3924 return r; 3925 } 3926 3927 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3928 ring = &adev->gfx.compute_ring[i]; 3929 amdgpu_ring_test_helper(ring); 3930 } 3931 3932 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3933 3934 return 0; 3935 } 3936 3937 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 3938 { 3939 u32 tmp; 3940 3941 if (adev->asic_type != CHIP_ARCTURUS && 3942 adev->asic_type != CHIP_ALDEBARAN) 3943 return; 3944 3945 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 3946 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 3947 adev->df.hash_status.hash_64k); 3948 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 3949 adev->df.hash_status.hash_2m); 3950 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 3951 adev->df.hash_status.hash_1g); 3952 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 3953 } 3954 3955 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3956 { 3957 if (adev->gfx.num_gfx_rings) 3958 gfx_v9_0_cp_gfx_enable(adev, enable); 3959 gfx_v9_0_cp_compute_enable(adev, enable); 3960 } 3961 3962 static int gfx_v9_0_hw_init(void *handle) 3963 { 3964 int r; 3965 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3966 3967 if (!amdgpu_sriov_vf(adev)) 3968 gfx_v9_0_init_golden_registers(adev); 3969 3970 gfx_v9_0_constants_init(adev); 3971 3972 gfx_v9_0_init_tcp_config(adev); 3973 3974 r = adev->gfx.rlc.funcs->resume(adev); 3975 if (r) 3976 return r; 3977 3978 r = gfx_v9_0_cp_resume(adev); 3979 if (r) 3980 return r; 3981 3982 if (adev->asic_type == CHIP_ALDEBARAN) 3983 gfx_v9_4_2_set_power_brake_sequence(adev); 3984 3985 return r; 3986 } 3987 3988 static int gfx_v9_0_hw_fini(void *handle) 3989 { 3990 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3991 3992 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3993 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3994 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3995 3996 /* DF freeze and kcq disable will fail */ 3997 if (!amdgpu_ras_intr_triggered()) 3998 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3999 amdgpu_gfx_disable_kcq(adev); 4000 4001 if (amdgpu_sriov_vf(adev)) { 4002 gfx_v9_0_cp_gfx_enable(adev, false); 4003 /* must disable polling for SRIOV when hw finished, otherwise 4004 * CPC engine may still keep fetching WB address which is already 4005 * invalid after sw finished and trigger DMAR reading error in 4006 * hypervisor side. 4007 */ 4008 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4009 return 0; 4010 } 4011 4012 /* Use deinitialize sequence from CAIL when unbinding device from driver, 4013 * otherwise KIQ is hanging when binding back 4014 */ 4015 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 4016 mutex_lock(&adev->srbm_mutex); 4017 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 4018 adev->gfx.kiq.ring.pipe, 4019 adev->gfx.kiq.ring.queue, 0); 4020 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 4021 soc15_grbm_select(adev, 0, 0, 0, 0); 4022 mutex_unlock(&adev->srbm_mutex); 4023 } 4024 4025 gfx_v9_0_cp_enable(adev, false); 4026 4027 /* Skip suspend with A+A reset */ 4028 if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { 4029 dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n"); 4030 return 0; 4031 } 4032 4033 adev->gfx.rlc.funcs->stop(adev); 4034 return 0; 4035 } 4036 4037 static int gfx_v9_0_suspend(void *handle) 4038 { 4039 return gfx_v9_0_hw_fini(handle); 4040 } 4041 4042 static int gfx_v9_0_resume(void *handle) 4043 { 4044 return gfx_v9_0_hw_init(handle); 4045 } 4046 4047 static bool gfx_v9_0_is_idle(void *handle) 4048 { 4049 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4050 4051 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 4052 GRBM_STATUS, GUI_ACTIVE)) 4053 return false; 4054 else 4055 return true; 4056 } 4057 4058 static int gfx_v9_0_wait_for_idle(void *handle) 4059 { 4060 unsigned i; 4061 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4062 4063 for (i = 0; i < adev->usec_timeout; i++) { 4064 if (gfx_v9_0_is_idle(handle)) 4065 return 0; 4066 udelay(1); 4067 } 4068 return -ETIMEDOUT; 4069 } 4070 4071 static int gfx_v9_0_soft_reset(void *handle) 4072 { 4073 u32 grbm_soft_reset = 0; 4074 u32 tmp; 4075 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4076 4077 /* GRBM_STATUS */ 4078 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4079 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4080 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4081 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4082 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4083 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4084 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4085 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4086 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4087 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4088 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4089 } 4090 4091 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4092 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4093 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4094 } 4095 4096 /* GRBM_STATUS2 */ 4097 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4098 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4099 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4100 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4101 4102 4103 if (grbm_soft_reset) { 4104 /* stop the rlc */ 4105 adev->gfx.rlc.funcs->stop(adev); 4106 4107 if (adev->gfx.num_gfx_rings) 4108 /* Disable GFX parsing/prefetching */ 4109 gfx_v9_0_cp_gfx_enable(adev, false); 4110 4111 /* Disable MEC parsing/prefetching */ 4112 gfx_v9_0_cp_compute_enable(adev, false); 4113 4114 if (grbm_soft_reset) { 4115 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4116 tmp |= grbm_soft_reset; 4117 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4118 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4119 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4120 4121 udelay(50); 4122 4123 tmp &= ~grbm_soft_reset; 4124 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4125 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4126 } 4127 4128 /* Wait a little for things to settle down */ 4129 udelay(50); 4130 } 4131 return 0; 4132 } 4133 4134 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) 4135 { 4136 signed long r, cnt = 0; 4137 unsigned long flags; 4138 uint32_t seq, reg_val_offs = 0; 4139 uint64_t value = 0; 4140 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 4141 struct amdgpu_ring *ring = &kiq->ring; 4142 4143 BUG_ON(!ring->funcs->emit_rreg); 4144 4145 spin_lock_irqsave(&kiq->ring_lock, flags); 4146 if (amdgpu_device_wb_get(adev, ®_val_offs)) { 4147 pr_err("critical bug! too many kiq readers\n"); 4148 goto failed_unlock; 4149 } 4150 amdgpu_ring_alloc(ring, 32); 4151 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4152 amdgpu_ring_write(ring, 9 | /* src: register*/ 4153 (5 << 8) | /* dst: memory */ 4154 (1 << 16) | /* count sel */ 4155 (1 << 20)); /* write confirm */ 4156 amdgpu_ring_write(ring, 0); 4157 amdgpu_ring_write(ring, 0); 4158 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4159 reg_val_offs * 4)); 4160 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4161 reg_val_offs * 4)); 4162 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 4163 if (r) 4164 goto failed_undo; 4165 4166 amdgpu_ring_commit(ring); 4167 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4168 4169 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4170 4171 /* don't wait anymore for gpu reset case because this way may 4172 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 4173 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 4174 * never return if we keep waiting in virt_kiq_rreg, which cause 4175 * gpu_recover() hang there. 4176 * 4177 * also don't wait anymore for IRQ context 4178 * */ 4179 if (r < 1 && (amdgpu_in_reset(adev))) 4180 goto failed_kiq_read; 4181 4182 might_sleep(); 4183 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 4184 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 4185 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4186 } 4187 4188 if (cnt > MAX_KIQ_REG_TRY) 4189 goto failed_kiq_read; 4190 4191 mb(); 4192 value = (uint64_t)adev->wb.wb[reg_val_offs] | 4193 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL; 4194 amdgpu_device_wb_free(adev, reg_val_offs); 4195 return value; 4196 4197 failed_undo: 4198 amdgpu_ring_undo(ring); 4199 failed_unlock: 4200 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4201 failed_kiq_read: 4202 if (reg_val_offs) 4203 amdgpu_device_wb_free(adev, reg_val_offs); 4204 pr_err("failed to read gpu clock\n"); 4205 return ~0; 4206 } 4207 4208 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4209 { 4210 uint64_t clock; 4211 4212 amdgpu_gfx_off_ctrl(adev, false); 4213 mutex_lock(&adev->gfx.gpu_clock_mutex); 4214 if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { 4215 clock = gfx_v9_0_kiq_read_clock(adev); 4216 } else { 4217 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4218 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4219 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4220 } 4221 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4222 amdgpu_gfx_off_ctrl(adev, true); 4223 return clock; 4224 } 4225 4226 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4227 uint32_t vmid, 4228 uint32_t gds_base, uint32_t gds_size, 4229 uint32_t gws_base, uint32_t gws_size, 4230 uint32_t oa_base, uint32_t oa_size) 4231 { 4232 struct amdgpu_device *adev = ring->adev; 4233 4234 /* GDS Base */ 4235 gfx_v9_0_write_data_to_reg(ring, 0, false, 4236 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4237 gds_base); 4238 4239 /* GDS Size */ 4240 gfx_v9_0_write_data_to_reg(ring, 0, false, 4241 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4242 gds_size); 4243 4244 /* GWS */ 4245 gfx_v9_0_write_data_to_reg(ring, 0, false, 4246 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4247 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4248 4249 /* OA */ 4250 gfx_v9_0_write_data_to_reg(ring, 0, false, 4251 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4252 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4253 } 4254 4255 static const u32 vgpr_init_compute_shader[] = 4256 { 4257 0xb07c0000, 0xbe8000ff, 4258 0x000000f8, 0xbf110800, 4259 0x7e000280, 0x7e020280, 4260 0x7e040280, 0x7e060280, 4261 0x7e080280, 0x7e0a0280, 4262 0x7e0c0280, 0x7e0e0280, 4263 0x80808800, 0xbe803200, 4264 0xbf84fff5, 0xbf9c0000, 4265 0xd28c0001, 0x0001007f, 4266 0xd28d0001, 0x0002027e, 4267 0x10020288, 0xb8810904, 4268 0xb7814000, 0xd1196a01, 4269 0x00000301, 0xbe800087, 4270 0xbefc00c1, 0xd89c4000, 4271 0x00020201, 0xd89cc080, 4272 0x00040401, 0x320202ff, 4273 0x00000800, 0x80808100, 4274 0xbf84fff8, 0x7e020280, 4275 0xbf810000, 0x00000000, 4276 }; 4277 4278 static const u32 sgpr_init_compute_shader[] = 4279 { 4280 0xb07c0000, 0xbe8000ff, 4281 0x0000005f, 0xbee50080, 4282 0xbe812c65, 0xbe822c65, 4283 0xbe832c65, 0xbe842c65, 4284 0xbe852c65, 0xb77c0005, 4285 0x80808500, 0xbf84fff8, 4286 0xbe800080, 0xbf810000, 4287 }; 4288 4289 static const u32 vgpr_init_compute_shader_arcturus[] = { 4290 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 4291 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 4292 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 4293 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 4294 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 4295 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 4296 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 4297 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 4298 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 4299 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 4300 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 4301 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 4302 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 4303 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 4304 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 4305 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 4306 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 4307 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 4308 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 4309 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 4310 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 4311 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 4312 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 4313 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 4314 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 4315 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 4316 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 4317 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 4318 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 4319 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 4320 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 4321 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 4322 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 4323 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 4324 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 4325 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 4326 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 4327 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 4328 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 4329 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 4330 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 4331 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 4332 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 4333 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 4334 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 4335 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 4336 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 4337 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 4338 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 4339 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 4340 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 4341 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 4342 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 4343 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 4344 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 4345 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 4346 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 4347 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 4348 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 4349 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 4350 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 4351 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 4352 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 4353 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 4354 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 4355 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 4356 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 4357 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 4358 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 4359 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 4360 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 4361 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 4362 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 4363 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 4364 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 4365 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 4366 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 4367 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 4368 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 4369 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 4370 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 4371 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 4372 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 4373 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 4374 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 4375 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 4376 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 4377 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 4378 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 4379 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 4380 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 4381 0xbf84fff8, 0xbf810000, 4382 }; 4383 4384 /* When below register arrays changed, please update gpr_reg_size, 4385 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 4386 to cover all gfx9 ASICs */ 4387 static const struct soc15_reg_entry vgpr_init_regs[] = { 4388 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4389 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4390 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4391 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4392 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 4393 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4394 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4395 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4396 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4397 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4398 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4399 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4400 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4401 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4402 }; 4403 4404 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = { 4405 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4406 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4407 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4408 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4409 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf }, 4410 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4411 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4412 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4413 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4414 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4415 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4416 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4417 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4418 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4419 }; 4420 4421 static const struct soc15_reg_entry sgpr1_init_regs[] = { 4422 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4423 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4424 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4425 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4426 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4427 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4428 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4429 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4430 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4431 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4432 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4433 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4434 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4435 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4436 }; 4437 4438 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4439 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4440 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4441 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4442 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4443 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4444 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4445 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4446 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4447 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4448 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4449 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4450 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4451 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4452 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4453 }; 4454 4455 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { 4456 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4457 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4458 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4459 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4460 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4461 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4462 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4463 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4464 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4465 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4466 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4467 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4468 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4469 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4470 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4471 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4472 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4473 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4474 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4475 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4476 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4477 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4478 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4479 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4480 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4481 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4482 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4483 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4484 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4485 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4486 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4487 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4488 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4489 }; 4490 4491 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4492 { 4493 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4494 int i, r; 4495 4496 /* only support when RAS is enabled */ 4497 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4498 return 0; 4499 4500 r = amdgpu_ring_alloc(ring, 7); 4501 if (r) { 4502 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4503 ring->name, r); 4504 return r; 4505 } 4506 4507 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4508 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4509 4510 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4511 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4512 PACKET3_DMA_DATA_DST_SEL(1) | 4513 PACKET3_DMA_DATA_SRC_SEL(2) | 4514 PACKET3_DMA_DATA_ENGINE(0))); 4515 amdgpu_ring_write(ring, 0); 4516 amdgpu_ring_write(ring, 0); 4517 amdgpu_ring_write(ring, 0); 4518 amdgpu_ring_write(ring, 0); 4519 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4520 adev->gds.gds_size); 4521 4522 amdgpu_ring_commit(ring); 4523 4524 for (i = 0; i < adev->usec_timeout; i++) { 4525 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4526 break; 4527 udelay(1); 4528 } 4529 4530 if (i >= adev->usec_timeout) 4531 r = -ETIMEDOUT; 4532 4533 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4534 4535 return r; 4536 } 4537 4538 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4539 { 4540 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4541 struct amdgpu_ib ib; 4542 struct dma_fence *f = NULL; 4543 int r, i; 4544 unsigned total_size, vgpr_offset, sgpr_offset; 4545 u64 gpu_addr; 4546 4547 int compute_dim_x = adev->gfx.config.max_shader_engines * 4548 adev->gfx.config.max_cu_per_sh * 4549 adev->gfx.config.max_sh_per_se; 4550 int sgpr_work_group_size = 5; 4551 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6; 4552 int vgpr_init_shader_size; 4553 const u32 *vgpr_init_shader_ptr; 4554 const struct soc15_reg_entry *vgpr_init_regs_ptr; 4555 4556 /* only support when RAS is enabled */ 4557 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4558 return 0; 4559 4560 /* bail if the compute ring is not ready */ 4561 if (!ring->sched.ready) 4562 return 0; 4563 4564 if (adev->asic_type == CHIP_ARCTURUS) { 4565 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; 4566 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); 4567 vgpr_init_regs_ptr = vgpr_init_regs_arcturus; 4568 } else { 4569 vgpr_init_shader_ptr = vgpr_init_compute_shader; 4570 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader); 4571 vgpr_init_regs_ptr = vgpr_init_regs; 4572 } 4573 4574 total_size = 4575 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4576 total_size += 4577 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4578 total_size += 4579 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4580 total_size = ALIGN(total_size, 256); 4581 vgpr_offset = total_size; 4582 total_size += ALIGN(vgpr_init_shader_size, 256); 4583 sgpr_offset = total_size; 4584 total_size += sizeof(sgpr_init_compute_shader); 4585 4586 /* allocate an indirect buffer to put the commands in */ 4587 memset(&ib, 0, sizeof(ib)); 4588 r = amdgpu_ib_get(adev, NULL, total_size, 4589 AMDGPU_IB_POOL_DIRECT, &ib); 4590 if (r) { 4591 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4592 return r; 4593 } 4594 4595 /* load the compute shaders */ 4596 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++) 4597 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i]; 4598 4599 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4600 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4601 4602 /* init the ib length to 0 */ 4603 ib.length_dw = 0; 4604 4605 /* VGPR */ 4606 /* write the register state for the compute dispatch */ 4607 for (i = 0; i < gpr_reg_size; i++) { 4608 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4609 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i]) 4610 - PACKET3_SET_SH_REG_START; 4611 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value; 4612 } 4613 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4614 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4615 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4616 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4617 - PACKET3_SET_SH_REG_START; 4618 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4619 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4620 4621 /* write dispatch packet */ 4622 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4623 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */ 4624 ib.ptr[ib.length_dw++] = 1; /* y */ 4625 ib.ptr[ib.length_dw++] = 1; /* z */ 4626 ib.ptr[ib.length_dw++] = 4627 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4628 4629 /* write CS partial flush packet */ 4630 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4631 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4632 4633 /* SGPR1 */ 4634 /* write the register state for the compute dispatch */ 4635 for (i = 0; i < gpr_reg_size; i++) { 4636 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4637 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4638 - PACKET3_SET_SH_REG_START; 4639 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4640 } 4641 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4642 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4643 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4644 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4645 - PACKET3_SET_SH_REG_START; 4646 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4647 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4648 4649 /* write dispatch packet */ 4650 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4651 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4652 ib.ptr[ib.length_dw++] = 1; /* y */ 4653 ib.ptr[ib.length_dw++] = 1; /* z */ 4654 ib.ptr[ib.length_dw++] = 4655 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4656 4657 /* write CS partial flush packet */ 4658 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4659 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4660 4661 /* SGPR2 */ 4662 /* write the register state for the compute dispatch */ 4663 for (i = 0; i < gpr_reg_size; i++) { 4664 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4665 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4666 - PACKET3_SET_SH_REG_START; 4667 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4668 } 4669 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4670 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4671 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4672 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4673 - PACKET3_SET_SH_REG_START; 4674 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4675 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4676 4677 /* write dispatch packet */ 4678 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4679 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4680 ib.ptr[ib.length_dw++] = 1; /* y */ 4681 ib.ptr[ib.length_dw++] = 1; /* z */ 4682 ib.ptr[ib.length_dw++] = 4683 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4684 4685 /* write CS partial flush packet */ 4686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4687 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4688 4689 /* shedule the ib on the ring */ 4690 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4691 if (r) { 4692 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4693 goto fail; 4694 } 4695 4696 /* wait for the GPU to finish processing the IB */ 4697 r = dma_fence_wait(f, false); 4698 if (r) { 4699 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4700 goto fail; 4701 } 4702 4703 fail: 4704 amdgpu_ib_free(adev, &ib, NULL); 4705 dma_fence_put(f); 4706 4707 return r; 4708 } 4709 4710 static int gfx_v9_0_early_init(void *handle) 4711 { 4712 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4713 4714 if (adev->asic_type == CHIP_ARCTURUS || 4715 adev->asic_type == CHIP_ALDEBARAN) 4716 adev->gfx.num_gfx_rings = 0; 4717 else 4718 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4719 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4720 AMDGPU_MAX_COMPUTE_RINGS); 4721 gfx_v9_0_set_kiq_pm4_funcs(adev); 4722 gfx_v9_0_set_ring_funcs(adev); 4723 gfx_v9_0_set_irq_funcs(adev); 4724 gfx_v9_0_set_gds_init(adev); 4725 gfx_v9_0_set_rlc_funcs(adev); 4726 4727 return 0; 4728 } 4729 4730 static int gfx_v9_0_ecc_late_init(void *handle) 4731 { 4732 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4733 int r; 4734 4735 /* 4736 * Temp workaround to fix the issue that CP firmware fails to 4737 * update read pointer when CPDMA is writing clearing operation 4738 * to GDS in suspend/resume sequence on several cards. So just 4739 * limit this operation in cold boot sequence. 4740 */ 4741 if ((!adev->in_suspend) && 4742 (adev->gds.gds_size)) { 4743 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4744 if (r) 4745 return r; 4746 } 4747 4748 /* requires IBs so do in late init after IB pool is initialized */ 4749 if (adev->asic_type == CHIP_ALDEBARAN) 4750 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); 4751 else 4752 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4753 4754 if (r) 4755 return r; 4756 4757 if (adev->gfx.ras_funcs && 4758 adev->gfx.ras_funcs->ras_late_init) { 4759 r = adev->gfx.ras_funcs->ras_late_init(adev); 4760 if (r) 4761 return r; 4762 } 4763 4764 if (adev->gfx.ras_funcs && 4765 adev->gfx.ras_funcs->enable_watchdog_timer) 4766 adev->gfx.ras_funcs->enable_watchdog_timer(adev); 4767 4768 return 0; 4769 } 4770 4771 static int gfx_v9_0_late_init(void *handle) 4772 { 4773 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4774 int r; 4775 4776 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4777 if (r) 4778 return r; 4779 4780 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4781 if (r) 4782 return r; 4783 4784 r = gfx_v9_0_ecc_late_init(handle); 4785 if (r) 4786 return r; 4787 4788 return 0; 4789 } 4790 4791 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4792 { 4793 uint32_t rlc_setting; 4794 4795 /* if RLC is not enabled, do nothing */ 4796 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4797 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4798 return false; 4799 4800 return true; 4801 } 4802 4803 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 4804 { 4805 uint32_t data; 4806 unsigned i; 4807 4808 data = RLC_SAFE_MODE__CMD_MASK; 4809 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4810 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4811 4812 /* wait for RLC_SAFE_MODE */ 4813 for (i = 0; i < adev->usec_timeout; i++) { 4814 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4815 break; 4816 udelay(1); 4817 } 4818 } 4819 4820 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 4821 { 4822 uint32_t data; 4823 4824 data = RLC_SAFE_MODE__CMD_MASK; 4825 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4826 } 4827 4828 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4829 bool enable) 4830 { 4831 amdgpu_gfx_rlc_enter_safe_mode(adev); 4832 4833 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4834 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4835 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4836 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4837 } else { 4838 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4839 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4840 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4841 } 4842 4843 amdgpu_gfx_rlc_exit_safe_mode(adev); 4844 } 4845 4846 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4847 bool enable) 4848 { 4849 /* TODO: double check if we need to perform under safe mode */ 4850 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4851 4852 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4853 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4854 else 4855 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4856 4857 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4858 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4859 else 4860 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4861 4862 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4863 } 4864 4865 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4866 bool enable) 4867 { 4868 uint32_t data, def; 4869 4870 amdgpu_gfx_rlc_enter_safe_mode(adev); 4871 4872 /* It is disabled by HW by default */ 4873 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4874 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4875 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4876 4877 if (adev->asic_type != CHIP_VEGA12) 4878 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4879 4880 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4881 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4882 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4883 4884 /* only for Vega10 & Raven1 */ 4885 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4886 4887 if (def != data) 4888 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4889 4890 /* MGLS is a global flag to control all MGLS in GFX */ 4891 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4892 /* 2 - RLC memory Light sleep */ 4893 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4894 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4895 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4896 if (def != data) 4897 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4898 } 4899 /* 3 - CP memory Light sleep */ 4900 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4901 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4902 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4903 if (def != data) 4904 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4905 } 4906 } 4907 } else { 4908 /* 1 - MGCG_OVERRIDE */ 4909 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4910 4911 if (adev->asic_type != CHIP_VEGA12) 4912 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4913 4914 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4915 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4916 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4917 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4918 4919 if (def != data) 4920 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4921 4922 /* 2 - disable MGLS in RLC */ 4923 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4924 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4925 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4926 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4927 } 4928 4929 /* 3 - disable MGLS in CP */ 4930 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4931 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4932 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4933 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4934 } 4935 } 4936 4937 amdgpu_gfx_rlc_exit_safe_mode(adev); 4938 } 4939 4940 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4941 bool enable) 4942 { 4943 uint32_t data, def; 4944 4945 if (!adev->gfx.num_gfx_rings) 4946 return; 4947 4948 amdgpu_gfx_rlc_enter_safe_mode(adev); 4949 4950 /* Enable 3D CGCG/CGLS */ 4951 if (enable) { 4952 /* write cmd to clear cgcg/cgls ov */ 4953 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4954 /* unset CGCG override */ 4955 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4956 /* update CGCG and CGLS override bits */ 4957 if (def != data) 4958 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4959 4960 /* enable 3Dcgcg FSM(0x0000363f) */ 4961 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4962 4963 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 4964 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4965 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4966 else 4967 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT; 4968 4969 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4970 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4971 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4972 if (def != data) 4973 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4974 4975 /* set IDLE_POLL_COUNT(0x00900100) */ 4976 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4977 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4978 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4979 if (def != data) 4980 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4981 } else { 4982 /* Disable CGCG/CGLS */ 4983 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4984 /* disable cgcg, cgls should be disabled */ 4985 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4986 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4987 /* disable cgcg and cgls in FSM */ 4988 if (def != data) 4989 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4990 } 4991 4992 amdgpu_gfx_rlc_exit_safe_mode(adev); 4993 } 4994 4995 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4996 bool enable) 4997 { 4998 uint32_t def, data; 4999 5000 amdgpu_gfx_rlc_enter_safe_mode(adev); 5001 5002 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5003 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5004 /* unset CGCG override */ 5005 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5006 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5007 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5008 else 5009 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5010 /* update CGCG and CGLS override bits */ 5011 if (def != data) 5012 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5013 5014 /* enable cgcg FSM(0x0000363F) */ 5015 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5016 5017 if (adev->asic_type == CHIP_ARCTURUS) 5018 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5019 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5020 else 5021 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5022 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5023 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5024 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5025 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5026 if (def != data) 5027 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5028 5029 /* set IDLE_POLL_COUNT(0x00900100) */ 5030 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5031 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5032 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5033 if (def != data) 5034 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5035 } else { 5036 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5037 /* reset CGCG/CGLS bits */ 5038 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5039 /* disable cgcg and cgls in FSM */ 5040 if (def != data) 5041 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5042 } 5043 5044 amdgpu_gfx_rlc_exit_safe_mode(adev); 5045 } 5046 5047 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5048 bool enable) 5049 { 5050 if (enable) { 5051 /* CGCG/CGLS should be enabled after MGCG/MGLS 5052 * === MGCG + MGLS === 5053 */ 5054 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5055 /* === CGCG /CGLS for GFX 3D Only === */ 5056 gfx_v9_0_update_3d_clock_gating(adev, enable); 5057 /* === CGCG + CGLS === */ 5058 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5059 } else { 5060 /* CGCG/CGLS should be disabled before MGCG/MGLS 5061 * === CGCG + CGLS === 5062 */ 5063 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5064 /* === CGCG /CGLS for GFX 3D Only === */ 5065 gfx_v9_0_update_3d_clock_gating(adev, enable); 5066 /* === MGCG + MGLS === */ 5067 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5068 } 5069 return 0; 5070 } 5071 5072 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) 5073 { 5074 u32 reg, data; 5075 5076 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); 5077 if (amdgpu_sriov_is_pp_one_vf(adev)) 5078 data = RREG32_NO_KIQ(reg); 5079 else 5080 data = RREG32(reg); 5081 5082 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5083 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5084 5085 if (amdgpu_sriov_is_pp_one_vf(adev)) 5086 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); 5087 else 5088 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); 5089 } 5090 5091 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev, 5092 uint32_t offset, 5093 struct soc15_reg_rlcg *entries, int arr_size) 5094 { 5095 int i; 5096 uint32_t reg; 5097 5098 if (!entries) 5099 return false; 5100 5101 for (i = 0; i < arr_size; i++) { 5102 const struct soc15_reg_rlcg *entry; 5103 5104 entry = &entries[i]; 5105 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 5106 if (offset == reg) 5107 return true; 5108 } 5109 5110 return false; 5111 } 5112 5113 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 5114 { 5115 return gfx_v9_0_check_rlcg_range(adev, offset, 5116 (void *)rlcg_access_gc_9_0, 5117 ARRAY_SIZE(rlcg_access_gc_9_0)); 5118 } 5119 5120 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 5121 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 5122 .set_safe_mode = gfx_v9_0_set_safe_mode, 5123 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 5124 .init = gfx_v9_0_rlc_init, 5125 .get_csb_size = gfx_v9_0_get_csb_size, 5126 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 5127 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 5128 .resume = gfx_v9_0_rlc_resume, 5129 .stop = gfx_v9_0_rlc_stop, 5130 .reset = gfx_v9_0_rlc_reset, 5131 .start = gfx_v9_0_rlc_start, 5132 .update_spm_vmid = gfx_v9_0_update_spm_vmid, 5133 .rlcg_wreg = gfx_v9_0_rlcg_wreg, 5134 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, 5135 }; 5136 5137 static int gfx_v9_0_set_powergating_state(void *handle, 5138 enum amd_powergating_state state) 5139 { 5140 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5141 bool enable = (state == AMD_PG_STATE_GATE); 5142 5143 switch (adev->asic_type) { 5144 case CHIP_RAVEN: 5145 case CHIP_RENOIR: 5146 if (!enable) 5147 amdgpu_gfx_off_ctrl(adev, false); 5148 5149 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5150 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 5151 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 5152 } else { 5153 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 5154 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 5155 } 5156 5157 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5158 gfx_v9_0_enable_cp_power_gating(adev, true); 5159 else 5160 gfx_v9_0_enable_cp_power_gating(adev, false); 5161 5162 /* update gfx cgpg state */ 5163 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 5164 5165 /* update mgcg state */ 5166 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 5167 5168 if (enable) 5169 amdgpu_gfx_off_ctrl(adev, true); 5170 break; 5171 case CHIP_VEGA12: 5172 amdgpu_gfx_off_ctrl(adev, enable); 5173 break; 5174 default: 5175 break; 5176 } 5177 5178 return 0; 5179 } 5180 5181 static int gfx_v9_0_set_clockgating_state(void *handle, 5182 enum amd_clockgating_state state) 5183 { 5184 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5185 5186 if (amdgpu_sriov_vf(adev)) 5187 return 0; 5188 5189 switch (adev->asic_type) { 5190 case CHIP_VEGA10: 5191 case CHIP_VEGA12: 5192 case CHIP_VEGA20: 5193 case CHIP_RAVEN: 5194 case CHIP_ARCTURUS: 5195 case CHIP_RENOIR: 5196 case CHIP_ALDEBARAN: 5197 gfx_v9_0_update_gfx_clock_gating(adev, 5198 state == AMD_CG_STATE_GATE); 5199 break; 5200 default: 5201 break; 5202 } 5203 return 0; 5204 } 5205 5206 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 5207 { 5208 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5209 int data; 5210 5211 if (amdgpu_sriov_vf(adev)) 5212 *flags = 0; 5213 5214 /* AMD_CG_SUPPORT_GFX_MGCG */ 5215 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 5216 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5217 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5218 5219 /* AMD_CG_SUPPORT_GFX_CGCG */ 5220 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 5221 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5222 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5223 5224 /* AMD_CG_SUPPORT_GFX_CGLS */ 5225 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5226 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5227 5228 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5229 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 5230 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5231 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5232 5233 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5234 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 5235 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5236 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5237 5238 if (adev->asic_type != CHIP_ARCTURUS) { 5239 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5240 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 5241 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5242 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5243 5244 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5245 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5246 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5247 } 5248 } 5249 5250 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5251 { 5252 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 5253 } 5254 5255 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5256 { 5257 struct amdgpu_device *adev = ring->adev; 5258 u64 wptr; 5259 5260 /* XXX check if swapping is necessary on BE */ 5261 if (ring->use_doorbell) { 5262 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 5263 } else { 5264 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 5265 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 5266 } 5267 5268 return wptr; 5269 } 5270 5271 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5272 { 5273 struct amdgpu_device *adev = ring->adev; 5274 5275 if (ring->use_doorbell) { 5276 /* XXX check if swapping is necessary on BE */ 5277 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5278 WDOORBELL64(ring->doorbell_index, ring->wptr); 5279 } else { 5280 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 5281 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 5282 } 5283 } 5284 5285 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5286 { 5287 struct amdgpu_device *adev = ring->adev; 5288 u32 ref_and_mask, reg_mem_engine; 5289 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5290 5291 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5292 switch (ring->me) { 5293 case 1: 5294 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5295 break; 5296 case 2: 5297 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5298 break; 5299 default: 5300 return; 5301 } 5302 reg_mem_engine = 0; 5303 } else { 5304 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5305 reg_mem_engine = 1; /* pfp */ 5306 } 5307 5308 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5309 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5310 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5311 ref_and_mask, ref_and_mask, 0x20); 5312 } 5313 5314 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5315 struct amdgpu_job *job, 5316 struct amdgpu_ib *ib, 5317 uint32_t flags) 5318 { 5319 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5320 u32 header, control = 0; 5321 5322 if (ib->flags & AMDGPU_IB_FLAG_CE) 5323 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5324 else 5325 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5326 5327 control |= ib->length_dw | (vmid << 24); 5328 5329 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5330 control |= INDIRECT_BUFFER_PRE_ENB(1); 5331 5332 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 5333 gfx_v9_0_ring_emit_de_meta(ring); 5334 } 5335 5336 amdgpu_ring_write(ring, header); 5337 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5338 amdgpu_ring_write(ring, 5339 #ifdef __BIG_ENDIAN 5340 (2 << 0) | 5341 #endif 5342 lower_32_bits(ib->gpu_addr)); 5343 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5344 amdgpu_ring_write(ring, control); 5345 } 5346 5347 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5348 struct amdgpu_job *job, 5349 struct amdgpu_ib *ib, 5350 uint32_t flags) 5351 { 5352 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5353 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5354 5355 /* Currently, there is a high possibility to get wave ID mismatch 5356 * between ME and GDS, leading to a hw deadlock, because ME generates 5357 * different wave IDs than the GDS expects. This situation happens 5358 * randomly when at least 5 compute pipes use GDS ordered append. 5359 * The wave IDs generated by ME are also wrong after suspend/resume. 5360 * Those are probably bugs somewhere else in the kernel driver. 5361 * 5362 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5363 * GDS to 0 for this ring (me/pipe). 5364 */ 5365 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5366 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5367 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5368 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5369 } 5370 5371 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5372 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5373 amdgpu_ring_write(ring, 5374 #ifdef __BIG_ENDIAN 5375 (2 << 0) | 5376 #endif 5377 lower_32_bits(ib->gpu_addr)); 5378 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5379 amdgpu_ring_write(ring, control); 5380 } 5381 5382 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5383 u64 seq, unsigned flags) 5384 { 5385 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5386 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5387 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5388 5389 /* RELEASE_MEM - flush caches, send int */ 5390 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5391 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 5392 EOP_TC_NC_ACTION_EN) : 5393 (EOP_TCL1_ACTION_EN | 5394 EOP_TC_ACTION_EN | 5395 EOP_TC_WB_ACTION_EN | 5396 EOP_TC_MD_ACTION_EN)) | 5397 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5398 EVENT_INDEX(5))); 5399 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5400 5401 /* 5402 * the address should be Qword aligned if 64bit write, Dword 5403 * aligned if only send 32bit data low (discard data high) 5404 */ 5405 if (write64bit) 5406 BUG_ON(addr & 0x7); 5407 else 5408 BUG_ON(addr & 0x3); 5409 amdgpu_ring_write(ring, lower_32_bits(addr)); 5410 amdgpu_ring_write(ring, upper_32_bits(addr)); 5411 amdgpu_ring_write(ring, lower_32_bits(seq)); 5412 amdgpu_ring_write(ring, upper_32_bits(seq)); 5413 amdgpu_ring_write(ring, 0); 5414 } 5415 5416 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5417 { 5418 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5419 uint32_t seq = ring->fence_drv.sync_seq; 5420 uint64_t addr = ring->fence_drv.gpu_addr; 5421 5422 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5423 lower_32_bits(addr), upper_32_bits(addr), 5424 seq, 0xffffffff, 4); 5425 } 5426 5427 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5428 unsigned vmid, uint64_t pd_addr) 5429 { 5430 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5431 5432 /* compute doesn't have PFP */ 5433 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5434 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5435 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5436 amdgpu_ring_write(ring, 0x0); 5437 } 5438 } 5439 5440 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5441 { 5442 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 5443 } 5444 5445 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5446 { 5447 u64 wptr; 5448 5449 /* XXX check if swapping is necessary on BE */ 5450 if (ring->use_doorbell) 5451 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 5452 else 5453 BUG(); 5454 return wptr; 5455 } 5456 5457 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5458 { 5459 struct amdgpu_device *adev = ring->adev; 5460 5461 /* XXX check if swapping is necessary on BE */ 5462 if (ring->use_doorbell) { 5463 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); 5464 WDOORBELL64(ring->doorbell_index, ring->wptr); 5465 } else{ 5466 BUG(); /* only DOORBELL method supported on gfx9 now */ 5467 } 5468 } 5469 5470 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5471 u64 seq, unsigned int flags) 5472 { 5473 struct amdgpu_device *adev = ring->adev; 5474 5475 /* we only allocate 32bit for each seq wb address */ 5476 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5477 5478 /* write fence seq to the "addr" */ 5479 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5480 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5481 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5482 amdgpu_ring_write(ring, lower_32_bits(addr)); 5483 amdgpu_ring_write(ring, upper_32_bits(addr)); 5484 amdgpu_ring_write(ring, lower_32_bits(seq)); 5485 5486 if (flags & AMDGPU_FENCE_FLAG_INT) { 5487 /* set register to trigger INT */ 5488 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5489 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5490 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5491 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5492 amdgpu_ring_write(ring, 0); 5493 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5494 } 5495 } 5496 5497 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5498 { 5499 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5500 amdgpu_ring_write(ring, 0); 5501 } 5502 5503 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 5504 { 5505 struct v9_ce_ib_state ce_payload = {0}; 5506 uint64_t csa_addr; 5507 int cnt; 5508 5509 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5510 csa_addr = amdgpu_csa_vaddr(ring->adev); 5511 5512 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5513 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5514 WRITE_DATA_DST_SEL(8) | 5515 WR_CONFIRM) | 5516 WRITE_DATA_CACHE_POLICY(0)); 5517 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5518 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 5519 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 5520 } 5521 5522 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 5523 { 5524 struct v9_de_ib_state de_payload = {0}; 5525 uint64_t csa_addr, gds_addr; 5526 int cnt; 5527 5528 csa_addr = amdgpu_csa_vaddr(ring->adev); 5529 gds_addr = csa_addr + 4096; 5530 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5531 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5532 5533 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5534 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5535 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5536 WRITE_DATA_DST_SEL(8) | 5537 WR_CONFIRM) | 5538 WRITE_DATA_CACHE_POLICY(0)); 5539 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5540 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 5541 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 5542 } 5543 5544 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5545 bool secure) 5546 { 5547 uint32_t v = secure ? FRAME_TMZ : 0; 5548 5549 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5550 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5551 } 5552 5553 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5554 { 5555 uint32_t dw2 = 0; 5556 5557 if (amdgpu_sriov_vf(ring->adev)) 5558 gfx_v9_0_ring_emit_ce_meta(ring); 5559 5560 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5561 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5562 /* set load_global_config & load_global_uconfig */ 5563 dw2 |= 0x8001; 5564 /* set load_cs_sh_regs */ 5565 dw2 |= 0x01000000; 5566 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5567 dw2 |= 0x10002; 5568 5569 /* set load_ce_ram if preamble presented */ 5570 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5571 dw2 |= 0x10000000; 5572 } else { 5573 /* still load_ce_ram if this is the first time preamble presented 5574 * although there is no context switch happens. 5575 */ 5576 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5577 dw2 |= 0x10000000; 5578 } 5579 5580 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5581 amdgpu_ring_write(ring, dw2); 5582 amdgpu_ring_write(ring, 0); 5583 } 5584 5585 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 5586 { 5587 unsigned ret; 5588 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5589 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 5590 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 5591 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 5592 ret = ring->wptr & ring->buf_mask; 5593 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 5594 return ret; 5595 } 5596 5597 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 5598 { 5599 unsigned cur; 5600 BUG_ON(offset > ring->buf_mask); 5601 BUG_ON(ring->ring[offset] != 0x55aa55aa); 5602 5603 cur = (ring->wptr & ring->buf_mask) - 1; 5604 if (likely(cur > offset)) 5605 ring->ring[offset] = cur - offset; 5606 else 5607 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 5608 } 5609 5610 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5611 uint32_t reg_val_offs) 5612 { 5613 struct amdgpu_device *adev = ring->adev; 5614 5615 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5616 amdgpu_ring_write(ring, 0 | /* src: register*/ 5617 (5 << 8) | /* dst: memory */ 5618 (1 << 20)); /* write confirm */ 5619 amdgpu_ring_write(ring, reg); 5620 amdgpu_ring_write(ring, 0); 5621 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5622 reg_val_offs * 4)); 5623 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5624 reg_val_offs * 4)); 5625 } 5626 5627 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5628 uint32_t val) 5629 { 5630 uint32_t cmd = 0; 5631 5632 switch (ring->funcs->type) { 5633 case AMDGPU_RING_TYPE_GFX: 5634 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5635 break; 5636 case AMDGPU_RING_TYPE_KIQ: 5637 cmd = (1 << 16); /* no inc addr */ 5638 break; 5639 default: 5640 cmd = WR_CONFIRM; 5641 break; 5642 } 5643 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5644 amdgpu_ring_write(ring, cmd); 5645 amdgpu_ring_write(ring, reg); 5646 amdgpu_ring_write(ring, 0); 5647 amdgpu_ring_write(ring, val); 5648 } 5649 5650 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5651 uint32_t val, uint32_t mask) 5652 { 5653 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5654 } 5655 5656 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5657 uint32_t reg0, uint32_t reg1, 5658 uint32_t ref, uint32_t mask) 5659 { 5660 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5661 struct amdgpu_device *adev = ring->adev; 5662 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5663 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5664 5665 if (fw_version_ok) 5666 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5667 ref, mask, 0x20); 5668 else 5669 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5670 ref, mask); 5671 } 5672 5673 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5674 { 5675 struct amdgpu_device *adev = ring->adev; 5676 uint32_t value = 0; 5677 5678 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5679 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5680 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5681 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5682 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5683 } 5684 5685 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5686 enum amdgpu_interrupt_state state) 5687 { 5688 switch (state) { 5689 case AMDGPU_IRQ_STATE_DISABLE: 5690 case AMDGPU_IRQ_STATE_ENABLE: 5691 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5692 TIME_STAMP_INT_ENABLE, 5693 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5694 break; 5695 default: 5696 break; 5697 } 5698 } 5699 5700 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5701 int me, int pipe, 5702 enum amdgpu_interrupt_state state) 5703 { 5704 u32 mec_int_cntl, mec_int_cntl_reg; 5705 5706 /* 5707 * amdgpu controls only the first MEC. That's why this function only 5708 * handles the setting of interrupts for this specific MEC. All other 5709 * pipes' interrupts are set by amdkfd. 5710 */ 5711 5712 if (me == 1) { 5713 switch (pipe) { 5714 case 0: 5715 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5716 break; 5717 case 1: 5718 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5719 break; 5720 case 2: 5721 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5722 break; 5723 case 3: 5724 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5725 break; 5726 default: 5727 DRM_DEBUG("invalid pipe %d\n", pipe); 5728 return; 5729 } 5730 } else { 5731 DRM_DEBUG("invalid me %d\n", me); 5732 return; 5733 } 5734 5735 switch (state) { 5736 case AMDGPU_IRQ_STATE_DISABLE: 5737 mec_int_cntl = RREG32(mec_int_cntl_reg); 5738 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5739 TIME_STAMP_INT_ENABLE, 0); 5740 WREG32(mec_int_cntl_reg, mec_int_cntl); 5741 break; 5742 case AMDGPU_IRQ_STATE_ENABLE: 5743 mec_int_cntl = RREG32(mec_int_cntl_reg); 5744 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5745 TIME_STAMP_INT_ENABLE, 1); 5746 WREG32(mec_int_cntl_reg, mec_int_cntl); 5747 break; 5748 default: 5749 break; 5750 } 5751 } 5752 5753 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5754 struct amdgpu_irq_src *source, 5755 unsigned type, 5756 enum amdgpu_interrupt_state state) 5757 { 5758 switch (state) { 5759 case AMDGPU_IRQ_STATE_DISABLE: 5760 case AMDGPU_IRQ_STATE_ENABLE: 5761 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5762 PRIV_REG_INT_ENABLE, 5763 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5764 break; 5765 default: 5766 break; 5767 } 5768 5769 return 0; 5770 } 5771 5772 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5773 struct amdgpu_irq_src *source, 5774 unsigned type, 5775 enum amdgpu_interrupt_state state) 5776 { 5777 switch (state) { 5778 case AMDGPU_IRQ_STATE_DISABLE: 5779 case AMDGPU_IRQ_STATE_ENABLE: 5780 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5781 PRIV_INSTR_INT_ENABLE, 5782 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5783 break; 5784 default: 5785 break; 5786 } 5787 5788 return 0; 5789 } 5790 5791 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5792 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5793 CP_ECC_ERROR_INT_ENABLE, 1) 5794 5795 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5796 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5797 CP_ECC_ERROR_INT_ENABLE, 0) 5798 5799 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5800 struct amdgpu_irq_src *source, 5801 unsigned type, 5802 enum amdgpu_interrupt_state state) 5803 { 5804 switch (state) { 5805 case AMDGPU_IRQ_STATE_DISABLE: 5806 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5807 CP_ECC_ERROR_INT_ENABLE, 0); 5808 DISABLE_ECC_ON_ME_PIPE(1, 0); 5809 DISABLE_ECC_ON_ME_PIPE(1, 1); 5810 DISABLE_ECC_ON_ME_PIPE(1, 2); 5811 DISABLE_ECC_ON_ME_PIPE(1, 3); 5812 break; 5813 5814 case AMDGPU_IRQ_STATE_ENABLE: 5815 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5816 CP_ECC_ERROR_INT_ENABLE, 1); 5817 ENABLE_ECC_ON_ME_PIPE(1, 0); 5818 ENABLE_ECC_ON_ME_PIPE(1, 1); 5819 ENABLE_ECC_ON_ME_PIPE(1, 2); 5820 ENABLE_ECC_ON_ME_PIPE(1, 3); 5821 break; 5822 default: 5823 break; 5824 } 5825 5826 return 0; 5827 } 5828 5829 5830 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 5831 struct amdgpu_irq_src *src, 5832 unsigned type, 5833 enum amdgpu_interrupt_state state) 5834 { 5835 switch (type) { 5836 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 5837 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 5838 break; 5839 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 5840 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 5841 break; 5842 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 5843 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5844 break; 5845 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5846 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5847 break; 5848 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5849 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5850 break; 5851 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5852 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5853 break; 5854 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5855 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5856 break; 5857 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5858 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5859 break; 5860 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5861 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5862 break; 5863 default: 5864 break; 5865 } 5866 return 0; 5867 } 5868 5869 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5870 struct amdgpu_irq_src *source, 5871 struct amdgpu_iv_entry *entry) 5872 { 5873 int i; 5874 u8 me_id, pipe_id, queue_id; 5875 struct amdgpu_ring *ring; 5876 5877 DRM_DEBUG("IH: CP EOP\n"); 5878 me_id = (entry->ring_id & 0x0c) >> 2; 5879 pipe_id = (entry->ring_id & 0x03) >> 0; 5880 queue_id = (entry->ring_id & 0x70) >> 4; 5881 5882 switch (me_id) { 5883 case 0: 5884 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5885 break; 5886 case 1: 5887 case 2: 5888 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5889 ring = &adev->gfx.compute_ring[i]; 5890 /* Per-queue interrupt is supported for MEC starting from VI. 5891 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5892 */ 5893 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5894 amdgpu_fence_process(ring); 5895 } 5896 break; 5897 } 5898 return 0; 5899 } 5900 5901 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5902 struct amdgpu_iv_entry *entry) 5903 { 5904 u8 me_id, pipe_id, queue_id; 5905 struct amdgpu_ring *ring; 5906 int i; 5907 5908 me_id = (entry->ring_id & 0x0c) >> 2; 5909 pipe_id = (entry->ring_id & 0x03) >> 0; 5910 queue_id = (entry->ring_id & 0x70) >> 4; 5911 5912 switch (me_id) { 5913 case 0: 5914 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5915 break; 5916 case 1: 5917 case 2: 5918 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5919 ring = &adev->gfx.compute_ring[i]; 5920 if (ring->me == me_id && ring->pipe == pipe_id && 5921 ring->queue == queue_id) 5922 drm_sched_fault(&ring->sched); 5923 } 5924 break; 5925 } 5926 } 5927 5928 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5929 struct amdgpu_irq_src *source, 5930 struct amdgpu_iv_entry *entry) 5931 { 5932 DRM_ERROR("Illegal register access in command stream\n"); 5933 gfx_v9_0_fault(adev, entry); 5934 return 0; 5935 } 5936 5937 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5938 struct amdgpu_irq_src *source, 5939 struct amdgpu_iv_entry *entry) 5940 { 5941 DRM_ERROR("Illegal instruction in command stream\n"); 5942 gfx_v9_0_fault(adev, entry); 5943 return 0; 5944 } 5945 5946 5947 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { 5948 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 5949 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 5950 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 5951 }, 5952 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 5953 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 5954 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 5955 }, 5956 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5957 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 5958 0, 0 5959 }, 5960 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 5961 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 5962 0, 0 5963 }, 5964 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 5965 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 5966 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 5967 }, 5968 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5969 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 5970 0, 0 5971 }, 5972 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 5973 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 5974 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 5975 }, 5976 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 5977 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 5978 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 5979 }, 5980 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 5981 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 5982 0, 0 5983 }, 5984 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 5985 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 5986 0, 0 5987 }, 5988 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 5989 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 5990 0, 0 5991 }, 5992 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5993 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 5994 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 5995 }, 5996 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 5997 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 5998 0, 0 5999 }, 6000 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6001 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 6002 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 6003 }, 6004 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 6005 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6006 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 6007 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 6008 }, 6009 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 6010 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6011 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 6012 0, 0 6013 }, 6014 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 6015 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6016 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 6017 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 6018 }, 6019 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 6020 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6021 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 6022 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 6023 }, 6024 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 6025 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6026 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 6027 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 6028 }, 6029 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 6030 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6031 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 6032 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 6033 }, 6034 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 6035 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 6036 0, 0 6037 }, 6038 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6039 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 6040 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 6041 }, 6042 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6043 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 6044 0, 0 6045 }, 6046 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6047 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 6048 0, 0 6049 }, 6050 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6051 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 6052 0, 0 6053 }, 6054 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6055 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 6056 0, 0 6057 }, 6058 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6059 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 6060 0, 0 6061 }, 6062 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6063 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 6064 0, 0 6065 }, 6066 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6067 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 6068 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 6069 }, 6070 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6071 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 6072 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 6073 }, 6074 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6075 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 6076 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 6077 }, 6078 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6079 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 6080 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 6081 }, 6082 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6083 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 6084 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 6085 }, 6086 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6087 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 6088 0, 0 6089 }, 6090 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6091 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 6092 0, 0 6093 }, 6094 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6095 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 6096 0, 0 6097 }, 6098 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6099 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 6100 0, 0 6101 }, 6102 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6103 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 6104 0, 0 6105 }, 6106 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6107 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 6108 0, 0 6109 }, 6110 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6111 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 6112 0, 0 6113 }, 6114 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6115 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 6116 0, 0 6117 }, 6118 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6119 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 6120 0, 0 6121 }, 6122 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6123 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 6124 0, 0 6125 }, 6126 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6127 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 6128 0, 0 6129 }, 6130 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6131 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 6132 0, 0 6133 }, 6134 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6135 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 6136 0, 0 6137 }, 6138 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 6139 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 6140 0, 0 6141 }, 6142 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6143 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 6144 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 6145 }, 6146 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6147 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 6148 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 6149 }, 6150 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6151 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 6152 0, 0 6153 }, 6154 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6155 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 6156 0, 0 6157 }, 6158 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6159 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 6160 0, 0 6161 }, 6162 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6163 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 6164 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 6165 }, 6166 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6167 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 6168 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 6169 }, 6170 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6171 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 6172 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 6173 }, 6174 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6175 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 6176 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 6177 }, 6178 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6179 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 6180 0, 0 6181 }, 6182 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6183 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 6184 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 6185 }, 6186 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6187 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 6188 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 6189 }, 6190 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6191 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 6192 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 6193 }, 6194 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6195 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 6196 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 6197 }, 6198 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6199 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 6200 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 6201 }, 6202 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6203 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 6204 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 6205 }, 6206 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6207 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 6208 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 6209 }, 6210 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6211 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 6212 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 6213 }, 6214 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6215 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 6216 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 6217 }, 6218 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6219 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 6220 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 6221 }, 6222 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6223 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 6224 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 6225 }, 6226 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6227 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 6228 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 6229 }, 6230 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6231 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 6232 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 6233 }, 6234 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6235 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 6236 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 6237 }, 6238 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6239 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 6240 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 6241 }, 6242 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6243 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 6244 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 6245 }, 6246 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6247 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 6248 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 6249 }, 6250 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6251 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 6252 0, 0 6253 }, 6254 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6255 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 6256 0, 0 6257 }, 6258 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6259 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 6260 0, 0 6261 }, 6262 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6263 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 6264 0, 0 6265 }, 6266 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6267 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 6268 0, 0 6269 }, 6270 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6271 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 6272 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 6273 }, 6274 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6275 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 6276 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 6277 }, 6278 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6279 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 6280 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 6281 }, 6282 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6283 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 6284 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 6285 }, 6286 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6287 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 6288 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 6289 }, 6290 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6291 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 6292 0, 0 6293 }, 6294 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6295 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 6296 0, 0 6297 }, 6298 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6299 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 6300 0, 0 6301 }, 6302 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6303 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 6304 0, 0 6305 }, 6306 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6307 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 6308 0, 0 6309 }, 6310 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6311 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6312 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 6313 }, 6314 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6315 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6316 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 6317 }, 6318 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6319 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6320 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 6321 }, 6322 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6323 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6324 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 6325 }, 6326 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6327 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6328 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 6329 }, 6330 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6331 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 6332 0, 0 6333 }, 6334 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6335 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 6336 0, 0 6337 }, 6338 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6339 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 6340 0, 0 6341 }, 6342 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6343 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 6344 0, 0 6345 }, 6346 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6347 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 6348 0, 0 6349 }, 6350 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6351 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6352 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 6353 }, 6354 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6355 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6356 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 6357 }, 6358 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6359 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6360 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 6361 }, 6362 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6363 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 6364 0, 0 6365 }, 6366 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6367 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 6368 0, 0 6369 }, 6370 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6371 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 6372 0, 0 6373 }, 6374 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6375 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 6376 0, 0 6377 }, 6378 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6379 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 6380 0, 0 6381 }, 6382 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6383 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 6384 0, 0 6385 } 6386 }; 6387 6388 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6389 void *inject_if) 6390 { 6391 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6392 int ret; 6393 struct ta_ras_trigger_error_input block_info = { 0 }; 6394 6395 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6396 return -EINVAL; 6397 6398 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6399 return -EINVAL; 6400 6401 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6402 return -EPERM; 6403 6404 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6405 info->head.type)) { 6406 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6407 ras_gfx_subblocks[info->head.sub_block_index].name, 6408 info->head.type); 6409 return -EPERM; 6410 } 6411 6412 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6413 info->head.type)) { 6414 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6415 ras_gfx_subblocks[info->head.sub_block_index].name, 6416 info->head.type); 6417 return -EPERM; 6418 } 6419 6420 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6421 block_info.sub_block_index = 6422 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6423 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6424 block_info.address = info->address; 6425 block_info.value = info->value; 6426 6427 mutex_lock(&adev->grbm_idx_mutex); 6428 ret = psp_ras_trigger_error(&adev->psp, &block_info); 6429 mutex_unlock(&adev->grbm_idx_mutex); 6430 6431 return ret; 6432 } 6433 6434 static const char *vml2_mems[] = { 6435 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6436 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6437 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6438 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6439 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6440 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6441 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6442 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6443 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6444 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6445 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6446 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6447 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6448 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6449 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6450 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6451 }; 6452 6453 static const char *vml2_walker_mems[] = { 6454 "UTC_VML2_CACHE_PDE0_MEM0", 6455 "UTC_VML2_CACHE_PDE0_MEM1", 6456 "UTC_VML2_CACHE_PDE1_MEM0", 6457 "UTC_VML2_CACHE_PDE1_MEM1", 6458 "UTC_VML2_CACHE_PDE2_MEM0", 6459 "UTC_VML2_CACHE_PDE2_MEM1", 6460 "UTC_VML2_RDIF_LOG_FIFO", 6461 }; 6462 6463 static const char *atc_l2_cache_2m_mems[] = { 6464 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6465 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6466 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6467 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6468 }; 6469 6470 static const char *atc_l2_cache_4k_mems[] = { 6471 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6472 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6473 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6474 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6475 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6476 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6477 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6478 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6479 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6480 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6481 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6482 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6483 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6484 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6485 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6486 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6487 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6488 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6489 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6490 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6491 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6492 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6493 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6494 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6495 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6496 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6497 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6498 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6499 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6500 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6501 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6502 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6503 }; 6504 6505 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6506 struct ras_err_data *err_data) 6507 { 6508 uint32_t i, data; 6509 uint32_t sec_count, ded_count; 6510 6511 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6512 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6513 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6514 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6515 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6516 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6517 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6518 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6519 6520 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6521 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6522 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6523 6524 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6525 if (sec_count) { 6526 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6527 "SEC %d\n", i, vml2_mems[i], sec_count); 6528 err_data->ce_count += sec_count; 6529 } 6530 6531 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6532 if (ded_count) { 6533 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6534 "DED %d\n", i, vml2_mems[i], ded_count); 6535 err_data->ue_count += ded_count; 6536 } 6537 } 6538 6539 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6540 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6541 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6542 6543 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6544 SEC_COUNT); 6545 if (sec_count) { 6546 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6547 "SEC %d\n", i, vml2_walker_mems[i], sec_count); 6548 err_data->ce_count += sec_count; 6549 } 6550 6551 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6552 DED_COUNT); 6553 if (ded_count) { 6554 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6555 "DED %d\n", i, vml2_walker_mems[i], ded_count); 6556 err_data->ue_count += ded_count; 6557 } 6558 } 6559 6560 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6561 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6562 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6563 6564 sec_count = (data & 0x00006000L) >> 0xd; 6565 if (sec_count) { 6566 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6567 "SEC %d\n", i, atc_l2_cache_2m_mems[i], 6568 sec_count); 6569 err_data->ce_count += sec_count; 6570 } 6571 } 6572 6573 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6574 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6575 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6576 6577 sec_count = (data & 0x00006000L) >> 0xd; 6578 if (sec_count) { 6579 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6580 "SEC %d\n", i, atc_l2_cache_4k_mems[i], 6581 sec_count); 6582 err_data->ce_count += sec_count; 6583 } 6584 6585 ded_count = (data & 0x00018000L) >> 0xf; 6586 if (ded_count) { 6587 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6588 "DED %d\n", i, atc_l2_cache_4k_mems[i], 6589 ded_count); 6590 err_data->ue_count += ded_count; 6591 } 6592 } 6593 6594 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6595 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6596 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6597 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6598 6599 return 0; 6600 } 6601 6602 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev, 6603 const struct soc15_reg_entry *reg, 6604 uint32_t se_id, uint32_t inst_id, uint32_t value, 6605 uint32_t *sec_count, uint32_t *ded_count) 6606 { 6607 uint32_t i; 6608 uint32_t sec_cnt, ded_cnt; 6609 6610 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { 6611 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || 6612 gfx_v9_0_ras_fields[i].seg != reg->seg || 6613 gfx_v9_0_ras_fields[i].inst != reg->inst) 6614 continue; 6615 6616 sec_cnt = (value & 6617 gfx_v9_0_ras_fields[i].sec_count_mask) >> 6618 gfx_v9_0_ras_fields[i].sec_count_shift; 6619 if (sec_cnt) { 6620 dev_info(adev->dev, "GFX SubBlock %s, " 6621 "Instance[%d][%d], SEC %d\n", 6622 gfx_v9_0_ras_fields[i].name, 6623 se_id, inst_id, 6624 sec_cnt); 6625 *sec_count += sec_cnt; 6626 } 6627 6628 ded_cnt = (value & 6629 gfx_v9_0_ras_fields[i].ded_count_mask) >> 6630 gfx_v9_0_ras_fields[i].ded_count_shift; 6631 if (ded_cnt) { 6632 dev_info(adev->dev, "GFX SubBlock %s, " 6633 "Instance[%d][%d], DED %d\n", 6634 gfx_v9_0_ras_fields[i].name, 6635 se_id, inst_id, 6636 ded_cnt); 6637 *ded_count += ded_cnt; 6638 } 6639 } 6640 6641 return 0; 6642 } 6643 6644 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) 6645 { 6646 int i, j, k; 6647 6648 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6649 return; 6650 6651 /* read back registers to clear the counters */ 6652 mutex_lock(&adev->grbm_idx_mutex); 6653 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6654 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6655 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6656 gfx_v9_0_select_se_sh(adev, j, 0x0, k); 6657 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6658 } 6659 } 6660 } 6661 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 6662 mutex_unlock(&adev->grbm_idx_mutex); 6663 6664 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6665 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6666 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6667 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6668 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6669 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6670 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6671 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6672 6673 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6674 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6675 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6676 } 6677 6678 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6679 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6680 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6681 } 6682 6683 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6684 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6685 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6686 } 6687 6688 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6689 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6690 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6691 } 6692 6693 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6694 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6695 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6696 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6697 } 6698 6699 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6700 void *ras_error_status) 6701 { 6702 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6703 uint32_t sec_count = 0, ded_count = 0; 6704 uint32_t i, j, k; 6705 uint32_t reg_value; 6706 6707 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6708 return -EINVAL; 6709 6710 err_data->ue_count = 0; 6711 err_data->ce_count = 0; 6712 6713 mutex_lock(&adev->grbm_idx_mutex); 6714 6715 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6716 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6717 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6718 gfx_v9_0_select_se_sh(adev, j, 0, k); 6719 reg_value = 6720 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6721 if (reg_value) 6722 gfx_v9_0_ras_error_count(adev, 6723 &gfx_v9_0_edc_counter_regs[i], 6724 j, k, reg_value, 6725 &sec_count, &ded_count); 6726 } 6727 } 6728 } 6729 6730 err_data->ce_count += sec_count; 6731 err_data->ue_count += ded_count; 6732 6733 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6734 mutex_unlock(&adev->grbm_idx_mutex); 6735 6736 gfx_v9_0_query_utc_edc_status(adev, err_data); 6737 6738 return 0; 6739 } 6740 6741 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) 6742 { 6743 const unsigned int cp_coher_cntl = 6744 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | 6745 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | 6746 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | 6747 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | 6748 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); 6749 6750 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ 6751 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 6752 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ 6753 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6754 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6755 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6756 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6757 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6758 } 6759 6760 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, 6761 uint32_t pipe, bool enable) 6762 { 6763 struct amdgpu_device *adev = ring->adev; 6764 uint32_t val; 6765 uint32_t wcl_cs_reg; 6766 6767 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 6768 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT; 6769 6770 switch (pipe) { 6771 case 0: 6772 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0); 6773 break; 6774 case 1: 6775 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1); 6776 break; 6777 case 2: 6778 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2); 6779 break; 6780 case 3: 6781 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3); 6782 break; 6783 default: 6784 DRM_DEBUG("invalid pipe %d\n", pipe); 6785 return; 6786 } 6787 6788 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 6789 6790 } 6791 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 6792 { 6793 struct amdgpu_device *adev = ring->adev; 6794 uint32_t val; 6795 int i; 6796 6797 6798 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 6799 * number of gfx waves. Setting 5 bit will make sure gfx only gets 6800 * around 25% of gpu resources. 6801 */ 6802 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; 6803 amdgpu_ring_emit_wreg(ring, 6804 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX), 6805 val); 6806 6807 /* Restrict waves for normal/low priority compute queues as well 6808 * to get best QoS for high priority compute jobs. 6809 * 6810 * amdgpu controls only 1st ME(0-3 CS pipes). 6811 */ 6812 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 6813 if (i != ring->pipe) 6814 gfx_v9_0_emit_wave_limit_cs(ring, i, enable); 6815 6816 } 6817 } 6818 6819 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 6820 .name = "gfx_v9_0", 6821 .early_init = gfx_v9_0_early_init, 6822 .late_init = gfx_v9_0_late_init, 6823 .sw_init = gfx_v9_0_sw_init, 6824 .sw_fini = gfx_v9_0_sw_fini, 6825 .hw_init = gfx_v9_0_hw_init, 6826 .hw_fini = gfx_v9_0_hw_fini, 6827 .suspend = gfx_v9_0_suspend, 6828 .resume = gfx_v9_0_resume, 6829 .is_idle = gfx_v9_0_is_idle, 6830 .wait_for_idle = gfx_v9_0_wait_for_idle, 6831 .soft_reset = gfx_v9_0_soft_reset, 6832 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 6833 .set_powergating_state = gfx_v9_0_set_powergating_state, 6834 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 6835 }; 6836 6837 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 6838 .type = AMDGPU_RING_TYPE_GFX, 6839 .align_mask = 0xff, 6840 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6841 .support_64bit_ptrs = true, 6842 .vmhub = AMDGPU_GFXHUB_0, 6843 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 6844 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 6845 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 6846 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 6847 5 + /* COND_EXEC */ 6848 7 + /* PIPELINE_SYNC */ 6849 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6850 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6851 2 + /* VM_FLUSH */ 6852 8 + /* FENCE for VM_FLUSH */ 6853 20 + /* GDS switch */ 6854 4 + /* double SWITCH_BUFFER, 6855 the first COND_EXEC jump to the place just 6856 prior to this double SWITCH_BUFFER */ 6857 5 + /* COND_EXEC */ 6858 7 + /* HDP_flush */ 6859 4 + /* VGT_flush */ 6860 14 + /* CE_META */ 6861 31 + /* DE_META */ 6862 3 + /* CNTX_CTRL */ 6863 5 + /* HDP_INVL */ 6864 8 + 8 + /* FENCE x2 */ 6865 2 + /* SWITCH_BUFFER */ 6866 7, /* gfx_v9_0_emit_mem_sync */ 6867 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 6868 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 6869 .emit_fence = gfx_v9_0_ring_emit_fence, 6870 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6871 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6872 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6873 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6874 .test_ring = gfx_v9_0_ring_test_ring, 6875 .test_ib = gfx_v9_0_ring_test_ib, 6876 .insert_nop = amdgpu_ring_insert_nop, 6877 .pad_ib = amdgpu_ring_generic_pad_ib, 6878 .emit_switch_buffer = gfx_v9_ring_emit_sb, 6879 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 6880 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 6881 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 6882 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 6883 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6884 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6885 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6886 .soft_recovery = gfx_v9_0_ring_soft_recovery, 6887 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 6888 }; 6889 6890 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 6891 .type = AMDGPU_RING_TYPE_COMPUTE, 6892 .align_mask = 0xff, 6893 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6894 .support_64bit_ptrs = true, 6895 .vmhub = AMDGPU_GFXHUB_0, 6896 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6897 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6898 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6899 .emit_frame_size = 6900 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6901 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6902 5 + /* hdp invalidate */ 6903 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6904 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6905 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6906 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6907 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 6908 7 + /* gfx_v9_0_emit_mem_sync */ 6909 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ 6910 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ 6911 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6912 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 6913 .emit_fence = gfx_v9_0_ring_emit_fence, 6914 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 6915 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 6916 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 6917 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 6918 .test_ring = gfx_v9_0_ring_test_ring, 6919 .test_ib = gfx_v9_0_ring_test_ib, 6920 .insert_nop = amdgpu_ring_insert_nop, 6921 .pad_ib = amdgpu_ring_generic_pad_ib, 6922 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6923 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6924 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6925 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 6926 .emit_wave_limit = gfx_v9_0_emit_wave_limit, 6927 }; 6928 6929 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 6930 .type = AMDGPU_RING_TYPE_KIQ, 6931 .align_mask = 0xff, 6932 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6933 .support_64bit_ptrs = true, 6934 .vmhub = AMDGPU_GFXHUB_0, 6935 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 6936 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 6937 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 6938 .emit_frame_size = 6939 20 + /* gfx_v9_0_ring_emit_gds_switch */ 6940 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 6941 5 + /* hdp invalidate */ 6942 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 6943 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6944 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6945 2 + /* gfx_v9_0_ring_emit_vm_flush */ 6946 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6947 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 6948 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 6949 .test_ring = gfx_v9_0_ring_test_ring, 6950 .insert_nop = amdgpu_ring_insert_nop, 6951 .pad_ib = amdgpu_ring_generic_pad_ib, 6952 .emit_rreg = gfx_v9_0_ring_emit_rreg, 6953 .emit_wreg = gfx_v9_0_ring_emit_wreg, 6954 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 6955 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 6956 }; 6957 6958 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 6959 { 6960 int i; 6961 6962 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 6963 6964 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6965 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 6966 6967 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6968 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 6969 } 6970 6971 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 6972 .set = gfx_v9_0_set_eop_interrupt_state, 6973 .process = gfx_v9_0_eop_irq, 6974 }; 6975 6976 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 6977 .set = gfx_v9_0_set_priv_reg_fault_state, 6978 .process = gfx_v9_0_priv_reg_irq, 6979 }; 6980 6981 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 6982 .set = gfx_v9_0_set_priv_inst_fault_state, 6983 .process = gfx_v9_0_priv_inst_irq, 6984 }; 6985 6986 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 6987 .set = gfx_v9_0_set_cp_ecc_error_state, 6988 .process = amdgpu_gfx_cp_ecc_error_irq, 6989 }; 6990 6991 6992 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 6993 { 6994 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6995 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 6996 6997 adev->gfx.priv_reg_irq.num_types = 1; 6998 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 6999 7000 adev->gfx.priv_inst_irq.num_types = 1; 7001 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 7002 7003 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 7004 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 7005 } 7006 7007 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 7008 { 7009 switch (adev->asic_type) { 7010 case CHIP_VEGA10: 7011 case CHIP_VEGA12: 7012 case CHIP_VEGA20: 7013 case CHIP_RAVEN: 7014 case CHIP_ARCTURUS: 7015 case CHIP_RENOIR: 7016 case CHIP_ALDEBARAN: 7017 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 7018 break; 7019 default: 7020 break; 7021 } 7022 } 7023 7024 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 7025 { 7026 /* init asci gds info */ 7027 switch (adev->asic_type) { 7028 case CHIP_VEGA10: 7029 case CHIP_VEGA12: 7030 case CHIP_VEGA20: 7031 adev->gds.gds_size = 0x10000; 7032 break; 7033 case CHIP_RAVEN: 7034 case CHIP_ARCTURUS: 7035 adev->gds.gds_size = 0x1000; 7036 break; 7037 case CHIP_ALDEBARAN: 7038 /* aldebaran removed all the GDS internal memory, 7039 * only support GWS opcode in kernel, like barrier 7040 * semaphore.etc */ 7041 adev->gds.gds_size = 0; 7042 break; 7043 default: 7044 adev->gds.gds_size = 0x10000; 7045 break; 7046 } 7047 7048 switch (adev->asic_type) { 7049 case CHIP_VEGA10: 7050 case CHIP_VEGA20: 7051 adev->gds.gds_compute_max_wave_id = 0x7ff; 7052 break; 7053 case CHIP_VEGA12: 7054 adev->gds.gds_compute_max_wave_id = 0x27f; 7055 break; 7056 case CHIP_RAVEN: 7057 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 7058 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 7059 else 7060 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 7061 break; 7062 case CHIP_ARCTURUS: 7063 adev->gds.gds_compute_max_wave_id = 0xfff; 7064 break; 7065 case CHIP_ALDEBARAN: 7066 /* deprecated for Aldebaran, no usage at all */ 7067 adev->gds.gds_compute_max_wave_id = 0; 7068 break; 7069 default: 7070 /* this really depends on the chip */ 7071 adev->gds.gds_compute_max_wave_id = 0x7ff; 7072 break; 7073 } 7074 7075 adev->gds.gws_size = 64; 7076 adev->gds.oa_size = 16; 7077 } 7078 7079 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7080 u32 bitmap) 7081 { 7082 u32 data; 7083 7084 if (!bitmap) 7085 return; 7086 7087 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7088 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7089 7090 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 7091 } 7092 7093 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7094 { 7095 u32 data, mask; 7096 7097 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 7098 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 7099 7100 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7101 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7102 7103 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7104 7105 return (~data) & mask; 7106 } 7107 7108 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 7109 struct amdgpu_cu_info *cu_info) 7110 { 7111 int i, j, k, counter, active_cu_number = 0; 7112 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7113 unsigned disable_masks[4 * 4]; 7114 7115 if (!adev || !cu_info) 7116 return -EINVAL; 7117 7118 /* 7119 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 7120 */ 7121 if (adev->gfx.config.max_shader_engines * 7122 adev->gfx.config.max_sh_per_se > 16) 7123 return -EINVAL; 7124 7125 amdgpu_gfx_parse_disable_cu(disable_masks, 7126 adev->gfx.config.max_shader_engines, 7127 adev->gfx.config.max_sh_per_se); 7128 7129 mutex_lock(&adev->grbm_idx_mutex); 7130 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7131 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7132 mask = 1; 7133 ao_bitmap = 0; 7134 counter = 0; 7135 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 7136 gfx_v9_0_set_user_cu_inactive_bitmap( 7137 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 7138 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 7139 7140 /* 7141 * The bitmap(and ao_cu_bitmap) in cu_info structure is 7142 * 4x4 size array, and it's usually suitable for Vega 7143 * ASICs which has 4*2 SE/SH layout. 7144 * But for Arcturus, SE/SH layout is changed to 8*1. 7145 * To mostly reduce the impact, we make it compatible 7146 * with current bitmap array as below: 7147 * SE4,SH0 --> bitmap[0][1] 7148 * SE5,SH0 --> bitmap[1][1] 7149 * SE6,SH0 --> bitmap[2][1] 7150 * SE7,SH0 --> bitmap[3][1] 7151 */ 7152 cu_info->bitmap[i % 4][j + i / 4] = bitmap; 7153 7154 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7155 if (bitmap & mask) { 7156 if (counter < adev->gfx.config.max_cu_per_sh) 7157 ao_bitmap |= mask; 7158 counter ++; 7159 } 7160 mask <<= 1; 7161 } 7162 active_cu_number += counter; 7163 if (i < 2 && j < 2) 7164 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7165 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 7166 } 7167 } 7168 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7169 mutex_unlock(&adev->grbm_idx_mutex); 7170 7171 cu_info->number = active_cu_number; 7172 cu_info->ao_cu_mask = ao_cu_mask; 7173 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7174 7175 return 0; 7176 } 7177 7178 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 7179 { 7180 .type = AMD_IP_BLOCK_TYPE_GFX, 7181 .major = 9, 7182 .minor = 0, 7183 .rev = 0, 7184 .funcs = &gfx_v9_0_ip_funcs, 7185 }; 7186