1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 42 #include "soc15_common.h" 43 #include "clearstate_gfx9.h" 44 #include "v9_structs.h" 45 46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 47 48 #include "amdgpu_ras.h" 49 50 #include "amdgpu_ring_mux.h" 51 #include "gfx_v9_4.h" 52 #include "gfx_v9_0.h" 53 #include "gfx_v9_0_cleaner_shader.h" 54 #include "gfx_v9_4_2.h" 55 56 #include "asic_reg/pwr/pwr_10_0_offset.h" 57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h" 58 #include "asic_reg/gc/gc_9_0_default.h" 59 60 #define GFX9_NUM_GFX_RINGS 1 61 #define GFX9_NUM_SW_GFX_RINGS 2 62 #define GFX9_MEC_HPD_SIZE 4096 63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 65 66 #define mmGCEA_PROBE_MAP 0x070c 67 #define mmGCEA_PROBE_MAP_BASE_IDX 0 68 69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 75 76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 82 83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 89 90 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 93 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 96 97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 104 105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 112 113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 115 116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 121 122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin"); 123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin"); 124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin"); 125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin"); 126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin"); 127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); 128 129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); 130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); 131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); 132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin"); 133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); 134 135 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 137 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 139 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 141 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 143 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 145 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 147 148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025 149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1 150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 152 153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = { 154 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS), 155 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2), 156 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1), 157 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2), 158 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1), 159 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1), 160 SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT), 161 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT), 162 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT), 163 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS), 164 SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR), 165 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE), 166 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR), 167 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR), 168 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE), 169 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR), 170 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR), 171 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE), 172 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR), 173 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR), 174 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE), 175 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), 176 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), 177 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ), 178 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ), 179 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ), 180 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ), 181 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO), 182 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI), 183 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ), 184 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO), 185 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI), 186 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ), 187 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO), 188 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI), 189 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ), 190 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO), 191 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI), 192 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ), 193 SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS), 194 SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS), 195 SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS), 196 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT), 197 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT), 198 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS), 199 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL), 200 SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS), 201 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS), 202 SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS), 203 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS), 204 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS), 205 SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS), 206 SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS), 207 SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS), 208 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), 209 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS), 210 SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG), 211 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL), 212 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR), 213 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR), 214 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR), 215 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR), 216 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR), 217 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS), 218 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT), 219 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND), 220 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE), 221 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1), 222 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2), 223 SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE), 224 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE), 225 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), 226 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), 227 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), 228 /* cp header registers */ 229 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 230 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), 231 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), 232 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 233 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), 234 /* SE status registers */ 235 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), 236 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), 237 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), 238 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) 239 }; 240 241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { 242 /* compute queue registers */ 243 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID), 244 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE), 245 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE), 246 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY), 247 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY), 248 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM), 249 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE), 250 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI), 251 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR), 252 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 253 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 254 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), 255 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL), 256 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR), 257 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI), 258 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR), 259 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL), 260 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 261 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR), 262 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), 263 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL), 264 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR), 265 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR), 266 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS), 267 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO), 268 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI), 269 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL), 270 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET), 271 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE), 272 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET), 273 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE), 274 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE), 275 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR), 276 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM), 277 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), 278 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), 279 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS), 280 }; 281 282 enum ta_ras_gfx_subblock { 283 /*CPC*/ 284 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 285 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 286 TA_RAS_BLOCK__GFX_CPC_UCODE, 287 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 288 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 289 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 290 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 291 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 292 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 293 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 294 /* CPF*/ 295 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 296 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 297 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 298 TA_RAS_BLOCK__GFX_CPF_TAG, 299 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 300 /* CPG*/ 301 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 302 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 303 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 304 TA_RAS_BLOCK__GFX_CPG_TAG, 305 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 306 /* GDS*/ 307 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 308 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 309 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 310 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 311 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 312 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 313 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 314 /* SPI*/ 315 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 316 /* SQ*/ 317 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 318 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 319 TA_RAS_BLOCK__GFX_SQ_LDS_D, 320 TA_RAS_BLOCK__GFX_SQ_LDS_I, 321 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 322 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 323 /* SQC (3 ranges)*/ 324 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 325 /* SQC range 0*/ 326 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 327 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 328 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 329 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 330 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 331 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 332 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 333 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 334 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 335 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 336 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 337 /* SQC range 1*/ 338 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 339 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 340 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 341 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 342 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 343 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 344 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 345 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 346 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 347 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 348 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 349 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 350 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 351 /* SQC range 2*/ 352 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 353 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 354 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 355 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 356 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 357 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 358 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 359 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 360 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 361 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 362 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 363 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 364 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 365 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 366 /* TA*/ 367 TA_RAS_BLOCK__GFX_TA_INDEX_START, 368 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 369 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 370 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 371 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 372 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 373 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 374 /* TCA*/ 375 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 376 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 377 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 378 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 379 /* TCC (5 sub-ranges)*/ 380 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 381 /* TCC range 0*/ 382 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 383 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 384 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 385 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 386 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 387 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 388 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 389 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 390 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 391 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 392 /* TCC range 1*/ 393 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 394 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 395 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 396 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 397 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 398 /* TCC range 2*/ 399 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 400 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 401 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 402 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 403 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 404 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 405 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 406 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 407 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 408 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 409 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 410 /* TCC range 3*/ 411 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 412 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 413 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 414 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 415 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 416 /* TCC range 4*/ 417 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 418 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 419 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 420 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 421 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 422 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 423 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 424 /* TCI*/ 425 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 426 /* TCP*/ 427 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 428 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 429 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 430 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 431 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 432 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 433 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 434 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 435 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 436 /* TD*/ 437 TA_RAS_BLOCK__GFX_TD_INDEX_START, 438 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 439 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 440 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 441 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 442 /* EA (3 sub-ranges)*/ 443 TA_RAS_BLOCK__GFX_EA_INDEX_START, 444 /* EA range 0*/ 445 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 446 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 447 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 448 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 449 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 450 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 451 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 452 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 453 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 454 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 455 /* EA range 1*/ 456 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 457 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 458 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 459 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 460 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 461 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 462 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 463 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 464 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 465 /* EA range 2*/ 466 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 467 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 468 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 469 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 470 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 471 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 472 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 473 /* UTC VM L2 bank*/ 474 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 475 /* UTC VM walker*/ 476 TA_RAS_BLOCK__UTC_VML2_WALKER, 477 /* UTC ATC L2 2MB cache*/ 478 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 479 /* UTC ATC L2 4KB cache*/ 480 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 481 TA_RAS_BLOCK__GFX_MAX 482 }; 483 484 struct ras_gfx_subblock { 485 unsigned char *name; 486 int ta_subblock; 487 int hw_supported_error_type; 488 int sw_supported_error_type; 489 }; 490 491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 492 [AMDGPU_RAS_BLOCK__##subblock] = { \ 493 #subblock, \ 494 TA_RAS_BLOCK__##subblock, \ 495 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 496 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 497 } 498 499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 500 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 501 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 502 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 503 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 504 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 505 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 506 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 507 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 508 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 509 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 510 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 511 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 512 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 513 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 514 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 515 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 516 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 517 0), 518 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 519 0), 520 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 521 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 522 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 523 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 524 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 525 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 526 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 527 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 528 0, 0), 529 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 530 0), 531 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 532 0, 0), 533 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 534 0), 535 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 536 0, 0), 537 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 538 0), 539 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 540 1), 541 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 542 0, 0, 0), 543 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 544 0), 545 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 546 0), 547 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 548 0), 549 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 550 0), 551 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 552 0), 553 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 554 0, 0), 555 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 556 0), 557 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 558 0), 559 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 560 0, 0, 0), 561 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 562 0), 563 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 564 0), 565 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 566 0), 567 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 568 0), 569 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 570 0), 571 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 572 0, 0), 573 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 574 0), 575 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 576 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 577 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 578 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 579 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 580 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 581 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 582 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 583 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 584 1), 585 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 586 1), 587 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 588 1), 589 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 590 0), 591 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 592 0), 593 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 594 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 595 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 596 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 597 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 598 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 599 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 600 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 601 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 602 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 603 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 604 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 605 0), 606 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 607 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 608 0), 609 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 610 0, 0), 611 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 612 0), 613 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 614 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 615 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 616 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 617 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 618 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 619 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 620 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 621 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 622 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 623 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 624 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 625 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 626 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 627 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 628 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 629 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 630 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 631 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 632 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 633 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 634 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 635 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 636 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 637 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 638 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 639 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 640 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 641 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 642 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 643 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 644 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 645 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 646 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 647 }; 648 649 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 650 { 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 671 }; 672 673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 674 { 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 693 }; 694 695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 696 { 697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 708 }; 709 710 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 711 { 712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 715 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 716 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 717 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 718 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 719 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 720 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 721 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 722 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 723 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 724 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 725 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 726 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 727 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 728 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 729 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 730 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 731 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 732 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 733 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 734 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 735 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 736 }; 737 738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 739 { 740 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 741 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 742 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 743 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 744 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 745 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 746 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 747 }; 748 749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 750 { 751 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 752 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 753 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 754 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 755 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 756 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 757 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 758 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 759 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 760 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 761 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 762 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 763 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 764 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 765 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 766 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 767 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 768 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 769 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 770 }; 771 772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 773 { 774 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 775 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 776 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 777 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 778 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 779 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 780 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 781 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 782 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 783 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 784 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 785 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 786 }; 787 788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 789 { 790 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 791 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 792 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 793 }; 794 795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 796 { 797 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 798 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 799 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 800 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 801 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 802 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 803 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 804 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 805 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 806 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 807 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 808 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 809 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 810 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 811 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 812 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 813 }; 814 815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 816 { 817 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 818 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 819 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 820 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 821 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 822 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 823 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 824 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 825 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 826 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 827 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 828 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 829 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 830 }; 831 832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 833 { 834 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 835 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 836 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 837 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 838 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 839 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 840 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 841 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 842 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 843 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 844 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000) 845 }; 846 847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = { 848 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)}, 849 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)}, 850 }; 851 852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 853 { 854 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 855 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 856 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 857 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 858 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 859 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 860 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 861 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 862 }; 863 864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 865 { 866 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 867 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 868 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 869 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 870 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 871 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 872 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 873 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 874 }; 875 876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 880 881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 886 struct amdgpu_cu_info *cu_info); 887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds); 889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 891 void *ras_error_status); 892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 893 void *inject_if, uint32_t instance_mask); 894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); 895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, 896 unsigned int vmid); 897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 899 900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 901 uint64_t queue_mask) 902 { 903 struct amdgpu_device *adev = kiq_ring->adev; 904 u64 shader_mc_addr; 905 906 /* Cleaner shader MC address */ 907 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 908 909 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 910 amdgpu_ring_write(kiq_ring, 911 PACKET3_SET_RESOURCES_VMID_MASK(0) | 912 /* vmid_mask:0* queue_type:0 (KIQ) */ 913 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 914 amdgpu_ring_write(kiq_ring, 915 lower_32_bits(queue_mask)); /* queue mask lo */ 916 amdgpu_ring_write(kiq_ring, 917 upper_32_bits(queue_mask)); /* queue mask hi */ 918 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 919 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 920 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 921 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 922 } 923 924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 925 struct amdgpu_ring *ring) 926 { 927 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 928 uint64_t wptr_addr = ring->wptr_gpu_addr; 929 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 930 931 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 932 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 933 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 934 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 935 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 936 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 937 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 938 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 939 /*queue_type: normal compute queue */ 940 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 941 /* alloc format: all_on_one_pipe */ 942 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 943 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 944 /* num_queues: must be 1 */ 945 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 946 amdgpu_ring_write(kiq_ring, 947 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 948 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 949 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 950 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 951 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 952 } 953 954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 955 struct amdgpu_ring *ring, 956 enum amdgpu_unmap_queues_action action, 957 u64 gpu_addr, u64 seq) 958 { 959 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 960 961 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 962 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 963 PACKET3_UNMAP_QUEUES_ACTION(action) | 964 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 965 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 966 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 967 amdgpu_ring_write(kiq_ring, 968 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 969 970 if (action == PREEMPT_QUEUES_NO_UNMAP) { 971 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask)); 972 amdgpu_ring_write(kiq_ring, 0); 973 amdgpu_ring_write(kiq_ring, 0); 974 975 } else { 976 amdgpu_ring_write(kiq_ring, 0); 977 amdgpu_ring_write(kiq_ring, 0); 978 amdgpu_ring_write(kiq_ring, 0); 979 } 980 } 981 982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 983 struct amdgpu_ring *ring, 984 u64 addr, 985 u64 seq) 986 { 987 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 988 989 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 990 amdgpu_ring_write(kiq_ring, 991 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 992 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 993 PACKET3_QUERY_STATUS_COMMAND(2)); 994 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 995 amdgpu_ring_write(kiq_ring, 996 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 997 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 998 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 999 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 1000 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 1001 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 1002 } 1003 1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 1005 uint16_t pasid, uint32_t flush_type, 1006 bool all_hub) 1007 { 1008 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 1009 amdgpu_ring_write(kiq_ring, 1010 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 1011 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 1012 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 1013 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 1014 } 1015 1016 1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, 1018 uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, 1019 uint32_t xcc_id, uint32_t vmid) 1020 { 1021 struct amdgpu_device *adev = kiq_ring->adev; 1022 unsigned i; 1023 1024 /* enter save mode */ 1025 amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); 1026 mutex_lock(&adev->srbm_mutex); 1027 soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0); 1028 1029 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 1030 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2); 1031 WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1); 1032 /* wait till dequeue take effects */ 1033 for (i = 0; i < adev->usec_timeout; i++) { 1034 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 1035 break; 1036 udelay(1); 1037 } 1038 if (i >= adev->usec_timeout) 1039 dev_err(adev->dev, "fail to wait on hqd deactive\n"); 1040 } else { 1041 dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type); 1042 } 1043 1044 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 1045 mutex_unlock(&adev->srbm_mutex); 1046 /* exit safe mode */ 1047 amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); 1048 } 1049 1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 1051 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 1052 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 1053 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 1054 .kiq_query_status = gfx_v9_0_kiq_query_status, 1055 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 1056 .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue, 1057 .set_resources_size = 8, 1058 .map_queues_size = 7, 1059 .unmap_queues_size = 6, 1060 .query_status_size = 7, 1061 .invalidate_tlbs_size = 2, 1062 }; 1063 1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 1065 { 1066 adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs; 1067 } 1068 1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 1070 { 1071 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1072 case IP_VERSION(9, 0, 1): 1073 soc15_program_register_sequence(adev, 1074 golden_settings_gc_9_0, 1075 ARRAY_SIZE(golden_settings_gc_9_0)); 1076 soc15_program_register_sequence(adev, 1077 golden_settings_gc_9_0_vg10, 1078 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 1079 break; 1080 case IP_VERSION(9, 2, 1): 1081 soc15_program_register_sequence(adev, 1082 golden_settings_gc_9_2_1, 1083 ARRAY_SIZE(golden_settings_gc_9_2_1)); 1084 soc15_program_register_sequence(adev, 1085 golden_settings_gc_9_2_1_vg12, 1086 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 1087 break; 1088 case IP_VERSION(9, 4, 0): 1089 soc15_program_register_sequence(adev, 1090 golden_settings_gc_9_0, 1091 ARRAY_SIZE(golden_settings_gc_9_0)); 1092 soc15_program_register_sequence(adev, 1093 golden_settings_gc_9_0_vg20, 1094 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 1095 break; 1096 case IP_VERSION(9, 4, 1): 1097 soc15_program_register_sequence(adev, 1098 golden_settings_gc_9_4_1_arct, 1099 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 1100 break; 1101 case IP_VERSION(9, 2, 2): 1102 case IP_VERSION(9, 1, 0): 1103 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 1104 ARRAY_SIZE(golden_settings_gc_9_1)); 1105 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1106 soc15_program_register_sequence(adev, 1107 golden_settings_gc_9_1_rv2, 1108 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 1109 else 1110 soc15_program_register_sequence(adev, 1111 golden_settings_gc_9_1_rv1, 1112 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 1113 break; 1114 case IP_VERSION(9, 3, 0): 1115 soc15_program_register_sequence(adev, 1116 golden_settings_gc_9_1_rn, 1117 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 1118 return; /* for renoir, don't need common goldensetting */ 1119 case IP_VERSION(9, 4, 2): 1120 gfx_v9_4_2_init_golden_registers(adev, 1121 adev->smuio.funcs->get_die_id(adev)); 1122 break; 1123 default: 1124 break; 1125 } 1126 1127 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && 1128 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))) 1129 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 1130 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 1131 } 1132 1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 1134 bool wc, uint32_t reg, uint32_t val) 1135 { 1136 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1137 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 1138 WRITE_DATA_DST_SEL(0) | 1139 (wc ? WR_CONFIRM : 0)); 1140 amdgpu_ring_write(ring, reg); 1141 amdgpu_ring_write(ring, 0); 1142 amdgpu_ring_write(ring, val); 1143 } 1144 1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 1146 int mem_space, int opt, uint32_t addr0, 1147 uint32_t addr1, uint32_t ref, uint32_t mask, 1148 uint32_t inv) 1149 { 1150 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 1151 amdgpu_ring_write(ring, 1152 /* memory (1) or register (0) */ 1153 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 1154 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 1155 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 1156 WAIT_REG_MEM_ENGINE(eng_sel))); 1157 1158 if (mem_space) 1159 BUG_ON(addr0 & 0x3); /* Dword align */ 1160 amdgpu_ring_write(ring, addr0); 1161 amdgpu_ring_write(ring, addr1); 1162 amdgpu_ring_write(ring, ref); 1163 amdgpu_ring_write(ring, mask); 1164 amdgpu_ring_write(ring, inv); /* poll interval */ 1165 } 1166 1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 1168 { 1169 struct amdgpu_device *adev = ring->adev; 1170 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1171 uint32_t tmp = 0; 1172 unsigned i; 1173 int r; 1174 1175 WREG32(scratch, 0xCAFEDEAD); 1176 r = amdgpu_ring_alloc(ring, 3); 1177 if (r) 1178 return r; 1179 1180 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1181 amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); 1182 amdgpu_ring_write(ring, 0xDEADBEEF); 1183 amdgpu_ring_commit(ring); 1184 1185 for (i = 0; i < adev->usec_timeout; i++) { 1186 tmp = RREG32(scratch); 1187 if (tmp == 0xDEADBEEF) 1188 break; 1189 udelay(1); 1190 } 1191 1192 if (i >= adev->usec_timeout) 1193 r = -ETIMEDOUT; 1194 return r; 1195 } 1196 1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1198 { 1199 struct amdgpu_device *adev = ring->adev; 1200 struct amdgpu_ib ib; 1201 struct dma_fence *f = NULL; 1202 1203 unsigned index; 1204 uint64_t gpu_addr; 1205 uint32_t tmp; 1206 long r; 1207 1208 r = amdgpu_device_wb_get(adev, &index); 1209 if (r) 1210 return r; 1211 1212 gpu_addr = adev->wb.gpu_addr + (index * 4); 1213 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1214 memset(&ib, 0, sizeof(ib)); 1215 1216 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 1217 if (r) 1218 goto err1; 1219 1220 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1221 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1222 ib.ptr[2] = lower_32_bits(gpu_addr); 1223 ib.ptr[3] = upper_32_bits(gpu_addr); 1224 ib.ptr[4] = 0xDEADBEEF; 1225 ib.length_dw = 5; 1226 1227 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1228 if (r) 1229 goto err2; 1230 1231 r = dma_fence_wait_timeout(f, false, timeout); 1232 if (r == 0) { 1233 r = -ETIMEDOUT; 1234 goto err2; 1235 } else if (r < 0) { 1236 goto err2; 1237 } 1238 1239 tmp = adev->wb.wb[index]; 1240 if (tmp == 0xDEADBEEF) 1241 r = 0; 1242 else 1243 r = -EINVAL; 1244 1245 err2: 1246 amdgpu_ib_free(&ib, NULL); 1247 dma_fence_put(f); 1248 err1: 1249 amdgpu_device_wb_free(adev, index); 1250 return r; 1251 } 1252 1253 1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1255 { 1256 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1257 amdgpu_ucode_release(&adev->gfx.me_fw); 1258 amdgpu_ucode_release(&adev->gfx.ce_fw); 1259 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1260 amdgpu_ucode_release(&adev->gfx.mec_fw); 1261 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1262 1263 kfree(adev->gfx.rlc.register_list_format); 1264 } 1265 1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1267 { 1268 adev->gfx.me_fw_write_wait = false; 1269 adev->gfx.mec_fw_write_wait = false; 1270 1271 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && 1272 ((adev->gfx.mec_fw_version < 0x000001a5) || 1273 (adev->gfx.mec_feature_version < 46) || 1274 (adev->gfx.pfp_fw_version < 0x000000b7) || 1275 (adev->gfx.pfp_feature_version < 46))) 1276 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1277 1278 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1279 case IP_VERSION(9, 0, 1): 1280 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1281 (adev->gfx.me_feature_version >= 42) && 1282 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1283 (adev->gfx.pfp_feature_version >= 42)) 1284 adev->gfx.me_fw_write_wait = true; 1285 1286 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1287 (adev->gfx.mec_feature_version >= 42)) 1288 adev->gfx.mec_fw_write_wait = true; 1289 break; 1290 case IP_VERSION(9, 2, 1): 1291 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1292 (adev->gfx.me_feature_version >= 44) && 1293 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1294 (adev->gfx.pfp_feature_version >= 44)) 1295 adev->gfx.me_fw_write_wait = true; 1296 1297 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1298 (adev->gfx.mec_feature_version >= 44)) 1299 adev->gfx.mec_fw_write_wait = true; 1300 break; 1301 case IP_VERSION(9, 4, 0): 1302 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1303 (adev->gfx.me_feature_version >= 44) && 1304 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1305 (adev->gfx.pfp_feature_version >= 44)) 1306 adev->gfx.me_fw_write_wait = true; 1307 1308 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1309 (adev->gfx.mec_feature_version >= 44)) 1310 adev->gfx.mec_fw_write_wait = true; 1311 break; 1312 case IP_VERSION(9, 1, 0): 1313 case IP_VERSION(9, 2, 2): 1314 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1315 (adev->gfx.me_feature_version >= 42) && 1316 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1317 (adev->gfx.pfp_feature_version >= 42)) 1318 adev->gfx.me_fw_write_wait = true; 1319 1320 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1321 (adev->gfx.mec_feature_version >= 42)) 1322 adev->gfx.mec_fw_write_wait = true; 1323 break; 1324 default: 1325 adev->gfx.me_fw_write_wait = true; 1326 adev->gfx.mec_fw_write_wait = true; 1327 break; 1328 } 1329 } 1330 1331 struct amdgpu_gfxoff_quirk { 1332 u16 chip_vendor; 1333 u16 chip_device; 1334 u16 subsys_vendor; 1335 u16 subsys_device; 1336 u8 revision; 1337 }; 1338 1339 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { 1340 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ 1341 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, 1342 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ 1343 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, 1344 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ 1345 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, 1346 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ 1347 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, 1348 /* https://bbs.openkylin.top/t/topic/171497 */ 1349 { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 }, 1350 /* HP 705G4 DM with R5 2400G */ 1351 { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 }, 1352 { 0, 0, 0, 0, 0 }, 1353 }; 1354 1355 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) 1356 { 1357 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; 1358 1359 while (p && p->chip_device != 0) { 1360 if (pdev->vendor == p->chip_vendor && 1361 pdev->device == p->chip_device && 1362 pdev->subsystem_vendor == p->subsys_vendor && 1363 pdev->subsystem_device == p->subsys_device && 1364 pdev->revision == p->revision) { 1365 return true; 1366 } 1367 ++p; 1368 } 1369 return false; 1370 } 1371 1372 static bool is_raven_kicker(struct amdgpu_device *adev) 1373 { 1374 if (adev->pm.fw_version >= 0x41e2b) 1375 return true; 1376 else 1377 return false; 1378 } 1379 1380 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) 1381 { 1382 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) && 1383 (adev->gfx.me_fw_version >= 0x000000a5) && 1384 (adev->gfx.me_feature_version >= 52)) 1385 return true; 1386 else 1387 return false; 1388 } 1389 1390 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1391 { 1392 if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) 1393 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1394 1395 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1396 case IP_VERSION(9, 0, 1): 1397 case IP_VERSION(9, 2, 1): 1398 case IP_VERSION(9, 4, 0): 1399 break; 1400 case IP_VERSION(9, 2, 2): 1401 case IP_VERSION(9, 1, 0): 1402 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) || 1403 (adev->apu_flags & AMD_APU_IS_PICASSO)) && 1404 ((!is_raven_kicker(adev) && 1405 adev->gfx.rlc_fw_version < 531) || 1406 (adev->gfx.rlc_feature_version < 1) || 1407 !adev->gfx.rlc.is_rlc_v2_1)) 1408 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1409 1410 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1411 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1412 AMD_PG_SUPPORT_CP | 1413 AMD_PG_SUPPORT_RLC_SMU_HS; 1414 break; 1415 case IP_VERSION(9, 3, 0): 1416 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1417 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1418 AMD_PG_SUPPORT_CP | 1419 AMD_PG_SUPPORT_RLC_SMU_HS; 1420 break; 1421 default: 1422 break; 1423 } 1424 } 1425 1426 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1427 char *chip_name) 1428 { 1429 int err; 1430 1431 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 1432 AMDGPU_UCODE_REQUIRED, 1433 "amdgpu/%s_pfp.bin", chip_name); 1434 if (err) 1435 goto out; 1436 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 1437 1438 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 1439 AMDGPU_UCODE_REQUIRED, 1440 "amdgpu/%s_me.bin", chip_name); 1441 if (err) 1442 goto out; 1443 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 1444 1445 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, 1446 AMDGPU_UCODE_REQUIRED, 1447 "amdgpu/%s_ce.bin", chip_name); 1448 if (err) 1449 goto out; 1450 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); 1451 1452 out: 1453 if (err) { 1454 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1455 amdgpu_ucode_release(&adev->gfx.me_fw); 1456 amdgpu_ucode_release(&adev->gfx.ce_fw); 1457 } 1458 return err; 1459 } 1460 1461 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1462 char *chip_name) 1463 { 1464 int err; 1465 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1466 uint16_t version_major; 1467 uint16_t version_minor; 1468 uint32_t smu_version; 1469 1470 /* 1471 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1472 * instead of picasso_rlc.bin. 1473 * Judgment method: 1474 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1475 * or revision >= 0xD8 && revision <= 0xDF 1476 * otherwise is PCO FP5 1477 */ 1478 if (!strcmp(chip_name, "picasso") && 1479 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1480 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1481 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1482 AMDGPU_UCODE_REQUIRED, 1483 "amdgpu/%s_rlc_am4.bin", chip_name); 1484 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1485 (smu_version >= 0x41e2b)) 1486 /** 1487 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1488 */ 1489 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1490 AMDGPU_UCODE_REQUIRED, 1491 "amdgpu/%s_kicker_rlc.bin", chip_name); 1492 else 1493 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1494 AMDGPU_UCODE_REQUIRED, 1495 "amdgpu/%s_rlc.bin", chip_name); 1496 if (err) 1497 goto out; 1498 1499 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1500 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1501 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1502 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 1503 out: 1504 if (err) 1505 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1506 1507 return err; 1508 } 1509 1510 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) 1511 { 1512 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || 1513 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || 1514 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) 1515 return false; 1516 1517 return true; 1518 } 1519 1520 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1521 char *chip_name) 1522 { 1523 int err; 1524 1525 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) 1526 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1527 AMDGPU_UCODE_REQUIRED, 1528 "amdgpu/%s_sjt_mec.bin", chip_name); 1529 else 1530 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1531 AMDGPU_UCODE_REQUIRED, 1532 "amdgpu/%s_mec.bin", chip_name); 1533 if (err) 1534 goto out; 1535 1536 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 1537 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 1538 1539 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1540 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) 1541 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1542 AMDGPU_UCODE_REQUIRED, 1543 "amdgpu/%s_sjt_mec2.bin", chip_name); 1544 else 1545 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1546 AMDGPU_UCODE_REQUIRED, 1547 "amdgpu/%s_mec2.bin", chip_name); 1548 if (!err) { 1549 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); 1550 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); 1551 } else { 1552 err = 0; 1553 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1554 } 1555 } else { 1556 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; 1557 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; 1558 } 1559 1560 gfx_v9_0_check_if_need_gfxoff(adev); 1561 gfx_v9_0_check_fw_write_wait(adev); 1562 1563 out: 1564 if (err) 1565 amdgpu_ucode_release(&adev->gfx.mec_fw); 1566 return err; 1567 } 1568 1569 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1570 { 1571 char ucode_prefix[30]; 1572 int r; 1573 1574 DRM_DEBUG("\n"); 1575 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 1576 1577 /* No CPG in Arcturus */ 1578 if (adev->gfx.num_gfx_rings) { 1579 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix); 1580 if (r) 1581 return r; 1582 } 1583 1584 r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix); 1585 if (r) 1586 return r; 1587 1588 r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix); 1589 if (r) 1590 return r; 1591 1592 return r; 1593 } 1594 1595 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1596 { 1597 u32 count = 0; 1598 const struct cs_section_def *sect = NULL; 1599 const struct cs_extent_def *ext = NULL; 1600 1601 /* begin clear state */ 1602 count += 2; 1603 /* context control state */ 1604 count += 3; 1605 1606 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1607 for (ext = sect->section; ext->extent != NULL; ++ext) { 1608 if (sect->id == SECT_CONTEXT) 1609 count += 2 + ext->reg_count; 1610 else 1611 return 0; 1612 } 1613 } 1614 1615 /* end clear state */ 1616 count += 2; 1617 /* clear state */ 1618 count += 2; 1619 1620 return count; 1621 } 1622 1623 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1624 volatile u32 *buffer) 1625 { 1626 u32 count = 0, i; 1627 const struct cs_section_def *sect = NULL; 1628 const struct cs_extent_def *ext = NULL; 1629 1630 if (adev->gfx.rlc.cs_data == NULL) 1631 return; 1632 if (buffer == NULL) 1633 return; 1634 1635 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1636 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1637 1638 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1639 buffer[count++] = cpu_to_le32(0x80000000); 1640 buffer[count++] = cpu_to_le32(0x80000000); 1641 1642 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1643 for (ext = sect->section; ext->extent != NULL; ++ext) { 1644 if (sect->id == SECT_CONTEXT) { 1645 buffer[count++] = 1646 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1647 buffer[count++] = cpu_to_le32(ext->reg_index - 1648 PACKET3_SET_CONTEXT_REG_START); 1649 for (i = 0; i < ext->reg_count; i++) 1650 buffer[count++] = cpu_to_le32(ext->extent[i]); 1651 } else { 1652 return; 1653 } 1654 } 1655 } 1656 1657 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1658 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1659 1660 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1661 buffer[count++] = cpu_to_le32(0); 1662 } 1663 1664 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1665 { 1666 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1667 uint32_t pg_always_on_cu_num = 2; 1668 uint32_t always_on_cu_num; 1669 uint32_t i, j, k; 1670 uint32_t mask, cu_bitmap, counter; 1671 1672 if (adev->flags & AMD_IS_APU) 1673 always_on_cu_num = 4; 1674 else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1)) 1675 always_on_cu_num = 8; 1676 else 1677 always_on_cu_num = 12; 1678 1679 mutex_lock(&adev->grbm_idx_mutex); 1680 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1681 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1682 mask = 1; 1683 cu_bitmap = 0; 1684 counter = 0; 1685 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 1686 1687 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1688 if (cu_info->bitmap[0][i][j] & mask) { 1689 if (counter == pg_always_on_cu_num) 1690 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1691 if (counter < always_on_cu_num) 1692 cu_bitmap |= mask; 1693 else 1694 break; 1695 counter++; 1696 } 1697 mask <<= 1; 1698 } 1699 1700 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1701 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1702 } 1703 } 1704 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1705 mutex_unlock(&adev->grbm_idx_mutex); 1706 } 1707 1708 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1709 { 1710 uint32_t data; 1711 1712 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1713 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1714 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1715 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1716 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1717 1718 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1719 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1720 1721 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1722 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1723 1724 mutex_lock(&adev->grbm_idx_mutex); 1725 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1726 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1727 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1728 1729 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1730 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1731 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1732 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1733 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1734 1735 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1736 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1737 data &= 0x0000FFFF; 1738 data |= 0x00C00000; 1739 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1740 1741 /* 1742 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1743 * programmed in gfx_v9_0_init_always_on_cu_mask() 1744 */ 1745 1746 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1747 * but used for RLC_LB_CNTL configuration */ 1748 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1749 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1750 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1751 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1752 mutex_unlock(&adev->grbm_idx_mutex); 1753 1754 gfx_v9_0_init_always_on_cu_mask(adev); 1755 } 1756 1757 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1758 { 1759 uint32_t data; 1760 1761 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1762 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1763 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1764 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1765 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1766 1767 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1768 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1769 1770 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1771 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1772 1773 mutex_lock(&adev->grbm_idx_mutex); 1774 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1775 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1776 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1777 1778 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1779 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1780 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1781 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1782 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1783 1784 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1785 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1786 data &= 0x0000FFFF; 1787 data |= 0x00C00000; 1788 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1789 1790 /* 1791 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1792 * programmed in gfx_v9_0_init_always_on_cu_mask() 1793 */ 1794 1795 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1796 * but used for RLC_LB_CNTL configuration */ 1797 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1798 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1799 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1800 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1801 mutex_unlock(&adev->grbm_idx_mutex); 1802 1803 gfx_v9_0_init_always_on_cu_mask(adev); 1804 } 1805 1806 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1807 { 1808 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1809 } 1810 1811 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1812 { 1813 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) 1814 return 5; 1815 else 1816 return 4; 1817 } 1818 1819 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 1820 { 1821 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 1822 1823 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 1824 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1825 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1); 1826 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2); 1827 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3); 1828 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL); 1829 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX); 1830 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT); 1831 adev->gfx.rlc.rlcg_reg_access_supported = true; 1832 } 1833 1834 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1835 { 1836 const struct cs_section_def *cs_data; 1837 int r; 1838 1839 adev->gfx.rlc.cs_data = gfx9_cs_data; 1840 1841 cs_data = adev->gfx.rlc.cs_data; 1842 1843 if (cs_data) { 1844 /* init clear state block */ 1845 r = amdgpu_gfx_rlc_init_csb(adev); 1846 if (r) 1847 return r; 1848 } 1849 1850 if (adev->flags & AMD_IS_APU) { 1851 /* TODO: double check the cp_table_size for RV */ 1852 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1853 r = amdgpu_gfx_rlc_init_cpt(adev); 1854 if (r) 1855 return r; 1856 } 1857 1858 return 0; 1859 } 1860 1861 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1862 { 1863 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1864 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1865 } 1866 1867 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1868 { 1869 int r; 1870 u32 *hpd; 1871 const __le32 *fw_data; 1872 unsigned fw_size; 1873 u32 *fw; 1874 size_t mec_hpd_size; 1875 1876 const struct gfx_firmware_header_v1_0 *mec_hdr; 1877 1878 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1879 1880 /* take ownership of the relevant compute queues */ 1881 amdgpu_gfx_compute_queue_acquire(adev); 1882 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1883 if (mec_hpd_size) { 1884 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1885 AMDGPU_GEM_DOMAIN_VRAM | 1886 AMDGPU_GEM_DOMAIN_GTT, 1887 &adev->gfx.mec.hpd_eop_obj, 1888 &adev->gfx.mec.hpd_eop_gpu_addr, 1889 (void **)&hpd); 1890 if (r) { 1891 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1892 gfx_v9_0_mec_fini(adev); 1893 return r; 1894 } 1895 1896 memset(hpd, 0, mec_hpd_size); 1897 1898 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1899 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1900 } 1901 1902 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1903 1904 fw_data = (const __le32 *) 1905 (adev->gfx.mec_fw->data + 1906 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1907 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 1908 1909 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1910 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1911 &adev->gfx.mec.mec_fw_obj, 1912 &adev->gfx.mec.mec_fw_gpu_addr, 1913 (void **)&fw); 1914 if (r) { 1915 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1916 gfx_v9_0_mec_fini(adev); 1917 return r; 1918 } 1919 1920 memcpy(fw, fw_data, fw_size); 1921 1922 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1923 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1924 1925 return 0; 1926 } 1927 1928 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1929 { 1930 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 1931 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1932 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1933 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1934 (SQ_IND_INDEX__FORCE_READ_MASK)); 1935 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1936 } 1937 1938 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1939 uint32_t wave, uint32_t thread, 1940 uint32_t regno, uint32_t num, uint32_t *out) 1941 { 1942 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 1943 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1944 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1945 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1946 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1947 (SQ_IND_INDEX__FORCE_READ_MASK) | 1948 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1949 while (num--) 1950 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1951 } 1952 1953 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1954 { 1955 /* type 1 wave data */ 1956 dst[(*no_fields)++] = 1; 1957 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1958 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1959 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1960 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1961 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1962 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1963 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1964 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1965 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1966 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1967 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1968 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1969 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1970 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1971 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); 1972 } 1973 1974 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1975 uint32_t wave, uint32_t start, 1976 uint32_t size, uint32_t *dst) 1977 { 1978 wave_read_regs( 1979 adev, simd, wave, 0, 1980 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1981 } 1982 1983 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1984 uint32_t wave, uint32_t thread, 1985 uint32_t start, uint32_t size, 1986 uint32_t *dst) 1987 { 1988 wave_read_regs( 1989 adev, simd, wave, thread, 1990 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1991 } 1992 1993 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1994 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1995 { 1996 soc15_grbm_select(adev, me, pipe, q, vm, 0); 1997 } 1998 1999 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 2000 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 2001 .select_se_sh = &gfx_v9_0_select_se_sh, 2002 .read_wave_data = &gfx_v9_0_read_wave_data, 2003 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 2004 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 2005 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 2006 }; 2007 2008 const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = { 2009 .ras_error_inject = &gfx_v9_0_ras_error_inject, 2010 .query_ras_error_count = &gfx_v9_0_query_ras_error_count, 2011 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, 2012 }; 2013 2014 static struct amdgpu_gfx_ras gfx_v9_0_ras = { 2015 .ras_block = { 2016 .hw_ops = &gfx_v9_0_ras_ops, 2017 }, 2018 }; 2019 2020 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 2021 { 2022 u32 gb_addr_config; 2023 int err; 2024 2025 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2026 case IP_VERSION(9, 0, 1): 2027 adev->gfx.config.max_hw_contexts = 8; 2028 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2029 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2030 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2031 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2032 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 2033 break; 2034 case IP_VERSION(9, 2, 1): 2035 adev->gfx.config.max_hw_contexts = 8; 2036 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2037 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2038 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2039 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2040 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 2041 DRM_INFO("fix gfx.config for vega12\n"); 2042 break; 2043 case IP_VERSION(9, 4, 0): 2044 adev->gfx.ras = &gfx_v9_0_ras; 2045 adev->gfx.config.max_hw_contexts = 8; 2046 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2047 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2048 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2049 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2050 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2051 gb_addr_config &= ~0xf3e777ff; 2052 gb_addr_config |= 0x22014042; 2053 /* check vbios table if gpu info is not available */ 2054 err = amdgpu_atomfirmware_get_gfx_info(adev); 2055 if (err) 2056 return err; 2057 break; 2058 case IP_VERSION(9, 2, 2): 2059 case IP_VERSION(9, 1, 0): 2060 adev->gfx.config.max_hw_contexts = 8; 2061 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2062 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2063 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2064 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2065 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 2066 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 2067 else 2068 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2069 break; 2070 case IP_VERSION(9, 4, 1): 2071 adev->gfx.ras = &gfx_v9_4_ras; 2072 adev->gfx.config.max_hw_contexts = 8; 2073 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2074 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2075 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2076 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2077 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2078 gb_addr_config &= ~0xf3e777ff; 2079 gb_addr_config |= 0x22014042; 2080 break; 2081 case IP_VERSION(9, 3, 0): 2082 adev->gfx.config.max_hw_contexts = 8; 2083 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2084 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2085 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2086 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2087 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2088 gb_addr_config &= ~0xf3e777ff; 2089 gb_addr_config |= 0x22010042; 2090 break; 2091 case IP_VERSION(9, 4, 2): 2092 adev->gfx.ras = &gfx_v9_4_2_ras; 2093 adev->gfx.config.max_hw_contexts = 8; 2094 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2095 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2096 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2097 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2098 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2099 gb_addr_config &= ~0xf3e777ff; 2100 gb_addr_config |= 0x22014042; 2101 /* check vbios table if gpu info is not available */ 2102 err = amdgpu_atomfirmware_get_gfx_info(adev); 2103 if (err) 2104 return err; 2105 break; 2106 default: 2107 BUG(); 2108 break; 2109 } 2110 2111 adev->gfx.config.gb_addr_config = gb_addr_config; 2112 2113 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2114 REG_GET_FIELD( 2115 adev->gfx.config.gb_addr_config, 2116 GB_ADDR_CONFIG, 2117 NUM_PIPES); 2118 2119 adev->gfx.config.max_tile_pipes = 2120 adev->gfx.config.gb_addr_config_fields.num_pipes; 2121 2122 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2123 REG_GET_FIELD( 2124 adev->gfx.config.gb_addr_config, 2125 GB_ADDR_CONFIG, 2126 NUM_BANKS); 2127 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2128 REG_GET_FIELD( 2129 adev->gfx.config.gb_addr_config, 2130 GB_ADDR_CONFIG, 2131 MAX_COMPRESSED_FRAGS); 2132 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2133 REG_GET_FIELD( 2134 adev->gfx.config.gb_addr_config, 2135 GB_ADDR_CONFIG, 2136 NUM_RB_PER_SE); 2137 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2138 REG_GET_FIELD( 2139 adev->gfx.config.gb_addr_config, 2140 GB_ADDR_CONFIG, 2141 NUM_SHADER_ENGINES); 2142 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2143 REG_GET_FIELD( 2144 adev->gfx.config.gb_addr_config, 2145 GB_ADDR_CONFIG, 2146 PIPE_INTERLEAVE_SIZE)); 2147 2148 return 0; 2149 } 2150 2151 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2152 int mec, int pipe, int queue) 2153 { 2154 unsigned irq_type; 2155 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2156 unsigned int hw_prio; 2157 2158 ring = &adev->gfx.compute_ring[ring_id]; 2159 2160 /* mec0 is me1 */ 2161 ring->me = mec + 1; 2162 ring->pipe = pipe; 2163 ring->queue = queue; 2164 2165 ring->ring_obj = NULL; 2166 ring->use_doorbell = true; 2167 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2168 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2169 + (ring_id * GFX9_MEC_HPD_SIZE); 2170 ring->vm_hub = AMDGPU_GFXHUB(0); 2171 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2172 2173 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2174 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2175 + ring->pipe; 2176 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 2177 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; 2178 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2179 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 2180 hw_prio, NULL); 2181 } 2182 2183 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) 2184 { 2185 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 2186 uint32_t *ptr; 2187 uint32_t inst; 2188 2189 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 2190 if (!ptr) { 2191 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 2192 adev->gfx.ip_dump_core = NULL; 2193 } else { 2194 adev->gfx.ip_dump_core = ptr; 2195 } 2196 2197 /* Allocate memory for compute queue registers for all the instances */ 2198 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 2199 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 2200 adev->gfx.mec.num_queue_per_pipe; 2201 2202 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 2203 if (!ptr) { 2204 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 2205 adev->gfx.ip_dump_compute_queues = NULL; 2206 } else { 2207 adev->gfx.ip_dump_compute_queues = ptr; 2208 } 2209 } 2210 2211 static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) 2212 { 2213 int i, j, k, r, ring_id; 2214 int xcc_id = 0; 2215 struct amdgpu_ring *ring; 2216 struct amdgpu_device *adev = ip_block->adev; 2217 unsigned int hw_prio; 2218 2219 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2220 case IP_VERSION(9, 0, 1): 2221 case IP_VERSION(9, 2, 1): 2222 case IP_VERSION(9, 4, 0): 2223 case IP_VERSION(9, 2, 2): 2224 case IP_VERSION(9, 1, 0): 2225 case IP_VERSION(9, 4, 1): 2226 case IP_VERSION(9, 3, 0): 2227 case IP_VERSION(9, 4, 2): 2228 adev->gfx.mec.num_mec = 2; 2229 break; 2230 default: 2231 adev->gfx.mec.num_mec = 1; 2232 break; 2233 } 2234 2235 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2236 case IP_VERSION(9, 4, 2): 2237 adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; 2238 adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); 2239 if (adev->gfx.mec_fw_version >= 88) { 2240 adev->gfx.enable_cleaner_shader = true; 2241 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 2242 if (r) { 2243 adev->gfx.enable_cleaner_shader = false; 2244 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 2245 } 2246 } 2247 break; 2248 default: 2249 adev->gfx.enable_cleaner_shader = false; 2250 break; 2251 } 2252 2253 adev->gfx.mec.num_pipe_per_mec = 4; 2254 adev->gfx.mec.num_queue_per_pipe = 8; 2255 2256 /* EOP Event */ 2257 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2258 if (r) 2259 return r; 2260 2261 /* Bad opcode Event */ 2262 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 2263 GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR, 2264 &adev->gfx.bad_op_irq); 2265 if (r) 2266 return r; 2267 2268 /* Privileged reg */ 2269 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2270 &adev->gfx.priv_reg_irq); 2271 if (r) 2272 return r; 2273 2274 /* Privileged inst */ 2275 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2276 &adev->gfx.priv_inst_irq); 2277 if (r) 2278 return r; 2279 2280 /* ECC error */ 2281 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2282 &adev->gfx.cp_ecc_error_irq); 2283 if (r) 2284 return r; 2285 2286 /* FUE error */ 2287 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2288 &adev->gfx.cp_ecc_error_irq); 2289 if (r) 2290 return r; 2291 2292 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2293 2294 if (adev->gfx.rlc.funcs) { 2295 if (adev->gfx.rlc.funcs->init) { 2296 r = adev->gfx.rlc.funcs->init(adev); 2297 if (r) { 2298 dev_err(adev->dev, "Failed to init rlc BOs!\n"); 2299 return r; 2300 } 2301 } 2302 } 2303 2304 r = gfx_v9_0_mec_init(adev); 2305 if (r) { 2306 DRM_ERROR("Failed to init MEC BOs!\n"); 2307 return r; 2308 } 2309 2310 /* set up the gfx ring */ 2311 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2312 ring = &adev->gfx.gfx_ring[i]; 2313 ring->ring_obj = NULL; 2314 if (!i) 2315 sprintf(ring->name, "gfx"); 2316 else 2317 sprintf(ring->name, "gfx_%d", i); 2318 ring->use_doorbell = true; 2319 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2320 2321 /* disable scheduler on the real ring */ 2322 ring->no_scheduler = adev->gfx.mcbp; 2323 ring->vm_hub = AMDGPU_GFXHUB(0); 2324 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2325 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2326 AMDGPU_RING_PRIO_DEFAULT, NULL); 2327 if (r) 2328 return r; 2329 } 2330 2331 /* set up the software rings */ 2332 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 2333 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2334 ring = &adev->gfx.sw_gfx_ring[i]; 2335 ring->ring_obj = NULL; 2336 sprintf(ring->name, amdgpu_sw_ring_name(i)); 2337 ring->use_doorbell = true; 2338 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2339 ring->is_sw_ring = true; 2340 hw_prio = amdgpu_sw_ring_priority(i); 2341 ring->vm_hub = AMDGPU_GFXHUB(0); 2342 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2343 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, 2344 NULL); 2345 if (r) 2346 return r; 2347 ring->wptr = 0; 2348 } 2349 2350 /* init the muxer and add software rings */ 2351 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], 2352 GFX9_NUM_SW_GFX_RINGS); 2353 if (r) { 2354 DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); 2355 return r; 2356 } 2357 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2358 r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, 2359 &adev->gfx.sw_gfx_ring[i]); 2360 if (r) { 2361 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); 2362 return r; 2363 } 2364 } 2365 } 2366 2367 /* set up the compute queues - allocate horizontally across pipes */ 2368 ring_id = 0; 2369 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2370 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2371 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2372 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 2373 k, j)) 2374 continue; 2375 2376 r = gfx_v9_0_compute_ring_init(adev, 2377 ring_id, 2378 i, k, j); 2379 if (r) 2380 return r; 2381 2382 ring_id++; 2383 } 2384 } 2385 } 2386 2387 /* TODO: Add queue reset mask when FW fully supports it */ 2388 adev->gfx.gfx_supported_reset = 2389 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 2390 adev->gfx.compute_supported_reset = 2391 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 2392 2393 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); 2394 if (r) { 2395 DRM_ERROR("Failed to init KIQ BOs!\n"); 2396 return r; 2397 } 2398 2399 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 2400 if (r) 2401 return r; 2402 2403 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2404 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0); 2405 if (r) 2406 return r; 2407 2408 adev->gfx.ce_ram_size = 0x8000; 2409 2410 r = gfx_v9_0_gpu_early_init(adev); 2411 if (r) 2412 return r; 2413 2414 if (amdgpu_gfx_ras_sw_init(adev)) { 2415 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 2416 return -EINVAL; 2417 } 2418 2419 gfx_v9_0_alloc_ip_dump(adev); 2420 2421 r = amdgpu_gfx_sysfs_init(adev); 2422 if (r) 2423 return r; 2424 2425 return 0; 2426 } 2427 2428 2429 static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) 2430 { 2431 int i; 2432 struct amdgpu_device *adev = ip_block->adev; 2433 2434 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 2435 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 2436 amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); 2437 amdgpu_ring_mux_fini(&adev->gfx.muxer); 2438 } 2439 2440 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2441 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2442 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2443 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2444 2445 amdgpu_gfx_mqd_sw_fini(adev, 0); 2446 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 2447 amdgpu_gfx_kiq_fini(adev, 0); 2448 2449 amdgpu_gfx_cleaner_shader_sw_fini(adev); 2450 2451 gfx_v9_0_mec_fini(adev); 2452 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2453 &adev->gfx.rlc.clear_state_gpu_addr, 2454 (void **)&adev->gfx.rlc.cs_ptr); 2455 if (adev->flags & AMD_IS_APU) { 2456 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2457 &adev->gfx.rlc.cp_table_gpu_addr, 2458 (void **)&adev->gfx.rlc.cp_table_ptr); 2459 } 2460 gfx_v9_0_free_microcode(adev); 2461 2462 amdgpu_gfx_sysfs_fini(adev); 2463 2464 kfree(adev->gfx.ip_dump_core); 2465 kfree(adev->gfx.ip_dump_compute_queues); 2466 2467 return 0; 2468 } 2469 2470 2471 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2472 { 2473 /* TODO */ 2474 } 2475 2476 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, 2477 u32 instance, int xcc_id) 2478 { 2479 u32 data; 2480 2481 if (instance == 0xffffffff) 2482 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2483 else 2484 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2485 2486 if (se_num == 0xffffffff) 2487 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2488 else 2489 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2490 2491 if (sh_num == 0xffffffff) 2492 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2493 else 2494 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2495 2496 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2497 } 2498 2499 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2500 { 2501 u32 data, mask; 2502 2503 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2504 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2505 2506 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2507 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2508 2509 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2510 adev->gfx.config.max_sh_per_se); 2511 2512 return (~data) & mask; 2513 } 2514 2515 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2516 { 2517 int i, j; 2518 u32 data; 2519 u32 active_rbs = 0; 2520 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2521 adev->gfx.config.max_sh_per_se; 2522 2523 mutex_lock(&adev->grbm_idx_mutex); 2524 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2525 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2526 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 2527 data = gfx_v9_0_get_rb_active_bitmap(adev); 2528 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2529 rb_bitmap_width_per_sh); 2530 } 2531 } 2532 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 2533 mutex_unlock(&adev->grbm_idx_mutex); 2534 2535 adev->gfx.config.backend_enable_mask = active_rbs; 2536 adev->gfx.config.num_rbs = hweight32(active_rbs); 2537 } 2538 2539 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev, 2540 uint32_t first_vmid, 2541 uint32_t last_vmid) 2542 { 2543 uint32_t data; 2544 uint32_t trap_config_vmid_mask = 0; 2545 int i; 2546 2547 /* Calculate trap config vmid mask */ 2548 for (i = first_vmid; i < last_vmid; i++) 2549 trap_config_vmid_mask |= (1 << i); 2550 2551 data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG, 2552 VMID_SEL, trap_config_vmid_mask); 2553 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, 2554 TRAP_EN, 1); 2555 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); 2556 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); 2557 2558 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); 2559 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); 2560 } 2561 2562 #define DEFAULT_SH_MEM_BASES (0x6000) 2563 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2564 { 2565 int i; 2566 uint32_t sh_mem_config; 2567 uint32_t sh_mem_bases; 2568 2569 /* 2570 * Configure apertures: 2571 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2572 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2573 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2574 */ 2575 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2576 2577 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2578 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2579 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2580 2581 mutex_lock(&adev->srbm_mutex); 2582 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2583 soc15_grbm_select(adev, 0, 0, 0, i, 0); 2584 /* CP and shaders */ 2585 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2586 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2587 } 2588 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 2589 mutex_unlock(&adev->srbm_mutex); 2590 2591 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2592 access. These should be enabled by FW for target VMIDs. */ 2593 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2594 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2595 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2596 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2597 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2598 } 2599 } 2600 2601 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2602 { 2603 int vmid; 2604 2605 /* 2606 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2607 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2608 * the driver can enable them for graphics. VMID0 should maintain 2609 * access so that HWS firmware can save/restore entries. 2610 */ 2611 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 2612 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2613 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2614 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2615 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2616 } 2617 } 2618 2619 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) 2620 { 2621 uint32_t tmp; 2622 2623 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2624 case IP_VERSION(9, 4, 1): 2625 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); 2626 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT, 2627 !READ_ONCE(adev->barrier_has_auto_waitcnt)); 2628 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); 2629 break; 2630 default: 2631 break; 2632 } 2633 } 2634 2635 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2636 { 2637 u32 tmp; 2638 int i; 2639 2640 if (!amdgpu_sriov_vf(adev) || 2641 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) { 2642 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2643 } 2644 2645 gfx_v9_0_tiling_mode_table_init(adev); 2646 2647 if (adev->gfx.num_gfx_rings) 2648 gfx_v9_0_setup_rb(adev); 2649 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2650 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2651 2652 /* XXX SH_MEM regs */ 2653 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2654 mutex_lock(&adev->srbm_mutex); 2655 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2656 soc15_grbm_select(adev, 0, 0, 0, i, 0); 2657 /* CP and shaders */ 2658 if (i == 0) { 2659 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2660 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2661 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2662 !!adev->gmc.noretry); 2663 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2664 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2665 } else { 2666 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2667 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2668 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2669 !!adev->gmc.noretry); 2670 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2671 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2672 (adev->gmc.private_aperture_start >> 48)); 2673 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2674 (adev->gmc.shared_aperture_start >> 48)); 2675 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2676 } 2677 } 2678 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 2679 2680 mutex_unlock(&adev->srbm_mutex); 2681 2682 gfx_v9_0_init_compute_vmid(adev); 2683 gfx_v9_0_init_gds_vmid(adev); 2684 gfx_v9_0_init_sq_config(adev); 2685 } 2686 2687 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2688 { 2689 u32 i, j, k; 2690 u32 mask; 2691 2692 mutex_lock(&adev->grbm_idx_mutex); 2693 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2694 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2695 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 2696 for (k = 0; k < adev->usec_timeout; k++) { 2697 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2698 break; 2699 udelay(1); 2700 } 2701 if (k == adev->usec_timeout) { 2702 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 2703 0xffffffff, 0xffffffff, 0); 2704 mutex_unlock(&adev->grbm_idx_mutex); 2705 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2706 i, j); 2707 return; 2708 } 2709 } 2710 } 2711 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 2712 mutex_unlock(&adev->grbm_idx_mutex); 2713 2714 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2715 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2716 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2717 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2718 for (k = 0; k < adev->usec_timeout; k++) { 2719 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2720 break; 2721 udelay(1); 2722 } 2723 } 2724 2725 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2726 bool enable) 2727 { 2728 u32 tmp; 2729 2730 /* These interrupts should be enabled to drive DS clock */ 2731 2732 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2733 2734 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2735 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2736 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2737 if (adev->gfx.num_gfx_rings) 2738 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2739 2740 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2741 } 2742 2743 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2744 { 2745 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2746 /* csib */ 2747 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2748 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2749 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2750 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2751 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2752 adev->gfx.rlc.clear_state_size); 2753 } 2754 2755 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2756 int indirect_offset, 2757 int list_size, 2758 int *unique_indirect_regs, 2759 int unique_indirect_reg_count, 2760 int *indirect_start_offsets, 2761 int *indirect_start_offsets_count, 2762 int max_start_offsets_count) 2763 { 2764 int idx; 2765 2766 for (; indirect_offset < list_size; indirect_offset++) { 2767 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2768 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2769 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2770 2771 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2772 indirect_offset += 2; 2773 2774 /* look for the matching indice */ 2775 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2776 if (unique_indirect_regs[idx] == 2777 register_list_format[indirect_offset] || 2778 !unique_indirect_regs[idx]) 2779 break; 2780 } 2781 2782 BUG_ON(idx >= unique_indirect_reg_count); 2783 2784 if (!unique_indirect_regs[idx]) 2785 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2786 2787 indirect_offset++; 2788 } 2789 } 2790 } 2791 2792 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2793 { 2794 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2795 int unique_indirect_reg_count = 0; 2796 2797 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2798 int indirect_start_offsets_count = 0; 2799 2800 int list_size = 0; 2801 int i = 0, j = 0; 2802 u32 tmp = 0; 2803 2804 u32 *register_list_format = 2805 kmemdup(adev->gfx.rlc.register_list_format, 2806 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2807 if (!register_list_format) 2808 return -ENOMEM; 2809 2810 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2811 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2812 gfx_v9_1_parse_ind_reg_list(register_list_format, 2813 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2814 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2815 unique_indirect_regs, 2816 unique_indirect_reg_count, 2817 indirect_start_offsets, 2818 &indirect_start_offsets_count, 2819 ARRAY_SIZE(indirect_start_offsets)); 2820 2821 /* enable auto inc in case it is disabled */ 2822 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2823 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2824 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2825 2826 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2827 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2828 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2829 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2830 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2831 adev->gfx.rlc.register_restore[i]); 2832 2833 /* load indirect register */ 2834 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2835 adev->gfx.rlc.reg_list_format_start); 2836 2837 /* direct register portion */ 2838 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2839 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2840 register_list_format[i]); 2841 2842 /* indirect register portion */ 2843 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2844 if (register_list_format[i] == 0xFFFFFFFF) { 2845 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2846 continue; 2847 } 2848 2849 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2850 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2851 2852 for (j = 0; j < unique_indirect_reg_count; j++) { 2853 if (register_list_format[i] == unique_indirect_regs[j]) { 2854 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2855 break; 2856 } 2857 } 2858 2859 BUG_ON(j >= unique_indirect_reg_count); 2860 2861 i++; 2862 } 2863 2864 /* set save/restore list size */ 2865 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2866 list_size = list_size >> 1; 2867 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2868 adev->gfx.rlc.reg_restore_list_size); 2869 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2870 2871 /* write the starting offsets to RLC scratch ram */ 2872 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2873 adev->gfx.rlc.starting_offsets_start); 2874 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2875 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2876 indirect_start_offsets[i]); 2877 2878 /* load unique indirect regs*/ 2879 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2880 if (unique_indirect_regs[i] != 0) { 2881 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2882 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2883 unique_indirect_regs[i] & 0x3FFFF); 2884 2885 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2886 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2887 unique_indirect_regs[i] >> 20); 2888 } 2889 } 2890 2891 kfree(register_list_format); 2892 return 0; 2893 } 2894 2895 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2896 { 2897 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2898 } 2899 2900 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2901 bool enable) 2902 { 2903 uint32_t data = 0; 2904 uint32_t default_data = 0; 2905 2906 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2907 if (enable) { 2908 /* enable GFXIP control over CGPG */ 2909 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2910 if(default_data != data) 2911 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2912 2913 /* update status */ 2914 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2915 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2916 if(default_data != data) 2917 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2918 } else { 2919 /* restore GFXIP control over GCPG */ 2920 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2921 if(default_data != data) 2922 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2923 } 2924 } 2925 2926 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2927 { 2928 uint32_t data = 0; 2929 2930 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2931 AMD_PG_SUPPORT_GFX_SMG | 2932 AMD_PG_SUPPORT_GFX_DMG)) { 2933 /* init IDLE_POLL_COUNT = 60 */ 2934 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2935 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2936 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2937 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2938 2939 /* init RLC PG Delay */ 2940 data = 0; 2941 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2942 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2943 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2944 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2945 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2946 2947 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2948 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2949 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2950 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2951 2952 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2953 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2954 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2955 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2956 2957 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2958 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2959 2960 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2961 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2962 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2963 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0)) 2964 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2965 } 2966 } 2967 2968 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2969 bool enable) 2970 { 2971 uint32_t data = 0; 2972 uint32_t default_data = 0; 2973 2974 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2975 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2976 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2977 enable ? 1 : 0); 2978 if (default_data != data) 2979 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2980 } 2981 2982 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2983 bool enable) 2984 { 2985 uint32_t data = 0; 2986 uint32_t default_data = 0; 2987 2988 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2989 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2990 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2991 enable ? 1 : 0); 2992 if(default_data != data) 2993 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2994 } 2995 2996 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2997 bool enable) 2998 { 2999 uint32_t data = 0; 3000 uint32_t default_data = 0; 3001 3002 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3003 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3004 CP_PG_DISABLE, 3005 enable ? 0 : 1); 3006 if(default_data != data) 3007 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3008 } 3009 3010 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 3011 bool enable) 3012 { 3013 uint32_t data, default_data; 3014 3015 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3016 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3017 GFX_POWER_GATING_ENABLE, 3018 enable ? 1 : 0); 3019 if(default_data != data) 3020 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3021 } 3022 3023 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 3024 bool enable) 3025 { 3026 uint32_t data, default_data; 3027 3028 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3029 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3030 GFX_PIPELINE_PG_ENABLE, 3031 enable ? 1 : 0); 3032 if(default_data != data) 3033 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3034 3035 if (!enable) 3036 /* read any GFX register to wake up GFX */ 3037 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 3038 } 3039 3040 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 3041 bool enable) 3042 { 3043 uint32_t data, default_data; 3044 3045 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3046 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3047 STATIC_PER_CU_PG_ENABLE, 3048 enable ? 1 : 0); 3049 if(default_data != data) 3050 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3051 } 3052 3053 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 3054 bool enable) 3055 { 3056 uint32_t data, default_data; 3057 3058 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3059 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3060 DYN_PER_CU_PG_ENABLE, 3061 enable ? 1 : 0); 3062 if(default_data != data) 3063 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3064 } 3065 3066 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 3067 { 3068 gfx_v9_0_init_csb(adev); 3069 3070 /* 3071 * Rlc save restore list is workable since v2_1. 3072 * And it's needed by gfxoff feature. 3073 */ 3074 if (adev->gfx.rlc.is_rlc_v2_1) { 3075 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 3076 IP_VERSION(9, 2, 1) || 3077 (adev->apu_flags & AMD_APU_IS_RAVEN2)) 3078 gfx_v9_1_init_rlc_save_restore_list(adev); 3079 gfx_v9_0_enable_save_restore_machine(adev); 3080 } 3081 3082 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3083 AMD_PG_SUPPORT_GFX_SMG | 3084 AMD_PG_SUPPORT_GFX_DMG | 3085 AMD_PG_SUPPORT_CP | 3086 AMD_PG_SUPPORT_GDS | 3087 AMD_PG_SUPPORT_RLC_SMU_HS)) { 3088 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, 3089 adev->gfx.rlc.cp_table_gpu_addr >> 8); 3090 gfx_v9_0_init_gfx_power_gating(adev); 3091 } 3092 } 3093 3094 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 3095 { 3096 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 3097 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3098 gfx_v9_0_wait_for_rlc_serdes(adev); 3099 } 3100 3101 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 3102 { 3103 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3104 udelay(50); 3105 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3106 udelay(50); 3107 } 3108 3109 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 3110 { 3111 #ifdef AMDGPU_RLC_DEBUG_RETRY 3112 u32 rlc_ucode_ver; 3113 #endif 3114 3115 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 3116 udelay(50); 3117 3118 /* carrizo do enable cp interrupt after cp inited */ 3119 if (!(adev->flags & AMD_IS_APU)) { 3120 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3121 udelay(50); 3122 } 3123 3124 #ifdef AMDGPU_RLC_DEBUG_RETRY 3125 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 3126 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 3127 if(rlc_ucode_ver == 0x108) { 3128 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 3129 rlc_ucode_ver, adev->gfx.rlc_fw_version); 3130 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 3131 * default is 0x9C4 to create a 100us interval */ 3132 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 3133 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 3134 * to disable the page fault retry interrupts, default is 3135 * 0x100 (256) */ 3136 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 3137 } 3138 #endif 3139 } 3140 3141 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 3142 { 3143 const struct rlc_firmware_header_v2_0 *hdr; 3144 const __le32 *fw_data; 3145 unsigned i, fw_size; 3146 3147 if (!adev->gfx.rlc_fw) 3148 return -EINVAL; 3149 3150 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3151 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3152 3153 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3154 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3155 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3156 3157 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 3158 RLCG_UCODE_LOADING_START_ADDRESS); 3159 for (i = 0; i < fw_size; i++) 3160 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3161 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3162 3163 return 0; 3164 } 3165 3166 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3167 { 3168 int r; 3169 3170 if (amdgpu_sriov_vf(adev)) { 3171 gfx_v9_0_init_csb(adev); 3172 return 0; 3173 } 3174 3175 adev->gfx.rlc.funcs->stop(adev); 3176 3177 /* disable CG */ 3178 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3179 3180 gfx_v9_0_init_pg(adev); 3181 3182 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3183 /* legacy rlc firmware loading */ 3184 r = gfx_v9_0_rlc_load_microcode(adev); 3185 if (r) 3186 return r; 3187 } 3188 3189 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 3190 case IP_VERSION(9, 2, 2): 3191 case IP_VERSION(9, 1, 0): 3192 gfx_v9_0_init_lbpw(adev); 3193 if (amdgpu_lbpw == 0) 3194 gfx_v9_0_enable_lbpw(adev, false); 3195 else 3196 gfx_v9_0_enable_lbpw(adev, true); 3197 break; 3198 case IP_VERSION(9, 4, 0): 3199 gfx_v9_4_init_lbpw(adev); 3200 if (amdgpu_lbpw > 0) 3201 gfx_v9_0_enable_lbpw(adev, true); 3202 else 3203 gfx_v9_0_enable_lbpw(adev, false); 3204 break; 3205 default: 3206 break; 3207 } 3208 3209 gfx_v9_0_update_spm_vmid_internal(adev, 0xf); 3210 3211 adev->gfx.rlc.funcs->start(adev); 3212 3213 return 0; 3214 } 3215 3216 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3217 { 3218 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3219 3220 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1); 3221 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1); 3222 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1); 3223 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1); 3224 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1); 3225 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1); 3226 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1); 3227 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1); 3228 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1); 3229 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3230 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3231 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3232 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3233 udelay(50); 3234 } 3235 3236 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3237 { 3238 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3239 const struct gfx_firmware_header_v1_0 *ce_hdr; 3240 const struct gfx_firmware_header_v1_0 *me_hdr; 3241 const __le32 *fw_data; 3242 unsigned i, fw_size; 3243 3244 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3245 return -EINVAL; 3246 3247 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3248 adev->gfx.pfp_fw->data; 3249 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3250 adev->gfx.ce_fw->data; 3251 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3252 adev->gfx.me_fw->data; 3253 3254 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3255 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3256 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3257 3258 gfx_v9_0_cp_gfx_enable(adev, false); 3259 3260 /* PFP */ 3261 fw_data = (const __le32 *) 3262 (adev->gfx.pfp_fw->data + 3263 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3264 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3265 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3266 for (i = 0; i < fw_size; i++) 3267 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3268 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3269 3270 /* CE */ 3271 fw_data = (const __le32 *) 3272 (adev->gfx.ce_fw->data + 3273 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3274 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3275 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3276 for (i = 0; i < fw_size; i++) 3277 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3278 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3279 3280 /* ME */ 3281 fw_data = (const __le32 *) 3282 (adev->gfx.me_fw->data + 3283 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3284 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3285 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3286 for (i = 0; i < fw_size; i++) 3287 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3288 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3289 3290 return 0; 3291 } 3292 3293 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3294 { 3295 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3296 const struct cs_section_def *sect = NULL; 3297 const struct cs_extent_def *ext = NULL; 3298 int r, i, tmp; 3299 3300 /* init the CP */ 3301 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3302 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3303 3304 gfx_v9_0_cp_gfx_enable(adev, true); 3305 3306 /* Now only limit the quirk on the APU gfx9 series and already 3307 * confirmed that the APU gfx10/gfx11 needn't such update. 3308 */ 3309 if (adev->flags & AMD_IS_APU && 3310 adev->in_s3 && !pm_resume_via_firmware()) { 3311 DRM_INFO("Will skip the CSB packet resubmit\n"); 3312 return 0; 3313 } 3314 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3315 if (r) { 3316 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3317 return r; 3318 } 3319 3320 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3321 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3322 3323 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3324 amdgpu_ring_write(ring, 0x80000000); 3325 amdgpu_ring_write(ring, 0x80000000); 3326 3327 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3328 for (ext = sect->section; ext->extent != NULL; ++ext) { 3329 if (sect->id == SECT_CONTEXT) { 3330 amdgpu_ring_write(ring, 3331 PACKET3(PACKET3_SET_CONTEXT_REG, 3332 ext->reg_count)); 3333 amdgpu_ring_write(ring, 3334 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3335 for (i = 0; i < ext->reg_count; i++) 3336 amdgpu_ring_write(ring, ext->extent[i]); 3337 } 3338 } 3339 } 3340 3341 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3342 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3343 3344 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3345 amdgpu_ring_write(ring, 0); 3346 3347 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3348 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3349 amdgpu_ring_write(ring, 0x8000); 3350 amdgpu_ring_write(ring, 0x8000); 3351 3352 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3353 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3354 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3355 amdgpu_ring_write(ring, tmp); 3356 amdgpu_ring_write(ring, 0); 3357 3358 amdgpu_ring_commit(ring); 3359 3360 return 0; 3361 } 3362 3363 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3364 { 3365 struct amdgpu_ring *ring; 3366 u32 tmp; 3367 u32 rb_bufsz; 3368 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3369 3370 /* Set the write pointer delay */ 3371 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3372 3373 /* set the RB to use vmid 0 */ 3374 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3375 3376 /* Set ring buffer size */ 3377 ring = &adev->gfx.gfx_ring[0]; 3378 rb_bufsz = order_base_2(ring->ring_size / 8); 3379 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3380 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3381 #ifdef __BIG_ENDIAN 3382 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3383 #endif 3384 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3385 3386 /* Initialize the ring buffer's write pointers */ 3387 ring->wptr = 0; 3388 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3389 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3390 3391 /* set the wb address whether it's enabled or not */ 3392 rptr_addr = ring->rptr_gpu_addr; 3393 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3394 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3395 3396 wptr_gpu_addr = ring->wptr_gpu_addr; 3397 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3398 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3399 3400 mdelay(1); 3401 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3402 3403 rb_addr = ring->gpu_addr >> 8; 3404 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3405 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3406 3407 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3408 if (ring->use_doorbell) { 3409 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3410 DOORBELL_OFFSET, ring->doorbell_index); 3411 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3412 DOORBELL_EN, 1); 3413 } else { 3414 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3415 } 3416 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3417 3418 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3419 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3420 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3421 3422 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3423 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3424 3425 3426 /* start the ring */ 3427 gfx_v9_0_cp_gfx_start(adev); 3428 3429 return 0; 3430 } 3431 3432 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3433 { 3434 if (enable) { 3435 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3436 } else { 3437 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3438 (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK | 3439 CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK | 3440 CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK | 3441 CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK | 3442 CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK | 3443 CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK | 3444 CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK | 3445 CP_MEC_CNTL__MEC_ME1_HALT_MASK | 3446 CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3447 adev->gfx.kiq[0].ring.sched.ready = false; 3448 } 3449 udelay(50); 3450 } 3451 3452 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3453 { 3454 const struct gfx_firmware_header_v1_0 *mec_hdr; 3455 const __le32 *fw_data; 3456 unsigned i; 3457 u32 tmp; 3458 3459 if (!adev->gfx.mec_fw) 3460 return -EINVAL; 3461 3462 gfx_v9_0_cp_compute_enable(adev, false); 3463 3464 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3465 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3466 3467 fw_data = (const __le32 *) 3468 (adev->gfx.mec_fw->data + 3469 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3470 tmp = 0; 3471 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3472 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3473 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3474 3475 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3476 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3477 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3478 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3479 3480 /* MEC1 */ 3481 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3482 mec_hdr->jt_offset); 3483 for (i = 0; i < mec_hdr->jt_size; i++) 3484 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3485 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3486 3487 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3488 adev->gfx.mec_fw_version); 3489 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3490 3491 return 0; 3492 } 3493 3494 /* KIQ functions */ 3495 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3496 { 3497 uint32_t tmp; 3498 struct amdgpu_device *adev = ring->adev; 3499 3500 /* tell RLC which is KIQ queue */ 3501 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3502 tmp &= 0xffffff00; 3503 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3504 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80); 3505 } 3506 3507 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) 3508 { 3509 struct amdgpu_device *adev = ring->adev; 3510 3511 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3512 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 3513 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 3514 mqd->cp_hqd_queue_priority = 3515 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 3516 } 3517 } 3518 } 3519 3520 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3521 { 3522 struct amdgpu_device *adev = ring->adev; 3523 struct v9_mqd *mqd = ring->mqd_ptr; 3524 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3525 uint32_t tmp; 3526 3527 mqd->header = 0xC0310800; 3528 mqd->compute_pipelinestat_enable = 0x00000001; 3529 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3530 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3531 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3532 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3533 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3534 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3535 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3536 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3537 mqd->compute_misc_reserved = 0x00000003; 3538 3539 mqd->dynamic_cu_mask_addr_lo = 3540 lower_32_bits(ring->mqd_gpu_addr 3541 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3542 mqd->dynamic_cu_mask_addr_hi = 3543 upper_32_bits(ring->mqd_gpu_addr 3544 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3545 3546 eop_base_addr = ring->eop_gpu_addr >> 8; 3547 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3548 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3549 3550 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3551 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3552 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3553 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3554 3555 mqd->cp_hqd_eop_control = tmp; 3556 3557 /* enable doorbell? */ 3558 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3559 3560 if (ring->use_doorbell) { 3561 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3562 DOORBELL_OFFSET, ring->doorbell_index); 3563 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3564 DOORBELL_EN, 1); 3565 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3566 DOORBELL_SOURCE, 0); 3567 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3568 DOORBELL_HIT, 0); 3569 } else { 3570 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3571 DOORBELL_EN, 0); 3572 } 3573 3574 mqd->cp_hqd_pq_doorbell_control = tmp; 3575 3576 /* disable the queue if it's active */ 3577 ring->wptr = 0; 3578 mqd->cp_hqd_dequeue_request = 0; 3579 mqd->cp_hqd_pq_rptr = 0; 3580 mqd->cp_hqd_pq_wptr_lo = 0; 3581 mqd->cp_hqd_pq_wptr_hi = 0; 3582 3583 /* set the pointer to the MQD */ 3584 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3585 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3586 3587 /* set MQD vmid to 0 */ 3588 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3589 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3590 mqd->cp_mqd_control = tmp; 3591 3592 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3593 hqd_gpu_addr = ring->gpu_addr >> 8; 3594 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3595 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3596 3597 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3598 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3599 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3600 (order_base_2(ring->ring_size / 4) - 1)); 3601 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3602 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 3603 #ifdef __BIG_ENDIAN 3604 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3605 #endif 3606 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3607 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3608 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3609 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3610 mqd->cp_hqd_pq_control = tmp; 3611 3612 /* set the wb address whether it's enabled or not */ 3613 wb_gpu_addr = ring->rptr_gpu_addr; 3614 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3615 mqd->cp_hqd_pq_rptr_report_addr_hi = 3616 upper_32_bits(wb_gpu_addr) & 0xffff; 3617 3618 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3619 wb_gpu_addr = ring->wptr_gpu_addr; 3620 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3621 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3622 3623 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3624 ring->wptr = 0; 3625 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3626 3627 /* set the vmid for the queue */ 3628 mqd->cp_hqd_vmid = 0; 3629 3630 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3631 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3632 mqd->cp_hqd_persistent_state = tmp; 3633 3634 /* set MIN_IB_AVAIL_SIZE */ 3635 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3636 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3637 mqd->cp_hqd_ib_control = tmp; 3638 3639 /* set static priority for a queue/ring */ 3640 gfx_v9_0_mqd_set_priority(ring, mqd); 3641 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM); 3642 3643 /* map_queues packet doesn't need activate the queue, 3644 * so only kiq need set this field. 3645 */ 3646 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3647 mqd->cp_hqd_active = 1; 3648 3649 return 0; 3650 } 3651 3652 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3653 { 3654 struct amdgpu_device *adev = ring->adev; 3655 struct v9_mqd *mqd = ring->mqd_ptr; 3656 int j; 3657 3658 /* disable wptr polling */ 3659 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3660 3661 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3662 mqd->cp_hqd_eop_base_addr_lo); 3663 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3664 mqd->cp_hqd_eop_base_addr_hi); 3665 3666 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3667 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3668 mqd->cp_hqd_eop_control); 3669 3670 /* enable doorbell? */ 3671 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3672 mqd->cp_hqd_pq_doorbell_control); 3673 3674 /* disable the queue if it's active */ 3675 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3676 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3677 for (j = 0; j < adev->usec_timeout; j++) { 3678 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3679 break; 3680 udelay(1); 3681 } 3682 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3683 mqd->cp_hqd_dequeue_request); 3684 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3685 mqd->cp_hqd_pq_rptr); 3686 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3687 mqd->cp_hqd_pq_wptr_lo); 3688 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3689 mqd->cp_hqd_pq_wptr_hi); 3690 } 3691 3692 /* set the pointer to the MQD */ 3693 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3694 mqd->cp_mqd_base_addr_lo); 3695 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3696 mqd->cp_mqd_base_addr_hi); 3697 3698 /* set MQD vmid to 0 */ 3699 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3700 mqd->cp_mqd_control); 3701 3702 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3703 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3704 mqd->cp_hqd_pq_base_lo); 3705 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3706 mqd->cp_hqd_pq_base_hi); 3707 3708 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3709 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3710 mqd->cp_hqd_pq_control); 3711 3712 /* set the wb address whether it's enabled or not */ 3713 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3714 mqd->cp_hqd_pq_rptr_report_addr_lo); 3715 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3716 mqd->cp_hqd_pq_rptr_report_addr_hi); 3717 3718 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3719 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3720 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3721 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3722 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3723 3724 /* enable the doorbell if requested */ 3725 if (ring->use_doorbell) { 3726 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3727 (adev->doorbell_index.kiq * 2) << 2); 3728 /* If GC has entered CGPG, ringing doorbell > first page 3729 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to 3730 * workaround this issue. And this change has to align with firmware 3731 * update. 3732 */ 3733 if (check_if_enlarge_doorbell_range(adev)) 3734 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3735 (adev->doorbell.size - 4)); 3736 else 3737 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3738 (adev->doorbell_index.userqueue_end * 2) << 2); 3739 } 3740 3741 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3742 mqd->cp_hqd_pq_doorbell_control); 3743 3744 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3745 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3746 mqd->cp_hqd_pq_wptr_lo); 3747 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3748 mqd->cp_hqd_pq_wptr_hi); 3749 3750 /* set the vmid for the queue */ 3751 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3752 3753 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3754 mqd->cp_hqd_persistent_state); 3755 3756 /* activate the queue */ 3757 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3758 mqd->cp_hqd_active); 3759 3760 if (ring->use_doorbell) 3761 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3762 3763 return 0; 3764 } 3765 3766 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3767 { 3768 struct amdgpu_device *adev = ring->adev; 3769 int j; 3770 3771 /* disable the queue if it's active */ 3772 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3773 3774 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3775 3776 for (j = 0; j < adev->usec_timeout; j++) { 3777 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3778 break; 3779 udelay(1); 3780 } 3781 3782 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3783 DRM_DEBUG("KIQ dequeue request failed.\n"); 3784 3785 /* Manual disable if dequeue request times out */ 3786 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3787 } 3788 3789 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3790 0); 3791 } 3792 3793 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3794 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3795 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3796 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3797 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3798 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3799 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3800 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3801 3802 return 0; 3803 } 3804 3805 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3806 { 3807 struct amdgpu_device *adev = ring->adev; 3808 struct v9_mqd *mqd = ring->mqd_ptr; 3809 struct v9_mqd *tmp_mqd; 3810 3811 gfx_v9_0_kiq_setting(ring); 3812 3813 /* GPU could be in bad state during probe, driver trigger the reset 3814 * after load the SMU, in this case , the mqd is not be initialized. 3815 * driver need to re-init the mqd. 3816 * check mqd->cp_hqd_pq_control since this value should not be 0 3817 */ 3818 tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup; 3819 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ 3820 /* for GPU_RESET case , reset MQD to a clean status */ 3821 if (adev->gfx.kiq[0].mqd_backup) 3822 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation)); 3823 3824 /* reset ring buffer */ 3825 ring->wptr = 0; 3826 amdgpu_ring_clear_ring(ring); 3827 3828 mutex_lock(&adev->srbm_mutex); 3829 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3830 gfx_v9_0_kiq_init_register(ring); 3831 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3832 mutex_unlock(&adev->srbm_mutex); 3833 } else { 3834 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3835 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3836 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3837 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 3838 amdgpu_ring_clear_ring(ring); 3839 mutex_lock(&adev->srbm_mutex); 3840 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3841 gfx_v9_0_mqd_init(ring); 3842 gfx_v9_0_kiq_init_register(ring); 3843 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3844 mutex_unlock(&adev->srbm_mutex); 3845 3846 if (adev->gfx.kiq[0].mqd_backup) 3847 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); 3848 } 3849 3850 return 0; 3851 } 3852 3853 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) 3854 { 3855 struct amdgpu_device *adev = ring->adev; 3856 struct v9_mqd *mqd = ring->mqd_ptr; 3857 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3858 struct v9_mqd *tmp_mqd; 3859 3860 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control 3861 * is not be initialized before 3862 */ 3863 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3864 3865 if (!restore && (!tmp_mqd->cp_hqd_pq_control || 3866 (!amdgpu_in_reset(adev) && !adev->in_suspend))) { 3867 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3868 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3869 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3870 mutex_lock(&adev->srbm_mutex); 3871 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3872 gfx_v9_0_mqd_init(ring); 3873 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3874 mutex_unlock(&adev->srbm_mutex); 3875 3876 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3877 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3878 } else { 3879 /* restore MQD to a clean status */ 3880 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3881 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3882 /* reset ring buffer */ 3883 ring->wptr = 0; 3884 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 3885 amdgpu_ring_clear_ring(ring); 3886 } 3887 3888 return 0; 3889 } 3890 3891 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3892 { 3893 struct amdgpu_ring *ring; 3894 int r; 3895 3896 ring = &adev->gfx.kiq[0].ring; 3897 3898 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3899 if (unlikely(r != 0)) 3900 return r; 3901 3902 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3903 if (unlikely(r != 0)) { 3904 amdgpu_bo_unreserve(ring->mqd_obj); 3905 return r; 3906 } 3907 3908 gfx_v9_0_kiq_init_queue(ring); 3909 amdgpu_bo_kunmap(ring->mqd_obj); 3910 ring->mqd_ptr = NULL; 3911 amdgpu_bo_unreserve(ring->mqd_obj); 3912 return 0; 3913 } 3914 3915 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3916 { 3917 struct amdgpu_ring *ring = NULL; 3918 int r = 0, i; 3919 3920 gfx_v9_0_cp_compute_enable(adev, true); 3921 3922 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3923 ring = &adev->gfx.compute_ring[i]; 3924 3925 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3926 if (unlikely(r != 0)) 3927 goto done; 3928 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3929 if (!r) { 3930 r = gfx_v9_0_kcq_init_queue(ring, false); 3931 amdgpu_bo_kunmap(ring->mqd_obj); 3932 ring->mqd_ptr = NULL; 3933 } 3934 amdgpu_bo_unreserve(ring->mqd_obj); 3935 if (r) 3936 goto done; 3937 } 3938 3939 r = amdgpu_gfx_enable_kcq(adev, 0); 3940 done: 3941 return r; 3942 } 3943 3944 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3945 { 3946 int r, i; 3947 struct amdgpu_ring *ring; 3948 3949 if (!(adev->flags & AMD_IS_APU)) 3950 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3951 3952 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3953 if (adev->gfx.num_gfx_rings) { 3954 /* legacy firmware loading */ 3955 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3956 if (r) 3957 return r; 3958 } 3959 3960 r = gfx_v9_0_cp_compute_load_microcode(adev); 3961 if (r) 3962 return r; 3963 } 3964 3965 if (adev->gfx.num_gfx_rings) 3966 gfx_v9_0_cp_gfx_enable(adev, false); 3967 gfx_v9_0_cp_compute_enable(adev, false); 3968 3969 r = gfx_v9_0_kiq_resume(adev); 3970 if (r) 3971 return r; 3972 3973 if (adev->gfx.num_gfx_rings) { 3974 r = gfx_v9_0_cp_gfx_resume(adev); 3975 if (r) 3976 return r; 3977 } 3978 3979 r = gfx_v9_0_kcq_resume(adev); 3980 if (r) 3981 return r; 3982 3983 if (adev->gfx.num_gfx_rings) { 3984 ring = &adev->gfx.gfx_ring[0]; 3985 r = amdgpu_ring_test_helper(ring); 3986 if (r) 3987 return r; 3988 } 3989 3990 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3991 ring = &adev->gfx.compute_ring[i]; 3992 amdgpu_ring_test_helper(ring); 3993 } 3994 3995 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3996 3997 return 0; 3998 } 3999 4000 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 4001 { 4002 u32 tmp; 4003 4004 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) && 4005 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) 4006 return; 4007 4008 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 4009 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 4010 adev->df.hash_status.hash_64k); 4011 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 4012 adev->df.hash_status.hash_2m); 4013 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 4014 adev->df.hash_status.hash_1g); 4015 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 4016 } 4017 4018 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 4019 { 4020 if (adev->gfx.num_gfx_rings) 4021 gfx_v9_0_cp_gfx_enable(adev, enable); 4022 gfx_v9_0_cp_compute_enable(adev, enable); 4023 } 4024 4025 static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block) 4026 { 4027 int r; 4028 struct amdgpu_device *adev = ip_block->adev; 4029 4030 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4031 adev->gfx.cleaner_shader_ptr); 4032 4033 if (!amdgpu_sriov_vf(adev)) 4034 gfx_v9_0_init_golden_registers(adev); 4035 4036 gfx_v9_0_constants_init(adev); 4037 4038 gfx_v9_0_init_tcp_config(adev); 4039 4040 r = adev->gfx.rlc.funcs->resume(adev); 4041 if (r) 4042 return r; 4043 4044 r = gfx_v9_0_cp_resume(adev); 4045 if (r) 4046 return r; 4047 4048 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) && 4049 !amdgpu_sriov_vf(adev)) 4050 gfx_v9_4_2_set_power_brake_sequence(adev); 4051 4052 return r; 4053 } 4054 4055 static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block) 4056 { 4057 struct amdgpu_device *adev = ip_block->adev; 4058 4059 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4060 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 4061 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4062 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4063 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4064 4065 /* DF freeze and kcq disable will fail */ 4066 if (!amdgpu_ras_intr_triggered()) 4067 /* disable KCQ to avoid CPC touch memory not valid anymore */ 4068 amdgpu_gfx_disable_kcq(adev, 0); 4069 4070 if (amdgpu_sriov_vf(adev)) { 4071 gfx_v9_0_cp_gfx_enable(adev, false); 4072 /* must disable polling for SRIOV when hw finished, otherwise 4073 * CPC engine may still keep fetching WB address which is already 4074 * invalid after sw finished and trigger DMAR reading error in 4075 * hypervisor side. 4076 */ 4077 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4078 return 0; 4079 } 4080 4081 /* Use deinitialize sequence from CAIL when unbinding device from driver, 4082 * otherwise KIQ is hanging when binding back 4083 */ 4084 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 4085 mutex_lock(&adev->srbm_mutex); 4086 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, 4087 adev->gfx.kiq[0].ring.pipe, 4088 adev->gfx.kiq[0].ring.queue, 0, 0); 4089 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring); 4090 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 4091 mutex_unlock(&adev->srbm_mutex); 4092 } 4093 4094 gfx_v9_0_cp_enable(adev, false); 4095 4096 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */ 4097 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) || 4098 (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) { 4099 dev_dbg(adev->dev, "Skipping RLC halt\n"); 4100 return 0; 4101 } 4102 4103 adev->gfx.rlc.funcs->stop(adev); 4104 return 0; 4105 } 4106 4107 static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block) 4108 { 4109 return gfx_v9_0_hw_fini(ip_block); 4110 } 4111 4112 static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block) 4113 { 4114 return gfx_v9_0_hw_init(ip_block); 4115 } 4116 4117 static bool gfx_v9_0_is_idle(struct amdgpu_ip_block *ip_block) 4118 { 4119 struct amdgpu_device *adev = ip_block->adev; 4120 4121 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 4122 GRBM_STATUS, GUI_ACTIVE)) 4123 return false; 4124 else 4125 return true; 4126 } 4127 4128 static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 4129 { 4130 unsigned i; 4131 struct amdgpu_device *adev = ip_block->adev; 4132 4133 for (i = 0; i < adev->usec_timeout; i++) { 4134 if (gfx_v9_0_is_idle(ip_block)) 4135 return 0; 4136 udelay(1); 4137 } 4138 return -ETIMEDOUT; 4139 } 4140 4141 static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block) 4142 { 4143 u32 grbm_soft_reset = 0; 4144 u32 tmp; 4145 struct amdgpu_device *adev = ip_block->adev; 4146 4147 /* GRBM_STATUS */ 4148 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4149 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4150 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4151 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4152 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4153 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4154 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4155 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4156 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4157 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4158 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4159 } 4160 4161 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4162 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4163 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4164 } 4165 4166 /* GRBM_STATUS2 */ 4167 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4168 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4169 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4170 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4171 4172 4173 if (grbm_soft_reset) { 4174 /* stop the rlc */ 4175 adev->gfx.rlc.funcs->stop(adev); 4176 4177 if (adev->gfx.num_gfx_rings) 4178 /* Disable GFX parsing/prefetching */ 4179 gfx_v9_0_cp_gfx_enable(adev, false); 4180 4181 /* Disable MEC parsing/prefetching */ 4182 gfx_v9_0_cp_compute_enable(adev, false); 4183 4184 if (grbm_soft_reset) { 4185 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4186 tmp |= grbm_soft_reset; 4187 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4188 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4189 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4190 4191 udelay(50); 4192 4193 tmp &= ~grbm_soft_reset; 4194 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4195 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4196 } 4197 4198 /* Wait a little for things to settle down */ 4199 udelay(50); 4200 } 4201 return 0; 4202 } 4203 4204 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) 4205 { 4206 signed long r, cnt = 0; 4207 unsigned long flags; 4208 uint32_t seq, reg_val_offs = 0; 4209 uint64_t value = 0; 4210 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 4211 struct amdgpu_ring *ring = &kiq->ring; 4212 4213 BUG_ON(!ring->funcs->emit_rreg); 4214 4215 spin_lock_irqsave(&kiq->ring_lock, flags); 4216 if (amdgpu_device_wb_get(adev, ®_val_offs)) { 4217 pr_err("critical bug! too many kiq readers\n"); 4218 goto failed_unlock; 4219 } 4220 amdgpu_ring_alloc(ring, 32); 4221 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4222 amdgpu_ring_write(ring, 9 | /* src: register*/ 4223 (5 << 8) | /* dst: memory */ 4224 (1 << 16) | /* count sel */ 4225 (1 << 20)); /* write confirm */ 4226 amdgpu_ring_write(ring, 0); 4227 amdgpu_ring_write(ring, 0); 4228 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4229 reg_val_offs * 4)); 4230 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4231 reg_val_offs * 4)); 4232 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 4233 if (r) 4234 goto failed_undo; 4235 4236 amdgpu_ring_commit(ring); 4237 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4238 4239 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4240 4241 /* don't wait anymore for gpu reset case because this way may 4242 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 4243 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 4244 * never return if we keep waiting in virt_kiq_rreg, which cause 4245 * gpu_recover() hang there. 4246 * 4247 * also don't wait anymore for IRQ context 4248 * */ 4249 if (r < 1 && (amdgpu_in_reset(adev))) 4250 goto failed_kiq_read; 4251 4252 might_sleep(); 4253 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 4254 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 4255 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4256 } 4257 4258 if (cnt > MAX_KIQ_REG_TRY) 4259 goto failed_kiq_read; 4260 4261 mb(); 4262 value = (uint64_t)adev->wb.wb[reg_val_offs] | 4263 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL; 4264 amdgpu_device_wb_free(adev, reg_val_offs); 4265 return value; 4266 4267 failed_undo: 4268 amdgpu_ring_undo(ring); 4269 failed_unlock: 4270 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4271 failed_kiq_read: 4272 if (reg_val_offs) 4273 amdgpu_device_wb_free(adev, reg_val_offs); 4274 pr_err("failed to read gpu clock\n"); 4275 return ~0; 4276 } 4277 4278 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4279 { 4280 uint64_t clock, clock_lo, clock_hi, hi_check; 4281 4282 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 4283 case IP_VERSION(9, 3, 0): 4284 preempt_disable(); 4285 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4286 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4287 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4288 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over 4289 * roughly every 42 seconds. 4290 */ 4291 if (hi_check != clock_hi) { 4292 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4293 clock_hi = hi_check; 4294 } 4295 preempt_enable(); 4296 clock = clock_lo | (clock_hi << 32ULL); 4297 break; 4298 default: 4299 amdgpu_gfx_off_ctrl(adev, false); 4300 mutex_lock(&adev->gfx.gpu_clock_mutex); 4301 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 4302 IP_VERSION(9, 0, 1) && 4303 amdgpu_sriov_runtime(adev)) { 4304 clock = gfx_v9_0_kiq_read_clock(adev); 4305 } else { 4306 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4307 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4308 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4309 } 4310 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4311 amdgpu_gfx_off_ctrl(adev, true); 4312 break; 4313 } 4314 return clock; 4315 } 4316 4317 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4318 uint32_t vmid, 4319 uint32_t gds_base, uint32_t gds_size, 4320 uint32_t gws_base, uint32_t gws_size, 4321 uint32_t oa_base, uint32_t oa_size) 4322 { 4323 struct amdgpu_device *adev = ring->adev; 4324 4325 /* GDS Base */ 4326 gfx_v9_0_write_data_to_reg(ring, 0, false, 4327 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4328 gds_base); 4329 4330 /* GDS Size */ 4331 gfx_v9_0_write_data_to_reg(ring, 0, false, 4332 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4333 gds_size); 4334 4335 /* GWS */ 4336 gfx_v9_0_write_data_to_reg(ring, 0, false, 4337 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4338 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4339 4340 /* OA */ 4341 gfx_v9_0_write_data_to_reg(ring, 0, false, 4342 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4343 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4344 } 4345 4346 static const u32 vgpr_init_compute_shader[] = 4347 { 4348 0xb07c0000, 0xbe8000ff, 4349 0x000000f8, 0xbf110800, 4350 0x7e000280, 0x7e020280, 4351 0x7e040280, 0x7e060280, 4352 0x7e080280, 0x7e0a0280, 4353 0x7e0c0280, 0x7e0e0280, 4354 0x80808800, 0xbe803200, 4355 0xbf84fff5, 0xbf9c0000, 4356 0xd28c0001, 0x0001007f, 4357 0xd28d0001, 0x0002027e, 4358 0x10020288, 0xb8810904, 4359 0xb7814000, 0xd1196a01, 4360 0x00000301, 0xbe800087, 4361 0xbefc00c1, 0xd89c4000, 4362 0x00020201, 0xd89cc080, 4363 0x00040401, 0x320202ff, 4364 0x00000800, 0x80808100, 4365 0xbf84fff8, 0x7e020280, 4366 0xbf810000, 0x00000000, 4367 }; 4368 4369 static const u32 sgpr_init_compute_shader[] = 4370 { 4371 0xb07c0000, 0xbe8000ff, 4372 0x0000005f, 0xbee50080, 4373 0xbe812c65, 0xbe822c65, 4374 0xbe832c65, 0xbe842c65, 4375 0xbe852c65, 0xb77c0005, 4376 0x80808500, 0xbf84fff8, 4377 0xbe800080, 0xbf810000, 4378 }; 4379 4380 static const u32 vgpr_init_compute_shader_arcturus[] = { 4381 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 4382 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 4383 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 4384 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 4385 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 4386 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 4387 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 4388 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 4389 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 4390 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 4391 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 4392 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 4393 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 4394 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 4395 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 4396 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 4397 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 4398 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 4399 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 4400 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 4401 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 4402 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 4403 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 4404 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 4405 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 4406 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 4407 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 4408 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 4409 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 4410 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 4411 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 4412 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 4413 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 4414 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 4415 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 4416 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 4417 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 4418 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 4419 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 4420 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 4421 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 4422 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 4423 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 4424 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 4425 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 4426 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 4427 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 4428 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 4429 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 4430 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 4431 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 4432 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 4433 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 4434 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 4435 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 4436 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 4437 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 4438 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 4439 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 4440 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 4441 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 4442 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 4443 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 4444 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 4445 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 4446 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 4447 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 4448 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 4449 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 4450 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 4451 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 4452 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 4453 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 4454 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 4455 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 4456 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 4457 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 4458 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 4459 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 4460 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 4461 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 4462 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 4463 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 4464 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 4465 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 4466 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 4467 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 4468 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 4469 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 4470 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 4471 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 4472 0xbf84fff8, 0xbf810000, 4473 }; 4474 4475 /* When below register arrays changed, please update gpr_reg_size, 4476 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 4477 to cover all gfx9 ASICs */ 4478 static const struct soc15_reg_entry vgpr_init_regs[] = { 4479 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4480 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4481 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4482 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4483 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 4484 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4485 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4486 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4487 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4488 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4489 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4490 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4491 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4492 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4493 }; 4494 4495 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = { 4496 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4497 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4498 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4499 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4500 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf }, 4501 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4502 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4503 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4504 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4505 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4506 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4507 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4508 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4509 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4510 }; 4511 4512 static const struct soc15_reg_entry sgpr1_init_regs[] = { 4513 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4514 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4515 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4516 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4517 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4518 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4519 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4520 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4521 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4522 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4523 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4524 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4525 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4526 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4527 }; 4528 4529 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4530 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4531 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4532 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4533 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4534 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4535 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4536 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4537 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4538 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4539 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4540 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4541 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4542 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4543 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4544 }; 4545 4546 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { 4547 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4548 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4549 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4550 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4551 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4552 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4553 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4554 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4555 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4556 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4557 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4558 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4559 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4560 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4561 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4562 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4563 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4564 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4565 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4566 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4567 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4568 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4569 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4570 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4571 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4572 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4573 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4574 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4575 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4576 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4577 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4578 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4579 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4580 }; 4581 4582 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4583 { 4584 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4585 int i, r; 4586 4587 /* only support when RAS is enabled */ 4588 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4589 return 0; 4590 4591 r = amdgpu_ring_alloc(ring, 7); 4592 if (r) { 4593 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4594 ring->name, r); 4595 return r; 4596 } 4597 4598 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4599 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4600 4601 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4602 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4603 PACKET3_DMA_DATA_DST_SEL(1) | 4604 PACKET3_DMA_DATA_SRC_SEL(2) | 4605 PACKET3_DMA_DATA_ENGINE(0))); 4606 amdgpu_ring_write(ring, 0); 4607 amdgpu_ring_write(ring, 0); 4608 amdgpu_ring_write(ring, 0); 4609 amdgpu_ring_write(ring, 0); 4610 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4611 adev->gds.gds_size); 4612 4613 amdgpu_ring_commit(ring); 4614 4615 for (i = 0; i < adev->usec_timeout; i++) { 4616 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4617 break; 4618 udelay(1); 4619 } 4620 4621 if (i >= adev->usec_timeout) 4622 r = -ETIMEDOUT; 4623 4624 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4625 4626 return r; 4627 } 4628 4629 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4630 { 4631 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4632 struct amdgpu_ib ib; 4633 struct dma_fence *f = NULL; 4634 int r, i; 4635 unsigned total_size, vgpr_offset, sgpr_offset; 4636 u64 gpu_addr; 4637 4638 int compute_dim_x = adev->gfx.config.max_shader_engines * 4639 adev->gfx.config.max_cu_per_sh * 4640 adev->gfx.config.max_sh_per_se; 4641 int sgpr_work_group_size = 5; 4642 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6; 4643 int vgpr_init_shader_size; 4644 const u32 *vgpr_init_shader_ptr; 4645 const struct soc15_reg_entry *vgpr_init_regs_ptr; 4646 4647 /* only support when RAS is enabled */ 4648 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4649 return 0; 4650 4651 /* bail if the compute ring is not ready */ 4652 if (!ring->sched.ready) 4653 return 0; 4654 4655 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) { 4656 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; 4657 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); 4658 vgpr_init_regs_ptr = vgpr_init_regs_arcturus; 4659 } else { 4660 vgpr_init_shader_ptr = vgpr_init_compute_shader; 4661 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader); 4662 vgpr_init_regs_ptr = vgpr_init_regs; 4663 } 4664 4665 total_size = 4666 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4667 total_size += 4668 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4669 total_size += 4670 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4671 total_size = ALIGN(total_size, 256); 4672 vgpr_offset = total_size; 4673 total_size += ALIGN(vgpr_init_shader_size, 256); 4674 sgpr_offset = total_size; 4675 total_size += sizeof(sgpr_init_compute_shader); 4676 4677 /* allocate an indirect buffer to put the commands in */ 4678 memset(&ib, 0, sizeof(ib)); 4679 r = amdgpu_ib_get(adev, NULL, total_size, 4680 AMDGPU_IB_POOL_DIRECT, &ib); 4681 if (r) { 4682 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4683 return r; 4684 } 4685 4686 /* load the compute shaders */ 4687 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++) 4688 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i]; 4689 4690 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4691 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4692 4693 /* init the ib length to 0 */ 4694 ib.length_dw = 0; 4695 4696 /* VGPR */ 4697 /* write the register state for the compute dispatch */ 4698 for (i = 0; i < gpr_reg_size; i++) { 4699 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4700 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i]) 4701 - PACKET3_SET_SH_REG_START; 4702 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value; 4703 } 4704 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4705 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4707 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4708 - PACKET3_SET_SH_REG_START; 4709 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4710 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4711 4712 /* write dispatch packet */ 4713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4714 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */ 4715 ib.ptr[ib.length_dw++] = 1; /* y */ 4716 ib.ptr[ib.length_dw++] = 1; /* z */ 4717 ib.ptr[ib.length_dw++] = 4718 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4719 4720 /* write CS partial flush packet */ 4721 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4722 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4723 4724 /* SGPR1 */ 4725 /* write the register state for the compute dispatch */ 4726 for (i = 0; i < gpr_reg_size; i++) { 4727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4728 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4729 - PACKET3_SET_SH_REG_START; 4730 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4731 } 4732 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4733 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4734 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4735 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4736 - PACKET3_SET_SH_REG_START; 4737 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4738 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4739 4740 /* write dispatch packet */ 4741 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4742 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4743 ib.ptr[ib.length_dw++] = 1; /* y */ 4744 ib.ptr[ib.length_dw++] = 1; /* z */ 4745 ib.ptr[ib.length_dw++] = 4746 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4747 4748 /* write CS partial flush packet */ 4749 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4750 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4751 4752 /* SGPR2 */ 4753 /* write the register state for the compute dispatch */ 4754 for (i = 0; i < gpr_reg_size; i++) { 4755 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4756 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4757 - PACKET3_SET_SH_REG_START; 4758 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4759 } 4760 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4761 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4762 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4763 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4764 - PACKET3_SET_SH_REG_START; 4765 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4766 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4767 4768 /* write dispatch packet */ 4769 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4770 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4771 ib.ptr[ib.length_dw++] = 1; /* y */ 4772 ib.ptr[ib.length_dw++] = 1; /* z */ 4773 ib.ptr[ib.length_dw++] = 4774 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4775 4776 /* write CS partial flush packet */ 4777 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4778 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4779 4780 /* shedule the ib on the ring */ 4781 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4782 if (r) { 4783 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4784 goto fail; 4785 } 4786 4787 /* wait for the GPU to finish processing the IB */ 4788 r = dma_fence_wait(f, false); 4789 if (r) { 4790 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4791 goto fail; 4792 } 4793 4794 fail: 4795 amdgpu_ib_free(&ib, NULL); 4796 dma_fence_put(f); 4797 4798 return r; 4799 } 4800 4801 static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block) 4802 { 4803 struct amdgpu_device *adev = ip_block->adev; 4804 4805 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 4806 4807 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || 4808 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4809 adev->gfx.num_gfx_rings = 0; 4810 else 4811 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4812 adev->gfx.xcc_mask = 1; 4813 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4814 AMDGPU_MAX_COMPUTE_RINGS); 4815 gfx_v9_0_set_kiq_pm4_funcs(adev); 4816 gfx_v9_0_set_ring_funcs(adev); 4817 gfx_v9_0_set_irq_funcs(adev); 4818 gfx_v9_0_set_gds_init(adev); 4819 gfx_v9_0_set_rlc_funcs(adev); 4820 4821 /* init rlcg reg access ctrl */ 4822 gfx_v9_0_init_rlcg_reg_access_ctrl(adev); 4823 4824 return gfx_v9_0_init_microcode(adev); 4825 } 4826 4827 static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block) 4828 { 4829 struct amdgpu_device *adev = ip_block->adev; 4830 int r; 4831 4832 /* 4833 * Temp workaround to fix the issue that CP firmware fails to 4834 * update read pointer when CPDMA is writing clearing operation 4835 * to GDS in suspend/resume sequence on several cards. So just 4836 * limit this operation in cold boot sequence. 4837 */ 4838 if ((!adev->in_suspend) && 4839 (adev->gds.gds_size)) { 4840 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4841 if (r) 4842 return r; 4843 } 4844 4845 /* requires IBs so do in late init after IB pool is initialized */ 4846 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4847 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); 4848 else 4849 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4850 4851 if (r) 4852 return r; 4853 4854 if (adev->gfx.ras && 4855 adev->gfx.ras->enable_watchdog_timer) 4856 adev->gfx.ras->enable_watchdog_timer(adev); 4857 4858 return 0; 4859 } 4860 4861 static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block) 4862 { 4863 struct amdgpu_device *adev = ip_block->adev; 4864 int r; 4865 4866 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4867 if (r) 4868 return r; 4869 4870 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4871 if (r) 4872 return r; 4873 4874 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 4875 if (r) 4876 return r; 4877 4878 r = gfx_v9_0_ecc_late_init(ip_block); 4879 if (r) 4880 return r; 4881 4882 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4883 gfx_v9_4_2_debug_trap_config_init(adev, 4884 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); 4885 else 4886 gfx_v9_0_debug_trap_config_init(adev, 4887 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); 4888 4889 return 0; 4890 } 4891 4892 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4893 { 4894 uint32_t rlc_setting; 4895 4896 /* if RLC is not enabled, do nothing */ 4897 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4898 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4899 return false; 4900 4901 return true; 4902 } 4903 4904 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 4905 { 4906 uint32_t data; 4907 unsigned i; 4908 4909 data = RLC_SAFE_MODE__CMD_MASK; 4910 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4911 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4912 4913 /* wait for RLC_SAFE_MODE */ 4914 for (i = 0; i < adev->usec_timeout; i++) { 4915 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4916 break; 4917 udelay(1); 4918 } 4919 } 4920 4921 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 4922 { 4923 uint32_t data; 4924 4925 data = RLC_SAFE_MODE__CMD_MASK; 4926 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4927 } 4928 4929 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4930 bool enable) 4931 { 4932 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4933 4934 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4935 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4936 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4937 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4938 } else { 4939 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4940 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4941 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4942 } 4943 4944 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4945 } 4946 4947 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4948 bool enable) 4949 { 4950 /* TODO: double check if we need to perform under safe mode */ 4951 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4952 4953 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4954 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4955 else 4956 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4957 4958 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4959 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4960 else 4961 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4962 4963 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4964 } 4965 4966 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4967 bool enable) 4968 { 4969 uint32_t data, def; 4970 4971 /* It is disabled by HW by default */ 4972 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4973 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4974 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4975 4976 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) 4977 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4978 4979 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4980 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4981 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4982 4983 /* only for Vega10 & Raven1 */ 4984 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4985 4986 if (def != data) 4987 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4988 4989 /* MGLS is a global flag to control all MGLS in GFX */ 4990 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4991 /* 2 - RLC memory Light sleep */ 4992 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4993 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4994 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4995 if (def != data) 4996 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4997 } 4998 /* 3 - CP memory Light sleep */ 4999 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 5000 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 5001 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5002 if (def != data) 5003 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 5004 } 5005 } 5006 } else { 5007 /* 1 - MGCG_OVERRIDE */ 5008 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5009 5010 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) 5011 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 5012 5013 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5014 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5015 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 5016 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 5017 5018 if (def != data) 5019 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5020 5021 /* 2 - disable MGLS in RLC */ 5022 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 5023 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5024 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5025 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 5026 } 5027 5028 /* 3 - disable MGLS in CP */ 5029 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 5030 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5031 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5032 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 5033 } 5034 } 5035 } 5036 5037 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 5038 bool enable) 5039 { 5040 uint32_t data, def; 5041 5042 if (!adev->gfx.num_gfx_rings) 5043 return; 5044 5045 /* Enable 3D CGCG/CGLS */ 5046 if (enable) { 5047 /* write cmd to clear cgcg/cgls ov */ 5048 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5049 /* unset CGCG override */ 5050 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5051 /* update CGCG and CGLS override bits */ 5052 if (def != data) 5053 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5054 5055 /* enable 3Dcgcg FSM(0x0000363f) */ 5056 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 5057 5058 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5059 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5060 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5061 else 5062 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT; 5063 5064 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5065 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5066 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5067 if (def != data) 5068 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5069 5070 /* set IDLE_POLL_COUNT(0x00900100) */ 5071 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5072 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5073 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5074 if (def != data) 5075 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5076 } else { 5077 /* Disable CGCG/CGLS */ 5078 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 5079 /* disable cgcg, cgls should be disabled */ 5080 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 5081 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 5082 /* disable cgcg and cgls in FSM */ 5083 if (def != data) 5084 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5085 } 5086 } 5087 5088 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5089 bool enable) 5090 { 5091 uint32_t def, data; 5092 5093 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5094 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5095 /* unset CGCG override */ 5096 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5097 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5098 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5099 else 5100 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5101 /* update CGCG and CGLS override bits */ 5102 if (def != data) 5103 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5104 5105 /* enable cgcg FSM(0x0000363F) */ 5106 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5107 5108 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) 5109 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5110 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5111 else 5112 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5113 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5114 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5115 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5116 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5117 if (def != data) 5118 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5119 5120 /* set IDLE_POLL_COUNT(0x00900100) */ 5121 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5122 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5123 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5124 if (def != data) 5125 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5126 } else { 5127 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5128 /* reset CGCG/CGLS bits */ 5129 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5130 /* disable cgcg and cgls in FSM */ 5131 if (def != data) 5132 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5133 } 5134 } 5135 5136 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5137 bool enable) 5138 { 5139 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5140 if (enable) { 5141 /* CGCG/CGLS should be enabled after MGCG/MGLS 5142 * === MGCG + MGLS === 5143 */ 5144 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5145 /* === CGCG /CGLS for GFX 3D Only === */ 5146 gfx_v9_0_update_3d_clock_gating(adev, enable); 5147 /* === CGCG + CGLS === */ 5148 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5149 } else { 5150 /* CGCG/CGLS should be disabled before MGCG/MGLS 5151 * === CGCG + CGLS === 5152 */ 5153 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5154 /* === CGCG /CGLS for GFX 3D Only === */ 5155 gfx_v9_0_update_3d_clock_gating(adev, enable); 5156 /* === MGCG + MGLS === */ 5157 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5158 } 5159 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5160 return 0; 5161 } 5162 5163 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, 5164 unsigned int vmid) 5165 { 5166 u32 reg, data; 5167 5168 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); 5169 if (amdgpu_sriov_is_pp_one_vf(adev)) 5170 data = RREG32_NO_KIQ(reg); 5171 else 5172 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); 5173 5174 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5175 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5176 5177 if (amdgpu_sriov_is_pp_one_vf(adev)) 5178 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); 5179 else 5180 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); 5181 } 5182 5183 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) 5184 { 5185 amdgpu_gfx_off_ctrl(adev, false); 5186 5187 gfx_v9_0_update_spm_vmid_internal(adev, vmid); 5188 5189 amdgpu_gfx_off_ctrl(adev, true); 5190 } 5191 5192 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev, 5193 uint32_t offset, 5194 struct soc15_reg_rlcg *entries, int arr_size) 5195 { 5196 int i; 5197 uint32_t reg; 5198 5199 if (!entries) 5200 return false; 5201 5202 for (i = 0; i < arr_size; i++) { 5203 const struct soc15_reg_rlcg *entry; 5204 5205 entry = &entries[i]; 5206 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 5207 if (offset == reg) 5208 return true; 5209 } 5210 5211 return false; 5212 } 5213 5214 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 5215 { 5216 return gfx_v9_0_check_rlcg_range(adev, offset, 5217 (void *)rlcg_access_gc_9_0, 5218 ARRAY_SIZE(rlcg_access_gc_9_0)); 5219 } 5220 5221 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 5222 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 5223 .set_safe_mode = gfx_v9_0_set_safe_mode, 5224 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 5225 .init = gfx_v9_0_rlc_init, 5226 .get_csb_size = gfx_v9_0_get_csb_size, 5227 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 5228 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 5229 .resume = gfx_v9_0_rlc_resume, 5230 .stop = gfx_v9_0_rlc_stop, 5231 .reset = gfx_v9_0_rlc_reset, 5232 .start = gfx_v9_0_rlc_start, 5233 .update_spm_vmid = gfx_v9_0_update_spm_vmid, 5234 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, 5235 }; 5236 5237 static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5238 enum amd_powergating_state state) 5239 { 5240 struct amdgpu_device *adev = ip_block->adev; 5241 bool enable = (state == AMD_PG_STATE_GATE); 5242 5243 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5244 case IP_VERSION(9, 2, 2): 5245 case IP_VERSION(9, 1, 0): 5246 case IP_VERSION(9, 3, 0): 5247 if (!enable) 5248 amdgpu_gfx_off_ctrl_immediate(adev, false); 5249 5250 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5251 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 5252 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 5253 } else { 5254 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 5255 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 5256 } 5257 5258 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5259 gfx_v9_0_enable_cp_power_gating(adev, true); 5260 else 5261 gfx_v9_0_enable_cp_power_gating(adev, false); 5262 5263 /* update gfx cgpg state */ 5264 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 5265 5266 /* update mgcg state */ 5267 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 5268 5269 if (enable) 5270 amdgpu_gfx_off_ctrl_immediate(adev, true); 5271 break; 5272 case IP_VERSION(9, 2, 1): 5273 amdgpu_gfx_off_ctrl_immediate(adev, enable); 5274 break; 5275 default: 5276 break; 5277 } 5278 5279 return 0; 5280 } 5281 5282 static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5283 enum amd_clockgating_state state) 5284 { 5285 struct amdgpu_device *adev = ip_block->adev; 5286 5287 if (amdgpu_sriov_vf(adev)) 5288 return 0; 5289 5290 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5291 case IP_VERSION(9, 0, 1): 5292 case IP_VERSION(9, 2, 1): 5293 case IP_VERSION(9, 4, 0): 5294 case IP_VERSION(9, 2, 2): 5295 case IP_VERSION(9, 1, 0): 5296 case IP_VERSION(9, 4, 1): 5297 case IP_VERSION(9, 3, 0): 5298 case IP_VERSION(9, 4, 2): 5299 gfx_v9_0_update_gfx_clock_gating(adev, 5300 state == AMD_CG_STATE_GATE); 5301 break; 5302 default: 5303 break; 5304 } 5305 return 0; 5306 } 5307 5308 static void gfx_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5309 { 5310 struct amdgpu_device *adev = ip_block->adev; 5311 int data; 5312 5313 if (amdgpu_sriov_vf(adev)) 5314 *flags = 0; 5315 5316 /* AMD_CG_SUPPORT_GFX_MGCG */ 5317 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 5318 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5319 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5320 5321 /* AMD_CG_SUPPORT_GFX_CGCG */ 5322 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 5323 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5324 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5325 5326 /* AMD_CG_SUPPORT_GFX_CGLS */ 5327 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5328 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5329 5330 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5331 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 5332 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5333 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5334 5335 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5336 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 5337 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5338 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5339 5340 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) { 5341 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5342 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 5343 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5344 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5345 5346 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5347 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5348 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5349 } 5350 } 5351 5352 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5353 { 5354 return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/ 5355 } 5356 5357 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5358 { 5359 struct amdgpu_device *adev = ring->adev; 5360 u64 wptr; 5361 5362 /* XXX check if swapping is necessary on BE */ 5363 if (ring->use_doorbell) { 5364 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5365 } else { 5366 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 5367 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 5368 } 5369 5370 return wptr; 5371 } 5372 5373 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5374 { 5375 struct amdgpu_device *adev = ring->adev; 5376 5377 if (ring->use_doorbell) { 5378 /* XXX check if swapping is necessary on BE */ 5379 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); 5380 WDOORBELL64(ring->doorbell_index, ring->wptr); 5381 } else { 5382 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 5383 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 5384 } 5385 } 5386 5387 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5388 { 5389 struct amdgpu_device *adev = ring->adev; 5390 u32 ref_and_mask, reg_mem_engine; 5391 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5392 5393 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5394 switch (ring->me) { 5395 case 1: 5396 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5397 break; 5398 case 2: 5399 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5400 break; 5401 default: 5402 return; 5403 } 5404 reg_mem_engine = 0; 5405 } else { 5406 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5407 reg_mem_engine = 1; /* pfp */ 5408 } 5409 5410 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5411 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5412 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5413 ref_and_mask, ref_and_mask, 0x20); 5414 } 5415 5416 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5417 struct amdgpu_job *job, 5418 struct amdgpu_ib *ib, 5419 uint32_t flags) 5420 { 5421 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5422 u32 header, control = 0; 5423 5424 if (ib->flags & AMDGPU_IB_FLAG_CE) 5425 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5426 else 5427 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5428 5429 control |= ib->length_dw | (vmid << 24); 5430 5431 if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) { 5432 control |= INDIRECT_BUFFER_PRE_ENB(1); 5433 5434 if (flags & AMDGPU_IB_PREEMPTED) 5435 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5436 5437 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 5438 gfx_v9_0_ring_emit_de_meta(ring, 5439 (!amdgpu_sriov_vf(ring->adev) && 5440 flags & AMDGPU_IB_PREEMPTED) ? 5441 true : false, 5442 job->gds_size > 0 && job->gds_base != 0); 5443 } 5444 5445 amdgpu_ring_write(ring, header); 5446 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5447 amdgpu_ring_write(ring, 5448 #ifdef __BIG_ENDIAN 5449 (2 << 0) | 5450 #endif 5451 lower_32_bits(ib->gpu_addr)); 5452 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5453 amdgpu_ring_ib_on_emit_cntl(ring); 5454 amdgpu_ring_write(ring, control); 5455 } 5456 5457 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring, 5458 unsigned offset) 5459 { 5460 u32 control = ring->ring[offset]; 5461 5462 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5463 ring->ring[offset] = control; 5464 } 5465 5466 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, 5467 unsigned offset) 5468 { 5469 struct amdgpu_device *adev = ring->adev; 5470 void *ce_payload_cpu_addr; 5471 uint64_t payload_offset, payload_size; 5472 5473 payload_size = sizeof(struct v9_ce_ib_state); 5474 5475 if (ring->is_mes_queue) { 5476 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5477 gfx[0].gfx_meta_data) + 5478 offsetof(struct v9_gfx_meta_data, ce_payload); 5479 ce_payload_cpu_addr = 5480 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); 5481 } else { 5482 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5483 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5484 } 5485 5486 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5487 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); 5488 } else { 5489 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, 5490 (ring->buf_mask + 1 - offset) << 2); 5491 payload_size -= (ring->buf_mask + 1 - offset) << 2; 5492 memcpy((void *)&ring->ring[0], 5493 ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), 5494 payload_size); 5495 } 5496 } 5497 5498 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, 5499 unsigned offset) 5500 { 5501 struct amdgpu_device *adev = ring->adev; 5502 void *de_payload_cpu_addr; 5503 uint64_t payload_offset, payload_size; 5504 5505 payload_size = sizeof(struct v9_de_ib_state); 5506 5507 if (ring->is_mes_queue) { 5508 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5509 gfx[0].gfx_meta_data) + 5510 offsetof(struct v9_gfx_meta_data, de_payload); 5511 de_payload_cpu_addr = 5512 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); 5513 } else { 5514 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); 5515 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5516 } 5517 5518 ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = 5519 IB_COMPLETION_STATUS_PREEMPTED; 5520 5521 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5522 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size); 5523 } else { 5524 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, 5525 (ring->buf_mask + 1 - offset) << 2); 5526 payload_size -= (ring->buf_mask + 1 - offset) << 2; 5527 memcpy((void *)&ring->ring[0], 5528 de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), 5529 payload_size); 5530 } 5531 } 5532 5533 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5534 struct amdgpu_job *job, 5535 struct amdgpu_ib *ib, 5536 uint32_t flags) 5537 { 5538 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5539 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5540 5541 /* Currently, there is a high possibility to get wave ID mismatch 5542 * between ME and GDS, leading to a hw deadlock, because ME generates 5543 * different wave IDs than the GDS expects. This situation happens 5544 * randomly when at least 5 compute pipes use GDS ordered append. 5545 * The wave IDs generated by ME are also wrong after suspend/resume. 5546 * Those are probably bugs somewhere else in the kernel driver. 5547 * 5548 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5549 * GDS to 0 for this ring (me/pipe). 5550 */ 5551 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5552 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5553 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5554 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5555 } 5556 5557 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5558 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5559 amdgpu_ring_write(ring, 5560 #ifdef __BIG_ENDIAN 5561 (2 << 0) | 5562 #endif 5563 lower_32_bits(ib->gpu_addr)); 5564 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5565 amdgpu_ring_write(ring, control); 5566 } 5567 5568 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5569 u64 seq, unsigned flags) 5570 { 5571 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5572 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5573 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5574 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; 5575 uint32_t dw2 = 0; 5576 5577 /* RELEASE_MEM - flush caches, send int */ 5578 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5579 5580 if (writeback) { 5581 dw2 = EOP_TC_NC_ACTION_EN; 5582 } else { 5583 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | 5584 EOP_TC_MD_ACTION_EN; 5585 } 5586 dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5587 EVENT_INDEX(5); 5588 if (exec) 5589 dw2 |= EOP_EXEC; 5590 5591 amdgpu_ring_write(ring, dw2); 5592 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5593 5594 /* 5595 * the address should be Qword aligned if 64bit write, Dword 5596 * aligned if only send 32bit data low (discard data high) 5597 */ 5598 if (write64bit) 5599 BUG_ON(addr & 0x7); 5600 else 5601 BUG_ON(addr & 0x3); 5602 amdgpu_ring_write(ring, lower_32_bits(addr)); 5603 amdgpu_ring_write(ring, upper_32_bits(addr)); 5604 amdgpu_ring_write(ring, lower_32_bits(seq)); 5605 amdgpu_ring_write(ring, upper_32_bits(seq)); 5606 amdgpu_ring_write(ring, 0); 5607 } 5608 5609 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5610 { 5611 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5612 uint32_t seq = ring->fence_drv.sync_seq; 5613 uint64_t addr = ring->fence_drv.gpu_addr; 5614 5615 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5616 lower_32_bits(addr), upper_32_bits(addr), 5617 seq, 0xffffffff, 4); 5618 } 5619 5620 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5621 unsigned vmid, uint64_t pd_addr) 5622 { 5623 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5624 5625 /* compute doesn't have PFP */ 5626 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5627 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5628 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5629 amdgpu_ring_write(ring, 0x0); 5630 } 5631 } 5632 5633 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5634 { 5635 return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */ 5636 } 5637 5638 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5639 { 5640 u64 wptr; 5641 5642 /* XXX check if swapping is necessary on BE */ 5643 if (ring->use_doorbell) 5644 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5645 else 5646 BUG(); 5647 return wptr; 5648 } 5649 5650 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5651 { 5652 struct amdgpu_device *adev = ring->adev; 5653 5654 /* XXX check if swapping is necessary on BE */ 5655 if (ring->use_doorbell) { 5656 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); 5657 WDOORBELL64(ring->doorbell_index, ring->wptr); 5658 } else{ 5659 BUG(); /* only DOORBELL method supported on gfx9 now */ 5660 } 5661 } 5662 5663 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5664 u64 seq, unsigned int flags) 5665 { 5666 struct amdgpu_device *adev = ring->adev; 5667 5668 /* we only allocate 32bit for each seq wb address */ 5669 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5670 5671 /* write fence seq to the "addr" */ 5672 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5673 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5674 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5675 amdgpu_ring_write(ring, lower_32_bits(addr)); 5676 amdgpu_ring_write(ring, upper_32_bits(addr)); 5677 amdgpu_ring_write(ring, lower_32_bits(seq)); 5678 5679 if (flags & AMDGPU_FENCE_FLAG_INT) { 5680 /* set register to trigger INT */ 5681 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5682 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5683 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5684 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5685 amdgpu_ring_write(ring, 0); 5686 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5687 } 5688 } 5689 5690 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5691 { 5692 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5693 amdgpu_ring_write(ring, 0); 5694 } 5695 5696 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) 5697 { 5698 struct amdgpu_device *adev = ring->adev; 5699 struct v9_ce_ib_state ce_payload = {0}; 5700 uint64_t offset, ce_payload_gpu_addr; 5701 void *ce_payload_cpu_addr; 5702 int cnt; 5703 5704 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5705 5706 if (ring->is_mes_queue) { 5707 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5708 gfx[0].gfx_meta_data) + 5709 offsetof(struct v9_gfx_meta_data, ce_payload); 5710 ce_payload_gpu_addr = 5711 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5712 ce_payload_cpu_addr = 5713 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5714 } else { 5715 offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5716 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5717 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5718 } 5719 5720 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5721 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5722 WRITE_DATA_DST_SEL(8) | 5723 WR_CONFIRM) | 5724 WRITE_DATA_CACHE_POLICY(0)); 5725 amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); 5726 amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); 5727 5728 amdgpu_ring_ib_on_emit_ce(ring); 5729 5730 if (resume) 5731 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, 5732 sizeof(ce_payload) >> 2); 5733 else 5734 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, 5735 sizeof(ce_payload) >> 2); 5736 } 5737 5738 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) 5739 { 5740 int i, r = 0; 5741 struct amdgpu_device *adev = ring->adev; 5742 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 5743 struct amdgpu_ring *kiq_ring = &kiq->ring; 5744 unsigned long flags; 5745 5746 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 5747 return -EINVAL; 5748 5749 spin_lock_irqsave(&kiq->ring_lock, flags); 5750 5751 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 5752 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5753 return -ENOMEM; 5754 } 5755 5756 /* assert preemption condition */ 5757 amdgpu_ring_set_preempt_cond_exec(ring, false); 5758 5759 ring->trail_seq += 1; 5760 amdgpu_ring_alloc(ring, 13); 5761 gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, 5762 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); 5763 5764 /* assert IB preemption, emit the trailing fence */ 5765 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 5766 ring->trail_fence_gpu_addr, 5767 ring->trail_seq); 5768 5769 amdgpu_ring_commit(kiq_ring); 5770 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5771 5772 /* poll the trailing fence */ 5773 for (i = 0; i < adev->usec_timeout; i++) { 5774 if (ring->trail_seq == 5775 le32_to_cpu(*ring->trail_fence_cpu_addr)) 5776 break; 5777 udelay(1); 5778 } 5779 5780 if (i >= adev->usec_timeout) { 5781 r = -EINVAL; 5782 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); 5783 } 5784 5785 /*reset the CP_VMID_PREEMPT after trailing fence*/ 5786 amdgpu_ring_emit_wreg(ring, 5787 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), 5788 0x0); 5789 amdgpu_ring_commit(ring); 5790 5791 /* deassert preemption condition */ 5792 amdgpu_ring_set_preempt_cond_exec(ring, true); 5793 return r; 5794 } 5795 5796 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds) 5797 { 5798 struct amdgpu_device *adev = ring->adev; 5799 struct v9_de_ib_state de_payload = {0}; 5800 uint64_t offset, gds_addr, de_payload_gpu_addr; 5801 void *de_payload_cpu_addr; 5802 int cnt; 5803 5804 if (ring->is_mes_queue) { 5805 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5806 gfx[0].gfx_meta_data) + 5807 offsetof(struct v9_gfx_meta_data, de_payload); 5808 de_payload_gpu_addr = 5809 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5810 de_payload_cpu_addr = 5811 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5812 5813 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5814 gfx[0].gds_backup) + 5815 offsetof(struct v9_gfx_meta_data, de_payload); 5816 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5817 } else { 5818 offset = offsetof(struct v9_gfx_meta_data, de_payload); 5819 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5820 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5821 5822 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 5823 AMDGPU_CSA_SIZE - adev->gds.gds_size, 5824 PAGE_SIZE); 5825 } 5826 5827 if (usegds) { 5828 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5829 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5830 } 5831 5832 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5833 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5834 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5835 WRITE_DATA_DST_SEL(8) | 5836 WR_CONFIRM) | 5837 WRITE_DATA_CACHE_POLICY(0)); 5838 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 5839 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 5840 5841 amdgpu_ring_ib_on_emit_de(ring); 5842 if (resume) 5843 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 5844 sizeof(de_payload) >> 2); 5845 else 5846 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 5847 sizeof(de_payload) >> 2); 5848 } 5849 5850 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5851 bool secure) 5852 { 5853 uint32_t v = secure ? FRAME_TMZ : 0; 5854 5855 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5856 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5857 } 5858 5859 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5860 { 5861 uint32_t dw2 = 0; 5862 5863 gfx_v9_0_ring_emit_ce_meta(ring, 5864 (!amdgpu_sriov_vf(ring->adev) && 5865 flags & AMDGPU_IB_PREEMPTED) ? true : false); 5866 5867 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5868 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5869 /* set load_global_config & load_global_uconfig */ 5870 dw2 |= 0x8001; 5871 /* set load_cs_sh_regs */ 5872 dw2 |= 0x01000000; 5873 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5874 dw2 |= 0x10002; 5875 5876 /* set load_ce_ram if preamble presented */ 5877 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5878 dw2 |= 0x10000000; 5879 } else { 5880 /* still load_ce_ram if this is the first time preamble presented 5881 * although there is no context switch happens. 5882 */ 5883 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5884 dw2 |= 0x10000000; 5885 } 5886 5887 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5888 amdgpu_ring_write(ring, dw2); 5889 amdgpu_ring_write(ring, 0); 5890 } 5891 5892 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 5893 uint64_t addr) 5894 { 5895 unsigned ret; 5896 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5897 amdgpu_ring_write(ring, lower_32_bits(addr)); 5898 amdgpu_ring_write(ring, upper_32_bits(addr)); 5899 /* discard following DWs if *cond_exec_gpu_addr==0 */ 5900 amdgpu_ring_write(ring, 0); 5901 ret = ring->wptr & ring->buf_mask; 5902 /* patch dummy value later */ 5903 amdgpu_ring_write(ring, 0); 5904 return ret; 5905 } 5906 5907 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5908 uint32_t reg_val_offs) 5909 { 5910 struct amdgpu_device *adev = ring->adev; 5911 5912 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5913 amdgpu_ring_write(ring, 0 | /* src: register*/ 5914 (5 << 8) | /* dst: memory */ 5915 (1 << 20)); /* write confirm */ 5916 amdgpu_ring_write(ring, reg); 5917 amdgpu_ring_write(ring, 0); 5918 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5919 reg_val_offs * 4)); 5920 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5921 reg_val_offs * 4)); 5922 } 5923 5924 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5925 uint32_t val) 5926 { 5927 uint32_t cmd = 0; 5928 5929 switch (ring->funcs->type) { 5930 case AMDGPU_RING_TYPE_GFX: 5931 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5932 break; 5933 case AMDGPU_RING_TYPE_KIQ: 5934 cmd = (1 << 16); /* no inc addr */ 5935 break; 5936 default: 5937 cmd = WR_CONFIRM; 5938 break; 5939 } 5940 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5941 amdgpu_ring_write(ring, cmd); 5942 amdgpu_ring_write(ring, reg); 5943 amdgpu_ring_write(ring, 0); 5944 amdgpu_ring_write(ring, val); 5945 } 5946 5947 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5948 uint32_t val, uint32_t mask) 5949 { 5950 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5951 } 5952 5953 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5954 uint32_t reg0, uint32_t reg1, 5955 uint32_t ref, uint32_t mask) 5956 { 5957 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5958 struct amdgpu_device *adev = ring->adev; 5959 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5960 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5961 5962 if (fw_version_ok) 5963 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5964 ref, mask, 0x20); 5965 else 5966 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5967 ref, mask); 5968 } 5969 5970 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5971 { 5972 struct amdgpu_device *adev = ring->adev; 5973 uint32_t value = 0; 5974 5975 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5976 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5977 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5978 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5979 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5980 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5981 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5982 } 5983 5984 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5985 enum amdgpu_interrupt_state state) 5986 { 5987 switch (state) { 5988 case AMDGPU_IRQ_STATE_DISABLE: 5989 case AMDGPU_IRQ_STATE_ENABLE: 5990 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5991 TIME_STAMP_INT_ENABLE, 5992 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5993 break; 5994 default: 5995 break; 5996 } 5997 } 5998 5999 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6000 int me, int pipe, 6001 enum amdgpu_interrupt_state state) 6002 { 6003 u32 mec_int_cntl, mec_int_cntl_reg; 6004 6005 /* 6006 * amdgpu controls only the first MEC. That's why this function only 6007 * handles the setting of interrupts for this specific MEC. All other 6008 * pipes' interrupts are set by amdkfd. 6009 */ 6010 6011 if (me == 1) { 6012 switch (pipe) { 6013 case 0: 6014 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 6015 break; 6016 case 1: 6017 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 6018 break; 6019 case 2: 6020 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 6021 break; 6022 case 3: 6023 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 6024 break; 6025 default: 6026 DRM_DEBUG("invalid pipe %d\n", pipe); 6027 return; 6028 } 6029 } else { 6030 DRM_DEBUG("invalid me %d\n", me); 6031 return; 6032 } 6033 6034 switch (state) { 6035 case AMDGPU_IRQ_STATE_DISABLE: 6036 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg); 6037 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6038 TIME_STAMP_INT_ENABLE, 0); 6039 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6040 break; 6041 case AMDGPU_IRQ_STATE_ENABLE: 6042 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6043 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6044 TIME_STAMP_INT_ENABLE, 1); 6045 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6046 break; 6047 default: 6048 break; 6049 } 6050 } 6051 6052 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev, 6053 int me, int pipe) 6054 { 6055 /* 6056 * amdgpu controls only the first MEC. That's why this function only 6057 * handles the setting of interrupts for this specific MEC. All other 6058 * pipes' interrupts are set by amdkfd. 6059 */ 6060 if (me != 1) 6061 return 0; 6062 6063 switch (pipe) { 6064 case 0: 6065 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 6066 case 1: 6067 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 6068 case 2: 6069 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 6070 case 3: 6071 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 6072 default: 6073 return 0; 6074 } 6075 } 6076 6077 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6078 struct amdgpu_irq_src *source, 6079 unsigned type, 6080 enum amdgpu_interrupt_state state) 6081 { 6082 u32 cp_int_cntl_reg, cp_int_cntl; 6083 int i, j; 6084 6085 switch (state) { 6086 case AMDGPU_IRQ_STATE_DISABLE: 6087 case AMDGPU_IRQ_STATE_ENABLE: 6088 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6089 PRIV_REG_INT_ENABLE, 6090 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6091 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6092 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6093 /* MECs start at 1 */ 6094 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); 6095 6096 if (cp_int_cntl_reg) { 6097 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6098 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6099 PRIV_REG_INT_ENABLE, 6100 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6101 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6102 } 6103 } 6104 } 6105 break; 6106 default: 6107 break; 6108 } 6109 6110 return 0; 6111 } 6112 6113 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6114 struct amdgpu_irq_src *source, 6115 unsigned type, 6116 enum amdgpu_interrupt_state state) 6117 { 6118 u32 cp_int_cntl_reg, cp_int_cntl; 6119 int i, j; 6120 6121 switch (state) { 6122 case AMDGPU_IRQ_STATE_DISABLE: 6123 case AMDGPU_IRQ_STATE_ENABLE: 6124 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6125 OPCODE_ERROR_INT_ENABLE, 6126 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6127 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6128 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6129 /* MECs start at 1 */ 6130 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); 6131 6132 if (cp_int_cntl_reg) { 6133 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6134 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6135 OPCODE_ERROR_INT_ENABLE, 6136 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6137 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6138 } 6139 } 6140 } 6141 break; 6142 default: 6143 break; 6144 } 6145 6146 return 0; 6147 } 6148 6149 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6150 struct amdgpu_irq_src *source, 6151 unsigned type, 6152 enum amdgpu_interrupt_state state) 6153 { 6154 switch (state) { 6155 case AMDGPU_IRQ_STATE_DISABLE: 6156 case AMDGPU_IRQ_STATE_ENABLE: 6157 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6158 PRIV_INSTR_INT_ENABLE, 6159 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6160 break; 6161 default: 6162 break; 6163 } 6164 6165 return 0; 6166 } 6167 6168 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 6169 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 6170 CP_ECC_ERROR_INT_ENABLE, 1) 6171 6172 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 6173 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 6174 CP_ECC_ERROR_INT_ENABLE, 0) 6175 6176 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 6177 struct amdgpu_irq_src *source, 6178 unsigned type, 6179 enum amdgpu_interrupt_state state) 6180 { 6181 switch (state) { 6182 case AMDGPU_IRQ_STATE_DISABLE: 6183 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6184 CP_ECC_ERROR_INT_ENABLE, 0); 6185 DISABLE_ECC_ON_ME_PIPE(1, 0); 6186 DISABLE_ECC_ON_ME_PIPE(1, 1); 6187 DISABLE_ECC_ON_ME_PIPE(1, 2); 6188 DISABLE_ECC_ON_ME_PIPE(1, 3); 6189 break; 6190 6191 case AMDGPU_IRQ_STATE_ENABLE: 6192 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6193 CP_ECC_ERROR_INT_ENABLE, 1); 6194 ENABLE_ECC_ON_ME_PIPE(1, 0); 6195 ENABLE_ECC_ON_ME_PIPE(1, 1); 6196 ENABLE_ECC_ON_ME_PIPE(1, 2); 6197 ENABLE_ECC_ON_ME_PIPE(1, 3); 6198 break; 6199 default: 6200 break; 6201 } 6202 6203 return 0; 6204 } 6205 6206 6207 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6208 struct amdgpu_irq_src *src, 6209 unsigned type, 6210 enum amdgpu_interrupt_state state) 6211 { 6212 switch (type) { 6213 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6214 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 6215 break; 6216 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6217 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6218 break; 6219 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6220 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6221 break; 6222 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6223 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6224 break; 6225 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6226 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6227 break; 6228 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6229 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6230 break; 6231 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6232 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6233 break; 6234 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6235 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6236 break; 6237 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6238 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6239 break; 6240 default: 6241 break; 6242 } 6243 return 0; 6244 } 6245 6246 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 6247 struct amdgpu_irq_src *source, 6248 struct amdgpu_iv_entry *entry) 6249 { 6250 int i; 6251 u8 me_id, pipe_id, queue_id; 6252 struct amdgpu_ring *ring; 6253 6254 DRM_DEBUG("IH: CP EOP\n"); 6255 me_id = (entry->ring_id & 0x0c) >> 2; 6256 pipe_id = (entry->ring_id & 0x03) >> 0; 6257 queue_id = (entry->ring_id & 0x70) >> 4; 6258 6259 switch (me_id) { 6260 case 0: 6261 if (adev->gfx.num_gfx_rings) { 6262 if (!adev->gfx.mcbp) { 6263 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6264 } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { 6265 /* Fence signals are handled on the software rings*/ 6266 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 6267 amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); 6268 } 6269 } 6270 break; 6271 case 1: 6272 case 2: 6273 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6274 ring = &adev->gfx.compute_ring[i]; 6275 /* Per-queue interrupt is supported for MEC starting from VI. 6276 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6277 */ 6278 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6279 amdgpu_fence_process(ring); 6280 } 6281 break; 6282 } 6283 return 0; 6284 } 6285 6286 static void gfx_v9_0_fault(struct amdgpu_device *adev, 6287 struct amdgpu_iv_entry *entry) 6288 { 6289 u8 me_id, pipe_id, queue_id; 6290 struct amdgpu_ring *ring; 6291 int i; 6292 6293 me_id = (entry->ring_id & 0x0c) >> 2; 6294 pipe_id = (entry->ring_id & 0x03) >> 0; 6295 queue_id = (entry->ring_id & 0x70) >> 4; 6296 6297 switch (me_id) { 6298 case 0: 6299 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 6300 break; 6301 case 1: 6302 case 2: 6303 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6304 ring = &adev->gfx.compute_ring[i]; 6305 if (ring->me == me_id && ring->pipe == pipe_id && 6306 ring->queue == queue_id) 6307 drm_sched_fault(&ring->sched); 6308 } 6309 break; 6310 } 6311 } 6312 6313 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 6314 struct amdgpu_irq_src *source, 6315 struct amdgpu_iv_entry *entry) 6316 { 6317 DRM_ERROR("Illegal register access in command stream\n"); 6318 gfx_v9_0_fault(adev, entry); 6319 return 0; 6320 } 6321 6322 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev, 6323 struct amdgpu_irq_src *source, 6324 struct amdgpu_iv_entry *entry) 6325 { 6326 DRM_ERROR("Illegal opcode in command stream\n"); 6327 gfx_v9_0_fault(adev, entry); 6328 return 0; 6329 } 6330 6331 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 6332 struct amdgpu_irq_src *source, 6333 struct amdgpu_iv_entry *entry) 6334 { 6335 DRM_ERROR("Illegal instruction in command stream\n"); 6336 gfx_v9_0_fault(adev, entry); 6337 return 0; 6338 } 6339 6340 6341 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { 6342 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 6343 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 6344 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 6345 }, 6346 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 6347 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 6348 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 6349 }, 6350 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6351 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 6352 0, 0 6353 }, 6354 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6355 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 6356 0, 0 6357 }, 6358 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 6359 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 6360 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 6361 }, 6362 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6363 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 6364 0, 0 6365 }, 6366 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6367 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 6368 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 6369 }, 6370 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 6371 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 6372 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 6373 }, 6374 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 6375 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 6376 0, 0 6377 }, 6378 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 6379 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 6380 0, 0 6381 }, 6382 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 6383 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 6384 0, 0 6385 }, 6386 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6387 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 6388 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 6389 }, 6390 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6391 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 6392 0, 0 6393 }, 6394 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6395 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 6396 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 6397 }, 6398 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 6399 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6400 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 6401 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 6402 }, 6403 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 6404 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6405 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 6406 0, 0 6407 }, 6408 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 6409 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6410 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 6411 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 6412 }, 6413 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 6414 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6415 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 6416 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 6417 }, 6418 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 6419 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6420 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 6421 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 6422 }, 6423 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 6424 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6425 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 6426 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 6427 }, 6428 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 6429 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 6430 0, 0 6431 }, 6432 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6433 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 6434 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 6435 }, 6436 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6437 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 6438 0, 0 6439 }, 6440 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6441 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 6442 0, 0 6443 }, 6444 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6445 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 6446 0, 0 6447 }, 6448 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6449 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 6450 0, 0 6451 }, 6452 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6453 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 6454 0, 0 6455 }, 6456 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6457 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 6458 0, 0 6459 }, 6460 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6461 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 6462 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 6463 }, 6464 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6465 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 6466 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 6467 }, 6468 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6469 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 6470 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 6471 }, 6472 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6473 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 6474 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 6475 }, 6476 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6477 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 6478 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 6479 }, 6480 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6481 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 6482 0, 0 6483 }, 6484 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6485 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 6486 0, 0 6487 }, 6488 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6489 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 6490 0, 0 6491 }, 6492 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6493 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 6494 0, 0 6495 }, 6496 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6497 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 6498 0, 0 6499 }, 6500 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6501 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 6502 0, 0 6503 }, 6504 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6505 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 6506 0, 0 6507 }, 6508 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6509 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 6510 0, 0 6511 }, 6512 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6513 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 6514 0, 0 6515 }, 6516 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6517 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 6518 0, 0 6519 }, 6520 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6521 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 6522 0, 0 6523 }, 6524 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6525 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 6526 0, 0 6527 }, 6528 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6529 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 6530 0, 0 6531 }, 6532 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 6533 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 6534 0, 0 6535 }, 6536 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6537 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 6538 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 6539 }, 6540 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6541 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 6542 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 6543 }, 6544 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6545 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 6546 0, 0 6547 }, 6548 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6549 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 6550 0, 0 6551 }, 6552 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6553 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 6554 0, 0 6555 }, 6556 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6557 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 6558 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 6559 }, 6560 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6561 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 6562 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 6563 }, 6564 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6565 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 6566 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 6567 }, 6568 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6569 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 6570 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 6571 }, 6572 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6573 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 6574 0, 0 6575 }, 6576 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6577 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 6578 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 6579 }, 6580 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6581 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 6582 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 6583 }, 6584 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6585 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 6586 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 6587 }, 6588 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6589 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 6590 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 6591 }, 6592 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6593 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 6594 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 6595 }, 6596 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6597 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 6598 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 6599 }, 6600 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6601 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 6602 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 6603 }, 6604 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6605 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 6606 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 6607 }, 6608 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6609 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 6610 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 6611 }, 6612 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6613 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 6614 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 6615 }, 6616 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6617 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 6618 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 6619 }, 6620 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6621 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 6622 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 6623 }, 6624 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6625 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 6626 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 6627 }, 6628 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6629 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 6630 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 6631 }, 6632 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6633 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 6634 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 6635 }, 6636 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6637 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 6638 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 6639 }, 6640 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6641 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 6642 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 6643 }, 6644 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6645 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 6646 0, 0 6647 }, 6648 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6649 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 6650 0, 0 6651 }, 6652 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6653 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 6654 0, 0 6655 }, 6656 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6657 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 6658 0, 0 6659 }, 6660 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6661 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 6662 0, 0 6663 }, 6664 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6665 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 6666 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 6667 }, 6668 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6669 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 6670 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 6671 }, 6672 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6673 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 6674 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 6675 }, 6676 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6677 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 6678 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 6679 }, 6680 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6681 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 6682 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 6683 }, 6684 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6685 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 6686 0, 0 6687 }, 6688 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6689 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 6690 0, 0 6691 }, 6692 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6693 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 6694 0, 0 6695 }, 6696 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6697 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 6698 0, 0 6699 }, 6700 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6701 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 6702 0, 0 6703 }, 6704 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6705 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6706 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 6707 }, 6708 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6709 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6710 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 6711 }, 6712 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6713 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6714 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 6715 }, 6716 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6717 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6718 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 6719 }, 6720 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6721 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6722 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 6723 }, 6724 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6725 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 6726 0, 0 6727 }, 6728 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6729 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 6730 0, 0 6731 }, 6732 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6733 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 6734 0, 0 6735 }, 6736 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6737 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 6738 0, 0 6739 }, 6740 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6741 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 6742 0, 0 6743 }, 6744 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6745 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6746 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 6747 }, 6748 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6749 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6750 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 6751 }, 6752 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6753 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6754 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 6755 }, 6756 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6757 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 6758 0, 0 6759 }, 6760 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6761 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 6762 0, 0 6763 }, 6764 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6765 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 6766 0, 0 6767 }, 6768 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6769 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 6770 0, 0 6771 }, 6772 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6773 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 6774 0, 0 6775 }, 6776 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6777 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 6778 0, 0 6779 } 6780 }; 6781 6782 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6783 void *inject_if, uint32_t instance_mask) 6784 { 6785 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6786 int ret; 6787 struct ta_ras_trigger_error_input block_info = { 0 }; 6788 6789 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6790 return -EINVAL; 6791 6792 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6793 return -EINVAL; 6794 6795 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6796 return -EPERM; 6797 6798 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6799 info->head.type)) { 6800 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6801 ras_gfx_subblocks[info->head.sub_block_index].name, 6802 info->head.type); 6803 return -EPERM; 6804 } 6805 6806 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6807 info->head.type)) { 6808 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6809 ras_gfx_subblocks[info->head.sub_block_index].name, 6810 info->head.type); 6811 return -EPERM; 6812 } 6813 6814 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6815 block_info.sub_block_index = 6816 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6817 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6818 block_info.address = info->address; 6819 block_info.value = info->value; 6820 6821 mutex_lock(&adev->grbm_idx_mutex); 6822 ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask); 6823 mutex_unlock(&adev->grbm_idx_mutex); 6824 6825 return ret; 6826 } 6827 6828 static const char * const vml2_mems[] = { 6829 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6830 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6831 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6832 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6833 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6834 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6835 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6836 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6837 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6838 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6839 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6840 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6841 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6842 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6843 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6844 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6845 }; 6846 6847 static const char * const vml2_walker_mems[] = { 6848 "UTC_VML2_CACHE_PDE0_MEM0", 6849 "UTC_VML2_CACHE_PDE0_MEM1", 6850 "UTC_VML2_CACHE_PDE1_MEM0", 6851 "UTC_VML2_CACHE_PDE1_MEM1", 6852 "UTC_VML2_CACHE_PDE2_MEM0", 6853 "UTC_VML2_CACHE_PDE2_MEM1", 6854 "UTC_VML2_RDIF_LOG_FIFO", 6855 }; 6856 6857 static const char * const atc_l2_cache_2m_mems[] = { 6858 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6859 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6860 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6861 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6862 }; 6863 6864 static const char *atc_l2_cache_4k_mems[] = { 6865 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6866 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6867 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6868 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6869 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6870 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6871 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6872 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6873 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6874 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6875 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6876 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6877 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6878 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6879 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6880 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6881 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6882 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6883 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6884 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6885 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6886 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6887 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6888 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6889 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6890 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6891 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6892 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6893 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6894 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6895 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6896 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6897 }; 6898 6899 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6900 struct ras_err_data *err_data) 6901 { 6902 uint32_t i, data; 6903 uint32_t sec_count, ded_count; 6904 6905 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6906 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6907 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6908 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6909 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6910 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6911 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6912 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6913 6914 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6915 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6916 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6917 6918 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6919 if (sec_count) { 6920 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6921 "SEC %d\n", i, vml2_mems[i], sec_count); 6922 err_data->ce_count += sec_count; 6923 } 6924 6925 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6926 if (ded_count) { 6927 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6928 "DED %d\n", i, vml2_mems[i], ded_count); 6929 err_data->ue_count += ded_count; 6930 } 6931 } 6932 6933 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6934 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6935 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6936 6937 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6938 SEC_COUNT); 6939 if (sec_count) { 6940 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6941 "SEC %d\n", i, vml2_walker_mems[i], sec_count); 6942 err_data->ce_count += sec_count; 6943 } 6944 6945 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6946 DED_COUNT); 6947 if (ded_count) { 6948 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6949 "DED %d\n", i, vml2_walker_mems[i], ded_count); 6950 err_data->ue_count += ded_count; 6951 } 6952 } 6953 6954 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6955 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6956 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6957 6958 sec_count = (data & 0x00006000L) >> 0xd; 6959 if (sec_count) { 6960 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6961 "SEC %d\n", i, atc_l2_cache_2m_mems[i], 6962 sec_count); 6963 err_data->ce_count += sec_count; 6964 } 6965 } 6966 6967 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6968 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6969 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6970 6971 sec_count = (data & 0x00006000L) >> 0xd; 6972 if (sec_count) { 6973 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6974 "SEC %d\n", i, atc_l2_cache_4k_mems[i], 6975 sec_count); 6976 err_data->ce_count += sec_count; 6977 } 6978 6979 ded_count = (data & 0x00018000L) >> 0xf; 6980 if (ded_count) { 6981 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6982 "DED %d\n", i, atc_l2_cache_4k_mems[i], 6983 ded_count); 6984 err_data->ue_count += ded_count; 6985 } 6986 } 6987 6988 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6989 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6990 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6991 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6992 6993 return 0; 6994 } 6995 6996 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev, 6997 const struct soc15_reg_entry *reg, 6998 uint32_t se_id, uint32_t inst_id, uint32_t value, 6999 uint32_t *sec_count, uint32_t *ded_count) 7000 { 7001 uint32_t i; 7002 uint32_t sec_cnt, ded_cnt; 7003 7004 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { 7005 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || 7006 gfx_v9_0_ras_fields[i].seg != reg->seg || 7007 gfx_v9_0_ras_fields[i].inst != reg->inst) 7008 continue; 7009 7010 sec_cnt = (value & 7011 gfx_v9_0_ras_fields[i].sec_count_mask) >> 7012 gfx_v9_0_ras_fields[i].sec_count_shift; 7013 if (sec_cnt) { 7014 dev_info(adev->dev, "GFX SubBlock %s, " 7015 "Instance[%d][%d], SEC %d\n", 7016 gfx_v9_0_ras_fields[i].name, 7017 se_id, inst_id, 7018 sec_cnt); 7019 *sec_count += sec_cnt; 7020 } 7021 7022 ded_cnt = (value & 7023 gfx_v9_0_ras_fields[i].ded_count_mask) >> 7024 gfx_v9_0_ras_fields[i].ded_count_shift; 7025 if (ded_cnt) { 7026 dev_info(adev->dev, "GFX SubBlock %s, " 7027 "Instance[%d][%d], DED %d\n", 7028 gfx_v9_0_ras_fields[i].name, 7029 se_id, inst_id, 7030 ded_cnt); 7031 *ded_count += ded_cnt; 7032 } 7033 } 7034 7035 return 0; 7036 } 7037 7038 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) 7039 { 7040 int i, j, k; 7041 7042 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 7043 return; 7044 7045 /* read back registers to clear the counters */ 7046 mutex_lock(&adev->grbm_idx_mutex); 7047 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 7048 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 7049 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 7050 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0); 7051 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 7052 } 7053 } 7054 } 7055 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 7056 mutex_unlock(&adev->grbm_idx_mutex); 7057 7058 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 7059 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 7060 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 7061 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 7062 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 7063 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 7064 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 7065 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 7066 7067 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 7068 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 7069 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 7070 } 7071 7072 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 7073 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 7074 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 7075 } 7076 7077 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 7078 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 7079 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 7080 } 7081 7082 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 7083 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 7084 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 7085 } 7086 7087 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 7088 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 7089 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 7090 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 7091 } 7092 7093 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 7094 void *ras_error_status) 7095 { 7096 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 7097 uint32_t sec_count = 0, ded_count = 0; 7098 uint32_t i, j, k; 7099 uint32_t reg_value; 7100 7101 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 7102 return; 7103 7104 err_data->ue_count = 0; 7105 err_data->ce_count = 0; 7106 7107 mutex_lock(&adev->grbm_idx_mutex); 7108 7109 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 7110 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 7111 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 7112 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0); 7113 reg_value = 7114 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 7115 if (reg_value) 7116 gfx_v9_0_ras_error_count(adev, 7117 &gfx_v9_0_edc_counter_regs[i], 7118 j, k, reg_value, 7119 &sec_count, &ded_count); 7120 } 7121 } 7122 } 7123 7124 err_data->ce_count += sec_count; 7125 err_data->ue_count += ded_count; 7126 7127 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7128 mutex_unlock(&adev->grbm_idx_mutex); 7129 7130 gfx_v9_0_query_utc_edc_status(adev, err_data); 7131 } 7132 7133 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) 7134 { 7135 const unsigned int cp_coher_cntl = 7136 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | 7137 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | 7138 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | 7139 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | 7140 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); 7141 7142 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ 7143 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 7144 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ 7145 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 7146 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 7147 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 7148 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 7149 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 7150 } 7151 7152 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, 7153 uint32_t pipe, bool enable) 7154 { 7155 struct amdgpu_device *adev = ring->adev; 7156 uint32_t val; 7157 uint32_t wcl_cs_reg; 7158 7159 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 7160 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT; 7161 7162 switch (pipe) { 7163 case 0: 7164 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0); 7165 break; 7166 case 1: 7167 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1); 7168 break; 7169 case 2: 7170 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2); 7171 break; 7172 case 3: 7173 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3); 7174 break; 7175 default: 7176 DRM_DEBUG("invalid pipe %d\n", pipe); 7177 return; 7178 } 7179 7180 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 7181 7182 } 7183 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 7184 { 7185 struct amdgpu_device *adev = ring->adev; 7186 uint32_t val; 7187 int i; 7188 7189 7190 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 7191 * number of gfx waves. Setting 5 bit will make sure gfx only gets 7192 * around 25% of gpu resources. 7193 */ 7194 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; 7195 amdgpu_ring_emit_wreg(ring, 7196 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX), 7197 val); 7198 7199 /* Restrict waves for normal/low priority compute queues as well 7200 * to get best QoS for high priority compute jobs. 7201 * 7202 * amdgpu controls only 1st ME(0-3 CS pipes). 7203 */ 7204 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 7205 if (i != ring->pipe) 7206 gfx_v9_0_emit_wave_limit_cs(ring, i, enable); 7207 7208 } 7209 } 7210 7211 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 7212 { 7213 /* Header itself is a NOP packet */ 7214 if (num_nop == 1) { 7215 amdgpu_ring_write(ring, ring->funcs->nop); 7216 return; 7217 } 7218 7219 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 7220 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 7221 7222 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 7223 amdgpu_ring_insert_nop(ring, num_nop - 1); 7224 } 7225 7226 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) 7227 { 7228 struct amdgpu_device *adev = ring->adev; 7229 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 7230 struct amdgpu_ring *kiq_ring = &kiq->ring; 7231 unsigned long flags; 7232 u32 tmp; 7233 int r; 7234 7235 if (amdgpu_sriov_vf(adev)) 7236 return -EINVAL; 7237 7238 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 7239 return -EINVAL; 7240 7241 spin_lock_irqsave(&kiq->ring_lock, flags); 7242 7243 if (amdgpu_ring_alloc(kiq_ring, 5)) { 7244 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7245 return -ENOMEM; 7246 } 7247 7248 tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); 7249 gfx_v9_0_ring_emit_wreg(kiq_ring, 7250 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); 7251 amdgpu_ring_commit(kiq_ring); 7252 7253 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7254 7255 r = amdgpu_ring_test_ring(kiq_ring); 7256 if (r) 7257 return r; 7258 7259 if (amdgpu_ring_alloc(ring, 7 + 7 + 5)) 7260 return -ENOMEM; 7261 gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 7262 ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); 7263 gfx_v9_0_ring_emit_reg_wait(ring, 7264 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff); 7265 gfx_v9_0_ring_emit_wreg(ring, 7266 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); 7267 7268 return amdgpu_ring_test_ring(ring); 7269 } 7270 7271 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, 7272 unsigned int vmid) 7273 { 7274 struct amdgpu_device *adev = ring->adev; 7275 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 7276 struct amdgpu_ring *kiq_ring = &kiq->ring; 7277 unsigned long flags; 7278 int i, r; 7279 7280 if (amdgpu_sriov_vf(adev)) 7281 return -EINVAL; 7282 7283 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 7284 return -EINVAL; 7285 7286 spin_lock_irqsave(&kiq->ring_lock, flags); 7287 7288 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 7289 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7290 return -ENOMEM; 7291 } 7292 7293 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 7294 0, 0); 7295 amdgpu_ring_commit(kiq_ring); 7296 7297 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7298 7299 r = amdgpu_ring_test_ring(kiq_ring); 7300 if (r) 7301 return r; 7302 7303 /* make sure dequeue is complete*/ 7304 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 7305 mutex_lock(&adev->srbm_mutex); 7306 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 7307 for (i = 0; i < adev->usec_timeout; i++) { 7308 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 7309 break; 7310 udelay(1); 7311 } 7312 if (i >= adev->usec_timeout) 7313 r = -ETIMEDOUT; 7314 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 7315 mutex_unlock(&adev->srbm_mutex); 7316 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 7317 if (r) { 7318 dev_err(adev->dev, "fail to wait on hqd deactive\n"); 7319 return r; 7320 } 7321 7322 r = amdgpu_bo_reserve(ring->mqd_obj, false); 7323 if (unlikely(r != 0)){ 7324 dev_err(adev->dev, "fail to resv mqd_obj\n"); 7325 return r; 7326 } 7327 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 7328 if (!r) { 7329 r = gfx_v9_0_kcq_init_queue(ring, true); 7330 amdgpu_bo_kunmap(ring->mqd_obj); 7331 ring->mqd_ptr = NULL; 7332 } 7333 amdgpu_bo_unreserve(ring->mqd_obj); 7334 if (r) { 7335 dev_err(adev->dev, "fail to unresv mqd_obj\n"); 7336 return r; 7337 } 7338 spin_lock_irqsave(&kiq->ring_lock, flags); 7339 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); 7340 if (r) { 7341 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7342 return -ENOMEM; 7343 } 7344 kiq->pmf->kiq_map_queues(kiq_ring, ring); 7345 amdgpu_ring_commit(kiq_ring); 7346 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7347 r = amdgpu_ring_test_ring(kiq_ring); 7348 if (r) { 7349 DRM_ERROR("fail to remap queue\n"); 7350 return r; 7351 } 7352 return amdgpu_ring_test_ring(ring); 7353 } 7354 7355 static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 7356 { 7357 struct amdgpu_device *adev = ip_block->adev; 7358 uint32_t i, j, k, reg, index = 0; 7359 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 7360 7361 if (!adev->gfx.ip_dump_core) 7362 return; 7363 7364 for (i = 0; i < reg_count; i++) 7365 drm_printf(p, "%-50s \t 0x%08x\n", 7366 gc_reg_list_9[i].reg_name, 7367 adev->gfx.ip_dump_core[i]); 7368 7369 /* print compute queue registers for all instances */ 7370 if (!adev->gfx.ip_dump_compute_queues) 7371 return; 7372 7373 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 7374 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7375 adev->gfx.mec.num_mec, 7376 adev->gfx.mec.num_pipe_per_mec, 7377 adev->gfx.mec.num_queue_per_pipe); 7378 7379 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7380 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7381 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7382 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7383 for (reg = 0; reg < reg_count; reg++) { 7384 drm_printf(p, "%-50s \t 0x%08x\n", 7385 gc_cp_reg_list_9[reg].reg_name, 7386 adev->gfx.ip_dump_compute_queues[index + reg]); 7387 } 7388 index += reg_count; 7389 } 7390 } 7391 } 7392 7393 } 7394 7395 static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block) 7396 { 7397 struct amdgpu_device *adev = ip_block->adev; 7398 uint32_t i, j, k, reg, index = 0; 7399 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 7400 7401 if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings) 7402 return; 7403 7404 amdgpu_gfx_off_ctrl(adev, false); 7405 for (i = 0; i < reg_count; i++) 7406 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i])); 7407 amdgpu_gfx_off_ctrl(adev, true); 7408 7409 /* dump compute queue registers for all instances */ 7410 if (!adev->gfx.ip_dump_compute_queues) 7411 return; 7412 7413 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 7414 amdgpu_gfx_off_ctrl(adev, false); 7415 mutex_lock(&adev->srbm_mutex); 7416 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7417 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7418 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7419 /* ME0 is for GFX so start from 1 for CP */ 7420 soc15_grbm_select(adev, 1 + i, j, k, 0, 0); 7421 7422 for (reg = 0; reg < reg_count; reg++) { 7423 adev->gfx.ip_dump_compute_queues[index + reg] = 7424 RREG32(SOC15_REG_ENTRY_OFFSET( 7425 gc_cp_reg_list_9[reg])); 7426 } 7427 index += reg_count; 7428 } 7429 } 7430 } 7431 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 7432 mutex_unlock(&adev->srbm_mutex); 7433 amdgpu_gfx_off_ctrl(adev, true); 7434 7435 } 7436 7437 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 7438 { 7439 /* Emit the cleaner shader */ 7440 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7441 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7442 } 7443 7444 static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring) 7445 { 7446 struct amdgpu_device *adev = ring->adev; 7447 struct amdgpu_ip_block *gfx_block = 7448 amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 7449 7450 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 7451 7452 /* Raven and PCO APUs seem to have stability issues 7453 * with compute and gfxoff and gfx pg. Disable gfx pg during 7454 * submission and allow again afterwards. 7455 */ 7456 if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) 7457 gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE); 7458 } 7459 7460 static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring) 7461 { 7462 struct amdgpu_device *adev = ring->adev; 7463 struct amdgpu_ip_block *gfx_block = 7464 amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 7465 7466 /* Raven and PCO APUs seem to have stability issues 7467 * with compute and gfxoff and gfx pg. Disable gfx pg during 7468 * submission and allow again afterwards. 7469 */ 7470 if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) 7471 gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE); 7472 7473 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 7474 } 7475 7476 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 7477 .name = "gfx_v9_0", 7478 .early_init = gfx_v9_0_early_init, 7479 .late_init = gfx_v9_0_late_init, 7480 .sw_init = gfx_v9_0_sw_init, 7481 .sw_fini = gfx_v9_0_sw_fini, 7482 .hw_init = gfx_v9_0_hw_init, 7483 .hw_fini = gfx_v9_0_hw_fini, 7484 .suspend = gfx_v9_0_suspend, 7485 .resume = gfx_v9_0_resume, 7486 .is_idle = gfx_v9_0_is_idle, 7487 .wait_for_idle = gfx_v9_0_wait_for_idle, 7488 .soft_reset = gfx_v9_0_soft_reset, 7489 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 7490 .set_powergating_state = gfx_v9_0_set_powergating_state, 7491 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 7492 .dump_ip_state = gfx_v9_ip_dump, 7493 .print_ip_state = gfx_v9_ip_print, 7494 }; 7495 7496 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 7497 .type = AMDGPU_RING_TYPE_GFX, 7498 .align_mask = 0xff, 7499 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7500 .support_64bit_ptrs = true, 7501 .secure_submission_supported = true, 7502 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 7503 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 7504 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 7505 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 7506 5 + /* COND_EXEC */ 7507 7 + /* PIPELINE_SYNC */ 7508 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7509 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7510 2 + /* VM_FLUSH */ 7511 8 + /* FENCE for VM_FLUSH */ 7512 20 + /* GDS switch */ 7513 4 + /* double SWITCH_BUFFER, 7514 the first COND_EXEC jump to the place just 7515 prior to this double SWITCH_BUFFER */ 7516 5 + /* COND_EXEC */ 7517 7 + /* HDP_flush */ 7518 4 + /* VGT_flush */ 7519 14 + /* CE_META */ 7520 31 + /* DE_META */ 7521 3 + /* CNTX_CTRL */ 7522 5 + /* HDP_INVL */ 7523 8 + 8 + /* FENCE x2 */ 7524 2 + /* SWITCH_BUFFER */ 7525 7 + /* gfx_v9_0_emit_mem_sync */ 7526 2, /* gfx_v9_0_ring_emit_cleaner_shader */ 7527 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 7528 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 7529 .emit_fence = gfx_v9_0_ring_emit_fence, 7530 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7531 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7532 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7533 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7534 .test_ring = gfx_v9_0_ring_test_ring, 7535 .insert_nop = gfx_v9_ring_insert_nop, 7536 .pad_ib = amdgpu_ring_generic_pad_ib, 7537 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7538 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7539 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7540 .preempt_ib = gfx_v9_0_ring_preempt_ib, 7541 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7542 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7543 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7544 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7545 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7546 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7547 .reset = gfx_v9_0_reset_kgq, 7548 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, 7549 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, 7550 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, 7551 }; 7552 7553 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 7554 .type = AMDGPU_RING_TYPE_GFX, 7555 .align_mask = 0xff, 7556 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7557 .support_64bit_ptrs = true, 7558 .secure_submission_supported = true, 7559 .get_rptr = amdgpu_sw_ring_get_rptr_gfx, 7560 .get_wptr = amdgpu_sw_ring_get_wptr_gfx, 7561 .set_wptr = amdgpu_sw_ring_set_wptr_gfx, 7562 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 7563 5 + /* COND_EXEC */ 7564 7 + /* PIPELINE_SYNC */ 7565 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7566 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7567 2 + /* VM_FLUSH */ 7568 8 + /* FENCE for VM_FLUSH */ 7569 20 + /* GDS switch */ 7570 4 + /* double SWITCH_BUFFER, 7571 * the first COND_EXEC jump to the place just 7572 * prior to this double SWITCH_BUFFER 7573 */ 7574 5 + /* COND_EXEC */ 7575 7 + /* HDP_flush */ 7576 4 + /* VGT_flush */ 7577 14 + /* CE_META */ 7578 31 + /* DE_META */ 7579 3 + /* CNTX_CTRL */ 7580 5 + /* HDP_INVL */ 7581 8 + 8 + /* FENCE x2 */ 7582 2 + /* SWITCH_BUFFER */ 7583 7 + /* gfx_v9_0_emit_mem_sync */ 7584 2, /* gfx_v9_0_ring_emit_cleaner_shader */ 7585 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 7586 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 7587 .emit_fence = gfx_v9_0_ring_emit_fence, 7588 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7589 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7590 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7591 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7592 .test_ring = gfx_v9_0_ring_test_ring, 7593 .test_ib = gfx_v9_0_ring_test_ib, 7594 .insert_nop = gfx_v9_ring_insert_nop, 7595 .pad_ib = amdgpu_ring_generic_pad_ib, 7596 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7597 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7598 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7599 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7600 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7601 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7602 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7603 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7604 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7605 .patch_cntl = gfx_v9_0_ring_patch_cntl, 7606 .patch_de = gfx_v9_0_ring_patch_de_meta, 7607 .patch_ce = gfx_v9_0_ring_patch_ce_meta, 7608 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, 7609 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, 7610 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, 7611 }; 7612 7613 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7614 .type = AMDGPU_RING_TYPE_COMPUTE, 7615 .align_mask = 0xff, 7616 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7617 .support_64bit_ptrs = true, 7618 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7619 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7620 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7621 .emit_frame_size = 7622 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7623 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7624 5 + /* hdp invalidate */ 7625 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7626 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7627 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7628 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7629 7 + /* gfx_v9_0_emit_mem_sync */ 7630 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ 7631 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ 7632 2, /* gfx_v9_0_ring_emit_cleaner_shader */ 7633 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7634 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 7635 .emit_fence = gfx_v9_0_ring_emit_fence, 7636 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7637 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7638 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7639 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7640 .test_ring = gfx_v9_0_ring_test_ring, 7641 .test_ib = gfx_v9_0_ring_test_ib, 7642 .insert_nop = gfx_v9_ring_insert_nop, 7643 .pad_ib = amdgpu_ring_generic_pad_ib, 7644 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7645 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7646 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7647 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7648 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7649 .emit_wave_limit = gfx_v9_0_emit_wave_limit, 7650 .reset = gfx_v9_0_reset_kcq, 7651 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, 7652 .begin_use = gfx_v9_0_ring_begin_use_compute, 7653 .end_use = gfx_v9_0_ring_end_use_compute, 7654 }; 7655 7656 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7657 .type = AMDGPU_RING_TYPE_KIQ, 7658 .align_mask = 0xff, 7659 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7660 .support_64bit_ptrs = true, 7661 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7662 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7663 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7664 .emit_frame_size = 7665 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7666 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7667 5 + /* hdp invalidate */ 7668 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7669 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7670 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7671 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7672 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7673 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 7674 .test_ring = gfx_v9_0_ring_test_ring, 7675 .insert_nop = amdgpu_ring_insert_nop, 7676 .pad_ib = amdgpu_ring_generic_pad_ib, 7677 .emit_rreg = gfx_v9_0_ring_emit_rreg, 7678 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7679 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7680 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7681 }; 7682 7683 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 7684 { 7685 int i; 7686 7687 adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq; 7688 7689 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7690 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 7691 7692 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 7693 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 7694 adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; 7695 } 7696 7697 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7698 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 7699 } 7700 7701 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 7702 .set = gfx_v9_0_set_eop_interrupt_state, 7703 .process = gfx_v9_0_eop_irq, 7704 }; 7705 7706 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 7707 .set = gfx_v9_0_set_priv_reg_fault_state, 7708 .process = gfx_v9_0_priv_reg_irq, 7709 }; 7710 7711 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = { 7712 .set = gfx_v9_0_set_bad_op_fault_state, 7713 .process = gfx_v9_0_bad_op_irq, 7714 }; 7715 7716 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 7717 .set = gfx_v9_0_set_priv_inst_fault_state, 7718 .process = gfx_v9_0_priv_inst_irq, 7719 }; 7720 7721 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 7722 .set = gfx_v9_0_set_cp_ecc_error_state, 7723 .process = amdgpu_gfx_cp_ecc_error_irq, 7724 }; 7725 7726 7727 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 7728 { 7729 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7730 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 7731 7732 adev->gfx.priv_reg_irq.num_types = 1; 7733 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 7734 7735 adev->gfx.bad_op_irq.num_types = 1; 7736 adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs; 7737 7738 adev->gfx.priv_inst_irq.num_types = 1; 7739 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 7740 7741 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 7742 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 7743 } 7744 7745 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 7746 { 7747 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7748 case IP_VERSION(9, 0, 1): 7749 case IP_VERSION(9, 2, 1): 7750 case IP_VERSION(9, 4, 0): 7751 case IP_VERSION(9, 2, 2): 7752 case IP_VERSION(9, 1, 0): 7753 case IP_VERSION(9, 4, 1): 7754 case IP_VERSION(9, 3, 0): 7755 case IP_VERSION(9, 4, 2): 7756 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 7757 break; 7758 default: 7759 break; 7760 } 7761 } 7762 7763 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 7764 { 7765 /* init asci gds info */ 7766 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7767 case IP_VERSION(9, 0, 1): 7768 case IP_VERSION(9, 2, 1): 7769 case IP_VERSION(9, 4, 0): 7770 adev->gds.gds_size = 0x10000; 7771 break; 7772 case IP_VERSION(9, 2, 2): 7773 case IP_VERSION(9, 1, 0): 7774 case IP_VERSION(9, 4, 1): 7775 adev->gds.gds_size = 0x1000; 7776 break; 7777 case IP_VERSION(9, 4, 2): 7778 /* aldebaran removed all the GDS internal memory, 7779 * only support GWS opcode in kernel, like barrier 7780 * semaphore.etc */ 7781 adev->gds.gds_size = 0; 7782 break; 7783 default: 7784 adev->gds.gds_size = 0x10000; 7785 break; 7786 } 7787 7788 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7789 case IP_VERSION(9, 0, 1): 7790 case IP_VERSION(9, 4, 0): 7791 adev->gds.gds_compute_max_wave_id = 0x7ff; 7792 break; 7793 case IP_VERSION(9, 2, 1): 7794 adev->gds.gds_compute_max_wave_id = 0x27f; 7795 break; 7796 case IP_VERSION(9, 2, 2): 7797 case IP_VERSION(9, 1, 0): 7798 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 7799 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 7800 else 7801 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 7802 break; 7803 case IP_VERSION(9, 4, 1): 7804 adev->gds.gds_compute_max_wave_id = 0xfff; 7805 break; 7806 case IP_VERSION(9, 4, 2): 7807 /* deprecated for Aldebaran, no usage at all */ 7808 adev->gds.gds_compute_max_wave_id = 0; 7809 break; 7810 default: 7811 /* this really depends on the chip */ 7812 adev->gds.gds_compute_max_wave_id = 0x7ff; 7813 break; 7814 } 7815 7816 adev->gds.gws_size = 64; 7817 adev->gds.oa_size = 16; 7818 } 7819 7820 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7821 u32 bitmap) 7822 { 7823 u32 data; 7824 7825 if (!bitmap) 7826 return; 7827 7828 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7829 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7830 7831 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 7832 } 7833 7834 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7835 { 7836 u32 data, mask; 7837 7838 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 7839 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 7840 7841 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7842 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7843 7844 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7845 7846 return (~data) & mask; 7847 } 7848 7849 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 7850 struct amdgpu_cu_info *cu_info) 7851 { 7852 int i, j, k, counter, active_cu_number = 0; 7853 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7854 unsigned disable_masks[4 * 4]; 7855 7856 if (!adev || !cu_info) 7857 return -EINVAL; 7858 7859 /* 7860 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 7861 */ 7862 if (adev->gfx.config.max_shader_engines * 7863 adev->gfx.config.max_sh_per_se > 16) 7864 return -EINVAL; 7865 7866 amdgpu_gfx_parse_disable_cu(disable_masks, 7867 adev->gfx.config.max_shader_engines, 7868 adev->gfx.config.max_sh_per_se); 7869 7870 mutex_lock(&adev->grbm_idx_mutex); 7871 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7872 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7873 mask = 1; 7874 ao_bitmap = 0; 7875 counter = 0; 7876 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 7877 gfx_v9_0_set_user_cu_inactive_bitmap( 7878 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 7879 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 7880 7881 /* 7882 * The bitmap(and ao_cu_bitmap) in cu_info structure is 7883 * 4x4 size array, and it's usually suitable for Vega 7884 * ASICs which has 4*2 SE/SH layout. 7885 * But for Arcturus, SE/SH layout is changed to 8*1. 7886 * To mostly reduce the impact, we make it compatible 7887 * with current bitmap array as below: 7888 * SE4,SH0 --> bitmap[0][1] 7889 * SE5,SH0 --> bitmap[1][1] 7890 * SE6,SH0 --> bitmap[2][1] 7891 * SE7,SH0 --> bitmap[3][1] 7892 */ 7893 cu_info->bitmap[0][i % 4][j + i / 4] = bitmap; 7894 7895 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7896 if (bitmap & mask) { 7897 if (counter < adev->gfx.config.max_cu_per_sh) 7898 ao_bitmap |= mask; 7899 counter ++; 7900 } 7901 mask <<= 1; 7902 } 7903 active_cu_number += counter; 7904 if (i < 2 && j < 2) 7905 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7906 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 7907 } 7908 } 7909 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7910 mutex_unlock(&adev->grbm_idx_mutex); 7911 7912 cu_info->number = active_cu_number; 7913 cu_info->ao_cu_mask = ao_cu_mask; 7914 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7915 7916 return 0; 7917 } 7918 7919 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 7920 { 7921 .type = AMD_IP_BLOCK_TYPE_GFX, 7922 .major = 9, 7923 .minor = 0, 7924 .rev = 0, 7925 .funcs = &gfx_v9_0_ip_funcs, 7926 }; 7927