1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 42 #include "soc15_common.h" 43 #include "clearstate_gfx9.h" 44 #include "v9_structs.h" 45 46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 47 48 #include "amdgpu_ras.h" 49 50 #include "amdgpu_ring_mux.h" 51 #include "gfx_v9_4.h" 52 #include "gfx_v9_0.h" 53 #include "gfx_v9_0_cleaner_shader.h" 54 #include "gfx_v9_4_2.h" 55 56 #include "asic_reg/pwr/pwr_10_0_offset.h" 57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h" 58 #include "asic_reg/gc/gc_9_0_default.h" 59 60 #define GFX9_NUM_GFX_RINGS 1 61 #define GFX9_NUM_SW_GFX_RINGS 2 62 #define GFX9_MEC_HPD_SIZE 4096 63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 65 66 #define mmGCEA_PROBE_MAP 0x070c 67 #define mmGCEA_PROBE_MAP_BASE_IDX 0 68 69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 75 76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 82 83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 89 90 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 93 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 96 97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 104 105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 112 113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 115 116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 121 122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin"); 123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin"); 124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin"); 125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin"); 126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin"); 127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); 128 129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); 130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); 131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); 132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin"); 133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); 134 135 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 137 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 139 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 141 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 143 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 145 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 147 148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025 149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1 150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 152 153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = { 154 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS), 155 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2), 156 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1), 157 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2), 158 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1), 159 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1), 160 SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT), 161 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT), 162 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT), 163 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS), 164 SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR), 165 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE), 166 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR), 167 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR), 168 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE), 169 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR), 170 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR), 171 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE), 172 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR), 173 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR), 174 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE), 175 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), 176 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), 177 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ), 178 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ), 179 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ), 180 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ), 181 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO), 182 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI), 183 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ), 184 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO), 185 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI), 186 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ), 187 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO), 188 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI), 189 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ), 190 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO), 191 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI), 192 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ), 193 SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS), 194 SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS), 195 SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS), 196 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT), 197 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT), 198 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS), 199 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL), 200 SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS), 201 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS), 202 SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS), 203 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS), 204 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS), 205 SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS), 206 SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS), 207 SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS), 208 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), 209 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS), 210 SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG), 211 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL), 212 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR), 213 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR), 214 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR), 215 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR), 216 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR), 217 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS), 218 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT), 219 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND), 220 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE), 221 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1), 222 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2), 223 SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE), 224 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE), 225 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), 226 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), 227 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), 228 /* cp header registers */ 229 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 230 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), 231 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), 232 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 233 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), 234 /* SE status registers */ 235 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), 236 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), 237 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), 238 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) 239 }; 240 241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { 242 /* compute queue registers */ 243 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID), 244 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE), 245 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE), 246 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY), 247 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY), 248 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM), 249 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE), 250 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI), 251 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR), 252 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 253 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 254 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), 255 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL), 256 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR), 257 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI), 258 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR), 259 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL), 260 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 261 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR), 262 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), 263 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL), 264 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR), 265 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR), 266 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS), 267 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO), 268 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI), 269 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL), 270 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET), 271 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE), 272 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET), 273 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE), 274 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE), 275 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR), 276 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM), 277 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), 278 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), 279 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS), 280 }; 281 282 enum ta_ras_gfx_subblock { 283 /*CPC*/ 284 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 285 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 286 TA_RAS_BLOCK__GFX_CPC_UCODE, 287 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 288 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 289 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 290 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 291 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 292 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 293 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 294 /* CPF*/ 295 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 296 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 297 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 298 TA_RAS_BLOCK__GFX_CPF_TAG, 299 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 300 /* CPG*/ 301 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 302 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 303 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 304 TA_RAS_BLOCK__GFX_CPG_TAG, 305 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 306 /* GDS*/ 307 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 308 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 309 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 310 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 311 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 312 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 313 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 314 /* SPI*/ 315 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 316 /* SQ*/ 317 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 318 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 319 TA_RAS_BLOCK__GFX_SQ_LDS_D, 320 TA_RAS_BLOCK__GFX_SQ_LDS_I, 321 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 322 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 323 /* SQC (3 ranges)*/ 324 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 325 /* SQC range 0*/ 326 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 327 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 328 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 329 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 330 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 331 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 332 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 333 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 334 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 335 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 336 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 337 /* SQC range 1*/ 338 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 339 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 340 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 341 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 342 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 343 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 344 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 345 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 346 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 347 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 348 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 349 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 350 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 351 /* SQC range 2*/ 352 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 353 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 354 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 355 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 356 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 357 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 358 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 359 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 360 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 361 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 362 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 363 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 364 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 365 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 366 /* TA*/ 367 TA_RAS_BLOCK__GFX_TA_INDEX_START, 368 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 369 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 370 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 371 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 372 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 373 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 374 /* TCA*/ 375 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 376 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 377 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 378 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 379 /* TCC (5 sub-ranges)*/ 380 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 381 /* TCC range 0*/ 382 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 383 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 384 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 385 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 386 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 387 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 388 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 389 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 390 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 391 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 392 /* TCC range 1*/ 393 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 394 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 395 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 396 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 397 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 398 /* TCC range 2*/ 399 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 400 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 401 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 402 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 403 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 404 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 405 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 406 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 407 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 408 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 409 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 410 /* TCC range 3*/ 411 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 412 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 413 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 414 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 415 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 416 /* TCC range 4*/ 417 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 418 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 419 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 420 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 421 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 422 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 423 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 424 /* TCI*/ 425 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 426 /* TCP*/ 427 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 428 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 429 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 430 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 431 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 432 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 433 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 434 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 435 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 436 /* TD*/ 437 TA_RAS_BLOCK__GFX_TD_INDEX_START, 438 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 439 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 440 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 441 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 442 /* EA (3 sub-ranges)*/ 443 TA_RAS_BLOCK__GFX_EA_INDEX_START, 444 /* EA range 0*/ 445 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 446 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 447 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 448 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 449 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 450 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 451 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 452 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 453 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 454 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 455 /* EA range 1*/ 456 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 457 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 458 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 459 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 460 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 461 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 462 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 463 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 464 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 465 /* EA range 2*/ 466 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 467 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 468 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 469 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 470 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 471 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 472 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 473 /* UTC VM L2 bank*/ 474 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 475 /* UTC VM walker*/ 476 TA_RAS_BLOCK__UTC_VML2_WALKER, 477 /* UTC ATC L2 2MB cache*/ 478 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 479 /* UTC ATC L2 4KB cache*/ 480 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 481 TA_RAS_BLOCK__GFX_MAX 482 }; 483 484 struct ras_gfx_subblock { 485 unsigned char *name; 486 int ta_subblock; 487 int hw_supported_error_type; 488 int sw_supported_error_type; 489 }; 490 491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 492 [AMDGPU_RAS_BLOCK__##subblock] = { \ 493 #subblock, \ 494 TA_RAS_BLOCK__##subblock, \ 495 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 496 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 497 } 498 499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 500 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 501 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 502 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 503 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 504 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 505 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 506 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 507 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 508 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 509 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 510 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 511 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 512 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 513 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 514 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 515 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 516 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 517 0), 518 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 519 0), 520 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 521 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 522 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 523 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 524 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 525 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 526 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 527 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 528 0, 0), 529 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 530 0), 531 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 532 0, 0), 533 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 534 0), 535 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 536 0, 0), 537 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 538 0), 539 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 540 1), 541 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 542 0, 0, 0), 543 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 544 0), 545 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 546 0), 547 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 548 0), 549 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 550 0), 551 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 552 0), 553 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 554 0, 0), 555 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 556 0), 557 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 558 0), 559 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 560 0, 0, 0), 561 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 562 0), 563 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 564 0), 565 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 566 0), 567 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 568 0), 569 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 570 0), 571 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 572 0, 0), 573 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 574 0), 575 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 576 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 577 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 578 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 579 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 580 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 581 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 582 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 583 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 584 1), 585 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 586 1), 587 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 588 1), 589 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 590 0), 591 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 592 0), 593 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 594 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 595 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 596 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 597 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 598 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 599 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 600 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 601 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 602 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 603 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 604 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 605 0), 606 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 607 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 608 0), 609 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 610 0, 0), 611 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 612 0), 613 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 614 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 615 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 616 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 617 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 618 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 619 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 620 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 621 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 622 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 623 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 624 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 625 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 626 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 627 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 628 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 629 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 630 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 631 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 632 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 633 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 634 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 635 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 636 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 637 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 638 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 639 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 640 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 641 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 642 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 643 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 644 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 645 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 646 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 647 }; 648 649 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 650 { 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 671 }; 672 673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 674 { 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 693 }; 694 695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 696 { 697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 708 }; 709 710 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 711 { 712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 715 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 716 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 717 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 718 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 719 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 720 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 721 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 722 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 723 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 724 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 725 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 726 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 727 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 728 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 729 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 730 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 731 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 732 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 733 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 734 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 735 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 736 }; 737 738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 739 { 740 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 741 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 742 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 743 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 744 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 745 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 746 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 747 }; 748 749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 750 { 751 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 752 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 753 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 754 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 755 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 756 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 757 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 758 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 759 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 760 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 761 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 762 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 763 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 764 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 765 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 766 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 767 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 768 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 769 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 770 }; 771 772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 773 { 774 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 775 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 776 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 777 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 778 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 779 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 780 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 781 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 782 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 783 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 784 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 785 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 786 }; 787 788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 789 { 790 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 791 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 792 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 793 }; 794 795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 796 { 797 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 798 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 799 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 800 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 801 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 802 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 803 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 804 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 805 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 806 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 807 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 808 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 809 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 810 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 811 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 812 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 813 }; 814 815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 816 { 817 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 818 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 819 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 820 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 821 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 822 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 823 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 824 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 825 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 826 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 827 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 828 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 829 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 830 }; 831 832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 833 { 834 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 835 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 836 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 837 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 838 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 839 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 840 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 841 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 842 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 843 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 844 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000) 845 }; 846 847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = { 848 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)}, 849 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)}, 850 }; 851 852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 853 { 854 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 855 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 856 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 857 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 858 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 859 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 860 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 861 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 862 }; 863 864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 865 { 866 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 867 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 868 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 869 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 870 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 871 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 872 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 873 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 874 }; 875 876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 880 881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 886 struct amdgpu_cu_info *cu_info); 887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds); 889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 891 void *ras_error_status); 892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 893 void *inject_if, uint32_t instance_mask); 894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); 895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, 896 unsigned int vmid); 897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 899 900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 901 uint64_t queue_mask) 902 { 903 struct amdgpu_device *adev = kiq_ring->adev; 904 u64 shader_mc_addr; 905 906 /* Cleaner shader MC address */ 907 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 908 909 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 910 amdgpu_ring_write(kiq_ring, 911 PACKET3_SET_RESOURCES_VMID_MASK(0) | 912 /* vmid_mask:0* queue_type:0 (KIQ) */ 913 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 914 amdgpu_ring_write(kiq_ring, 915 lower_32_bits(queue_mask)); /* queue mask lo */ 916 amdgpu_ring_write(kiq_ring, 917 upper_32_bits(queue_mask)); /* queue mask hi */ 918 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 919 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 920 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 921 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 922 } 923 924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 925 struct amdgpu_ring *ring) 926 { 927 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 928 uint64_t wptr_addr = ring->wptr_gpu_addr; 929 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 930 931 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 932 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 933 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 934 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 935 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 936 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 937 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 938 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 939 /*queue_type: normal compute queue */ 940 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 941 /* alloc format: all_on_one_pipe */ 942 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 943 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 944 /* num_queues: must be 1 */ 945 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 946 amdgpu_ring_write(kiq_ring, 947 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 948 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 949 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 950 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 951 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 952 } 953 954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 955 struct amdgpu_ring *ring, 956 enum amdgpu_unmap_queues_action action, 957 u64 gpu_addr, u64 seq) 958 { 959 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 960 961 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 962 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 963 PACKET3_UNMAP_QUEUES_ACTION(action) | 964 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 965 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 966 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 967 amdgpu_ring_write(kiq_ring, 968 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 969 970 if (action == PREEMPT_QUEUES_NO_UNMAP) { 971 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask)); 972 amdgpu_ring_write(kiq_ring, 0); 973 amdgpu_ring_write(kiq_ring, 0); 974 975 } else { 976 amdgpu_ring_write(kiq_ring, 0); 977 amdgpu_ring_write(kiq_ring, 0); 978 amdgpu_ring_write(kiq_ring, 0); 979 } 980 } 981 982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 983 struct amdgpu_ring *ring, 984 u64 addr, 985 u64 seq) 986 { 987 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 988 989 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 990 amdgpu_ring_write(kiq_ring, 991 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 992 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 993 PACKET3_QUERY_STATUS_COMMAND(2)); 994 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 995 amdgpu_ring_write(kiq_ring, 996 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 997 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 998 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 999 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 1000 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 1001 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 1002 } 1003 1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 1005 uint16_t pasid, uint32_t flush_type, 1006 bool all_hub) 1007 { 1008 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 1009 amdgpu_ring_write(kiq_ring, 1010 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 1011 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 1012 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 1013 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 1014 } 1015 1016 1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, 1018 uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, 1019 uint32_t xcc_id, uint32_t vmid) 1020 { 1021 struct amdgpu_device *adev = kiq_ring->adev; 1022 unsigned i; 1023 1024 /* enter save mode */ 1025 amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); 1026 mutex_lock(&adev->srbm_mutex); 1027 soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0); 1028 1029 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 1030 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2); 1031 WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1); 1032 /* wait till dequeue take effects */ 1033 for (i = 0; i < adev->usec_timeout; i++) { 1034 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 1035 break; 1036 udelay(1); 1037 } 1038 if (i >= adev->usec_timeout) 1039 dev_err(adev->dev, "fail to wait on hqd deactive\n"); 1040 } else { 1041 dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type); 1042 } 1043 1044 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 1045 mutex_unlock(&adev->srbm_mutex); 1046 /* exit safe mode */ 1047 amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); 1048 } 1049 1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 1051 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 1052 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 1053 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 1054 .kiq_query_status = gfx_v9_0_kiq_query_status, 1055 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 1056 .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue, 1057 .set_resources_size = 8, 1058 .map_queues_size = 7, 1059 .unmap_queues_size = 6, 1060 .query_status_size = 7, 1061 .invalidate_tlbs_size = 2, 1062 }; 1063 1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 1065 { 1066 adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs; 1067 } 1068 1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 1070 { 1071 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1072 case IP_VERSION(9, 0, 1): 1073 soc15_program_register_sequence(adev, 1074 golden_settings_gc_9_0, 1075 ARRAY_SIZE(golden_settings_gc_9_0)); 1076 soc15_program_register_sequence(adev, 1077 golden_settings_gc_9_0_vg10, 1078 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 1079 break; 1080 case IP_VERSION(9, 2, 1): 1081 soc15_program_register_sequence(adev, 1082 golden_settings_gc_9_2_1, 1083 ARRAY_SIZE(golden_settings_gc_9_2_1)); 1084 soc15_program_register_sequence(adev, 1085 golden_settings_gc_9_2_1_vg12, 1086 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 1087 break; 1088 case IP_VERSION(9, 4, 0): 1089 soc15_program_register_sequence(adev, 1090 golden_settings_gc_9_0, 1091 ARRAY_SIZE(golden_settings_gc_9_0)); 1092 soc15_program_register_sequence(adev, 1093 golden_settings_gc_9_0_vg20, 1094 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 1095 break; 1096 case IP_VERSION(9, 4, 1): 1097 soc15_program_register_sequence(adev, 1098 golden_settings_gc_9_4_1_arct, 1099 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 1100 break; 1101 case IP_VERSION(9, 2, 2): 1102 case IP_VERSION(9, 1, 0): 1103 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 1104 ARRAY_SIZE(golden_settings_gc_9_1)); 1105 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1106 soc15_program_register_sequence(adev, 1107 golden_settings_gc_9_1_rv2, 1108 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 1109 else 1110 soc15_program_register_sequence(adev, 1111 golden_settings_gc_9_1_rv1, 1112 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 1113 break; 1114 case IP_VERSION(9, 3, 0): 1115 soc15_program_register_sequence(adev, 1116 golden_settings_gc_9_1_rn, 1117 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 1118 return; /* for renoir, don't need common goldensetting */ 1119 case IP_VERSION(9, 4, 2): 1120 gfx_v9_4_2_init_golden_registers(adev, 1121 adev->smuio.funcs->get_die_id(adev)); 1122 break; 1123 default: 1124 break; 1125 } 1126 1127 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && 1128 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))) 1129 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 1130 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 1131 } 1132 1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 1134 bool wc, uint32_t reg, uint32_t val) 1135 { 1136 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1137 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 1138 WRITE_DATA_DST_SEL(0) | 1139 (wc ? WR_CONFIRM : 0)); 1140 amdgpu_ring_write(ring, reg); 1141 amdgpu_ring_write(ring, 0); 1142 amdgpu_ring_write(ring, val); 1143 } 1144 1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 1146 int mem_space, int opt, uint32_t addr0, 1147 uint32_t addr1, uint32_t ref, uint32_t mask, 1148 uint32_t inv) 1149 { 1150 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 1151 amdgpu_ring_write(ring, 1152 /* memory (1) or register (0) */ 1153 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 1154 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 1155 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 1156 WAIT_REG_MEM_ENGINE(eng_sel))); 1157 1158 if (mem_space) 1159 BUG_ON(addr0 & 0x3); /* Dword align */ 1160 amdgpu_ring_write(ring, addr0); 1161 amdgpu_ring_write(ring, addr1); 1162 amdgpu_ring_write(ring, ref); 1163 amdgpu_ring_write(ring, mask); 1164 amdgpu_ring_write(ring, inv); /* poll interval */ 1165 } 1166 1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 1168 { 1169 struct amdgpu_device *adev = ring->adev; 1170 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1171 uint32_t tmp = 0; 1172 unsigned i; 1173 int r; 1174 1175 WREG32(scratch, 0xCAFEDEAD); 1176 r = amdgpu_ring_alloc(ring, 3); 1177 if (r) 1178 return r; 1179 1180 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1181 amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); 1182 amdgpu_ring_write(ring, 0xDEADBEEF); 1183 amdgpu_ring_commit(ring); 1184 1185 for (i = 0; i < adev->usec_timeout; i++) { 1186 tmp = RREG32(scratch); 1187 if (tmp == 0xDEADBEEF) 1188 break; 1189 udelay(1); 1190 } 1191 1192 if (i >= adev->usec_timeout) 1193 r = -ETIMEDOUT; 1194 return r; 1195 } 1196 1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1198 { 1199 struct amdgpu_device *adev = ring->adev; 1200 struct amdgpu_ib ib; 1201 struct dma_fence *f = NULL; 1202 1203 unsigned index; 1204 uint64_t gpu_addr; 1205 uint32_t tmp; 1206 long r; 1207 1208 r = amdgpu_device_wb_get(adev, &index); 1209 if (r) 1210 return r; 1211 1212 gpu_addr = adev->wb.gpu_addr + (index * 4); 1213 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1214 memset(&ib, 0, sizeof(ib)); 1215 1216 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 1217 if (r) 1218 goto err1; 1219 1220 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1221 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1222 ib.ptr[2] = lower_32_bits(gpu_addr); 1223 ib.ptr[3] = upper_32_bits(gpu_addr); 1224 ib.ptr[4] = 0xDEADBEEF; 1225 ib.length_dw = 5; 1226 1227 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1228 if (r) 1229 goto err2; 1230 1231 r = dma_fence_wait_timeout(f, false, timeout); 1232 if (r == 0) { 1233 r = -ETIMEDOUT; 1234 goto err2; 1235 } else if (r < 0) { 1236 goto err2; 1237 } 1238 1239 tmp = adev->wb.wb[index]; 1240 if (tmp == 0xDEADBEEF) 1241 r = 0; 1242 else 1243 r = -EINVAL; 1244 1245 err2: 1246 amdgpu_ib_free(&ib, NULL); 1247 dma_fence_put(f); 1248 err1: 1249 amdgpu_device_wb_free(adev, index); 1250 return r; 1251 } 1252 1253 1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1255 { 1256 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1257 amdgpu_ucode_release(&adev->gfx.me_fw); 1258 amdgpu_ucode_release(&adev->gfx.ce_fw); 1259 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1260 amdgpu_ucode_release(&adev->gfx.mec_fw); 1261 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1262 1263 kfree(adev->gfx.rlc.register_list_format); 1264 } 1265 1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1267 { 1268 adev->gfx.me_fw_write_wait = false; 1269 adev->gfx.mec_fw_write_wait = false; 1270 1271 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && 1272 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) && 1273 ((adev->gfx.mec_fw_version < 0x000001a5) || 1274 (adev->gfx.mec_feature_version < 46) || 1275 (adev->gfx.pfp_fw_version < 0x000000b7) || 1276 (adev->gfx.pfp_feature_version < 46))) 1277 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1278 1279 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1280 case IP_VERSION(9, 0, 1): 1281 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1282 (adev->gfx.me_feature_version >= 42) && 1283 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1284 (adev->gfx.pfp_feature_version >= 42)) 1285 adev->gfx.me_fw_write_wait = true; 1286 1287 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1288 (adev->gfx.mec_feature_version >= 42)) 1289 adev->gfx.mec_fw_write_wait = true; 1290 break; 1291 case IP_VERSION(9, 2, 1): 1292 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1293 (adev->gfx.me_feature_version >= 44) && 1294 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1295 (adev->gfx.pfp_feature_version >= 44)) 1296 adev->gfx.me_fw_write_wait = true; 1297 1298 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1299 (adev->gfx.mec_feature_version >= 44)) 1300 adev->gfx.mec_fw_write_wait = true; 1301 break; 1302 case IP_VERSION(9, 4, 0): 1303 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1304 (adev->gfx.me_feature_version >= 44) && 1305 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1306 (adev->gfx.pfp_feature_version >= 44)) 1307 adev->gfx.me_fw_write_wait = true; 1308 1309 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1310 (adev->gfx.mec_feature_version >= 44)) 1311 adev->gfx.mec_fw_write_wait = true; 1312 break; 1313 case IP_VERSION(9, 1, 0): 1314 case IP_VERSION(9, 2, 2): 1315 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1316 (adev->gfx.me_feature_version >= 42) && 1317 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1318 (adev->gfx.pfp_feature_version >= 42)) 1319 adev->gfx.me_fw_write_wait = true; 1320 1321 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1322 (adev->gfx.mec_feature_version >= 42)) 1323 adev->gfx.mec_fw_write_wait = true; 1324 break; 1325 default: 1326 adev->gfx.me_fw_write_wait = true; 1327 adev->gfx.mec_fw_write_wait = true; 1328 break; 1329 } 1330 } 1331 1332 struct amdgpu_gfxoff_quirk { 1333 u16 chip_vendor; 1334 u16 chip_device; 1335 u16 subsys_vendor; 1336 u16 subsys_device; 1337 u8 revision; 1338 }; 1339 1340 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { 1341 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ 1342 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, 1343 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ 1344 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, 1345 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ 1346 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, 1347 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ 1348 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, 1349 /* https://bbs.openkylin.top/t/topic/171497 */ 1350 { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 }, 1351 /* HP 705G4 DM with R5 2400G */ 1352 { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 }, 1353 { 0, 0, 0, 0, 0 }, 1354 }; 1355 1356 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) 1357 { 1358 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; 1359 1360 while (p && p->chip_device != 0) { 1361 if (pdev->vendor == p->chip_vendor && 1362 pdev->device == p->chip_device && 1363 pdev->subsystem_vendor == p->subsys_vendor && 1364 pdev->subsystem_device == p->subsys_device && 1365 pdev->revision == p->revision) { 1366 return true; 1367 } 1368 ++p; 1369 } 1370 return false; 1371 } 1372 1373 static bool is_raven_kicker(struct amdgpu_device *adev) 1374 { 1375 if (adev->pm.fw_version >= 0x41e2b) 1376 return true; 1377 else 1378 return false; 1379 } 1380 1381 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) 1382 { 1383 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) && 1384 (adev->gfx.me_fw_version >= 0x000000a5) && 1385 (adev->gfx.me_feature_version >= 52)) 1386 return true; 1387 else 1388 return false; 1389 } 1390 1391 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1392 { 1393 if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) 1394 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1395 1396 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1397 case IP_VERSION(9, 0, 1): 1398 case IP_VERSION(9, 2, 1): 1399 case IP_VERSION(9, 4, 0): 1400 break; 1401 case IP_VERSION(9, 2, 2): 1402 case IP_VERSION(9, 1, 0): 1403 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) || 1404 (adev->apu_flags & AMD_APU_IS_PICASSO)) && 1405 ((!is_raven_kicker(adev) && 1406 adev->gfx.rlc_fw_version < 531) || 1407 (adev->gfx.rlc_feature_version < 1) || 1408 !adev->gfx.rlc.is_rlc_v2_1)) 1409 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1410 1411 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1412 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1413 AMD_PG_SUPPORT_CP | 1414 AMD_PG_SUPPORT_RLC_SMU_HS; 1415 break; 1416 case IP_VERSION(9, 3, 0): 1417 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1418 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1419 AMD_PG_SUPPORT_CP | 1420 AMD_PG_SUPPORT_RLC_SMU_HS; 1421 break; 1422 default: 1423 break; 1424 } 1425 } 1426 1427 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1428 char *chip_name) 1429 { 1430 int err; 1431 1432 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 1433 AMDGPU_UCODE_REQUIRED, 1434 "amdgpu/%s_pfp.bin", chip_name); 1435 if (err) 1436 goto out; 1437 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 1438 1439 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 1440 AMDGPU_UCODE_REQUIRED, 1441 "amdgpu/%s_me.bin", chip_name); 1442 if (err) 1443 goto out; 1444 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 1445 1446 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, 1447 AMDGPU_UCODE_REQUIRED, 1448 "amdgpu/%s_ce.bin", chip_name); 1449 if (err) 1450 goto out; 1451 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); 1452 1453 out: 1454 if (err) { 1455 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1456 amdgpu_ucode_release(&adev->gfx.me_fw); 1457 amdgpu_ucode_release(&adev->gfx.ce_fw); 1458 } 1459 return err; 1460 } 1461 1462 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1463 char *chip_name) 1464 { 1465 int err; 1466 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1467 uint16_t version_major; 1468 uint16_t version_minor; 1469 uint32_t smu_version; 1470 1471 /* 1472 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1473 * instead of picasso_rlc.bin. 1474 * Judgment method: 1475 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1476 * or revision >= 0xD8 && revision <= 0xDF 1477 * otherwise is PCO FP5 1478 */ 1479 if (!strcmp(chip_name, "picasso") && 1480 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1481 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1482 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1483 AMDGPU_UCODE_REQUIRED, 1484 "amdgpu/%s_rlc_am4.bin", chip_name); 1485 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1486 (smu_version >= 0x41e2b)) 1487 /** 1488 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1489 */ 1490 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1491 AMDGPU_UCODE_REQUIRED, 1492 "amdgpu/%s_kicker_rlc.bin", chip_name); 1493 else 1494 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1495 AMDGPU_UCODE_REQUIRED, 1496 "amdgpu/%s_rlc.bin", chip_name); 1497 if (err) 1498 goto out; 1499 1500 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1501 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1502 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1503 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 1504 out: 1505 if (err) 1506 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1507 1508 return err; 1509 } 1510 1511 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) 1512 { 1513 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || 1514 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || 1515 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) 1516 return false; 1517 1518 return true; 1519 } 1520 1521 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1522 char *chip_name) 1523 { 1524 int err; 1525 1526 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) 1527 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1528 AMDGPU_UCODE_REQUIRED, 1529 "amdgpu/%s_sjt_mec.bin", chip_name); 1530 else 1531 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1532 AMDGPU_UCODE_REQUIRED, 1533 "amdgpu/%s_mec.bin", chip_name); 1534 if (err) 1535 goto out; 1536 1537 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 1538 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 1539 1540 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1541 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) 1542 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1543 AMDGPU_UCODE_REQUIRED, 1544 "amdgpu/%s_sjt_mec2.bin", chip_name); 1545 else 1546 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1547 AMDGPU_UCODE_REQUIRED, 1548 "amdgpu/%s_mec2.bin", chip_name); 1549 if (!err) { 1550 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); 1551 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); 1552 } else { 1553 err = 0; 1554 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1555 } 1556 } else { 1557 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; 1558 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; 1559 } 1560 1561 gfx_v9_0_check_if_need_gfxoff(adev); 1562 gfx_v9_0_check_fw_write_wait(adev); 1563 1564 out: 1565 if (err) 1566 amdgpu_ucode_release(&adev->gfx.mec_fw); 1567 return err; 1568 } 1569 1570 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1571 { 1572 char ucode_prefix[30]; 1573 int r; 1574 1575 DRM_DEBUG("\n"); 1576 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 1577 1578 /* No CPG in Arcturus */ 1579 if (adev->gfx.num_gfx_rings) { 1580 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix); 1581 if (r) 1582 return r; 1583 } 1584 1585 r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix); 1586 if (r) 1587 return r; 1588 1589 r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix); 1590 if (r) 1591 return r; 1592 1593 return r; 1594 } 1595 1596 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1597 { 1598 u32 count = 0; 1599 const struct cs_section_def *sect = NULL; 1600 const struct cs_extent_def *ext = NULL; 1601 1602 /* begin clear state */ 1603 count += 2; 1604 /* context control state */ 1605 count += 3; 1606 1607 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1608 for (ext = sect->section; ext->extent != NULL; ++ext) { 1609 if (sect->id == SECT_CONTEXT) 1610 count += 2 + ext->reg_count; 1611 else 1612 return 0; 1613 } 1614 } 1615 1616 /* end clear state */ 1617 count += 2; 1618 /* clear state */ 1619 count += 2; 1620 1621 return count; 1622 } 1623 1624 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1625 volatile u32 *buffer) 1626 { 1627 u32 count = 0, i; 1628 const struct cs_section_def *sect = NULL; 1629 const struct cs_extent_def *ext = NULL; 1630 1631 if (adev->gfx.rlc.cs_data == NULL) 1632 return; 1633 if (buffer == NULL) 1634 return; 1635 1636 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1637 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1638 1639 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1640 buffer[count++] = cpu_to_le32(0x80000000); 1641 buffer[count++] = cpu_to_le32(0x80000000); 1642 1643 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1644 for (ext = sect->section; ext->extent != NULL; ++ext) { 1645 if (sect->id == SECT_CONTEXT) { 1646 buffer[count++] = 1647 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1648 buffer[count++] = cpu_to_le32(ext->reg_index - 1649 PACKET3_SET_CONTEXT_REG_START); 1650 for (i = 0; i < ext->reg_count; i++) 1651 buffer[count++] = cpu_to_le32(ext->extent[i]); 1652 } 1653 } 1654 } 1655 1656 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1657 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1658 1659 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1660 buffer[count++] = cpu_to_le32(0); 1661 } 1662 1663 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1664 { 1665 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1666 uint32_t pg_always_on_cu_num = 2; 1667 uint32_t always_on_cu_num; 1668 uint32_t i, j, k; 1669 uint32_t mask, cu_bitmap, counter; 1670 1671 if (adev->flags & AMD_IS_APU) 1672 always_on_cu_num = 4; 1673 else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1)) 1674 always_on_cu_num = 8; 1675 else 1676 always_on_cu_num = 12; 1677 1678 mutex_lock(&adev->grbm_idx_mutex); 1679 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1680 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1681 mask = 1; 1682 cu_bitmap = 0; 1683 counter = 0; 1684 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 1685 1686 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1687 if (cu_info->bitmap[0][i][j] & mask) { 1688 if (counter == pg_always_on_cu_num) 1689 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1690 if (counter < always_on_cu_num) 1691 cu_bitmap |= mask; 1692 else 1693 break; 1694 counter++; 1695 } 1696 mask <<= 1; 1697 } 1698 1699 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1700 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1701 } 1702 } 1703 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1704 mutex_unlock(&adev->grbm_idx_mutex); 1705 } 1706 1707 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1708 { 1709 uint32_t data; 1710 1711 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1712 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1713 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1714 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1715 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1716 1717 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1718 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1719 1720 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1721 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1722 1723 mutex_lock(&adev->grbm_idx_mutex); 1724 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1725 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1726 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1727 1728 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1729 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1730 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1731 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1732 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1733 1734 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1735 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1736 data &= 0x0000FFFF; 1737 data |= 0x00C00000; 1738 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1739 1740 /* 1741 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1742 * programmed in gfx_v9_0_init_always_on_cu_mask() 1743 */ 1744 1745 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1746 * but used for RLC_LB_CNTL configuration */ 1747 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1748 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1749 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1750 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1751 mutex_unlock(&adev->grbm_idx_mutex); 1752 1753 gfx_v9_0_init_always_on_cu_mask(adev); 1754 } 1755 1756 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1757 { 1758 uint32_t data; 1759 1760 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1761 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1762 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1763 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1764 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1765 1766 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1767 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1768 1769 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1770 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1771 1772 mutex_lock(&adev->grbm_idx_mutex); 1773 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1774 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1775 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1776 1777 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1778 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1779 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1780 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1781 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1782 1783 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1784 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1785 data &= 0x0000FFFF; 1786 data |= 0x00C00000; 1787 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1788 1789 /* 1790 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1791 * programmed in gfx_v9_0_init_always_on_cu_mask() 1792 */ 1793 1794 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1795 * but used for RLC_LB_CNTL configuration */ 1796 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1797 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1798 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1799 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1800 mutex_unlock(&adev->grbm_idx_mutex); 1801 1802 gfx_v9_0_init_always_on_cu_mask(adev); 1803 } 1804 1805 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1806 { 1807 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1808 } 1809 1810 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1811 { 1812 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) 1813 return 5; 1814 else 1815 return 4; 1816 } 1817 1818 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 1819 { 1820 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 1821 1822 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 1823 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1824 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1); 1825 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2); 1826 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3); 1827 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL); 1828 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX); 1829 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT); 1830 adev->gfx.rlc.rlcg_reg_access_supported = true; 1831 } 1832 1833 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1834 { 1835 const struct cs_section_def *cs_data; 1836 int r; 1837 1838 adev->gfx.rlc.cs_data = gfx9_cs_data; 1839 1840 cs_data = adev->gfx.rlc.cs_data; 1841 1842 if (cs_data) { 1843 /* init clear state block */ 1844 r = amdgpu_gfx_rlc_init_csb(adev); 1845 if (r) 1846 return r; 1847 } 1848 1849 if (adev->flags & AMD_IS_APU) { 1850 /* TODO: double check the cp_table_size for RV */ 1851 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1852 r = amdgpu_gfx_rlc_init_cpt(adev); 1853 if (r) 1854 return r; 1855 } 1856 1857 return 0; 1858 } 1859 1860 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1861 { 1862 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1863 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1864 } 1865 1866 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1867 { 1868 int r; 1869 u32 *hpd; 1870 const __le32 *fw_data; 1871 unsigned fw_size; 1872 u32 *fw; 1873 size_t mec_hpd_size; 1874 1875 const struct gfx_firmware_header_v1_0 *mec_hdr; 1876 1877 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1878 1879 /* take ownership of the relevant compute queues */ 1880 amdgpu_gfx_compute_queue_acquire(adev); 1881 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1882 if (mec_hpd_size) { 1883 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1884 AMDGPU_GEM_DOMAIN_VRAM | 1885 AMDGPU_GEM_DOMAIN_GTT, 1886 &adev->gfx.mec.hpd_eop_obj, 1887 &adev->gfx.mec.hpd_eop_gpu_addr, 1888 (void **)&hpd); 1889 if (r) { 1890 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1891 gfx_v9_0_mec_fini(adev); 1892 return r; 1893 } 1894 1895 memset(hpd, 0, mec_hpd_size); 1896 1897 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1898 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1899 } 1900 1901 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1902 1903 fw_data = (const __le32 *) 1904 (adev->gfx.mec_fw->data + 1905 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1906 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 1907 1908 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1909 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1910 &adev->gfx.mec.mec_fw_obj, 1911 &adev->gfx.mec.mec_fw_gpu_addr, 1912 (void **)&fw); 1913 if (r) { 1914 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1915 gfx_v9_0_mec_fini(adev); 1916 return r; 1917 } 1918 1919 memcpy(fw, fw_data, fw_size); 1920 1921 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1922 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1923 1924 return 0; 1925 } 1926 1927 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1928 { 1929 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 1930 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1931 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1932 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1933 (SQ_IND_INDEX__FORCE_READ_MASK)); 1934 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1935 } 1936 1937 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1938 uint32_t wave, uint32_t thread, 1939 uint32_t regno, uint32_t num, uint32_t *out) 1940 { 1941 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 1942 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1943 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1944 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1945 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1946 (SQ_IND_INDEX__FORCE_READ_MASK) | 1947 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1948 while (num--) 1949 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1950 } 1951 1952 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1953 { 1954 /* type 1 wave data */ 1955 dst[(*no_fields)++] = 1; 1956 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1957 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1958 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1959 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1960 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1961 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1962 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1963 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1964 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1965 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1966 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1967 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1968 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1969 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1970 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); 1971 } 1972 1973 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1974 uint32_t wave, uint32_t start, 1975 uint32_t size, uint32_t *dst) 1976 { 1977 wave_read_regs( 1978 adev, simd, wave, 0, 1979 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1980 } 1981 1982 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1983 uint32_t wave, uint32_t thread, 1984 uint32_t start, uint32_t size, 1985 uint32_t *dst) 1986 { 1987 wave_read_regs( 1988 adev, simd, wave, thread, 1989 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1990 } 1991 1992 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1993 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1994 { 1995 soc15_grbm_select(adev, me, pipe, q, vm, 0); 1996 } 1997 1998 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1999 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 2000 .select_se_sh = &gfx_v9_0_select_se_sh, 2001 .read_wave_data = &gfx_v9_0_read_wave_data, 2002 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 2003 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 2004 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 2005 }; 2006 2007 const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = { 2008 .ras_error_inject = &gfx_v9_0_ras_error_inject, 2009 .query_ras_error_count = &gfx_v9_0_query_ras_error_count, 2010 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, 2011 }; 2012 2013 static struct amdgpu_gfx_ras gfx_v9_0_ras = { 2014 .ras_block = { 2015 .hw_ops = &gfx_v9_0_ras_ops, 2016 }, 2017 }; 2018 2019 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 2020 { 2021 u32 gb_addr_config; 2022 int err; 2023 2024 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2025 case IP_VERSION(9, 0, 1): 2026 adev->gfx.config.max_hw_contexts = 8; 2027 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2028 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2029 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2030 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2031 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 2032 break; 2033 case IP_VERSION(9, 2, 1): 2034 adev->gfx.config.max_hw_contexts = 8; 2035 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2036 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2037 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2038 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2039 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 2040 DRM_INFO("fix gfx.config for vega12\n"); 2041 break; 2042 case IP_VERSION(9, 4, 0): 2043 adev->gfx.ras = &gfx_v9_0_ras; 2044 adev->gfx.config.max_hw_contexts = 8; 2045 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2046 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2047 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2048 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2049 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2050 gb_addr_config &= ~0xf3e777ff; 2051 gb_addr_config |= 0x22014042; 2052 /* check vbios table if gpu info is not available */ 2053 err = amdgpu_atomfirmware_get_gfx_info(adev); 2054 if (err) 2055 return err; 2056 break; 2057 case IP_VERSION(9, 2, 2): 2058 case IP_VERSION(9, 1, 0): 2059 adev->gfx.config.max_hw_contexts = 8; 2060 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2061 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2062 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2063 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2064 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 2065 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 2066 else 2067 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2068 break; 2069 case IP_VERSION(9, 4, 1): 2070 adev->gfx.ras = &gfx_v9_4_ras; 2071 adev->gfx.config.max_hw_contexts = 8; 2072 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2073 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2074 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2075 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2076 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2077 gb_addr_config &= ~0xf3e777ff; 2078 gb_addr_config |= 0x22014042; 2079 break; 2080 case IP_VERSION(9, 3, 0): 2081 adev->gfx.config.max_hw_contexts = 8; 2082 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2083 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2084 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2085 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2086 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2087 gb_addr_config &= ~0xf3e777ff; 2088 gb_addr_config |= 0x22010042; 2089 break; 2090 case IP_VERSION(9, 4, 2): 2091 adev->gfx.ras = &gfx_v9_4_2_ras; 2092 adev->gfx.config.max_hw_contexts = 8; 2093 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2094 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2095 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2096 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2097 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2098 gb_addr_config &= ~0xf3e777ff; 2099 gb_addr_config |= 0x22014042; 2100 /* check vbios table if gpu info is not available */ 2101 err = amdgpu_atomfirmware_get_gfx_info(adev); 2102 if (err) 2103 return err; 2104 break; 2105 default: 2106 BUG(); 2107 break; 2108 } 2109 2110 adev->gfx.config.gb_addr_config = gb_addr_config; 2111 2112 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2113 REG_GET_FIELD( 2114 adev->gfx.config.gb_addr_config, 2115 GB_ADDR_CONFIG, 2116 NUM_PIPES); 2117 2118 adev->gfx.config.max_tile_pipes = 2119 adev->gfx.config.gb_addr_config_fields.num_pipes; 2120 2121 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2122 REG_GET_FIELD( 2123 adev->gfx.config.gb_addr_config, 2124 GB_ADDR_CONFIG, 2125 NUM_BANKS); 2126 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2127 REG_GET_FIELD( 2128 adev->gfx.config.gb_addr_config, 2129 GB_ADDR_CONFIG, 2130 MAX_COMPRESSED_FRAGS); 2131 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2132 REG_GET_FIELD( 2133 adev->gfx.config.gb_addr_config, 2134 GB_ADDR_CONFIG, 2135 NUM_RB_PER_SE); 2136 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2137 REG_GET_FIELD( 2138 adev->gfx.config.gb_addr_config, 2139 GB_ADDR_CONFIG, 2140 NUM_SHADER_ENGINES); 2141 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2142 REG_GET_FIELD( 2143 adev->gfx.config.gb_addr_config, 2144 GB_ADDR_CONFIG, 2145 PIPE_INTERLEAVE_SIZE)); 2146 2147 return 0; 2148 } 2149 2150 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2151 int mec, int pipe, int queue) 2152 { 2153 unsigned irq_type; 2154 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2155 unsigned int hw_prio; 2156 2157 ring = &adev->gfx.compute_ring[ring_id]; 2158 2159 /* mec0 is me1 */ 2160 ring->me = mec + 1; 2161 ring->pipe = pipe; 2162 ring->queue = queue; 2163 2164 ring->ring_obj = NULL; 2165 ring->use_doorbell = true; 2166 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2167 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2168 + (ring_id * GFX9_MEC_HPD_SIZE); 2169 ring->vm_hub = AMDGPU_GFXHUB(0); 2170 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2171 2172 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2173 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2174 + ring->pipe; 2175 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 2176 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; 2177 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2178 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 2179 hw_prio, NULL); 2180 } 2181 2182 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) 2183 { 2184 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 2185 uint32_t *ptr; 2186 uint32_t inst; 2187 2188 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 2189 if (!ptr) { 2190 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 2191 adev->gfx.ip_dump_core = NULL; 2192 } else { 2193 adev->gfx.ip_dump_core = ptr; 2194 } 2195 2196 /* Allocate memory for compute queue registers for all the instances */ 2197 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 2198 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 2199 adev->gfx.mec.num_queue_per_pipe; 2200 2201 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 2202 if (!ptr) { 2203 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 2204 adev->gfx.ip_dump_compute_queues = NULL; 2205 } else { 2206 adev->gfx.ip_dump_compute_queues = ptr; 2207 } 2208 } 2209 2210 static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) 2211 { 2212 int i, j, k, r, ring_id; 2213 int xcc_id = 0; 2214 struct amdgpu_ring *ring; 2215 struct amdgpu_device *adev = ip_block->adev; 2216 unsigned int hw_prio; 2217 2218 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2219 case IP_VERSION(9, 0, 1): 2220 case IP_VERSION(9, 2, 1): 2221 case IP_VERSION(9, 4, 0): 2222 case IP_VERSION(9, 2, 2): 2223 case IP_VERSION(9, 1, 0): 2224 case IP_VERSION(9, 4, 1): 2225 case IP_VERSION(9, 3, 0): 2226 case IP_VERSION(9, 4, 2): 2227 adev->gfx.mec.num_mec = 2; 2228 break; 2229 default: 2230 adev->gfx.mec.num_mec = 1; 2231 break; 2232 } 2233 2234 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2235 case IP_VERSION(9, 4, 2): 2236 adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; 2237 adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); 2238 if (adev->gfx.mec_fw_version >= 88) { 2239 adev->gfx.enable_cleaner_shader = true; 2240 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 2241 if (r) { 2242 adev->gfx.enable_cleaner_shader = false; 2243 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 2244 } 2245 } 2246 break; 2247 default: 2248 adev->gfx.enable_cleaner_shader = false; 2249 break; 2250 } 2251 2252 adev->gfx.mec.num_pipe_per_mec = 4; 2253 adev->gfx.mec.num_queue_per_pipe = 8; 2254 2255 /* EOP Event */ 2256 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2257 if (r) 2258 return r; 2259 2260 /* Bad opcode Event */ 2261 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 2262 GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR, 2263 &adev->gfx.bad_op_irq); 2264 if (r) 2265 return r; 2266 2267 /* Privileged reg */ 2268 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2269 &adev->gfx.priv_reg_irq); 2270 if (r) 2271 return r; 2272 2273 /* Privileged inst */ 2274 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2275 &adev->gfx.priv_inst_irq); 2276 if (r) 2277 return r; 2278 2279 /* ECC error */ 2280 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2281 &adev->gfx.cp_ecc_error_irq); 2282 if (r) 2283 return r; 2284 2285 /* FUE error */ 2286 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2287 &adev->gfx.cp_ecc_error_irq); 2288 if (r) 2289 return r; 2290 2291 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2292 2293 if (adev->gfx.rlc.funcs) { 2294 if (adev->gfx.rlc.funcs->init) { 2295 r = adev->gfx.rlc.funcs->init(adev); 2296 if (r) { 2297 dev_err(adev->dev, "Failed to init rlc BOs!\n"); 2298 return r; 2299 } 2300 } 2301 } 2302 2303 r = gfx_v9_0_mec_init(adev); 2304 if (r) { 2305 DRM_ERROR("Failed to init MEC BOs!\n"); 2306 return r; 2307 } 2308 2309 /* set up the gfx ring */ 2310 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2311 ring = &adev->gfx.gfx_ring[i]; 2312 ring->ring_obj = NULL; 2313 if (!i) 2314 sprintf(ring->name, "gfx"); 2315 else 2316 sprintf(ring->name, "gfx_%d", i); 2317 ring->use_doorbell = true; 2318 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2319 2320 /* disable scheduler on the real ring */ 2321 ring->no_scheduler = adev->gfx.mcbp; 2322 ring->vm_hub = AMDGPU_GFXHUB(0); 2323 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2324 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2325 AMDGPU_RING_PRIO_DEFAULT, NULL); 2326 if (r) 2327 return r; 2328 } 2329 2330 /* set up the software rings */ 2331 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 2332 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2333 ring = &adev->gfx.sw_gfx_ring[i]; 2334 ring->ring_obj = NULL; 2335 sprintf(ring->name, amdgpu_sw_ring_name(i)); 2336 ring->use_doorbell = true; 2337 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2338 ring->is_sw_ring = true; 2339 hw_prio = amdgpu_sw_ring_priority(i); 2340 ring->vm_hub = AMDGPU_GFXHUB(0); 2341 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2342 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, 2343 NULL); 2344 if (r) 2345 return r; 2346 ring->wptr = 0; 2347 } 2348 2349 /* init the muxer and add software rings */ 2350 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], 2351 GFX9_NUM_SW_GFX_RINGS); 2352 if (r) { 2353 DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); 2354 return r; 2355 } 2356 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2357 r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, 2358 &adev->gfx.sw_gfx_ring[i]); 2359 if (r) { 2360 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); 2361 return r; 2362 } 2363 } 2364 } 2365 2366 /* set up the compute queues - allocate horizontally across pipes */ 2367 ring_id = 0; 2368 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2369 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2370 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2371 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 2372 k, j)) 2373 continue; 2374 2375 r = gfx_v9_0_compute_ring_init(adev, 2376 ring_id, 2377 i, k, j); 2378 if (r) 2379 return r; 2380 2381 ring_id++; 2382 } 2383 } 2384 } 2385 2386 /* TODO: Add queue reset mask when FW fully supports it */ 2387 adev->gfx.gfx_supported_reset = 2388 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 2389 adev->gfx.compute_supported_reset = 2390 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 2391 2392 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); 2393 if (r) { 2394 DRM_ERROR("Failed to init KIQ BOs!\n"); 2395 return r; 2396 } 2397 2398 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 2399 if (r) 2400 return r; 2401 2402 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2403 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0); 2404 if (r) 2405 return r; 2406 2407 adev->gfx.ce_ram_size = 0x8000; 2408 2409 r = gfx_v9_0_gpu_early_init(adev); 2410 if (r) 2411 return r; 2412 2413 if (amdgpu_gfx_ras_sw_init(adev)) { 2414 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 2415 return -EINVAL; 2416 } 2417 2418 gfx_v9_0_alloc_ip_dump(adev); 2419 2420 r = amdgpu_gfx_sysfs_init(adev); 2421 if (r) 2422 return r; 2423 2424 return 0; 2425 } 2426 2427 2428 static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) 2429 { 2430 int i; 2431 struct amdgpu_device *adev = ip_block->adev; 2432 2433 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 2434 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 2435 amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); 2436 amdgpu_ring_mux_fini(&adev->gfx.muxer); 2437 } 2438 2439 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2440 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2441 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2442 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2443 2444 amdgpu_gfx_mqd_sw_fini(adev, 0); 2445 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 2446 amdgpu_gfx_kiq_fini(adev, 0); 2447 2448 amdgpu_gfx_cleaner_shader_sw_fini(adev); 2449 2450 gfx_v9_0_mec_fini(adev); 2451 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2452 &adev->gfx.rlc.clear_state_gpu_addr, 2453 (void **)&adev->gfx.rlc.cs_ptr); 2454 if (adev->flags & AMD_IS_APU) { 2455 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2456 &adev->gfx.rlc.cp_table_gpu_addr, 2457 (void **)&adev->gfx.rlc.cp_table_ptr); 2458 } 2459 gfx_v9_0_free_microcode(adev); 2460 2461 amdgpu_gfx_sysfs_fini(adev); 2462 2463 kfree(adev->gfx.ip_dump_core); 2464 kfree(adev->gfx.ip_dump_compute_queues); 2465 2466 return 0; 2467 } 2468 2469 2470 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2471 { 2472 /* TODO */ 2473 } 2474 2475 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, 2476 u32 instance, int xcc_id) 2477 { 2478 u32 data; 2479 2480 if (instance == 0xffffffff) 2481 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2482 else 2483 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2484 2485 if (se_num == 0xffffffff) 2486 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2487 else 2488 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2489 2490 if (sh_num == 0xffffffff) 2491 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2492 else 2493 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2494 2495 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2496 } 2497 2498 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2499 { 2500 u32 data, mask; 2501 2502 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2503 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2504 2505 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2506 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2507 2508 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2509 adev->gfx.config.max_sh_per_se); 2510 2511 return (~data) & mask; 2512 } 2513 2514 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2515 { 2516 int i, j; 2517 u32 data; 2518 u32 active_rbs = 0; 2519 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2520 adev->gfx.config.max_sh_per_se; 2521 2522 mutex_lock(&adev->grbm_idx_mutex); 2523 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2524 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2525 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 2526 data = gfx_v9_0_get_rb_active_bitmap(adev); 2527 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2528 rb_bitmap_width_per_sh); 2529 } 2530 } 2531 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 2532 mutex_unlock(&adev->grbm_idx_mutex); 2533 2534 adev->gfx.config.backend_enable_mask = active_rbs; 2535 adev->gfx.config.num_rbs = hweight32(active_rbs); 2536 } 2537 2538 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev, 2539 uint32_t first_vmid, 2540 uint32_t last_vmid) 2541 { 2542 uint32_t data; 2543 uint32_t trap_config_vmid_mask = 0; 2544 int i; 2545 2546 /* Calculate trap config vmid mask */ 2547 for (i = first_vmid; i < last_vmid; i++) 2548 trap_config_vmid_mask |= (1 << i); 2549 2550 data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG, 2551 VMID_SEL, trap_config_vmid_mask); 2552 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, 2553 TRAP_EN, 1); 2554 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); 2555 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); 2556 2557 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); 2558 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); 2559 } 2560 2561 #define DEFAULT_SH_MEM_BASES (0x6000) 2562 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2563 { 2564 int i; 2565 uint32_t sh_mem_config; 2566 uint32_t sh_mem_bases; 2567 2568 /* 2569 * Configure apertures: 2570 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2571 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2572 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2573 */ 2574 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2575 2576 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2577 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2578 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2579 2580 mutex_lock(&adev->srbm_mutex); 2581 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2582 soc15_grbm_select(adev, 0, 0, 0, i, 0); 2583 /* CP and shaders */ 2584 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2585 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2586 } 2587 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 2588 mutex_unlock(&adev->srbm_mutex); 2589 2590 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2591 access. These should be enabled by FW for target VMIDs. */ 2592 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2593 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2594 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2595 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2596 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2597 } 2598 } 2599 2600 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2601 { 2602 int vmid; 2603 2604 /* 2605 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2606 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2607 * the driver can enable them for graphics. VMID0 should maintain 2608 * access so that HWS firmware can save/restore entries. 2609 */ 2610 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 2611 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2612 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2613 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2614 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2615 } 2616 } 2617 2618 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) 2619 { 2620 uint32_t tmp; 2621 2622 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2623 case IP_VERSION(9, 4, 1): 2624 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); 2625 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT, 2626 !READ_ONCE(adev->barrier_has_auto_waitcnt)); 2627 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); 2628 break; 2629 default: 2630 break; 2631 } 2632 } 2633 2634 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2635 { 2636 u32 tmp; 2637 int i; 2638 2639 if (!amdgpu_sriov_vf(adev) || 2640 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) { 2641 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2642 } 2643 2644 gfx_v9_0_tiling_mode_table_init(adev); 2645 2646 if (adev->gfx.num_gfx_rings) 2647 gfx_v9_0_setup_rb(adev); 2648 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2649 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2650 2651 /* XXX SH_MEM regs */ 2652 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2653 mutex_lock(&adev->srbm_mutex); 2654 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2655 soc15_grbm_select(adev, 0, 0, 0, i, 0); 2656 /* CP and shaders */ 2657 if (i == 0) { 2658 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2659 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2660 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2661 !!adev->gmc.noretry); 2662 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2663 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2664 } else { 2665 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2666 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2667 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2668 !!adev->gmc.noretry); 2669 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2670 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2671 (adev->gmc.private_aperture_start >> 48)); 2672 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2673 (adev->gmc.shared_aperture_start >> 48)); 2674 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2675 } 2676 } 2677 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 2678 2679 mutex_unlock(&adev->srbm_mutex); 2680 2681 gfx_v9_0_init_compute_vmid(adev); 2682 gfx_v9_0_init_gds_vmid(adev); 2683 gfx_v9_0_init_sq_config(adev); 2684 } 2685 2686 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2687 { 2688 u32 i, j, k; 2689 u32 mask; 2690 2691 mutex_lock(&adev->grbm_idx_mutex); 2692 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2693 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2694 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 2695 for (k = 0; k < adev->usec_timeout; k++) { 2696 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2697 break; 2698 udelay(1); 2699 } 2700 if (k == adev->usec_timeout) { 2701 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 2702 0xffffffff, 0xffffffff, 0); 2703 mutex_unlock(&adev->grbm_idx_mutex); 2704 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2705 i, j); 2706 return; 2707 } 2708 } 2709 } 2710 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 2711 mutex_unlock(&adev->grbm_idx_mutex); 2712 2713 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2714 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2715 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2716 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2717 for (k = 0; k < adev->usec_timeout; k++) { 2718 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2719 break; 2720 udelay(1); 2721 } 2722 } 2723 2724 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2725 bool enable) 2726 { 2727 u32 tmp; 2728 2729 /* These interrupts should be enabled to drive DS clock */ 2730 2731 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2732 2733 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2734 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2735 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2736 if (adev->gfx.num_gfx_rings) 2737 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2738 2739 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2740 } 2741 2742 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2743 { 2744 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2745 /* csib */ 2746 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2747 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2748 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2749 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2750 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2751 adev->gfx.rlc.clear_state_size); 2752 } 2753 2754 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2755 int indirect_offset, 2756 int list_size, 2757 int *unique_indirect_regs, 2758 int unique_indirect_reg_count, 2759 int *indirect_start_offsets, 2760 int *indirect_start_offsets_count, 2761 int max_start_offsets_count) 2762 { 2763 int idx; 2764 2765 for (; indirect_offset < list_size; indirect_offset++) { 2766 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2767 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2768 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2769 2770 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2771 indirect_offset += 2; 2772 2773 /* look for the matching indice */ 2774 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2775 if (unique_indirect_regs[idx] == 2776 register_list_format[indirect_offset] || 2777 !unique_indirect_regs[idx]) 2778 break; 2779 } 2780 2781 BUG_ON(idx >= unique_indirect_reg_count); 2782 2783 if (!unique_indirect_regs[idx]) 2784 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2785 2786 indirect_offset++; 2787 } 2788 } 2789 } 2790 2791 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2792 { 2793 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2794 int unique_indirect_reg_count = 0; 2795 2796 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2797 int indirect_start_offsets_count = 0; 2798 2799 int list_size = 0; 2800 int i = 0, j = 0; 2801 u32 tmp = 0; 2802 2803 u32 *register_list_format = 2804 kmemdup(adev->gfx.rlc.register_list_format, 2805 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2806 if (!register_list_format) 2807 return -ENOMEM; 2808 2809 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2810 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2811 gfx_v9_1_parse_ind_reg_list(register_list_format, 2812 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2813 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2814 unique_indirect_regs, 2815 unique_indirect_reg_count, 2816 indirect_start_offsets, 2817 &indirect_start_offsets_count, 2818 ARRAY_SIZE(indirect_start_offsets)); 2819 2820 /* enable auto inc in case it is disabled */ 2821 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2822 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2823 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2824 2825 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2826 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2827 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2828 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2829 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2830 adev->gfx.rlc.register_restore[i]); 2831 2832 /* load indirect register */ 2833 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2834 adev->gfx.rlc.reg_list_format_start); 2835 2836 /* direct register portion */ 2837 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2838 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2839 register_list_format[i]); 2840 2841 /* indirect register portion */ 2842 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2843 if (register_list_format[i] == 0xFFFFFFFF) { 2844 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2845 continue; 2846 } 2847 2848 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2849 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2850 2851 for (j = 0; j < unique_indirect_reg_count; j++) { 2852 if (register_list_format[i] == unique_indirect_regs[j]) { 2853 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2854 break; 2855 } 2856 } 2857 2858 BUG_ON(j >= unique_indirect_reg_count); 2859 2860 i++; 2861 } 2862 2863 /* set save/restore list size */ 2864 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2865 list_size = list_size >> 1; 2866 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2867 adev->gfx.rlc.reg_restore_list_size); 2868 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2869 2870 /* write the starting offsets to RLC scratch ram */ 2871 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2872 adev->gfx.rlc.starting_offsets_start); 2873 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2874 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2875 indirect_start_offsets[i]); 2876 2877 /* load unique indirect regs*/ 2878 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2879 if (unique_indirect_regs[i] != 0) { 2880 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2881 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2882 unique_indirect_regs[i] & 0x3FFFF); 2883 2884 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2885 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2886 unique_indirect_regs[i] >> 20); 2887 } 2888 } 2889 2890 kfree(register_list_format); 2891 return 0; 2892 } 2893 2894 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2895 { 2896 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2897 } 2898 2899 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2900 bool enable) 2901 { 2902 uint32_t data = 0; 2903 uint32_t default_data = 0; 2904 2905 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2906 if (enable) { 2907 /* enable GFXIP control over CGPG */ 2908 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2909 if(default_data != data) 2910 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2911 2912 /* update status */ 2913 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2914 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2915 if(default_data != data) 2916 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2917 } else { 2918 /* restore GFXIP control over GCPG */ 2919 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2920 if(default_data != data) 2921 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2922 } 2923 } 2924 2925 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2926 { 2927 uint32_t data = 0; 2928 2929 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2930 AMD_PG_SUPPORT_GFX_SMG | 2931 AMD_PG_SUPPORT_GFX_DMG)) { 2932 /* init IDLE_POLL_COUNT = 60 */ 2933 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2934 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2935 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2936 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2937 2938 /* init RLC PG Delay */ 2939 data = 0; 2940 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2941 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2942 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2943 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2944 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2945 2946 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2947 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2948 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2949 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2950 2951 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2952 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2953 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2954 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2955 2956 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2957 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2958 2959 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2960 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2961 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2962 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0)) 2963 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2964 } 2965 } 2966 2967 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2968 bool enable) 2969 { 2970 uint32_t data = 0; 2971 uint32_t default_data = 0; 2972 2973 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2974 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2975 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2976 enable ? 1 : 0); 2977 if (default_data != data) 2978 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2979 } 2980 2981 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2982 bool enable) 2983 { 2984 uint32_t data = 0; 2985 uint32_t default_data = 0; 2986 2987 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2988 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2989 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2990 enable ? 1 : 0); 2991 if(default_data != data) 2992 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2993 } 2994 2995 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2996 bool enable) 2997 { 2998 uint32_t data = 0; 2999 uint32_t default_data = 0; 3000 3001 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3002 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3003 CP_PG_DISABLE, 3004 enable ? 0 : 1); 3005 if(default_data != data) 3006 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3007 } 3008 3009 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 3010 bool enable) 3011 { 3012 uint32_t data, default_data; 3013 3014 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3015 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3016 GFX_POWER_GATING_ENABLE, 3017 enable ? 1 : 0); 3018 if(default_data != data) 3019 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3020 } 3021 3022 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 3023 bool enable) 3024 { 3025 uint32_t data, default_data; 3026 3027 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3028 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3029 GFX_PIPELINE_PG_ENABLE, 3030 enable ? 1 : 0); 3031 if(default_data != data) 3032 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3033 3034 if (!enable) 3035 /* read any GFX register to wake up GFX */ 3036 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 3037 } 3038 3039 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 3040 bool enable) 3041 { 3042 uint32_t data, default_data; 3043 3044 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3045 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3046 STATIC_PER_CU_PG_ENABLE, 3047 enable ? 1 : 0); 3048 if(default_data != data) 3049 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3050 } 3051 3052 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 3053 bool enable) 3054 { 3055 uint32_t data, default_data; 3056 3057 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3058 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3059 DYN_PER_CU_PG_ENABLE, 3060 enable ? 1 : 0); 3061 if(default_data != data) 3062 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3063 } 3064 3065 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 3066 { 3067 gfx_v9_0_init_csb(adev); 3068 3069 /* 3070 * Rlc save restore list is workable since v2_1. 3071 * And it's needed by gfxoff feature. 3072 */ 3073 if (adev->gfx.rlc.is_rlc_v2_1) { 3074 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 3075 IP_VERSION(9, 2, 1) || 3076 (adev->apu_flags & AMD_APU_IS_RAVEN2)) 3077 gfx_v9_1_init_rlc_save_restore_list(adev); 3078 gfx_v9_0_enable_save_restore_machine(adev); 3079 } 3080 3081 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3082 AMD_PG_SUPPORT_GFX_SMG | 3083 AMD_PG_SUPPORT_GFX_DMG | 3084 AMD_PG_SUPPORT_CP | 3085 AMD_PG_SUPPORT_GDS | 3086 AMD_PG_SUPPORT_RLC_SMU_HS)) { 3087 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, 3088 adev->gfx.rlc.cp_table_gpu_addr >> 8); 3089 gfx_v9_0_init_gfx_power_gating(adev); 3090 } 3091 } 3092 3093 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 3094 { 3095 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 3096 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3097 gfx_v9_0_wait_for_rlc_serdes(adev); 3098 } 3099 3100 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 3101 { 3102 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3103 udelay(50); 3104 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3105 udelay(50); 3106 } 3107 3108 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 3109 { 3110 #ifdef AMDGPU_RLC_DEBUG_RETRY 3111 u32 rlc_ucode_ver; 3112 #endif 3113 3114 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 3115 udelay(50); 3116 3117 /* carrizo do enable cp interrupt after cp inited */ 3118 if (!(adev->flags & AMD_IS_APU)) { 3119 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3120 udelay(50); 3121 } 3122 3123 #ifdef AMDGPU_RLC_DEBUG_RETRY 3124 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 3125 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 3126 if(rlc_ucode_ver == 0x108) { 3127 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 3128 rlc_ucode_ver, adev->gfx.rlc_fw_version); 3129 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 3130 * default is 0x9C4 to create a 100us interval */ 3131 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 3132 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 3133 * to disable the page fault retry interrupts, default is 3134 * 0x100 (256) */ 3135 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 3136 } 3137 #endif 3138 } 3139 3140 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 3141 { 3142 const struct rlc_firmware_header_v2_0 *hdr; 3143 const __le32 *fw_data; 3144 unsigned i, fw_size; 3145 3146 if (!adev->gfx.rlc_fw) 3147 return -EINVAL; 3148 3149 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3150 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3151 3152 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3153 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3154 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3155 3156 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 3157 RLCG_UCODE_LOADING_START_ADDRESS); 3158 for (i = 0; i < fw_size; i++) 3159 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3160 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3161 3162 return 0; 3163 } 3164 3165 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3166 { 3167 int r; 3168 3169 if (amdgpu_sriov_vf(adev)) { 3170 gfx_v9_0_init_csb(adev); 3171 return 0; 3172 } 3173 3174 adev->gfx.rlc.funcs->stop(adev); 3175 3176 /* disable CG */ 3177 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3178 3179 gfx_v9_0_init_pg(adev); 3180 3181 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3182 /* legacy rlc firmware loading */ 3183 r = gfx_v9_0_rlc_load_microcode(adev); 3184 if (r) 3185 return r; 3186 } 3187 3188 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 3189 case IP_VERSION(9, 2, 2): 3190 case IP_VERSION(9, 1, 0): 3191 gfx_v9_0_init_lbpw(adev); 3192 if (amdgpu_lbpw == 0) 3193 gfx_v9_0_enable_lbpw(adev, false); 3194 else 3195 gfx_v9_0_enable_lbpw(adev, true); 3196 break; 3197 case IP_VERSION(9, 4, 0): 3198 gfx_v9_4_init_lbpw(adev); 3199 if (amdgpu_lbpw > 0) 3200 gfx_v9_0_enable_lbpw(adev, true); 3201 else 3202 gfx_v9_0_enable_lbpw(adev, false); 3203 break; 3204 default: 3205 break; 3206 } 3207 3208 gfx_v9_0_update_spm_vmid_internal(adev, 0xf); 3209 3210 adev->gfx.rlc.funcs->start(adev); 3211 3212 return 0; 3213 } 3214 3215 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3216 { 3217 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3218 3219 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1); 3220 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1); 3221 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1); 3222 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1); 3223 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1); 3224 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1); 3225 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1); 3226 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1); 3227 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1); 3228 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3229 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3230 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3231 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3232 udelay(50); 3233 } 3234 3235 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3236 { 3237 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3238 const struct gfx_firmware_header_v1_0 *ce_hdr; 3239 const struct gfx_firmware_header_v1_0 *me_hdr; 3240 const __le32 *fw_data; 3241 unsigned i, fw_size; 3242 3243 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3244 return -EINVAL; 3245 3246 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3247 adev->gfx.pfp_fw->data; 3248 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3249 adev->gfx.ce_fw->data; 3250 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3251 adev->gfx.me_fw->data; 3252 3253 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3254 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3255 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3256 3257 gfx_v9_0_cp_gfx_enable(adev, false); 3258 3259 /* PFP */ 3260 fw_data = (const __le32 *) 3261 (adev->gfx.pfp_fw->data + 3262 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3263 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3264 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3265 for (i = 0; i < fw_size; i++) 3266 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3267 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3268 3269 /* CE */ 3270 fw_data = (const __le32 *) 3271 (adev->gfx.ce_fw->data + 3272 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3273 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3274 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3275 for (i = 0; i < fw_size; i++) 3276 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3277 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3278 3279 /* ME */ 3280 fw_data = (const __le32 *) 3281 (adev->gfx.me_fw->data + 3282 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3283 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3284 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3285 for (i = 0; i < fw_size; i++) 3286 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3287 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3288 3289 return 0; 3290 } 3291 3292 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3293 { 3294 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3295 const struct cs_section_def *sect = NULL; 3296 const struct cs_extent_def *ext = NULL; 3297 int r, i, tmp; 3298 3299 /* init the CP */ 3300 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3301 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3302 3303 gfx_v9_0_cp_gfx_enable(adev, true); 3304 3305 /* Now only limit the quirk on the APU gfx9 series and already 3306 * confirmed that the APU gfx10/gfx11 needn't such update. 3307 */ 3308 if (adev->flags & AMD_IS_APU && 3309 adev->in_s3 && !pm_resume_via_firmware()) { 3310 DRM_INFO("Will skip the CSB packet resubmit\n"); 3311 return 0; 3312 } 3313 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3314 if (r) { 3315 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3316 return r; 3317 } 3318 3319 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3320 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3321 3322 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3323 amdgpu_ring_write(ring, 0x80000000); 3324 amdgpu_ring_write(ring, 0x80000000); 3325 3326 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3327 for (ext = sect->section; ext->extent != NULL; ++ext) { 3328 if (sect->id == SECT_CONTEXT) { 3329 amdgpu_ring_write(ring, 3330 PACKET3(PACKET3_SET_CONTEXT_REG, 3331 ext->reg_count)); 3332 amdgpu_ring_write(ring, 3333 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3334 for (i = 0; i < ext->reg_count; i++) 3335 amdgpu_ring_write(ring, ext->extent[i]); 3336 } 3337 } 3338 } 3339 3340 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3341 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3342 3343 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3344 amdgpu_ring_write(ring, 0); 3345 3346 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3347 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3348 amdgpu_ring_write(ring, 0x8000); 3349 amdgpu_ring_write(ring, 0x8000); 3350 3351 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3352 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3353 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3354 amdgpu_ring_write(ring, tmp); 3355 amdgpu_ring_write(ring, 0); 3356 3357 amdgpu_ring_commit(ring); 3358 3359 return 0; 3360 } 3361 3362 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3363 { 3364 struct amdgpu_ring *ring; 3365 u32 tmp; 3366 u32 rb_bufsz; 3367 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3368 3369 /* Set the write pointer delay */ 3370 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3371 3372 /* set the RB to use vmid 0 */ 3373 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3374 3375 /* Set ring buffer size */ 3376 ring = &adev->gfx.gfx_ring[0]; 3377 rb_bufsz = order_base_2(ring->ring_size / 8); 3378 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3379 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3380 #ifdef __BIG_ENDIAN 3381 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3382 #endif 3383 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3384 3385 /* Initialize the ring buffer's write pointers */ 3386 ring->wptr = 0; 3387 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3388 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3389 3390 /* set the wb address whether it's enabled or not */ 3391 rptr_addr = ring->rptr_gpu_addr; 3392 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3393 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3394 3395 wptr_gpu_addr = ring->wptr_gpu_addr; 3396 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3397 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3398 3399 mdelay(1); 3400 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3401 3402 rb_addr = ring->gpu_addr >> 8; 3403 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3404 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3405 3406 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3407 if (ring->use_doorbell) { 3408 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3409 DOORBELL_OFFSET, ring->doorbell_index); 3410 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3411 DOORBELL_EN, 1); 3412 } else { 3413 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3414 } 3415 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3416 3417 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3418 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3419 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3420 3421 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3422 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3423 3424 3425 /* start the ring */ 3426 gfx_v9_0_cp_gfx_start(adev); 3427 3428 return 0; 3429 } 3430 3431 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3432 { 3433 if (enable) { 3434 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3435 } else { 3436 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3437 (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK | 3438 CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK | 3439 CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK | 3440 CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK | 3441 CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK | 3442 CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK | 3443 CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK | 3444 CP_MEC_CNTL__MEC_ME1_HALT_MASK | 3445 CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3446 adev->gfx.kiq[0].ring.sched.ready = false; 3447 } 3448 udelay(50); 3449 } 3450 3451 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3452 { 3453 const struct gfx_firmware_header_v1_0 *mec_hdr; 3454 const __le32 *fw_data; 3455 unsigned i; 3456 u32 tmp; 3457 3458 if (!adev->gfx.mec_fw) 3459 return -EINVAL; 3460 3461 gfx_v9_0_cp_compute_enable(adev, false); 3462 3463 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3464 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3465 3466 fw_data = (const __le32 *) 3467 (adev->gfx.mec_fw->data + 3468 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3469 tmp = 0; 3470 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3471 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3472 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3473 3474 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3475 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3476 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3477 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3478 3479 /* MEC1 */ 3480 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3481 mec_hdr->jt_offset); 3482 for (i = 0; i < mec_hdr->jt_size; i++) 3483 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3484 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3485 3486 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3487 adev->gfx.mec_fw_version); 3488 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3489 3490 return 0; 3491 } 3492 3493 /* KIQ functions */ 3494 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3495 { 3496 uint32_t tmp; 3497 struct amdgpu_device *adev = ring->adev; 3498 3499 /* tell RLC which is KIQ queue */ 3500 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3501 tmp &= 0xffffff00; 3502 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3503 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80); 3504 } 3505 3506 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) 3507 { 3508 struct amdgpu_device *adev = ring->adev; 3509 3510 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3511 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 3512 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 3513 mqd->cp_hqd_queue_priority = 3514 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 3515 } 3516 } 3517 } 3518 3519 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3520 { 3521 struct amdgpu_device *adev = ring->adev; 3522 struct v9_mqd *mqd = ring->mqd_ptr; 3523 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3524 uint32_t tmp; 3525 3526 mqd->header = 0xC0310800; 3527 mqd->compute_pipelinestat_enable = 0x00000001; 3528 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3529 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3530 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3531 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3532 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3533 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3534 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3535 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3536 mqd->compute_misc_reserved = 0x00000003; 3537 3538 mqd->dynamic_cu_mask_addr_lo = 3539 lower_32_bits(ring->mqd_gpu_addr 3540 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3541 mqd->dynamic_cu_mask_addr_hi = 3542 upper_32_bits(ring->mqd_gpu_addr 3543 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3544 3545 eop_base_addr = ring->eop_gpu_addr >> 8; 3546 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3547 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3548 3549 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3550 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3551 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3552 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3553 3554 mqd->cp_hqd_eop_control = tmp; 3555 3556 /* enable doorbell? */ 3557 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3558 3559 if (ring->use_doorbell) { 3560 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3561 DOORBELL_OFFSET, ring->doorbell_index); 3562 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3563 DOORBELL_EN, 1); 3564 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3565 DOORBELL_SOURCE, 0); 3566 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3567 DOORBELL_HIT, 0); 3568 } else { 3569 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3570 DOORBELL_EN, 0); 3571 } 3572 3573 mqd->cp_hqd_pq_doorbell_control = tmp; 3574 3575 /* disable the queue if it's active */ 3576 ring->wptr = 0; 3577 mqd->cp_hqd_dequeue_request = 0; 3578 mqd->cp_hqd_pq_rptr = 0; 3579 mqd->cp_hqd_pq_wptr_lo = 0; 3580 mqd->cp_hqd_pq_wptr_hi = 0; 3581 3582 /* set the pointer to the MQD */ 3583 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3584 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3585 3586 /* set MQD vmid to 0 */ 3587 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3588 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3589 mqd->cp_mqd_control = tmp; 3590 3591 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3592 hqd_gpu_addr = ring->gpu_addr >> 8; 3593 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3594 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3595 3596 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3597 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3598 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3599 (order_base_2(ring->ring_size / 4) - 1)); 3600 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3601 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 3602 #ifdef __BIG_ENDIAN 3603 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3604 #endif 3605 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3606 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3607 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3608 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3609 mqd->cp_hqd_pq_control = tmp; 3610 3611 /* set the wb address whether it's enabled or not */ 3612 wb_gpu_addr = ring->rptr_gpu_addr; 3613 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3614 mqd->cp_hqd_pq_rptr_report_addr_hi = 3615 upper_32_bits(wb_gpu_addr) & 0xffff; 3616 3617 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3618 wb_gpu_addr = ring->wptr_gpu_addr; 3619 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3620 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3621 3622 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3623 ring->wptr = 0; 3624 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3625 3626 /* set the vmid for the queue */ 3627 mqd->cp_hqd_vmid = 0; 3628 3629 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3630 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3631 mqd->cp_hqd_persistent_state = tmp; 3632 3633 /* set MIN_IB_AVAIL_SIZE */ 3634 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3635 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3636 mqd->cp_hqd_ib_control = tmp; 3637 3638 /* set static priority for a queue/ring */ 3639 gfx_v9_0_mqd_set_priority(ring, mqd); 3640 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM); 3641 3642 /* map_queues packet doesn't need activate the queue, 3643 * so only kiq need set this field. 3644 */ 3645 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3646 mqd->cp_hqd_active = 1; 3647 3648 return 0; 3649 } 3650 3651 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3652 { 3653 struct amdgpu_device *adev = ring->adev; 3654 struct v9_mqd *mqd = ring->mqd_ptr; 3655 int j; 3656 3657 /* disable wptr polling */ 3658 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3659 3660 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3661 mqd->cp_hqd_eop_base_addr_lo); 3662 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3663 mqd->cp_hqd_eop_base_addr_hi); 3664 3665 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3666 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3667 mqd->cp_hqd_eop_control); 3668 3669 /* enable doorbell? */ 3670 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3671 mqd->cp_hqd_pq_doorbell_control); 3672 3673 /* disable the queue if it's active */ 3674 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3675 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3676 for (j = 0; j < adev->usec_timeout; j++) { 3677 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3678 break; 3679 udelay(1); 3680 } 3681 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3682 mqd->cp_hqd_dequeue_request); 3683 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3684 mqd->cp_hqd_pq_rptr); 3685 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3686 mqd->cp_hqd_pq_wptr_lo); 3687 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3688 mqd->cp_hqd_pq_wptr_hi); 3689 } 3690 3691 /* set the pointer to the MQD */ 3692 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3693 mqd->cp_mqd_base_addr_lo); 3694 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3695 mqd->cp_mqd_base_addr_hi); 3696 3697 /* set MQD vmid to 0 */ 3698 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3699 mqd->cp_mqd_control); 3700 3701 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3702 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3703 mqd->cp_hqd_pq_base_lo); 3704 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3705 mqd->cp_hqd_pq_base_hi); 3706 3707 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3708 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3709 mqd->cp_hqd_pq_control); 3710 3711 /* set the wb address whether it's enabled or not */ 3712 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3713 mqd->cp_hqd_pq_rptr_report_addr_lo); 3714 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3715 mqd->cp_hqd_pq_rptr_report_addr_hi); 3716 3717 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3718 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3719 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3720 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3721 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3722 3723 /* enable the doorbell if requested */ 3724 if (ring->use_doorbell) { 3725 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3726 (adev->doorbell_index.kiq * 2) << 2); 3727 /* If GC has entered CGPG, ringing doorbell > first page 3728 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to 3729 * workaround this issue. And this change has to align with firmware 3730 * update. 3731 */ 3732 if (check_if_enlarge_doorbell_range(adev)) 3733 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3734 (adev->doorbell.size - 4)); 3735 else 3736 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3737 (adev->doorbell_index.userqueue_end * 2) << 2); 3738 } 3739 3740 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3741 mqd->cp_hqd_pq_doorbell_control); 3742 3743 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3744 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3745 mqd->cp_hqd_pq_wptr_lo); 3746 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3747 mqd->cp_hqd_pq_wptr_hi); 3748 3749 /* set the vmid for the queue */ 3750 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3751 3752 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3753 mqd->cp_hqd_persistent_state); 3754 3755 /* activate the queue */ 3756 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3757 mqd->cp_hqd_active); 3758 3759 if (ring->use_doorbell) 3760 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3761 3762 return 0; 3763 } 3764 3765 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3766 { 3767 struct amdgpu_device *adev = ring->adev; 3768 int j; 3769 3770 /* disable the queue if it's active */ 3771 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3772 3773 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3774 3775 for (j = 0; j < adev->usec_timeout; j++) { 3776 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3777 break; 3778 udelay(1); 3779 } 3780 3781 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3782 DRM_DEBUG("KIQ dequeue request failed.\n"); 3783 3784 /* Manual disable if dequeue request times out */ 3785 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3786 } 3787 3788 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3789 0); 3790 } 3791 3792 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3793 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3794 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3795 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3796 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3797 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3798 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3799 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3800 3801 return 0; 3802 } 3803 3804 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3805 { 3806 struct amdgpu_device *adev = ring->adev; 3807 struct v9_mqd *mqd = ring->mqd_ptr; 3808 struct v9_mqd *tmp_mqd; 3809 3810 gfx_v9_0_kiq_setting(ring); 3811 3812 /* GPU could be in bad state during probe, driver trigger the reset 3813 * after load the SMU, in this case , the mqd is not be initialized. 3814 * driver need to re-init the mqd. 3815 * check mqd->cp_hqd_pq_control since this value should not be 0 3816 */ 3817 tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup; 3818 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ 3819 /* for GPU_RESET case , reset MQD to a clean status */ 3820 if (adev->gfx.kiq[0].mqd_backup) 3821 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation)); 3822 3823 /* reset ring buffer */ 3824 ring->wptr = 0; 3825 amdgpu_ring_clear_ring(ring); 3826 3827 mutex_lock(&adev->srbm_mutex); 3828 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3829 gfx_v9_0_kiq_init_register(ring); 3830 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3831 mutex_unlock(&adev->srbm_mutex); 3832 } else { 3833 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3834 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3835 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3836 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 3837 amdgpu_ring_clear_ring(ring); 3838 mutex_lock(&adev->srbm_mutex); 3839 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3840 gfx_v9_0_mqd_init(ring); 3841 gfx_v9_0_kiq_init_register(ring); 3842 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3843 mutex_unlock(&adev->srbm_mutex); 3844 3845 if (adev->gfx.kiq[0].mqd_backup) 3846 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); 3847 } 3848 3849 return 0; 3850 } 3851 3852 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) 3853 { 3854 struct amdgpu_device *adev = ring->adev; 3855 struct v9_mqd *mqd = ring->mqd_ptr; 3856 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3857 struct v9_mqd *tmp_mqd; 3858 3859 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control 3860 * is not be initialized before 3861 */ 3862 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3863 3864 if (!restore && (!tmp_mqd->cp_hqd_pq_control || 3865 (!amdgpu_in_reset(adev) && !adev->in_suspend))) { 3866 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3867 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3868 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3869 mutex_lock(&adev->srbm_mutex); 3870 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3871 gfx_v9_0_mqd_init(ring); 3872 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3873 mutex_unlock(&adev->srbm_mutex); 3874 3875 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3876 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3877 } else { 3878 /* restore MQD to a clean status */ 3879 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3880 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3881 /* reset ring buffer */ 3882 ring->wptr = 0; 3883 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 3884 amdgpu_ring_clear_ring(ring); 3885 } 3886 3887 return 0; 3888 } 3889 3890 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3891 { 3892 gfx_v9_0_kiq_init_queue(&adev->gfx.kiq[0].ring); 3893 return 0; 3894 } 3895 3896 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3897 { 3898 int i, r; 3899 3900 gfx_v9_0_cp_compute_enable(adev, true); 3901 3902 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3903 r = gfx_v9_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); 3904 if (r) 3905 return r; 3906 } 3907 3908 return amdgpu_gfx_enable_kcq(adev, 0); 3909 } 3910 3911 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3912 { 3913 int r, i; 3914 struct amdgpu_ring *ring; 3915 3916 if (!(adev->flags & AMD_IS_APU)) 3917 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3918 3919 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3920 if (adev->gfx.num_gfx_rings) { 3921 /* legacy firmware loading */ 3922 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3923 if (r) 3924 return r; 3925 } 3926 3927 r = gfx_v9_0_cp_compute_load_microcode(adev); 3928 if (r) 3929 return r; 3930 } 3931 3932 if (adev->gfx.num_gfx_rings) 3933 gfx_v9_0_cp_gfx_enable(adev, false); 3934 gfx_v9_0_cp_compute_enable(adev, false); 3935 3936 r = gfx_v9_0_kiq_resume(adev); 3937 if (r) 3938 return r; 3939 3940 if (adev->gfx.num_gfx_rings) { 3941 r = gfx_v9_0_cp_gfx_resume(adev); 3942 if (r) 3943 return r; 3944 } 3945 3946 r = gfx_v9_0_kcq_resume(adev); 3947 if (r) 3948 return r; 3949 3950 if (adev->gfx.num_gfx_rings) { 3951 ring = &adev->gfx.gfx_ring[0]; 3952 r = amdgpu_ring_test_helper(ring); 3953 if (r) 3954 return r; 3955 } 3956 3957 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3958 ring = &adev->gfx.compute_ring[i]; 3959 amdgpu_ring_test_helper(ring); 3960 } 3961 3962 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3963 3964 return 0; 3965 } 3966 3967 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 3968 { 3969 u32 tmp; 3970 3971 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) && 3972 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) 3973 return; 3974 3975 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 3976 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 3977 adev->df.hash_status.hash_64k); 3978 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 3979 adev->df.hash_status.hash_2m); 3980 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 3981 adev->df.hash_status.hash_1g); 3982 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 3983 } 3984 3985 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3986 { 3987 if (adev->gfx.num_gfx_rings) 3988 gfx_v9_0_cp_gfx_enable(adev, enable); 3989 gfx_v9_0_cp_compute_enable(adev, enable); 3990 } 3991 3992 static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block) 3993 { 3994 int r; 3995 struct amdgpu_device *adev = ip_block->adev; 3996 3997 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 3998 adev->gfx.cleaner_shader_ptr); 3999 4000 if (!amdgpu_sriov_vf(adev)) 4001 gfx_v9_0_init_golden_registers(adev); 4002 4003 gfx_v9_0_constants_init(adev); 4004 4005 gfx_v9_0_init_tcp_config(adev); 4006 4007 r = adev->gfx.rlc.funcs->resume(adev); 4008 if (r) 4009 return r; 4010 4011 r = gfx_v9_0_cp_resume(adev); 4012 if (r) 4013 return r; 4014 4015 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) && 4016 !amdgpu_sriov_vf(adev)) 4017 gfx_v9_4_2_set_power_brake_sequence(adev); 4018 4019 return r; 4020 } 4021 4022 static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block) 4023 { 4024 struct amdgpu_device *adev = ip_block->adev; 4025 4026 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4027 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 4028 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4029 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4030 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4031 4032 /* DF freeze and kcq disable will fail */ 4033 if (!amdgpu_ras_intr_triggered()) 4034 /* disable KCQ to avoid CPC touch memory not valid anymore */ 4035 amdgpu_gfx_disable_kcq(adev, 0); 4036 4037 if (amdgpu_sriov_vf(adev)) { 4038 gfx_v9_0_cp_gfx_enable(adev, false); 4039 /* must disable polling for SRIOV when hw finished, otherwise 4040 * CPC engine may still keep fetching WB address which is already 4041 * invalid after sw finished and trigger DMAR reading error in 4042 * hypervisor side. 4043 */ 4044 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4045 return 0; 4046 } 4047 4048 /* Use deinitialize sequence from CAIL when unbinding device from driver, 4049 * otherwise KIQ is hanging when binding back 4050 */ 4051 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 4052 mutex_lock(&adev->srbm_mutex); 4053 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, 4054 adev->gfx.kiq[0].ring.pipe, 4055 adev->gfx.kiq[0].ring.queue, 0, 0); 4056 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring); 4057 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 4058 mutex_unlock(&adev->srbm_mutex); 4059 } 4060 4061 gfx_v9_0_cp_enable(adev, false); 4062 4063 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */ 4064 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) || 4065 (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) { 4066 dev_dbg(adev->dev, "Skipping RLC halt\n"); 4067 return 0; 4068 } 4069 4070 adev->gfx.rlc.funcs->stop(adev); 4071 return 0; 4072 } 4073 4074 static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block) 4075 { 4076 return gfx_v9_0_hw_fini(ip_block); 4077 } 4078 4079 static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block) 4080 { 4081 return gfx_v9_0_hw_init(ip_block); 4082 } 4083 4084 static bool gfx_v9_0_is_idle(struct amdgpu_ip_block *ip_block) 4085 { 4086 struct amdgpu_device *adev = ip_block->adev; 4087 4088 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 4089 GRBM_STATUS, GUI_ACTIVE)) 4090 return false; 4091 else 4092 return true; 4093 } 4094 4095 static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 4096 { 4097 unsigned i; 4098 struct amdgpu_device *adev = ip_block->adev; 4099 4100 for (i = 0; i < adev->usec_timeout; i++) { 4101 if (gfx_v9_0_is_idle(ip_block)) 4102 return 0; 4103 udelay(1); 4104 } 4105 return -ETIMEDOUT; 4106 } 4107 4108 static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block) 4109 { 4110 u32 grbm_soft_reset = 0; 4111 u32 tmp; 4112 struct amdgpu_device *adev = ip_block->adev; 4113 4114 /* GRBM_STATUS */ 4115 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4116 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4117 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4118 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4119 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4120 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4121 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4122 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4123 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4124 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4125 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4126 } 4127 4128 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4129 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4130 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4131 } 4132 4133 /* GRBM_STATUS2 */ 4134 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4135 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4136 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4137 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4138 4139 4140 if (grbm_soft_reset) { 4141 /* stop the rlc */ 4142 adev->gfx.rlc.funcs->stop(adev); 4143 4144 if (adev->gfx.num_gfx_rings) 4145 /* Disable GFX parsing/prefetching */ 4146 gfx_v9_0_cp_gfx_enable(adev, false); 4147 4148 /* Disable MEC parsing/prefetching */ 4149 gfx_v9_0_cp_compute_enable(adev, false); 4150 4151 if (grbm_soft_reset) { 4152 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4153 tmp |= grbm_soft_reset; 4154 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4155 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4156 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4157 4158 udelay(50); 4159 4160 tmp &= ~grbm_soft_reset; 4161 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4162 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4163 } 4164 4165 /* Wait a little for things to settle down */ 4166 udelay(50); 4167 } 4168 return 0; 4169 } 4170 4171 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) 4172 { 4173 signed long r, cnt = 0; 4174 unsigned long flags; 4175 uint32_t seq, reg_val_offs = 0; 4176 uint64_t value = 0; 4177 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 4178 struct amdgpu_ring *ring = &kiq->ring; 4179 4180 BUG_ON(!ring->funcs->emit_rreg); 4181 4182 spin_lock_irqsave(&kiq->ring_lock, flags); 4183 if (amdgpu_device_wb_get(adev, ®_val_offs)) { 4184 pr_err("critical bug! too many kiq readers\n"); 4185 goto failed_unlock; 4186 } 4187 amdgpu_ring_alloc(ring, 32); 4188 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4189 amdgpu_ring_write(ring, 9 | /* src: register*/ 4190 (5 << 8) | /* dst: memory */ 4191 (1 << 16) | /* count sel */ 4192 (1 << 20)); /* write confirm */ 4193 amdgpu_ring_write(ring, 0); 4194 amdgpu_ring_write(ring, 0); 4195 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4196 reg_val_offs * 4)); 4197 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4198 reg_val_offs * 4)); 4199 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 4200 if (r) 4201 goto failed_undo; 4202 4203 amdgpu_ring_commit(ring); 4204 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4205 4206 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4207 4208 /* don't wait anymore for gpu reset case because this way may 4209 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 4210 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 4211 * never return if we keep waiting in virt_kiq_rreg, which cause 4212 * gpu_recover() hang there. 4213 * 4214 * also don't wait anymore for IRQ context 4215 * */ 4216 if (r < 1 && (amdgpu_in_reset(adev))) 4217 goto failed_kiq_read; 4218 4219 might_sleep(); 4220 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 4221 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 4222 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4223 } 4224 4225 if (cnt > MAX_KIQ_REG_TRY) 4226 goto failed_kiq_read; 4227 4228 mb(); 4229 value = (uint64_t)adev->wb.wb[reg_val_offs] | 4230 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL; 4231 amdgpu_device_wb_free(adev, reg_val_offs); 4232 return value; 4233 4234 failed_undo: 4235 amdgpu_ring_undo(ring); 4236 failed_unlock: 4237 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4238 failed_kiq_read: 4239 if (reg_val_offs) 4240 amdgpu_device_wb_free(adev, reg_val_offs); 4241 pr_err("failed to read gpu clock\n"); 4242 return ~0; 4243 } 4244 4245 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4246 { 4247 uint64_t clock, clock_lo, clock_hi, hi_check; 4248 4249 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 4250 case IP_VERSION(9, 3, 0): 4251 preempt_disable(); 4252 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4253 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4254 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4255 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over 4256 * roughly every 42 seconds. 4257 */ 4258 if (hi_check != clock_hi) { 4259 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4260 clock_hi = hi_check; 4261 } 4262 preempt_enable(); 4263 clock = clock_lo | (clock_hi << 32ULL); 4264 break; 4265 default: 4266 amdgpu_gfx_off_ctrl(adev, false); 4267 mutex_lock(&adev->gfx.gpu_clock_mutex); 4268 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 4269 IP_VERSION(9, 0, 1) && 4270 amdgpu_sriov_runtime(adev)) { 4271 clock = gfx_v9_0_kiq_read_clock(adev); 4272 } else { 4273 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4274 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4275 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4276 } 4277 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4278 amdgpu_gfx_off_ctrl(adev, true); 4279 break; 4280 } 4281 return clock; 4282 } 4283 4284 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4285 uint32_t vmid, 4286 uint32_t gds_base, uint32_t gds_size, 4287 uint32_t gws_base, uint32_t gws_size, 4288 uint32_t oa_base, uint32_t oa_size) 4289 { 4290 struct amdgpu_device *adev = ring->adev; 4291 4292 /* GDS Base */ 4293 gfx_v9_0_write_data_to_reg(ring, 0, false, 4294 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4295 gds_base); 4296 4297 /* GDS Size */ 4298 gfx_v9_0_write_data_to_reg(ring, 0, false, 4299 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4300 gds_size); 4301 4302 /* GWS */ 4303 gfx_v9_0_write_data_to_reg(ring, 0, false, 4304 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4305 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4306 4307 /* OA */ 4308 gfx_v9_0_write_data_to_reg(ring, 0, false, 4309 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4310 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4311 } 4312 4313 static const u32 vgpr_init_compute_shader[] = 4314 { 4315 0xb07c0000, 0xbe8000ff, 4316 0x000000f8, 0xbf110800, 4317 0x7e000280, 0x7e020280, 4318 0x7e040280, 0x7e060280, 4319 0x7e080280, 0x7e0a0280, 4320 0x7e0c0280, 0x7e0e0280, 4321 0x80808800, 0xbe803200, 4322 0xbf84fff5, 0xbf9c0000, 4323 0xd28c0001, 0x0001007f, 4324 0xd28d0001, 0x0002027e, 4325 0x10020288, 0xb8810904, 4326 0xb7814000, 0xd1196a01, 4327 0x00000301, 0xbe800087, 4328 0xbefc00c1, 0xd89c4000, 4329 0x00020201, 0xd89cc080, 4330 0x00040401, 0x320202ff, 4331 0x00000800, 0x80808100, 4332 0xbf84fff8, 0x7e020280, 4333 0xbf810000, 0x00000000, 4334 }; 4335 4336 static const u32 sgpr_init_compute_shader[] = 4337 { 4338 0xb07c0000, 0xbe8000ff, 4339 0x0000005f, 0xbee50080, 4340 0xbe812c65, 0xbe822c65, 4341 0xbe832c65, 0xbe842c65, 4342 0xbe852c65, 0xb77c0005, 4343 0x80808500, 0xbf84fff8, 4344 0xbe800080, 0xbf810000, 4345 }; 4346 4347 static const u32 vgpr_init_compute_shader_arcturus[] = { 4348 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 4349 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 4350 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 4351 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 4352 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 4353 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 4354 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 4355 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 4356 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 4357 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 4358 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 4359 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 4360 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 4361 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 4362 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 4363 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 4364 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 4365 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 4366 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 4367 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 4368 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 4369 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 4370 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 4371 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 4372 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 4373 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 4374 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 4375 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 4376 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 4377 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 4378 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 4379 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 4380 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 4381 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 4382 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 4383 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 4384 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 4385 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 4386 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 4387 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 4388 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 4389 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 4390 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 4391 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 4392 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 4393 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 4394 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 4395 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 4396 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 4397 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 4398 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 4399 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 4400 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 4401 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 4402 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 4403 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 4404 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 4405 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 4406 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 4407 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 4408 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 4409 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 4410 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 4411 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 4412 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 4413 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 4414 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 4415 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 4416 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 4417 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 4418 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 4419 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 4420 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 4421 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 4422 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 4423 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 4424 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 4425 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 4426 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 4427 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 4428 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 4429 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 4430 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 4431 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 4432 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 4433 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 4434 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 4435 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 4436 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 4437 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 4438 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 4439 0xbf84fff8, 0xbf810000, 4440 }; 4441 4442 /* When below register arrays changed, please update gpr_reg_size, 4443 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 4444 to cover all gfx9 ASICs */ 4445 static const struct soc15_reg_entry vgpr_init_regs[] = { 4446 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4447 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4448 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4449 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4450 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 4451 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4452 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4453 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4454 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4455 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4456 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4457 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4458 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4459 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4460 }; 4461 4462 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = { 4463 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4464 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4465 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4466 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4467 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf }, 4468 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4469 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4470 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4471 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4472 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4473 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4474 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4475 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4476 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4477 }; 4478 4479 static const struct soc15_reg_entry sgpr1_init_regs[] = { 4480 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4481 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4482 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4483 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4484 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4485 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4486 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4487 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4488 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4489 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4490 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4491 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4492 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4493 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4494 }; 4495 4496 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4497 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4498 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4499 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4500 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4501 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4502 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4503 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4504 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4505 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4506 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4507 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4508 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4509 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4510 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4511 }; 4512 4513 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { 4514 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4515 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4516 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4517 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4518 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4519 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4520 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4521 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4522 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4523 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4524 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4525 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4526 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4527 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4528 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4529 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4530 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4531 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4532 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4533 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4534 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4535 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4536 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4537 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4538 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4539 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4540 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4541 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4542 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4543 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4544 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4545 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4546 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4547 }; 4548 4549 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4550 { 4551 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4552 int i, r; 4553 4554 /* only support when RAS is enabled */ 4555 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4556 return 0; 4557 4558 r = amdgpu_ring_alloc(ring, 7); 4559 if (r) { 4560 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4561 ring->name, r); 4562 return r; 4563 } 4564 4565 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4566 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4567 4568 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4569 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4570 PACKET3_DMA_DATA_DST_SEL(1) | 4571 PACKET3_DMA_DATA_SRC_SEL(2) | 4572 PACKET3_DMA_DATA_ENGINE(0))); 4573 amdgpu_ring_write(ring, 0); 4574 amdgpu_ring_write(ring, 0); 4575 amdgpu_ring_write(ring, 0); 4576 amdgpu_ring_write(ring, 0); 4577 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4578 adev->gds.gds_size); 4579 4580 amdgpu_ring_commit(ring); 4581 4582 for (i = 0; i < adev->usec_timeout; i++) { 4583 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4584 break; 4585 udelay(1); 4586 } 4587 4588 if (i >= adev->usec_timeout) 4589 r = -ETIMEDOUT; 4590 4591 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4592 4593 return r; 4594 } 4595 4596 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4597 { 4598 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4599 struct amdgpu_ib ib; 4600 struct dma_fence *f = NULL; 4601 int r, i; 4602 unsigned total_size, vgpr_offset, sgpr_offset; 4603 u64 gpu_addr; 4604 4605 int compute_dim_x = adev->gfx.config.max_shader_engines * 4606 adev->gfx.config.max_cu_per_sh * 4607 adev->gfx.config.max_sh_per_se; 4608 int sgpr_work_group_size = 5; 4609 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6; 4610 int vgpr_init_shader_size; 4611 const u32 *vgpr_init_shader_ptr; 4612 const struct soc15_reg_entry *vgpr_init_regs_ptr; 4613 4614 /* only support when RAS is enabled */ 4615 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4616 return 0; 4617 4618 /* bail if the compute ring is not ready */ 4619 if (!ring->sched.ready) 4620 return 0; 4621 4622 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) { 4623 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; 4624 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); 4625 vgpr_init_regs_ptr = vgpr_init_regs_arcturus; 4626 } else { 4627 vgpr_init_shader_ptr = vgpr_init_compute_shader; 4628 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader); 4629 vgpr_init_regs_ptr = vgpr_init_regs; 4630 } 4631 4632 total_size = 4633 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4634 total_size += 4635 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4636 total_size += 4637 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4638 total_size = ALIGN(total_size, 256); 4639 vgpr_offset = total_size; 4640 total_size += ALIGN(vgpr_init_shader_size, 256); 4641 sgpr_offset = total_size; 4642 total_size += sizeof(sgpr_init_compute_shader); 4643 4644 /* allocate an indirect buffer to put the commands in */ 4645 memset(&ib, 0, sizeof(ib)); 4646 r = amdgpu_ib_get(adev, NULL, total_size, 4647 AMDGPU_IB_POOL_DIRECT, &ib); 4648 if (r) { 4649 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4650 return r; 4651 } 4652 4653 /* load the compute shaders */ 4654 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++) 4655 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i]; 4656 4657 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4658 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4659 4660 /* init the ib length to 0 */ 4661 ib.length_dw = 0; 4662 4663 /* VGPR */ 4664 /* write the register state for the compute dispatch */ 4665 for (i = 0; i < gpr_reg_size; i++) { 4666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4667 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i]) 4668 - PACKET3_SET_SH_REG_START; 4669 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value; 4670 } 4671 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4672 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4673 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4674 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4675 - PACKET3_SET_SH_REG_START; 4676 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4677 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4678 4679 /* write dispatch packet */ 4680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4681 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */ 4682 ib.ptr[ib.length_dw++] = 1; /* y */ 4683 ib.ptr[ib.length_dw++] = 1; /* z */ 4684 ib.ptr[ib.length_dw++] = 4685 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4686 4687 /* write CS partial flush packet */ 4688 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4689 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4690 4691 /* SGPR1 */ 4692 /* write the register state for the compute dispatch */ 4693 for (i = 0; i < gpr_reg_size; i++) { 4694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4695 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4696 - PACKET3_SET_SH_REG_START; 4697 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4698 } 4699 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4700 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4701 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4702 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4703 - PACKET3_SET_SH_REG_START; 4704 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4705 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4706 4707 /* write dispatch packet */ 4708 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4709 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4710 ib.ptr[ib.length_dw++] = 1; /* y */ 4711 ib.ptr[ib.length_dw++] = 1; /* z */ 4712 ib.ptr[ib.length_dw++] = 4713 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4714 4715 /* write CS partial flush packet */ 4716 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4717 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4718 4719 /* SGPR2 */ 4720 /* write the register state for the compute dispatch */ 4721 for (i = 0; i < gpr_reg_size; i++) { 4722 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4723 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4724 - PACKET3_SET_SH_REG_START; 4725 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4726 } 4727 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4728 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4729 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4730 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4731 - PACKET3_SET_SH_REG_START; 4732 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4733 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4734 4735 /* write dispatch packet */ 4736 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4737 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4738 ib.ptr[ib.length_dw++] = 1; /* y */ 4739 ib.ptr[ib.length_dw++] = 1; /* z */ 4740 ib.ptr[ib.length_dw++] = 4741 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4742 4743 /* write CS partial flush packet */ 4744 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4745 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4746 4747 /* shedule the ib on the ring */ 4748 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4749 if (r) { 4750 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4751 goto fail; 4752 } 4753 4754 /* wait for the GPU to finish processing the IB */ 4755 r = dma_fence_wait(f, false); 4756 if (r) { 4757 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4758 goto fail; 4759 } 4760 4761 fail: 4762 amdgpu_ib_free(&ib, NULL); 4763 dma_fence_put(f); 4764 4765 return r; 4766 } 4767 4768 static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block) 4769 { 4770 struct amdgpu_device *adev = ip_block->adev; 4771 4772 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 4773 4774 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || 4775 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4776 adev->gfx.num_gfx_rings = 0; 4777 else 4778 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4779 adev->gfx.xcc_mask = 1; 4780 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4781 AMDGPU_MAX_COMPUTE_RINGS); 4782 gfx_v9_0_set_kiq_pm4_funcs(adev); 4783 gfx_v9_0_set_ring_funcs(adev); 4784 gfx_v9_0_set_irq_funcs(adev); 4785 gfx_v9_0_set_gds_init(adev); 4786 gfx_v9_0_set_rlc_funcs(adev); 4787 4788 /* init rlcg reg access ctrl */ 4789 gfx_v9_0_init_rlcg_reg_access_ctrl(adev); 4790 4791 return gfx_v9_0_init_microcode(adev); 4792 } 4793 4794 static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block) 4795 { 4796 struct amdgpu_device *adev = ip_block->adev; 4797 int r; 4798 4799 /* 4800 * Temp workaround to fix the issue that CP firmware fails to 4801 * update read pointer when CPDMA is writing clearing operation 4802 * to GDS in suspend/resume sequence on several cards. So just 4803 * limit this operation in cold boot sequence. 4804 */ 4805 if ((!adev->in_suspend) && 4806 (adev->gds.gds_size)) { 4807 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4808 if (r) 4809 return r; 4810 } 4811 4812 /* requires IBs so do in late init after IB pool is initialized */ 4813 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4814 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); 4815 else 4816 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4817 4818 if (r) 4819 return r; 4820 4821 if (adev->gfx.ras && 4822 adev->gfx.ras->enable_watchdog_timer) 4823 adev->gfx.ras->enable_watchdog_timer(adev); 4824 4825 return 0; 4826 } 4827 4828 static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block) 4829 { 4830 struct amdgpu_device *adev = ip_block->adev; 4831 int r; 4832 4833 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4834 if (r) 4835 return r; 4836 4837 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4838 if (r) 4839 return r; 4840 4841 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 4842 if (r) 4843 return r; 4844 4845 r = gfx_v9_0_ecc_late_init(ip_block); 4846 if (r) 4847 return r; 4848 4849 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4850 gfx_v9_4_2_debug_trap_config_init(adev, 4851 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); 4852 else 4853 gfx_v9_0_debug_trap_config_init(adev, 4854 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); 4855 4856 return 0; 4857 } 4858 4859 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4860 { 4861 uint32_t rlc_setting; 4862 4863 /* if RLC is not enabled, do nothing */ 4864 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4865 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4866 return false; 4867 4868 return true; 4869 } 4870 4871 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 4872 { 4873 uint32_t data; 4874 unsigned i; 4875 4876 data = RLC_SAFE_MODE__CMD_MASK; 4877 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4878 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4879 4880 /* wait for RLC_SAFE_MODE */ 4881 for (i = 0; i < adev->usec_timeout; i++) { 4882 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4883 break; 4884 udelay(1); 4885 } 4886 } 4887 4888 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 4889 { 4890 uint32_t data; 4891 4892 data = RLC_SAFE_MODE__CMD_MASK; 4893 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4894 } 4895 4896 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4897 bool enable) 4898 { 4899 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4900 4901 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4902 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4903 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4904 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4905 } else { 4906 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4907 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4908 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4909 } 4910 4911 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4912 } 4913 4914 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4915 bool enable) 4916 { 4917 /* TODO: double check if we need to perform under safe mode */ 4918 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4919 4920 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4921 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4922 else 4923 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4924 4925 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4926 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4927 else 4928 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4929 4930 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4931 } 4932 4933 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4934 bool enable) 4935 { 4936 uint32_t data, def; 4937 4938 /* It is disabled by HW by default */ 4939 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4940 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4941 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4942 4943 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) 4944 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4945 4946 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4947 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4948 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4949 4950 /* only for Vega10 & Raven1 */ 4951 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4952 4953 if (def != data) 4954 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4955 4956 /* MGLS is a global flag to control all MGLS in GFX */ 4957 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4958 /* 2 - RLC memory Light sleep */ 4959 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4960 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4961 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4962 if (def != data) 4963 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4964 } 4965 /* 3 - CP memory Light sleep */ 4966 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4967 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4968 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4969 if (def != data) 4970 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4971 } 4972 } 4973 } else { 4974 /* 1 - MGCG_OVERRIDE */ 4975 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4976 4977 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) 4978 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4979 4980 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4981 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4982 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4983 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4984 4985 if (def != data) 4986 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4987 4988 /* 2 - disable MGLS in RLC */ 4989 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4990 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4991 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4992 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4993 } 4994 4995 /* 3 - disable MGLS in CP */ 4996 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4997 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4998 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4999 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 5000 } 5001 } 5002 } 5003 5004 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 5005 bool enable) 5006 { 5007 uint32_t data, def; 5008 5009 if (!adev->gfx.num_gfx_rings) 5010 return; 5011 5012 /* Enable 3D CGCG/CGLS */ 5013 if (enable) { 5014 /* write cmd to clear cgcg/cgls ov */ 5015 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5016 /* unset CGCG override */ 5017 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5018 /* update CGCG and CGLS override bits */ 5019 if (def != data) 5020 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5021 5022 /* enable 3Dcgcg FSM(0x0000363f) */ 5023 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 5024 5025 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5026 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5027 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5028 else 5029 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT; 5030 5031 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5032 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5033 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5034 if (def != data) 5035 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5036 5037 /* set IDLE_POLL_COUNT(0x00900100) */ 5038 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5039 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5040 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5041 if (def != data) 5042 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5043 } else { 5044 /* Disable CGCG/CGLS */ 5045 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 5046 /* disable cgcg, cgls should be disabled */ 5047 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 5048 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 5049 /* disable cgcg and cgls in FSM */ 5050 if (def != data) 5051 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5052 } 5053 } 5054 5055 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5056 bool enable) 5057 { 5058 uint32_t def, data; 5059 5060 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5061 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5062 /* unset CGCG override */ 5063 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5064 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5065 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5066 else 5067 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5068 /* update CGCG and CGLS override bits */ 5069 if (def != data) 5070 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5071 5072 /* enable cgcg FSM(0x0000363F) */ 5073 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5074 5075 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) 5076 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5077 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5078 else 5079 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5080 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5081 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5082 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5083 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5084 if (def != data) 5085 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5086 5087 /* set IDLE_POLL_COUNT(0x00900100) */ 5088 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5089 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5090 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5091 if (def != data) 5092 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5093 } else { 5094 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5095 /* reset CGCG/CGLS bits */ 5096 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5097 /* disable cgcg and cgls in FSM */ 5098 if (def != data) 5099 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5100 } 5101 } 5102 5103 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5104 bool enable) 5105 { 5106 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5107 if (enable) { 5108 /* CGCG/CGLS should be enabled after MGCG/MGLS 5109 * === MGCG + MGLS === 5110 */ 5111 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5112 /* === CGCG /CGLS for GFX 3D Only === */ 5113 gfx_v9_0_update_3d_clock_gating(adev, enable); 5114 /* === CGCG + CGLS === */ 5115 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5116 } else { 5117 /* CGCG/CGLS should be disabled before MGCG/MGLS 5118 * === CGCG + CGLS === 5119 */ 5120 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5121 /* === CGCG /CGLS for GFX 3D Only === */ 5122 gfx_v9_0_update_3d_clock_gating(adev, enable); 5123 /* === MGCG + MGLS === */ 5124 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5125 } 5126 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5127 return 0; 5128 } 5129 5130 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, 5131 unsigned int vmid) 5132 { 5133 u32 reg, data; 5134 5135 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); 5136 if (amdgpu_sriov_is_pp_one_vf(adev)) 5137 data = RREG32_NO_KIQ(reg); 5138 else 5139 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); 5140 5141 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5142 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5143 5144 if (amdgpu_sriov_is_pp_one_vf(adev)) 5145 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); 5146 else 5147 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); 5148 } 5149 5150 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) 5151 { 5152 amdgpu_gfx_off_ctrl(adev, false); 5153 5154 gfx_v9_0_update_spm_vmid_internal(adev, vmid); 5155 5156 amdgpu_gfx_off_ctrl(adev, true); 5157 } 5158 5159 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev, 5160 uint32_t offset, 5161 struct soc15_reg_rlcg *entries, int arr_size) 5162 { 5163 int i; 5164 uint32_t reg; 5165 5166 if (!entries) 5167 return false; 5168 5169 for (i = 0; i < arr_size; i++) { 5170 const struct soc15_reg_rlcg *entry; 5171 5172 entry = &entries[i]; 5173 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 5174 if (offset == reg) 5175 return true; 5176 } 5177 5178 return false; 5179 } 5180 5181 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 5182 { 5183 return gfx_v9_0_check_rlcg_range(adev, offset, 5184 (void *)rlcg_access_gc_9_0, 5185 ARRAY_SIZE(rlcg_access_gc_9_0)); 5186 } 5187 5188 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 5189 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 5190 .set_safe_mode = gfx_v9_0_set_safe_mode, 5191 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 5192 .init = gfx_v9_0_rlc_init, 5193 .get_csb_size = gfx_v9_0_get_csb_size, 5194 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 5195 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 5196 .resume = gfx_v9_0_rlc_resume, 5197 .stop = gfx_v9_0_rlc_stop, 5198 .reset = gfx_v9_0_rlc_reset, 5199 .start = gfx_v9_0_rlc_start, 5200 .update_spm_vmid = gfx_v9_0_update_spm_vmid, 5201 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, 5202 }; 5203 5204 static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5205 enum amd_powergating_state state) 5206 { 5207 struct amdgpu_device *adev = ip_block->adev; 5208 bool enable = (state == AMD_PG_STATE_GATE); 5209 5210 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5211 case IP_VERSION(9, 2, 2): 5212 case IP_VERSION(9, 1, 0): 5213 case IP_VERSION(9, 3, 0): 5214 if (!enable) 5215 amdgpu_gfx_off_ctrl_immediate(adev, false); 5216 5217 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5218 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 5219 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 5220 } else { 5221 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 5222 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 5223 } 5224 5225 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5226 gfx_v9_0_enable_cp_power_gating(adev, true); 5227 else 5228 gfx_v9_0_enable_cp_power_gating(adev, false); 5229 5230 /* update gfx cgpg state */ 5231 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 5232 5233 /* update mgcg state */ 5234 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 5235 5236 if (enable) 5237 amdgpu_gfx_off_ctrl_immediate(adev, true); 5238 break; 5239 case IP_VERSION(9, 2, 1): 5240 amdgpu_gfx_off_ctrl_immediate(adev, enable); 5241 break; 5242 default: 5243 break; 5244 } 5245 5246 return 0; 5247 } 5248 5249 static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5250 enum amd_clockgating_state state) 5251 { 5252 struct amdgpu_device *adev = ip_block->adev; 5253 5254 if (amdgpu_sriov_vf(adev)) 5255 return 0; 5256 5257 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5258 case IP_VERSION(9, 0, 1): 5259 case IP_VERSION(9, 2, 1): 5260 case IP_VERSION(9, 4, 0): 5261 case IP_VERSION(9, 2, 2): 5262 case IP_VERSION(9, 1, 0): 5263 case IP_VERSION(9, 4, 1): 5264 case IP_VERSION(9, 3, 0): 5265 case IP_VERSION(9, 4, 2): 5266 gfx_v9_0_update_gfx_clock_gating(adev, 5267 state == AMD_CG_STATE_GATE); 5268 break; 5269 default: 5270 break; 5271 } 5272 return 0; 5273 } 5274 5275 static void gfx_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5276 { 5277 struct amdgpu_device *adev = ip_block->adev; 5278 int data; 5279 5280 if (amdgpu_sriov_vf(adev)) 5281 *flags = 0; 5282 5283 /* AMD_CG_SUPPORT_GFX_MGCG */ 5284 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 5285 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5286 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5287 5288 /* AMD_CG_SUPPORT_GFX_CGCG */ 5289 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 5290 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5291 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5292 5293 /* AMD_CG_SUPPORT_GFX_CGLS */ 5294 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5295 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5296 5297 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5298 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 5299 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5300 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5301 5302 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5303 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 5304 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5305 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5306 5307 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) { 5308 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5309 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 5310 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5311 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5312 5313 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5314 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5315 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5316 } 5317 } 5318 5319 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5320 { 5321 return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/ 5322 } 5323 5324 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5325 { 5326 struct amdgpu_device *adev = ring->adev; 5327 u64 wptr; 5328 5329 /* XXX check if swapping is necessary on BE */ 5330 if (ring->use_doorbell) { 5331 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5332 } else { 5333 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 5334 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 5335 } 5336 5337 return wptr; 5338 } 5339 5340 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5341 { 5342 struct amdgpu_device *adev = ring->adev; 5343 5344 if (ring->use_doorbell) { 5345 /* XXX check if swapping is necessary on BE */ 5346 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); 5347 WDOORBELL64(ring->doorbell_index, ring->wptr); 5348 } else { 5349 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 5350 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 5351 } 5352 } 5353 5354 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5355 { 5356 struct amdgpu_device *adev = ring->adev; 5357 u32 ref_and_mask, reg_mem_engine; 5358 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5359 5360 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5361 switch (ring->me) { 5362 case 1: 5363 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5364 break; 5365 case 2: 5366 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5367 break; 5368 default: 5369 return; 5370 } 5371 reg_mem_engine = 0; 5372 } else { 5373 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5374 reg_mem_engine = 1; /* pfp */ 5375 } 5376 5377 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5378 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5379 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5380 ref_and_mask, ref_and_mask, 0x20); 5381 } 5382 5383 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5384 struct amdgpu_job *job, 5385 struct amdgpu_ib *ib, 5386 uint32_t flags) 5387 { 5388 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5389 u32 header, control = 0; 5390 5391 if (ib->flags & AMDGPU_IB_FLAG_CE) 5392 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5393 else 5394 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5395 5396 control |= ib->length_dw | (vmid << 24); 5397 5398 if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) { 5399 control |= INDIRECT_BUFFER_PRE_ENB(1); 5400 5401 if (flags & AMDGPU_IB_PREEMPTED) 5402 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5403 5404 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 5405 gfx_v9_0_ring_emit_de_meta(ring, 5406 (!amdgpu_sriov_vf(ring->adev) && 5407 flags & AMDGPU_IB_PREEMPTED) ? 5408 true : false, 5409 job->gds_size > 0 && job->gds_base != 0); 5410 } 5411 5412 amdgpu_ring_write(ring, header); 5413 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5414 amdgpu_ring_write(ring, 5415 #ifdef __BIG_ENDIAN 5416 (2 << 0) | 5417 #endif 5418 lower_32_bits(ib->gpu_addr)); 5419 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5420 amdgpu_ring_ib_on_emit_cntl(ring); 5421 amdgpu_ring_write(ring, control); 5422 } 5423 5424 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring, 5425 unsigned offset) 5426 { 5427 u32 control = ring->ring[offset]; 5428 5429 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5430 ring->ring[offset] = control; 5431 } 5432 5433 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, 5434 unsigned offset) 5435 { 5436 struct amdgpu_device *adev = ring->adev; 5437 void *ce_payload_cpu_addr; 5438 uint64_t payload_offset, payload_size; 5439 5440 payload_size = sizeof(struct v9_ce_ib_state); 5441 5442 if (ring->is_mes_queue) { 5443 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5444 gfx[0].gfx_meta_data) + 5445 offsetof(struct v9_gfx_meta_data, ce_payload); 5446 ce_payload_cpu_addr = 5447 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); 5448 } else { 5449 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5450 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5451 } 5452 5453 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5454 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); 5455 } else { 5456 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, 5457 (ring->buf_mask + 1 - offset) << 2); 5458 payload_size -= (ring->buf_mask + 1 - offset) << 2; 5459 memcpy((void *)&ring->ring[0], 5460 ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), 5461 payload_size); 5462 } 5463 } 5464 5465 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, 5466 unsigned offset) 5467 { 5468 struct amdgpu_device *adev = ring->adev; 5469 void *de_payload_cpu_addr; 5470 uint64_t payload_offset, payload_size; 5471 5472 payload_size = sizeof(struct v9_de_ib_state); 5473 5474 if (ring->is_mes_queue) { 5475 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5476 gfx[0].gfx_meta_data) + 5477 offsetof(struct v9_gfx_meta_data, de_payload); 5478 de_payload_cpu_addr = 5479 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); 5480 } else { 5481 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); 5482 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5483 } 5484 5485 ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = 5486 IB_COMPLETION_STATUS_PREEMPTED; 5487 5488 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5489 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size); 5490 } else { 5491 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, 5492 (ring->buf_mask + 1 - offset) << 2); 5493 payload_size -= (ring->buf_mask + 1 - offset) << 2; 5494 memcpy((void *)&ring->ring[0], 5495 de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), 5496 payload_size); 5497 } 5498 } 5499 5500 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5501 struct amdgpu_job *job, 5502 struct amdgpu_ib *ib, 5503 uint32_t flags) 5504 { 5505 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5506 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5507 5508 /* Currently, there is a high possibility to get wave ID mismatch 5509 * between ME and GDS, leading to a hw deadlock, because ME generates 5510 * different wave IDs than the GDS expects. This situation happens 5511 * randomly when at least 5 compute pipes use GDS ordered append. 5512 * The wave IDs generated by ME are also wrong after suspend/resume. 5513 * Those are probably bugs somewhere else in the kernel driver. 5514 * 5515 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5516 * GDS to 0 for this ring (me/pipe). 5517 */ 5518 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5519 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5520 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5521 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5522 } 5523 5524 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5525 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5526 amdgpu_ring_write(ring, 5527 #ifdef __BIG_ENDIAN 5528 (2 << 0) | 5529 #endif 5530 lower_32_bits(ib->gpu_addr)); 5531 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5532 amdgpu_ring_write(ring, control); 5533 } 5534 5535 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5536 u64 seq, unsigned flags) 5537 { 5538 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5539 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5540 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5541 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; 5542 uint32_t dw2 = 0; 5543 5544 /* RELEASE_MEM - flush caches, send int */ 5545 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5546 5547 if (writeback) { 5548 dw2 = EOP_TC_NC_ACTION_EN; 5549 } else { 5550 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | 5551 EOP_TC_MD_ACTION_EN; 5552 } 5553 dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5554 EVENT_INDEX(5); 5555 if (exec) 5556 dw2 |= EOP_EXEC; 5557 5558 amdgpu_ring_write(ring, dw2); 5559 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5560 5561 /* 5562 * the address should be Qword aligned if 64bit write, Dword 5563 * aligned if only send 32bit data low (discard data high) 5564 */ 5565 if (write64bit) 5566 BUG_ON(addr & 0x7); 5567 else 5568 BUG_ON(addr & 0x3); 5569 amdgpu_ring_write(ring, lower_32_bits(addr)); 5570 amdgpu_ring_write(ring, upper_32_bits(addr)); 5571 amdgpu_ring_write(ring, lower_32_bits(seq)); 5572 amdgpu_ring_write(ring, upper_32_bits(seq)); 5573 amdgpu_ring_write(ring, 0); 5574 } 5575 5576 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5577 { 5578 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5579 uint32_t seq = ring->fence_drv.sync_seq; 5580 uint64_t addr = ring->fence_drv.gpu_addr; 5581 5582 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5583 lower_32_bits(addr), upper_32_bits(addr), 5584 seq, 0xffffffff, 4); 5585 } 5586 5587 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5588 unsigned vmid, uint64_t pd_addr) 5589 { 5590 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5591 5592 /* compute doesn't have PFP */ 5593 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5594 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5595 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5596 amdgpu_ring_write(ring, 0x0); 5597 } 5598 } 5599 5600 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5601 { 5602 return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */ 5603 } 5604 5605 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5606 { 5607 u64 wptr; 5608 5609 /* XXX check if swapping is necessary on BE */ 5610 if (ring->use_doorbell) 5611 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5612 else 5613 BUG(); 5614 return wptr; 5615 } 5616 5617 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5618 { 5619 struct amdgpu_device *adev = ring->adev; 5620 5621 /* XXX check if swapping is necessary on BE */ 5622 if (ring->use_doorbell) { 5623 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); 5624 WDOORBELL64(ring->doorbell_index, ring->wptr); 5625 } else{ 5626 BUG(); /* only DOORBELL method supported on gfx9 now */ 5627 } 5628 } 5629 5630 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5631 u64 seq, unsigned int flags) 5632 { 5633 struct amdgpu_device *adev = ring->adev; 5634 5635 /* we only allocate 32bit for each seq wb address */ 5636 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5637 5638 /* write fence seq to the "addr" */ 5639 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5640 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5641 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5642 amdgpu_ring_write(ring, lower_32_bits(addr)); 5643 amdgpu_ring_write(ring, upper_32_bits(addr)); 5644 amdgpu_ring_write(ring, lower_32_bits(seq)); 5645 5646 if (flags & AMDGPU_FENCE_FLAG_INT) { 5647 /* set register to trigger INT */ 5648 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5649 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5650 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5651 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5652 amdgpu_ring_write(ring, 0); 5653 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5654 } 5655 } 5656 5657 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5658 { 5659 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5660 amdgpu_ring_write(ring, 0); 5661 } 5662 5663 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) 5664 { 5665 struct amdgpu_device *adev = ring->adev; 5666 struct v9_ce_ib_state ce_payload = {0}; 5667 uint64_t offset, ce_payload_gpu_addr; 5668 void *ce_payload_cpu_addr; 5669 int cnt; 5670 5671 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5672 5673 if (ring->is_mes_queue) { 5674 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5675 gfx[0].gfx_meta_data) + 5676 offsetof(struct v9_gfx_meta_data, ce_payload); 5677 ce_payload_gpu_addr = 5678 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5679 ce_payload_cpu_addr = 5680 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5681 } else { 5682 offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5683 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5684 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5685 } 5686 5687 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5688 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5689 WRITE_DATA_DST_SEL(8) | 5690 WR_CONFIRM) | 5691 WRITE_DATA_CACHE_POLICY(0)); 5692 amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); 5693 amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); 5694 5695 amdgpu_ring_ib_on_emit_ce(ring); 5696 5697 if (resume) 5698 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, 5699 sizeof(ce_payload) >> 2); 5700 else 5701 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, 5702 sizeof(ce_payload) >> 2); 5703 } 5704 5705 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) 5706 { 5707 int i, r = 0; 5708 struct amdgpu_device *adev = ring->adev; 5709 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 5710 struct amdgpu_ring *kiq_ring = &kiq->ring; 5711 unsigned long flags; 5712 5713 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 5714 return -EINVAL; 5715 5716 spin_lock_irqsave(&kiq->ring_lock, flags); 5717 5718 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 5719 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5720 return -ENOMEM; 5721 } 5722 5723 /* assert preemption condition */ 5724 amdgpu_ring_set_preempt_cond_exec(ring, false); 5725 5726 ring->trail_seq += 1; 5727 amdgpu_ring_alloc(ring, 13); 5728 gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, 5729 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); 5730 5731 /* assert IB preemption, emit the trailing fence */ 5732 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 5733 ring->trail_fence_gpu_addr, 5734 ring->trail_seq); 5735 5736 amdgpu_ring_commit(kiq_ring); 5737 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5738 5739 /* poll the trailing fence */ 5740 for (i = 0; i < adev->usec_timeout; i++) { 5741 if (ring->trail_seq == 5742 le32_to_cpu(*ring->trail_fence_cpu_addr)) 5743 break; 5744 udelay(1); 5745 } 5746 5747 if (i >= adev->usec_timeout) { 5748 r = -EINVAL; 5749 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); 5750 } 5751 5752 /*reset the CP_VMID_PREEMPT after trailing fence*/ 5753 amdgpu_ring_emit_wreg(ring, 5754 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), 5755 0x0); 5756 amdgpu_ring_commit(ring); 5757 5758 /* deassert preemption condition */ 5759 amdgpu_ring_set_preempt_cond_exec(ring, true); 5760 return r; 5761 } 5762 5763 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds) 5764 { 5765 struct amdgpu_device *adev = ring->adev; 5766 struct v9_de_ib_state de_payload = {0}; 5767 uint64_t offset, gds_addr, de_payload_gpu_addr; 5768 void *de_payload_cpu_addr; 5769 int cnt; 5770 5771 if (ring->is_mes_queue) { 5772 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5773 gfx[0].gfx_meta_data) + 5774 offsetof(struct v9_gfx_meta_data, de_payload); 5775 de_payload_gpu_addr = 5776 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5777 de_payload_cpu_addr = 5778 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5779 5780 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5781 gfx[0].gds_backup) + 5782 offsetof(struct v9_gfx_meta_data, de_payload); 5783 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5784 } else { 5785 offset = offsetof(struct v9_gfx_meta_data, de_payload); 5786 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5787 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5788 5789 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 5790 AMDGPU_CSA_SIZE - adev->gds.gds_size, 5791 PAGE_SIZE); 5792 } 5793 5794 if (usegds) { 5795 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5796 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5797 } 5798 5799 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5800 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5801 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5802 WRITE_DATA_DST_SEL(8) | 5803 WR_CONFIRM) | 5804 WRITE_DATA_CACHE_POLICY(0)); 5805 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 5806 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 5807 5808 amdgpu_ring_ib_on_emit_de(ring); 5809 if (resume) 5810 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 5811 sizeof(de_payload) >> 2); 5812 else 5813 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 5814 sizeof(de_payload) >> 2); 5815 } 5816 5817 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5818 bool secure) 5819 { 5820 uint32_t v = secure ? FRAME_TMZ : 0; 5821 5822 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5823 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5824 } 5825 5826 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5827 { 5828 uint32_t dw2 = 0; 5829 5830 gfx_v9_0_ring_emit_ce_meta(ring, 5831 (!amdgpu_sriov_vf(ring->adev) && 5832 flags & AMDGPU_IB_PREEMPTED) ? true : false); 5833 5834 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5835 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5836 /* set load_global_config & load_global_uconfig */ 5837 dw2 |= 0x8001; 5838 /* set load_cs_sh_regs */ 5839 dw2 |= 0x01000000; 5840 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5841 dw2 |= 0x10002; 5842 5843 /* set load_ce_ram if preamble presented */ 5844 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5845 dw2 |= 0x10000000; 5846 } else { 5847 /* still load_ce_ram if this is the first time preamble presented 5848 * although there is no context switch happens. 5849 */ 5850 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5851 dw2 |= 0x10000000; 5852 } 5853 5854 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5855 amdgpu_ring_write(ring, dw2); 5856 amdgpu_ring_write(ring, 0); 5857 } 5858 5859 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 5860 uint64_t addr) 5861 { 5862 unsigned ret; 5863 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5864 amdgpu_ring_write(ring, lower_32_bits(addr)); 5865 amdgpu_ring_write(ring, upper_32_bits(addr)); 5866 /* discard following DWs if *cond_exec_gpu_addr==0 */ 5867 amdgpu_ring_write(ring, 0); 5868 ret = ring->wptr & ring->buf_mask; 5869 /* patch dummy value later */ 5870 amdgpu_ring_write(ring, 0); 5871 return ret; 5872 } 5873 5874 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5875 uint32_t reg_val_offs) 5876 { 5877 struct amdgpu_device *adev = ring->adev; 5878 5879 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5880 amdgpu_ring_write(ring, 0 | /* src: register*/ 5881 (5 << 8) | /* dst: memory */ 5882 (1 << 20)); /* write confirm */ 5883 amdgpu_ring_write(ring, reg); 5884 amdgpu_ring_write(ring, 0); 5885 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5886 reg_val_offs * 4)); 5887 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5888 reg_val_offs * 4)); 5889 } 5890 5891 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5892 uint32_t val) 5893 { 5894 uint32_t cmd = 0; 5895 5896 switch (ring->funcs->type) { 5897 case AMDGPU_RING_TYPE_GFX: 5898 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5899 break; 5900 case AMDGPU_RING_TYPE_KIQ: 5901 cmd = (1 << 16); /* no inc addr */ 5902 break; 5903 default: 5904 cmd = WR_CONFIRM; 5905 break; 5906 } 5907 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5908 amdgpu_ring_write(ring, cmd); 5909 amdgpu_ring_write(ring, reg); 5910 amdgpu_ring_write(ring, 0); 5911 amdgpu_ring_write(ring, val); 5912 } 5913 5914 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5915 uint32_t val, uint32_t mask) 5916 { 5917 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5918 } 5919 5920 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5921 uint32_t reg0, uint32_t reg1, 5922 uint32_t ref, uint32_t mask) 5923 { 5924 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5925 struct amdgpu_device *adev = ring->adev; 5926 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5927 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5928 5929 if (fw_version_ok) 5930 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5931 ref, mask, 0x20); 5932 else 5933 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5934 ref, mask); 5935 } 5936 5937 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5938 { 5939 struct amdgpu_device *adev = ring->adev; 5940 uint32_t value = 0; 5941 5942 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5943 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5944 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5945 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5946 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5947 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5948 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5949 } 5950 5951 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5952 enum amdgpu_interrupt_state state) 5953 { 5954 switch (state) { 5955 case AMDGPU_IRQ_STATE_DISABLE: 5956 case AMDGPU_IRQ_STATE_ENABLE: 5957 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5958 TIME_STAMP_INT_ENABLE, 5959 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5960 break; 5961 default: 5962 break; 5963 } 5964 } 5965 5966 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5967 int me, int pipe, 5968 enum amdgpu_interrupt_state state) 5969 { 5970 u32 mec_int_cntl, mec_int_cntl_reg; 5971 5972 /* 5973 * amdgpu controls only the first MEC. That's why this function only 5974 * handles the setting of interrupts for this specific MEC. All other 5975 * pipes' interrupts are set by amdkfd. 5976 */ 5977 5978 if (me == 1) { 5979 switch (pipe) { 5980 case 0: 5981 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5982 break; 5983 case 1: 5984 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5985 break; 5986 case 2: 5987 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5988 break; 5989 case 3: 5990 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5991 break; 5992 default: 5993 DRM_DEBUG("invalid pipe %d\n", pipe); 5994 return; 5995 } 5996 } else { 5997 DRM_DEBUG("invalid me %d\n", me); 5998 return; 5999 } 6000 6001 switch (state) { 6002 case AMDGPU_IRQ_STATE_DISABLE: 6003 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg); 6004 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6005 TIME_STAMP_INT_ENABLE, 0); 6006 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6007 break; 6008 case AMDGPU_IRQ_STATE_ENABLE: 6009 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6010 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6011 TIME_STAMP_INT_ENABLE, 1); 6012 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6013 break; 6014 default: 6015 break; 6016 } 6017 } 6018 6019 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev, 6020 int me, int pipe) 6021 { 6022 /* 6023 * amdgpu controls only the first MEC. That's why this function only 6024 * handles the setting of interrupts for this specific MEC. All other 6025 * pipes' interrupts are set by amdkfd. 6026 */ 6027 if (me != 1) 6028 return 0; 6029 6030 switch (pipe) { 6031 case 0: 6032 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 6033 case 1: 6034 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 6035 case 2: 6036 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 6037 case 3: 6038 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 6039 default: 6040 return 0; 6041 } 6042 } 6043 6044 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6045 struct amdgpu_irq_src *source, 6046 unsigned type, 6047 enum amdgpu_interrupt_state state) 6048 { 6049 u32 cp_int_cntl_reg, cp_int_cntl; 6050 int i, j; 6051 6052 switch (state) { 6053 case AMDGPU_IRQ_STATE_DISABLE: 6054 case AMDGPU_IRQ_STATE_ENABLE: 6055 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6056 PRIV_REG_INT_ENABLE, 6057 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6058 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6059 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6060 /* MECs start at 1 */ 6061 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); 6062 6063 if (cp_int_cntl_reg) { 6064 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6065 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6066 PRIV_REG_INT_ENABLE, 6067 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6068 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6069 } 6070 } 6071 } 6072 break; 6073 default: 6074 break; 6075 } 6076 6077 return 0; 6078 } 6079 6080 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6081 struct amdgpu_irq_src *source, 6082 unsigned type, 6083 enum amdgpu_interrupt_state state) 6084 { 6085 u32 cp_int_cntl_reg, cp_int_cntl; 6086 int i, j; 6087 6088 switch (state) { 6089 case AMDGPU_IRQ_STATE_DISABLE: 6090 case AMDGPU_IRQ_STATE_ENABLE: 6091 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6092 OPCODE_ERROR_INT_ENABLE, 6093 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6094 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6095 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6096 /* MECs start at 1 */ 6097 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); 6098 6099 if (cp_int_cntl_reg) { 6100 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6101 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6102 OPCODE_ERROR_INT_ENABLE, 6103 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6104 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6105 } 6106 } 6107 } 6108 break; 6109 default: 6110 break; 6111 } 6112 6113 return 0; 6114 } 6115 6116 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6117 struct amdgpu_irq_src *source, 6118 unsigned type, 6119 enum amdgpu_interrupt_state state) 6120 { 6121 switch (state) { 6122 case AMDGPU_IRQ_STATE_DISABLE: 6123 case AMDGPU_IRQ_STATE_ENABLE: 6124 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6125 PRIV_INSTR_INT_ENABLE, 6126 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6127 break; 6128 default: 6129 break; 6130 } 6131 6132 return 0; 6133 } 6134 6135 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 6136 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 6137 CP_ECC_ERROR_INT_ENABLE, 1) 6138 6139 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 6140 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 6141 CP_ECC_ERROR_INT_ENABLE, 0) 6142 6143 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 6144 struct amdgpu_irq_src *source, 6145 unsigned type, 6146 enum amdgpu_interrupt_state state) 6147 { 6148 switch (state) { 6149 case AMDGPU_IRQ_STATE_DISABLE: 6150 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6151 CP_ECC_ERROR_INT_ENABLE, 0); 6152 DISABLE_ECC_ON_ME_PIPE(1, 0); 6153 DISABLE_ECC_ON_ME_PIPE(1, 1); 6154 DISABLE_ECC_ON_ME_PIPE(1, 2); 6155 DISABLE_ECC_ON_ME_PIPE(1, 3); 6156 break; 6157 6158 case AMDGPU_IRQ_STATE_ENABLE: 6159 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6160 CP_ECC_ERROR_INT_ENABLE, 1); 6161 ENABLE_ECC_ON_ME_PIPE(1, 0); 6162 ENABLE_ECC_ON_ME_PIPE(1, 1); 6163 ENABLE_ECC_ON_ME_PIPE(1, 2); 6164 ENABLE_ECC_ON_ME_PIPE(1, 3); 6165 break; 6166 default: 6167 break; 6168 } 6169 6170 return 0; 6171 } 6172 6173 6174 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6175 struct amdgpu_irq_src *src, 6176 unsigned type, 6177 enum amdgpu_interrupt_state state) 6178 { 6179 switch (type) { 6180 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6181 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 6182 break; 6183 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6184 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6185 break; 6186 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6187 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6188 break; 6189 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6190 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6191 break; 6192 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6193 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6194 break; 6195 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6196 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6197 break; 6198 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6199 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6200 break; 6201 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6202 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6203 break; 6204 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6205 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6206 break; 6207 default: 6208 break; 6209 } 6210 return 0; 6211 } 6212 6213 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 6214 struct amdgpu_irq_src *source, 6215 struct amdgpu_iv_entry *entry) 6216 { 6217 int i; 6218 u8 me_id, pipe_id, queue_id; 6219 struct amdgpu_ring *ring; 6220 6221 DRM_DEBUG("IH: CP EOP\n"); 6222 me_id = (entry->ring_id & 0x0c) >> 2; 6223 pipe_id = (entry->ring_id & 0x03) >> 0; 6224 queue_id = (entry->ring_id & 0x70) >> 4; 6225 6226 switch (me_id) { 6227 case 0: 6228 if (adev->gfx.num_gfx_rings) { 6229 if (!adev->gfx.mcbp) { 6230 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6231 } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { 6232 /* Fence signals are handled on the software rings*/ 6233 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 6234 amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); 6235 } 6236 } 6237 break; 6238 case 1: 6239 case 2: 6240 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6241 ring = &adev->gfx.compute_ring[i]; 6242 /* Per-queue interrupt is supported for MEC starting from VI. 6243 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6244 */ 6245 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6246 amdgpu_fence_process(ring); 6247 } 6248 break; 6249 } 6250 return 0; 6251 } 6252 6253 static void gfx_v9_0_fault(struct amdgpu_device *adev, 6254 struct amdgpu_iv_entry *entry) 6255 { 6256 u8 me_id, pipe_id, queue_id; 6257 struct amdgpu_ring *ring; 6258 int i; 6259 6260 me_id = (entry->ring_id & 0x0c) >> 2; 6261 pipe_id = (entry->ring_id & 0x03) >> 0; 6262 queue_id = (entry->ring_id & 0x70) >> 4; 6263 6264 switch (me_id) { 6265 case 0: 6266 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 6267 break; 6268 case 1: 6269 case 2: 6270 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6271 ring = &adev->gfx.compute_ring[i]; 6272 if (ring->me == me_id && ring->pipe == pipe_id && 6273 ring->queue == queue_id) 6274 drm_sched_fault(&ring->sched); 6275 } 6276 break; 6277 } 6278 } 6279 6280 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 6281 struct amdgpu_irq_src *source, 6282 struct amdgpu_iv_entry *entry) 6283 { 6284 DRM_ERROR("Illegal register access in command stream\n"); 6285 gfx_v9_0_fault(adev, entry); 6286 return 0; 6287 } 6288 6289 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev, 6290 struct amdgpu_irq_src *source, 6291 struct amdgpu_iv_entry *entry) 6292 { 6293 DRM_ERROR("Illegal opcode in command stream\n"); 6294 gfx_v9_0_fault(adev, entry); 6295 return 0; 6296 } 6297 6298 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 6299 struct amdgpu_irq_src *source, 6300 struct amdgpu_iv_entry *entry) 6301 { 6302 DRM_ERROR("Illegal instruction in command stream\n"); 6303 gfx_v9_0_fault(adev, entry); 6304 return 0; 6305 } 6306 6307 6308 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { 6309 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 6310 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 6311 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 6312 }, 6313 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 6314 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 6315 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 6316 }, 6317 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6318 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 6319 0, 0 6320 }, 6321 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6322 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 6323 0, 0 6324 }, 6325 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 6326 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 6327 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 6328 }, 6329 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6330 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 6331 0, 0 6332 }, 6333 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6334 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 6335 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 6336 }, 6337 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 6338 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 6339 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 6340 }, 6341 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 6342 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 6343 0, 0 6344 }, 6345 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 6346 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 6347 0, 0 6348 }, 6349 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 6350 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 6351 0, 0 6352 }, 6353 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6354 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 6355 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 6356 }, 6357 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6358 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 6359 0, 0 6360 }, 6361 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6362 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 6363 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 6364 }, 6365 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 6366 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6367 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 6368 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 6369 }, 6370 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 6371 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6372 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 6373 0, 0 6374 }, 6375 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 6376 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6377 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 6378 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 6379 }, 6380 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 6381 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6382 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 6383 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 6384 }, 6385 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 6386 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6387 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 6388 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 6389 }, 6390 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 6391 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6392 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 6393 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 6394 }, 6395 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 6396 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 6397 0, 0 6398 }, 6399 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6400 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 6401 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 6402 }, 6403 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6404 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 6405 0, 0 6406 }, 6407 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6408 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 6409 0, 0 6410 }, 6411 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6412 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 6413 0, 0 6414 }, 6415 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6416 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 6417 0, 0 6418 }, 6419 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6420 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 6421 0, 0 6422 }, 6423 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6424 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 6425 0, 0 6426 }, 6427 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6428 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 6429 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 6430 }, 6431 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6432 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 6433 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 6434 }, 6435 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6436 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 6437 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 6438 }, 6439 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6440 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 6441 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 6442 }, 6443 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6444 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 6445 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 6446 }, 6447 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6448 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 6449 0, 0 6450 }, 6451 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6452 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 6453 0, 0 6454 }, 6455 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6456 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 6457 0, 0 6458 }, 6459 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6460 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 6461 0, 0 6462 }, 6463 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6464 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 6465 0, 0 6466 }, 6467 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6468 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 6469 0, 0 6470 }, 6471 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6472 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 6473 0, 0 6474 }, 6475 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6476 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 6477 0, 0 6478 }, 6479 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6480 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 6481 0, 0 6482 }, 6483 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6484 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 6485 0, 0 6486 }, 6487 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6488 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 6489 0, 0 6490 }, 6491 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6492 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 6493 0, 0 6494 }, 6495 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6496 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 6497 0, 0 6498 }, 6499 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 6500 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 6501 0, 0 6502 }, 6503 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6504 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 6505 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 6506 }, 6507 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6508 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 6509 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 6510 }, 6511 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6512 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 6513 0, 0 6514 }, 6515 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6516 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 6517 0, 0 6518 }, 6519 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6520 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 6521 0, 0 6522 }, 6523 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6524 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 6525 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 6526 }, 6527 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6528 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 6529 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 6530 }, 6531 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6532 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 6533 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 6534 }, 6535 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6536 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 6537 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 6538 }, 6539 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6540 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 6541 0, 0 6542 }, 6543 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6544 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 6545 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 6546 }, 6547 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6548 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 6549 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 6550 }, 6551 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6552 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 6553 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 6554 }, 6555 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6556 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 6557 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 6558 }, 6559 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6560 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 6561 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 6562 }, 6563 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6564 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 6565 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 6566 }, 6567 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6568 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 6569 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 6570 }, 6571 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6572 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 6573 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 6574 }, 6575 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6576 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 6577 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 6578 }, 6579 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6580 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 6581 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 6582 }, 6583 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6584 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 6585 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 6586 }, 6587 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6588 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 6589 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 6590 }, 6591 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6592 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 6593 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 6594 }, 6595 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6596 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 6597 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 6598 }, 6599 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6600 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 6601 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 6602 }, 6603 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6604 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 6605 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 6606 }, 6607 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6608 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 6609 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 6610 }, 6611 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6612 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 6613 0, 0 6614 }, 6615 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6616 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 6617 0, 0 6618 }, 6619 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6620 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 6621 0, 0 6622 }, 6623 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6624 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 6625 0, 0 6626 }, 6627 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6628 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 6629 0, 0 6630 }, 6631 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6632 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 6633 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 6634 }, 6635 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6636 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 6637 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 6638 }, 6639 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6640 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 6641 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 6642 }, 6643 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6644 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 6645 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 6646 }, 6647 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6648 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 6649 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 6650 }, 6651 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6652 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 6653 0, 0 6654 }, 6655 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6656 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 6657 0, 0 6658 }, 6659 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6660 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 6661 0, 0 6662 }, 6663 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6664 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 6665 0, 0 6666 }, 6667 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6668 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 6669 0, 0 6670 }, 6671 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6672 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6673 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 6674 }, 6675 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6676 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6677 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 6678 }, 6679 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6680 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6681 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 6682 }, 6683 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6684 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6685 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 6686 }, 6687 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6688 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6689 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 6690 }, 6691 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6692 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 6693 0, 0 6694 }, 6695 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6696 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 6697 0, 0 6698 }, 6699 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6700 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 6701 0, 0 6702 }, 6703 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6704 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 6705 0, 0 6706 }, 6707 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6708 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 6709 0, 0 6710 }, 6711 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6712 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6713 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 6714 }, 6715 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6716 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6717 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 6718 }, 6719 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6720 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6721 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 6722 }, 6723 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6724 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 6725 0, 0 6726 }, 6727 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6728 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 6729 0, 0 6730 }, 6731 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6732 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 6733 0, 0 6734 }, 6735 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6736 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 6737 0, 0 6738 }, 6739 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6740 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 6741 0, 0 6742 }, 6743 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6744 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 6745 0, 0 6746 } 6747 }; 6748 6749 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6750 void *inject_if, uint32_t instance_mask) 6751 { 6752 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6753 int ret; 6754 struct ta_ras_trigger_error_input block_info = { 0 }; 6755 6756 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6757 return -EINVAL; 6758 6759 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6760 return -EINVAL; 6761 6762 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6763 return -EPERM; 6764 6765 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6766 info->head.type)) { 6767 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6768 ras_gfx_subblocks[info->head.sub_block_index].name, 6769 info->head.type); 6770 return -EPERM; 6771 } 6772 6773 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6774 info->head.type)) { 6775 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6776 ras_gfx_subblocks[info->head.sub_block_index].name, 6777 info->head.type); 6778 return -EPERM; 6779 } 6780 6781 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6782 block_info.sub_block_index = 6783 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6784 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6785 block_info.address = info->address; 6786 block_info.value = info->value; 6787 6788 mutex_lock(&adev->grbm_idx_mutex); 6789 ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask); 6790 mutex_unlock(&adev->grbm_idx_mutex); 6791 6792 return ret; 6793 } 6794 6795 static const char * const vml2_mems[] = { 6796 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6797 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6798 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6799 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6800 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6801 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6802 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6803 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6804 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6805 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6806 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6807 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6808 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6809 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6810 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6811 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6812 }; 6813 6814 static const char * const vml2_walker_mems[] = { 6815 "UTC_VML2_CACHE_PDE0_MEM0", 6816 "UTC_VML2_CACHE_PDE0_MEM1", 6817 "UTC_VML2_CACHE_PDE1_MEM0", 6818 "UTC_VML2_CACHE_PDE1_MEM1", 6819 "UTC_VML2_CACHE_PDE2_MEM0", 6820 "UTC_VML2_CACHE_PDE2_MEM1", 6821 "UTC_VML2_RDIF_LOG_FIFO", 6822 }; 6823 6824 static const char * const atc_l2_cache_2m_mems[] = { 6825 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6826 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6827 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6828 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6829 }; 6830 6831 static const char *atc_l2_cache_4k_mems[] = { 6832 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6833 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6834 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6835 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6836 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6837 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6838 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6839 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6840 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6841 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6842 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6843 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6844 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6845 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6846 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6847 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6848 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6849 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6850 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6851 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6852 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6853 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6854 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6855 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6856 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6857 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6858 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6859 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6860 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6861 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6862 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6863 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6864 }; 6865 6866 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6867 struct ras_err_data *err_data) 6868 { 6869 uint32_t i, data; 6870 uint32_t sec_count, ded_count; 6871 6872 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6873 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6874 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6875 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6876 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6877 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6878 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6879 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6880 6881 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6882 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6883 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6884 6885 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6886 if (sec_count) { 6887 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6888 "SEC %d\n", i, vml2_mems[i], sec_count); 6889 err_data->ce_count += sec_count; 6890 } 6891 6892 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6893 if (ded_count) { 6894 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6895 "DED %d\n", i, vml2_mems[i], ded_count); 6896 err_data->ue_count += ded_count; 6897 } 6898 } 6899 6900 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6901 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6902 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6903 6904 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6905 SEC_COUNT); 6906 if (sec_count) { 6907 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6908 "SEC %d\n", i, vml2_walker_mems[i], sec_count); 6909 err_data->ce_count += sec_count; 6910 } 6911 6912 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6913 DED_COUNT); 6914 if (ded_count) { 6915 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6916 "DED %d\n", i, vml2_walker_mems[i], ded_count); 6917 err_data->ue_count += ded_count; 6918 } 6919 } 6920 6921 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6922 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6923 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6924 6925 sec_count = (data & 0x00006000L) >> 0xd; 6926 if (sec_count) { 6927 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6928 "SEC %d\n", i, atc_l2_cache_2m_mems[i], 6929 sec_count); 6930 err_data->ce_count += sec_count; 6931 } 6932 } 6933 6934 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6935 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6936 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6937 6938 sec_count = (data & 0x00006000L) >> 0xd; 6939 if (sec_count) { 6940 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6941 "SEC %d\n", i, atc_l2_cache_4k_mems[i], 6942 sec_count); 6943 err_data->ce_count += sec_count; 6944 } 6945 6946 ded_count = (data & 0x00018000L) >> 0xf; 6947 if (ded_count) { 6948 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6949 "DED %d\n", i, atc_l2_cache_4k_mems[i], 6950 ded_count); 6951 err_data->ue_count += ded_count; 6952 } 6953 } 6954 6955 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6956 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6957 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6958 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6959 6960 return 0; 6961 } 6962 6963 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev, 6964 const struct soc15_reg_entry *reg, 6965 uint32_t se_id, uint32_t inst_id, uint32_t value, 6966 uint32_t *sec_count, uint32_t *ded_count) 6967 { 6968 uint32_t i; 6969 uint32_t sec_cnt, ded_cnt; 6970 6971 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { 6972 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || 6973 gfx_v9_0_ras_fields[i].seg != reg->seg || 6974 gfx_v9_0_ras_fields[i].inst != reg->inst) 6975 continue; 6976 6977 sec_cnt = (value & 6978 gfx_v9_0_ras_fields[i].sec_count_mask) >> 6979 gfx_v9_0_ras_fields[i].sec_count_shift; 6980 if (sec_cnt) { 6981 dev_info(adev->dev, "GFX SubBlock %s, " 6982 "Instance[%d][%d], SEC %d\n", 6983 gfx_v9_0_ras_fields[i].name, 6984 se_id, inst_id, 6985 sec_cnt); 6986 *sec_count += sec_cnt; 6987 } 6988 6989 ded_cnt = (value & 6990 gfx_v9_0_ras_fields[i].ded_count_mask) >> 6991 gfx_v9_0_ras_fields[i].ded_count_shift; 6992 if (ded_cnt) { 6993 dev_info(adev->dev, "GFX SubBlock %s, " 6994 "Instance[%d][%d], DED %d\n", 6995 gfx_v9_0_ras_fields[i].name, 6996 se_id, inst_id, 6997 ded_cnt); 6998 *ded_count += ded_cnt; 6999 } 7000 } 7001 7002 return 0; 7003 } 7004 7005 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) 7006 { 7007 int i, j, k; 7008 7009 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 7010 return; 7011 7012 /* read back registers to clear the counters */ 7013 mutex_lock(&adev->grbm_idx_mutex); 7014 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 7015 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 7016 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 7017 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0); 7018 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 7019 } 7020 } 7021 } 7022 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 7023 mutex_unlock(&adev->grbm_idx_mutex); 7024 7025 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 7026 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 7027 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 7028 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 7029 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 7030 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 7031 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 7032 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 7033 7034 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 7035 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 7036 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 7037 } 7038 7039 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 7040 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 7041 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 7042 } 7043 7044 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 7045 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 7046 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 7047 } 7048 7049 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 7050 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 7051 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 7052 } 7053 7054 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 7055 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 7056 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 7057 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 7058 } 7059 7060 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 7061 void *ras_error_status) 7062 { 7063 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 7064 uint32_t sec_count = 0, ded_count = 0; 7065 uint32_t i, j, k; 7066 uint32_t reg_value; 7067 7068 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 7069 return; 7070 7071 err_data->ue_count = 0; 7072 err_data->ce_count = 0; 7073 7074 mutex_lock(&adev->grbm_idx_mutex); 7075 7076 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 7077 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 7078 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 7079 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0); 7080 reg_value = 7081 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 7082 if (reg_value) 7083 gfx_v9_0_ras_error_count(adev, 7084 &gfx_v9_0_edc_counter_regs[i], 7085 j, k, reg_value, 7086 &sec_count, &ded_count); 7087 } 7088 } 7089 } 7090 7091 err_data->ce_count += sec_count; 7092 err_data->ue_count += ded_count; 7093 7094 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7095 mutex_unlock(&adev->grbm_idx_mutex); 7096 7097 gfx_v9_0_query_utc_edc_status(adev, err_data); 7098 } 7099 7100 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) 7101 { 7102 const unsigned int cp_coher_cntl = 7103 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | 7104 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | 7105 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | 7106 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | 7107 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); 7108 7109 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ 7110 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 7111 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ 7112 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 7113 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 7114 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 7115 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 7116 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 7117 } 7118 7119 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, 7120 uint32_t pipe, bool enable) 7121 { 7122 struct amdgpu_device *adev = ring->adev; 7123 uint32_t val; 7124 uint32_t wcl_cs_reg; 7125 7126 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 7127 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT; 7128 7129 switch (pipe) { 7130 case 0: 7131 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0); 7132 break; 7133 case 1: 7134 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1); 7135 break; 7136 case 2: 7137 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2); 7138 break; 7139 case 3: 7140 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3); 7141 break; 7142 default: 7143 DRM_DEBUG("invalid pipe %d\n", pipe); 7144 return; 7145 } 7146 7147 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 7148 7149 } 7150 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 7151 { 7152 struct amdgpu_device *adev = ring->adev; 7153 uint32_t val; 7154 int i; 7155 7156 7157 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 7158 * number of gfx waves. Setting 5 bit will make sure gfx only gets 7159 * around 25% of gpu resources. 7160 */ 7161 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; 7162 amdgpu_ring_emit_wreg(ring, 7163 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX), 7164 val); 7165 7166 /* Restrict waves for normal/low priority compute queues as well 7167 * to get best QoS for high priority compute jobs. 7168 * 7169 * amdgpu controls only 1st ME(0-3 CS pipes). 7170 */ 7171 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 7172 if (i != ring->pipe) 7173 gfx_v9_0_emit_wave_limit_cs(ring, i, enable); 7174 7175 } 7176 } 7177 7178 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 7179 { 7180 /* Header itself is a NOP packet */ 7181 if (num_nop == 1) { 7182 amdgpu_ring_write(ring, ring->funcs->nop); 7183 return; 7184 } 7185 7186 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 7187 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 7188 7189 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 7190 amdgpu_ring_insert_nop(ring, num_nop - 1); 7191 } 7192 7193 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) 7194 { 7195 struct amdgpu_device *adev = ring->adev; 7196 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 7197 struct amdgpu_ring *kiq_ring = &kiq->ring; 7198 unsigned long flags; 7199 u32 tmp; 7200 int r; 7201 7202 if (amdgpu_sriov_vf(adev)) 7203 return -EINVAL; 7204 7205 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 7206 return -EINVAL; 7207 7208 spin_lock_irqsave(&kiq->ring_lock, flags); 7209 7210 if (amdgpu_ring_alloc(kiq_ring, 5)) { 7211 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7212 return -ENOMEM; 7213 } 7214 7215 tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); 7216 gfx_v9_0_ring_emit_wreg(kiq_ring, 7217 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); 7218 amdgpu_ring_commit(kiq_ring); 7219 7220 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7221 7222 r = amdgpu_ring_test_ring(kiq_ring); 7223 if (r) 7224 return r; 7225 7226 if (amdgpu_ring_alloc(ring, 7 + 7 + 5)) 7227 return -ENOMEM; 7228 gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 7229 ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); 7230 gfx_v9_0_ring_emit_reg_wait(ring, 7231 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff); 7232 gfx_v9_0_ring_emit_wreg(ring, 7233 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); 7234 7235 return amdgpu_ring_test_ring(ring); 7236 } 7237 7238 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, 7239 unsigned int vmid) 7240 { 7241 struct amdgpu_device *adev = ring->adev; 7242 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 7243 struct amdgpu_ring *kiq_ring = &kiq->ring; 7244 unsigned long flags; 7245 int i, r; 7246 7247 if (amdgpu_sriov_vf(adev)) 7248 return -EINVAL; 7249 7250 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 7251 return -EINVAL; 7252 7253 spin_lock_irqsave(&kiq->ring_lock, flags); 7254 7255 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 7256 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7257 return -ENOMEM; 7258 } 7259 7260 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 7261 0, 0); 7262 amdgpu_ring_commit(kiq_ring); 7263 7264 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7265 7266 r = amdgpu_ring_test_ring(kiq_ring); 7267 if (r) 7268 return r; 7269 7270 /* make sure dequeue is complete*/ 7271 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 7272 mutex_lock(&adev->srbm_mutex); 7273 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 7274 for (i = 0; i < adev->usec_timeout; i++) { 7275 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 7276 break; 7277 udelay(1); 7278 } 7279 if (i >= adev->usec_timeout) 7280 r = -ETIMEDOUT; 7281 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 7282 mutex_unlock(&adev->srbm_mutex); 7283 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 7284 if (r) { 7285 dev_err(adev->dev, "fail to wait on hqd deactive\n"); 7286 return r; 7287 } 7288 7289 r = gfx_v9_0_kcq_init_queue(ring, true); 7290 if (r) { 7291 dev_err(adev->dev, "fail to init kcq\n"); 7292 return r; 7293 } 7294 spin_lock_irqsave(&kiq->ring_lock, flags); 7295 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); 7296 if (r) { 7297 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7298 return -ENOMEM; 7299 } 7300 kiq->pmf->kiq_map_queues(kiq_ring, ring); 7301 amdgpu_ring_commit(kiq_ring); 7302 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7303 r = amdgpu_ring_test_ring(kiq_ring); 7304 if (r) { 7305 DRM_ERROR("fail to remap queue\n"); 7306 return r; 7307 } 7308 return amdgpu_ring_test_ring(ring); 7309 } 7310 7311 static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 7312 { 7313 struct amdgpu_device *adev = ip_block->adev; 7314 uint32_t i, j, k, reg, index = 0; 7315 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 7316 7317 if (!adev->gfx.ip_dump_core) 7318 return; 7319 7320 for (i = 0; i < reg_count; i++) 7321 drm_printf(p, "%-50s \t 0x%08x\n", 7322 gc_reg_list_9[i].reg_name, 7323 adev->gfx.ip_dump_core[i]); 7324 7325 /* print compute queue registers for all instances */ 7326 if (!adev->gfx.ip_dump_compute_queues) 7327 return; 7328 7329 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 7330 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7331 adev->gfx.mec.num_mec, 7332 adev->gfx.mec.num_pipe_per_mec, 7333 adev->gfx.mec.num_queue_per_pipe); 7334 7335 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7336 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7337 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7338 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7339 for (reg = 0; reg < reg_count; reg++) { 7340 drm_printf(p, "%-50s \t 0x%08x\n", 7341 gc_cp_reg_list_9[reg].reg_name, 7342 adev->gfx.ip_dump_compute_queues[index + reg]); 7343 } 7344 index += reg_count; 7345 } 7346 } 7347 } 7348 7349 } 7350 7351 static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block) 7352 { 7353 struct amdgpu_device *adev = ip_block->adev; 7354 uint32_t i, j, k, reg, index = 0; 7355 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 7356 7357 if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings) 7358 return; 7359 7360 amdgpu_gfx_off_ctrl(adev, false); 7361 for (i = 0; i < reg_count; i++) 7362 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i])); 7363 amdgpu_gfx_off_ctrl(adev, true); 7364 7365 /* dump compute queue registers for all instances */ 7366 if (!adev->gfx.ip_dump_compute_queues) 7367 return; 7368 7369 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 7370 amdgpu_gfx_off_ctrl(adev, false); 7371 mutex_lock(&adev->srbm_mutex); 7372 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7373 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7374 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7375 /* ME0 is for GFX so start from 1 for CP */ 7376 soc15_grbm_select(adev, 1 + i, j, k, 0, 0); 7377 7378 for (reg = 0; reg < reg_count; reg++) { 7379 adev->gfx.ip_dump_compute_queues[index + reg] = 7380 RREG32(SOC15_REG_ENTRY_OFFSET( 7381 gc_cp_reg_list_9[reg])); 7382 } 7383 index += reg_count; 7384 } 7385 } 7386 } 7387 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 7388 mutex_unlock(&adev->srbm_mutex); 7389 amdgpu_gfx_off_ctrl(adev, true); 7390 7391 } 7392 7393 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 7394 { 7395 /* Emit the cleaner shader */ 7396 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7397 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7398 } 7399 7400 static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring) 7401 { 7402 struct amdgpu_device *adev = ring->adev; 7403 struct amdgpu_ip_block *gfx_block = 7404 amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 7405 7406 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 7407 7408 /* Raven and PCO APUs seem to have stability issues 7409 * with compute and gfxoff and gfx pg. Disable gfx pg during 7410 * submission and allow again afterwards. 7411 */ 7412 if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) 7413 gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE); 7414 } 7415 7416 static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring) 7417 { 7418 struct amdgpu_device *adev = ring->adev; 7419 struct amdgpu_ip_block *gfx_block = 7420 amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 7421 7422 /* Raven and PCO APUs seem to have stability issues 7423 * with compute and gfxoff and gfx pg. Disable gfx pg during 7424 * submission and allow again afterwards. 7425 */ 7426 if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) 7427 gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE); 7428 7429 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 7430 } 7431 7432 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 7433 .name = "gfx_v9_0", 7434 .early_init = gfx_v9_0_early_init, 7435 .late_init = gfx_v9_0_late_init, 7436 .sw_init = gfx_v9_0_sw_init, 7437 .sw_fini = gfx_v9_0_sw_fini, 7438 .hw_init = gfx_v9_0_hw_init, 7439 .hw_fini = gfx_v9_0_hw_fini, 7440 .suspend = gfx_v9_0_suspend, 7441 .resume = gfx_v9_0_resume, 7442 .is_idle = gfx_v9_0_is_idle, 7443 .wait_for_idle = gfx_v9_0_wait_for_idle, 7444 .soft_reset = gfx_v9_0_soft_reset, 7445 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 7446 .set_powergating_state = gfx_v9_0_set_powergating_state, 7447 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 7448 .dump_ip_state = gfx_v9_ip_dump, 7449 .print_ip_state = gfx_v9_ip_print, 7450 }; 7451 7452 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 7453 .type = AMDGPU_RING_TYPE_GFX, 7454 .align_mask = 0xff, 7455 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7456 .support_64bit_ptrs = true, 7457 .secure_submission_supported = true, 7458 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 7459 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 7460 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 7461 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 7462 5 + /* COND_EXEC */ 7463 7 + /* PIPELINE_SYNC */ 7464 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7465 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7466 2 + /* VM_FLUSH */ 7467 8 + /* FENCE for VM_FLUSH */ 7468 20 + /* GDS switch */ 7469 4 + /* double SWITCH_BUFFER, 7470 the first COND_EXEC jump to the place just 7471 prior to this double SWITCH_BUFFER */ 7472 5 + /* COND_EXEC */ 7473 7 + /* HDP_flush */ 7474 4 + /* VGT_flush */ 7475 14 + /* CE_META */ 7476 31 + /* DE_META */ 7477 3 + /* CNTX_CTRL */ 7478 5 + /* HDP_INVL */ 7479 8 + 8 + /* FENCE x2 */ 7480 2 + /* SWITCH_BUFFER */ 7481 7 + /* gfx_v9_0_emit_mem_sync */ 7482 2, /* gfx_v9_0_ring_emit_cleaner_shader */ 7483 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 7484 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 7485 .emit_fence = gfx_v9_0_ring_emit_fence, 7486 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7487 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7488 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7489 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7490 .test_ring = gfx_v9_0_ring_test_ring, 7491 .insert_nop = gfx_v9_ring_insert_nop, 7492 .pad_ib = amdgpu_ring_generic_pad_ib, 7493 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7494 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7495 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7496 .preempt_ib = gfx_v9_0_ring_preempt_ib, 7497 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7498 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7499 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7500 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7501 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7502 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7503 .reset = gfx_v9_0_reset_kgq, 7504 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, 7505 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, 7506 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, 7507 }; 7508 7509 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 7510 .type = AMDGPU_RING_TYPE_GFX, 7511 .align_mask = 0xff, 7512 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7513 .support_64bit_ptrs = true, 7514 .secure_submission_supported = true, 7515 .get_rptr = amdgpu_sw_ring_get_rptr_gfx, 7516 .get_wptr = amdgpu_sw_ring_get_wptr_gfx, 7517 .set_wptr = amdgpu_sw_ring_set_wptr_gfx, 7518 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 7519 5 + /* COND_EXEC */ 7520 7 + /* PIPELINE_SYNC */ 7521 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7522 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7523 2 + /* VM_FLUSH */ 7524 8 + /* FENCE for VM_FLUSH */ 7525 20 + /* GDS switch */ 7526 4 + /* double SWITCH_BUFFER, 7527 * the first COND_EXEC jump to the place just 7528 * prior to this double SWITCH_BUFFER 7529 */ 7530 5 + /* COND_EXEC */ 7531 7 + /* HDP_flush */ 7532 4 + /* VGT_flush */ 7533 14 + /* CE_META */ 7534 31 + /* DE_META */ 7535 3 + /* CNTX_CTRL */ 7536 5 + /* HDP_INVL */ 7537 8 + 8 + /* FENCE x2 */ 7538 2 + /* SWITCH_BUFFER */ 7539 7 + /* gfx_v9_0_emit_mem_sync */ 7540 2, /* gfx_v9_0_ring_emit_cleaner_shader */ 7541 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 7542 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 7543 .emit_fence = gfx_v9_0_ring_emit_fence, 7544 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7545 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7546 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7547 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7548 .test_ring = gfx_v9_0_ring_test_ring, 7549 .test_ib = gfx_v9_0_ring_test_ib, 7550 .insert_nop = gfx_v9_ring_insert_nop, 7551 .pad_ib = amdgpu_ring_generic_pad_ib, 7552 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7553 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7554 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7555 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7556 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7557 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7558 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7559 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7560 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7561 .patch_cntl = gfx_v9_0_ring_patch_cntl, 7562 .patch_de = gfx_v9_0_ring_patch_de_meta, 7563 .patch_ce = gfx_v9_0_ring_patch_ce_meta, 7564 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, 7565 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, 7566 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, 7567 }; 7568 7569 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7570 .type = AMDGPU_RING_TYPE_COMPUTE, 7571 .align_mask = 0xff, 7572 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7573 .support_64bit_ptrs = true, 7574 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7575 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7576 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7577 .emit_frame_size = 7578 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7579 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7580 5 + /* hdp invalidate */ 7581 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7582 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7583 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7584 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7585 7 + /* gfx_v9_0_emit_mem_sync */ 7586 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ 7587 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ 7588 2, /* gfx_v9_0_ring_emit_cleaner_shader */ 7589 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7590 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 7591 .emit_fence = gfx_v9_0_ring_emit_fence, 7592 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7593 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7594 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7595 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7596 .test_ring = gfx_v9_0_ring_test_ring, 7597 .test_ib = gfx_v9_0_ring_test_ib, 7598 .insert_nop = gfx_v9_ring_insert_nop, 7599 .pad_ib = amdgpu_ring_generic_pad_ib, 7600 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7601 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7602 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7603 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7604 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7605 .emit_wave_limit = gfx_v9_0_emit_wave_limit, 7606 .reset = gfx_v9_0_reset_kcq, 7607 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, 7608 .begin_use = gfx_v9_0_ring_begin_use_compute, 7609 .end_use = gfx_v9_0_ring_end_use_compute, 7610 }; 7611 7612 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7613 .type = AMDGPU_RING_TYPE_KIQ, 7614 .align_mask = 0xff, 7615 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7616 .support_64bit_ptrs = true, 7617 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7618 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7619 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7620 .emit_frame_size = 7621 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7622 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7623 5 + /* hdp invalidate */ 7624 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7625 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7626 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7627 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7628 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7629 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 7630 .test_ring = gfx_v9_0_ring_test_ring, 7631 .insert_nop = amdgpu_ring_insert_nop, 7632 .pad_ib = amdgpu_ring_generic_pad_ib, 7633 .emit_rreg = gfx_v9_0_ring_emit_rreg, 7634 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7635 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7636 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7637 }; 7638 7639 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 7640 { 7641 int i; 7642 7643 adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq; 7644 7645 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7646 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 7647 7648 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 7649 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 7650 adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; 7651 } 7652 7653 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7654 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 7655 } 7656 7657 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 7658 .set = gfx_v9_0_set_eop_interrupt_state, 7659 .process = gfx_v9_0_eop_irq, 7660 }; 7661 7662 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 7663 .set = gfx_v9_0_set_priv_reg_fault_state, 7664 .process = gfx_v9_0_priv_reg_irq, 7665 }; 7666 7667 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = { 7668 .set = gfx_v9_0_set_bad_op_fault_state, 7669 .process = gfx_v9_0_bad_op_irq, 7670 }; 7671 7672 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 7673 .set = gfx_v9_0_set_priv_inst_fault_state, 7674 .process = gfx_v9_0_priv_inst_irq, 7675 }; 7676 7677 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 7678 .set = gfx_v9_0_set_cp_ecc_error_state, 7679 .process = amdgpu_gfx_cp_ecc_error_irq, 7680 }; 7681 7682 7683 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 7684 { 7685 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7686 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 7687 7688 adev->gfx.priv_reg_irq.num_types = 1; 7689 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 7690 7691 adev->gfx.bad_op_irq.num_types = 1; 7692 adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs; 7693 7694 adev->gfx.priv_inst_irq.num_types = 1; 7695 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 7696 7697 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 7698 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 7699 } 7700 7701 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 7702 { 7703 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7704 case IP_VERSION(9, 0, 1): 7705 case IP_VERSION(9, 2, 1): 7706 case IP_VERSION(9, 4, 0): 7707 case IP_VERSION(9, 2, 2): 7708 case IP_VERSION(9, 1, 0): 7709 case IP_VERSION(9, 4, 1): 7710 case IP_VERSION(9, 3, 0): 7711 case IP_VERSION(9, 4, 2): 7712 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 7713 break; 7714 default: 7715 break; 7716 } 7717 } 7718 7719 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 7720 { 7721 /* init asci gds info */ 7722 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7723 case IP_VERSION(9, 0, 1): 7724 case IP_VERSION(9, 2, 1): 7725 case IP_VERSION(9, 4, 0): 7726 adev->gds.gds_size = 0x10000; 7727 break; 7728 case IP_VERSION(9, 2, 2): 7729 case IP_VERSION(9, 1, 0): 7730 case IP_VERSION(9, 4, 1): 7731 adev->gds.gds_size = 0x1000; 7732 break; 7733 case IP_VERSION(9, 4, 2): 7734 /* aldebaran removed all the GDS internal memory, 7735 * only support GWS opcode in kernel, like barrier 7736 * semaphore.etc */ 7737 adev->gds.gds_size = 0; 7738 break; 7739 default: 7740 adev->gds.gds_size = 0x10000; 7741 break; 7742 } 7743 7744 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7745 case IP_VERSION(9, 0, 1): 7746 case IP_VERSION(9, 4, 0): 7747 adev->gds.gds_compute_max_wave_id = 0x7ff; 7748 break; 7749 case IP_VERSION(9, 2, 1): 7750 adev->gds.gds_compute_max_wave_id = 0x27f; 7751 break; 7752 case IP_VERSION(9, 2, 2): 7753 case IP_VERSION(9, 1, 0): 7754 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 7755 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 7756 else 7757 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 7758 break; 7759 case IP_VERSION(9, 4, 1): 7760 adev->gds.gds_compute_max_wave_id = 0xfff; 7761 break; 7762 case IP_VERSION(9, 4, 2): 7763 /* deprecated for Aldebaran, no usage at all */ 7764 adev->gds.gds_compute_max_wave_id = 0; 7765 break; 7766 default: 7767 /* this really depends on the chip */ 7768 adev->gds.gds_compute_max_wave_id = 0x7ff; 7769 break; 7770 } 7771 7772 adev->gds.gws_size = 64; 7773 adev->gds.oa_size = 16; 7774 } 7775 7776 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7777 u32 bitmap) 7778 { 7779 u32 data; 7780 7781 if (!bitmap) 7782 return; 7783 7784 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7785 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7786 7787 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 7788 } 7789 7790 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7791 { 7792 u32 data, mask; 7793 7794 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 7795 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 7796 7797 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7798 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7799 7800 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7801 7802 return (~data) & mask; 7803 } 7804 7805 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 7806 struct amdgpu_cu_info *cu_info) 7807 { 7808 int i, j, k, counter, active_cu_number = 0; 7809 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7810 unsigned disable_masks[4 * 4]; 7811 7812 if (!adev || !cu_info) 7813 return -EINVAL; 7814 7815 /* 7816 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 7817 */ 7818 if (adev->gfx.config.max_shader_engines * 7819 adev->gfx.config.max_sh_per_se > 16) 7820 return -EINVAL; 7821 7822 amdgpu_gfx_parse_disable_cu(disable_masks, 7823 adev->gfx.config.max_shader_engines, 7824 adev->gfx.config.max_sh_per_se); 7825 7826 mutex_lock(&adev->grbm_idx_mutex); 7827 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7828 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7829 mask = 1; 7830 ao_bitmap = 0; 7831 counter = 0; 7832 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 7833 gfx_v9_0_set_user_cu_inactive_bitmap( 7834 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 7835 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 7836 7837 /* 7838 * The bitmap(and ao_cu_bitmap) in cu_info structure is 7839 * 4x4 size array, and it's usually suitable for Vega 7840 * ASICs which has 4*2 SE/SH layout. 7841 * But for Arcturus, SE/SH layout is changed to 8*1. 7842 * To mostly reduce the impact, we make it compatible 7843 * with current bitmap array as below: 7844 * SE4,SH0 --> bitmap[0][1] 7845 * SE5,SH0 --> bitmap[1][1] 7846 * SE6,SH0 --> bitmap[2][1] 7847 * SE7,SH0 --> bitmap[3][1] 7848 */ 7849 cu_info->bitmap[0][i % 4][j + i / 4] = bitmap; 7850 7851 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7852 if (bitmap & mask) { 7853 if (counter < adev->gfx.config.max_cu_per_sh) 7854 ao_bitmap |= mask; 7855 counter ++; 7856 } 7857 mask <<= 1; 7858 } 7859 active_cu_number += counter; 7860 if (i < 2 && j < 2) 7861 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7862 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 7863 } 7864 } 7865 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7866 mutex_unlock(&adev->grbm_idx_mutex); 7867 7868 cu_info->number = active_cu_number; 7869 cu_info->ao_cu_mask = ao_cu_mask; 7870 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7871 7872 return 0; 7873 } 7874 7875 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 7876 { 7877 .type = AMD_IP_BLOCK_TYPE_GFX, 7878 .major = 9, 7879 .minor = 0, 7880 .rev = 0, 7881 .funcs = &gfx_v9_0_ip_funcs, 7882 }; 7883