1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 42 #include "soc15_common.h" 43 #include "clearstate_gfx9.h" 44 #include "v9_structs.h" 45 46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 47 48 #include "amdgpu_ras.h" 49 50 #include "amdgpu_ring_mux.h" 51 #include "gfx_v9_4.h" 52 #include "gfx_v9_0.h" 53 #include "gfx_v9_4_2.h" 54 55 #include "asic_reg/pwr/pwr_10_0_offset.h" 56 #include "asic_reg/pwr/pwr_10_0_sh_mask.h" 57 #include "asic_reg/gc/gc_9_0_default.h" 58 59 #define GFX9_NUM_GFX_RINGS 1 60 #define GFX9_NUM_SW_GFX_RINGS 2 61 #define GFX9_MEC_HPD_SIZE 4096 62 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 63 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 64 65 #define mmGCEA_PROBE_MAP 0x070c 66 #define mmGCEA_PROBE_MAP_BASE_IDX 0 67 68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 74 75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 81 82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 88 89 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 95 96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 103 104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 111 112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 113 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 114 115 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 116 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 120 121 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin"); 122 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin"); 123 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin"); 124 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin"); 125 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin"); 126 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); 127 128 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); 129 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); 130 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); 131 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin"); 132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); 133 134 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 135 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 136 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 137 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 138 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 139 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 140 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 141 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 142 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 143 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 144 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 145 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 146 147 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025 148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1 149 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 151 152 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = { 153 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS), 154 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2), 155 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1), 156 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2), 157 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1), 158 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1), 159 SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT), 160 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT), 161 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT), 162 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS), 163 SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR), 164 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE), 165 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR), 166 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR), 167 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE), 168 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR), 169 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR), 170 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE), 171 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR), 172 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR), 173 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE), 174 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), 175 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), 176 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ), 177 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ), 178 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ), 179 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ), 180 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO), 181 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI), 182 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ), 183 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO), 184 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI), 185 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ), 186 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO), 187 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI), 188 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ), 189 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO), 190 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI), 191 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ), 192 SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS), 193 SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS), 194 SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS), 195 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT), 196 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT), 197 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS), 198 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL), 199 SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS), 200 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS), 201 SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS), 202 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS), 203 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS), 204 SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS), 205 SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS), 206 SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS), 207 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), 208 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS), 209 SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG), 210 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL), 211 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR), 212 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR), 213 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR), 214 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR), 215 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR), 216 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS), 217 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT), 218 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND), 219 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE), 220 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1), 221 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2), 222 SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE), 223 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE), 224 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), 225 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), 226 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), 227 /* cp header registers */ 228 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 229 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), 230 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), 231 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 232 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), 233 /* SE status registers */ 234 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), 235 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), 236 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), 237 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) 238 }; 239 240 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { 241 /* compute queue registers */ 242 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID), 243 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE), 244 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE), 245 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY), 246 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY), 247 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM), 248 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE), 249 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI), 250 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR), 251 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 252 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 253 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), 254 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL), 255 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR), 256 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI), 257 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR), 258 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL), 259 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 260 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR), 261 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), 262 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL), 263 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR), 264 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR), 265 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS), 266 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO), 267 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI), 268 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL), 269 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET), 270 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE), 271 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET), 272 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE), 273 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE), 274 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR), 275 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM), 276 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), 277 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), 278 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS), 279 }; 280 281 enum ta_ras_gfx_subblock { 282 /*CPC*/ 283 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 284 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 285 TA_RAS_BLOCK__GFX_CPC_UCODE, 286 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 287 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 288 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 289 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 290 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 291 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 292 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 293 /* CPF*/ 294 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 295 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 296 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 297 TA_RAS_BLOCK__GFX_CPF_TAG, 298 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 299 /* CPG*/ 300 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 301 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 302 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 303 TA_RAS_BLOCK__GFX_CPG_TAG, 304 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 305 /* GDS*/ 306 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 307 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 308 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 309 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 310 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 311 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 312 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 313 /* SPI*/ 314 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 315 /* SQ*/ 316 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 317 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 318 TA_RAS_BLOCK__GFX_SQ_LDS_D, 319 TA_RAS_BLOCK__GFX_SQ_LDS_I, 320 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 321 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 322 /* SQC (3 ranges)*/ 323 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 324 /* SQC range 0*/ 325 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 326 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 327 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 328 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 329 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 330 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 331 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 332 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 333 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 334 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 335 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 336 /* SQC range 1*/ 337 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 338 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 339 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 340 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 341 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 342 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 343 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 344 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 345 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 346 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 347 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 348 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 349 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 350 /* SQC range 2*/ 351 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 352 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 353 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 354 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 355 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 356 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 357 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 358 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 359 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 360 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 361 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 362 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 363 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 364 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 365 /* TA*/ 366 TA_RAS_BLOCK__GFX_TA_INDEX_START, 367 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 368 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 369 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 370 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 371 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 372 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 373 /* TCA*/ 374 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 375 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 376 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 377 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 378 /* TCC (5 sub-ranges)*/ 379 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 380 /* TCC range 0*/ 381 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 382 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 383 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 384 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 385 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 386 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 387 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 388 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 389 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 390 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 391 /* TCC range 1*/ 392 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 393 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 394 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 395 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 396 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 397 /* TCC range 2*/ 398 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 399 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 400 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 401 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 402 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 403 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 404 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 405 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 406 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 407 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 408 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 409 /* TCC range 3*/ 410 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 411 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 412 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 413 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 414 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 415 /* TCC range 4*/ 416 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 417 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 418 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 419 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 420 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 421 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 422 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 423 /* TCI*/ 424 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 425 /* TCP*/ 426 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 427 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 428 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 429 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 430 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 431 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 432 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 433 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 434 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 435 /* TD*/ 436 TA_RAS_BLOCK__GFX_TD_INDEX_START, 437 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 438 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 439 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 440 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 441 /* EA (3 sub-ranges)*/ 442 TA_RAS_BLOCK__GFX_EA_INDEX_START, 443 /* EA range 0*/ 444 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 445 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 446 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 447 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 448 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 449 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 450 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 451 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 452 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 453 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 454 /* EA range 1*/ 455 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 456 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 457 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 458 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 459 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 460 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 461 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 462 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 463 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 464 /* EA range 2*/ 465 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 466 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 467 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 468 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 469 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 470 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 471 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 472 /* UTC VM L2 bank*/ 473 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 474 /* UTC VM walker*/ 475 TA_RAS_BLOCK__UTC_VML2_WALKER, 476 /* UTC ATC L2 2MB cache*/ 477 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 478 /* UTC ATC L2 4KB cache*/ 479 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 480 TA_RAS_BLOCK__GFX_MAX 481 }; 482 483 struct ras_gfx_subblock { 484 unsigned char *name; 485 int ta_subblock; 486 int hw_supported_error_type; 487 int sw_supported_error_type; 488 }; 489 490 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 491 [AMDGPU_RAS_BLOCK__##subblock] = { \ 492 #subblock, \ 493 TA_RAS_BLOCK__##subblock, \ 494 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 495 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 496 } 497 498 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 499 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 500 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 501 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 502 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 503 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 504 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 505 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 506 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 507 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 508 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 509 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 510 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 511 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 512 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 513 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 514 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 515 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 516 0), 517 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 518 0), 519 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 520 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 521 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 522 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 523 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 524 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 525 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 526 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 527 0, 0), 528 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 529 0), 530 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 531 0, 0), 532 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 533 0), 534 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 535 0, 0), 536 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 537 0), 538 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 539 1), 540 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 541 0, 0, 0), 542 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 543 0), 544 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 545 0), 546 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 547 0), 548 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 549 0), 550 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 551 0), 552 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 553 0, 0), 554 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 555 0), 556 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 557 0), 558 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 559 0, 0, 0), 560 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 561 0), 562 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 563 0), 564 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 565 0), 566 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 567 0), 568 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 569 0), 570 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 571 0, 0), 572 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 573 0), 574 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 575 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 576 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 577 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 578 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 579 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 580 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 581 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 582 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 583 1), 584 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 585 1), 586 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 587 1), 588 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 589 0), 590 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 591 0), 592 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 593 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 594 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 595 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 596 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 597 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 598 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 599 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 600 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 601 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 602 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 603 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 604 0), 605 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 606 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 607 0), 608 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 609 0, 0), 610 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 611 0), 612 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 613 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 614 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 615 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 616 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 617 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 618 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 619 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 620 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 621 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 622 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 623 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 624 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 625 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 626 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 627 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 628 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 629 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 630 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 631 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 632 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 633 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 634 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 635 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 636 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 637 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 638 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 639 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 640 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 641 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 642 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 643 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 644 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 645 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 646 }; 647 648 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 649 { 650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 670 }; 671 672 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 673 { 674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 692 }; 693 694 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 695 { 696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 707 }; 708 709 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 710 { 711 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 715 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 716 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 717 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 718 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 719 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 720 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 721 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 722 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 723 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 724 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 725 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 726 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 727 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 728 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 729 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 730 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 731 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 732 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 733 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 734 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 735 }; 736 737 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 738 { 739 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 740 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 741 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 742 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 743 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 744 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 745 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 746 }; 747 748 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 749 { 750 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 751 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 752 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 753 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 754 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 755 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 756 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 757 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 758 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 759 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 760 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 761 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 762 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 763 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 764 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 765 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 766 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 767 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 768 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 769 }; 770 771 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 772 { 773 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 774 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 775 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 776 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 777 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 778 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 779 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 780 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 781 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 782 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 783 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 784 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 785 }; 786 787 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 788 { 789 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 790 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 791 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 792 }; 793 794 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 795 { 796 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 797 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 798 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 799 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 800 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 801 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 802 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 803 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 804 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 805 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 806 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 807 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 808 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 809 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 810 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 811 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 812 }; 813 814 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 815 { 816 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 817 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 818 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 819 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 820 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 821 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 822 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 823 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 824 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 825 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 826 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 827 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 828 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 829 }; 830 831 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 832 { 833 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 834 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 835 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 836 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 837 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 838 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 839 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 840 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 841 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 842 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 843 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000) 844 }; 845 846 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = { 847 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)}, 848 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)}, 849 }; 850 851 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 852 { 853 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 854 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 855 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 856 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 857 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 858 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 859 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 860 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 861 }; 862 863 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 864 { 865 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 866 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 867 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 868 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 869 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 870 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 871 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 872 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 873 }; 874 875 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 876 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 877 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 878 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 879 880 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 881 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 882 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 883 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 884 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 885 struct amdgpu_cu_info *cu_info); 886 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 887 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds); 888 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 889 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 890 void *ras_error_status); 891 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 892 void *inject_if, uint32_t instance_mask); 893 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); 894 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, 895 unsigned int vmid); 896 897 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 898 uint64_t queue_mask) 899 { 900 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 901 amdgpu_ring_write(kiq_ring, 902 PACKET3_SET_RESOURCES_VMID_MASK(0) | 903 /* vmid_mask:0* queue_type:0 (KIQ) */ 904 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 905 amdgpu_ring_write(kiq_ring, 906 lower_32_bits(queue_mask)); /* queue mask lo */ 907 amdgpu_ring_write(kiq_ring, 908 upper_32_bits(queue_mask)); /* queue mask hi */ 909 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 910 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 911 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 912 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 913 } 914 915 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 916 struct amdgpu_ring *ring) 917 { 918 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 919 uint64_t wptr_addr = ring->wptr_gpu_addr; 920 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 921 922 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 923 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 924 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 925 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 926 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 927 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 928 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 929 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 930 /*queue_type: normal compute queue */ 931 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 932 /* alloc format: all_on_one_pipe */ 933 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 934 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 935 /* num_queues: must be 1 */ 936 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 937 amdgpu_ring_write(kiq_ring, 938 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 939 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 940 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 941 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 942 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 943 } 944 945 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 946 struct amdgpu_ring *ring, 947 enum amdgpu_unmap_queues_action action, 948 u64 gpu_addr, u64 seq) 949 { 950 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 951 952 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 953 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 954 PACKET3_UNMAP_QUEUES_ACTION(action) | 955 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 956 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 957 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 958 amdgpu_ring_write(kiq_ring, 959 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 960 961 if (action == PREEMPT_QUEUES_NO_UNMAP) { 962 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask)); 963 amdgpu_ring_write(kiq_ring, 0); 964 amdgpu_ring_write(kiq_ring, 0); 965 966 } else { 967 amdgpu_ring_write(kiq_ring, 0); 968 amdgpu_ring_write(kiq_ring, 0); 969 amdgpu_ring_write(kiq_ring, 0); 970 } 971 } 972 973 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 974 struct amdgpu_ring *ring, 975 u64 addr, 976 u64 seq) 977 { 978 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 979 980 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 981 amdgpu_ring_write(kiq_ring, 982 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 983 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 984 PACKET3_QUERY_STATUS_COMMAND(2)); 985 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 986 amdgpu_ring_write(kiq_ring, 987 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 988 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 989 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 990 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 991 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 992 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 993 } 994 995 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 996 uint16_t pasid, uint32_t flush_type, 997 bool all_hub) 998 { 999 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 1000 amdgpu_ring_write(kiq_ring, 1001 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 1002 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 1003 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 1004 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 1005 } 1006 1007 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 1008 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 1009 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 1010 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 1011 .kiq_query_status = gfx_v9_0_kiq_query_status, 1012 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 1013 .set_resources_size = 8, 1014 .map_queues_size = 7, 1015 .unmap_queues_size = 6, 1016 .query_status_size = 7, 1017 .invalidate_tlbs_size = 2, 1018 }; 1019 1020 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 1021 { 1022 adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs; 1023 } 1024 1025 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 1026 { 1027 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1028 case IP_VERSION(9, 0, 1): 1029 soc15_program_register_sequence(adev, 1030 golden_settings_gc_9_0, 1031 ARRAY_SIZE(golden_settings_gc_9_0)); 1032 soc15_program_register_sequence(adev, 1033 golden_settings_gc_9_0_vg10, 1034 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 1035 break; 1036 case IP_VERSION(9, 2, 1): 1037 soc15_program_register_sequence(adev, 1038 golden_settings_gc_9_2_1, 1039 ARRAY_SIZE(golden_settings_gc_9_2_1)); 1040 soc15_program_register_sequence(adev, 1041 golden_settings_gc_9_2_1_vg12, 1042 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 1043 break; 1044 case IP_VERSION(9, 4, 0): 1045 soc15_program_register_sequence(adev, 1046 golden_settings_gc_9_0, 1047 ARRAY_SIZE(golden_settings_gc_9_0)); 1048 soc15_program_register_sequence(adev, 1049 golden_settings_gc_9_0_vg20, 1050 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 1051 break; 1052 case IP_VERSION(9, 4, 1): 1053 soc15_program_register_sequence(adev, 1054 golden_settings_gc_9_4_1_arct, 1055 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 1056 break; 1057 case IP_VERSION(9, 2, 2): 1058 case IP_VERSION(9, 1, 0): 1059 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 1060 ARRAY_SIZE(golden_settings_gc_9_1)); 1061 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1062 soc15_program_register_sequence(adev, 1063 golden_settings_gc_9_1_rv2, 1064 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 1065 else 1066 soc15_program_register_sequence(adev, 1067 golden_settings_gc_9_1_rv1, 1068 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 1069 break; 1070 case IP_VERSION(9, 3, 0): 1071 soc15_program_register_sequence(adev, 1072 golden_settings_gc_9_1_rn, 1073 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 1074 return; /* for renoir, don't need common goldensetting */ 1075 case IP_VERSION(9, 4, 2): 1076 gfx_v9_4_2_init_golden_registers(adev, 1077 adev->smuio.funcs->get_die_id(adev)); 1078 break; 1079 default: 1080 break; 1081 } 1082 1083 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && 1084 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))) 1085 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 1086 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 1087 } 1088 1089 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 1090 bool wc, uint32_t reg, uint32_t val) 1091 { 1092 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1093 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 1094 WRITE_DATA_DST_SEL(0) | 1095 (wc ? WR_CONFIRM : 0)); 1096 amdgpu_ring_write(ring, reg); 1097 amdgpu_ring_write(ring, 0); 1098 amdgpu_ring_write(ring, val); 1099 } 1100 1101 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 1102 int mem_space, int opt, uint32_t addr0, 1103 uint32_t addr1, uint32_t ref, uint32_t mask, 1104 uint32_t inv) 1105 { 1106 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 1107 amdgpu_ring_write(ring, 1108 /* memory (1) or register (0) */ 1109 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 1110 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 1111 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 1112 WAIT_REG_MEM_ENGINE(eng_sel))); 1113 1114 if (mem_space) 1115 BUG_ON(addr0 & 0x3); /* Dword align */ 1116 amdgpu_ring_write(ring, addr0); 1117 amdgpu_ring_write(ring, addr1); 1118 amdgpu_ring_write(ring, ref); 1119 amdgpu_ring_write(ring, mask); 1120 amdgpu_ring_write(ring, inv); /* poll interval */ 1121 } 1122 1123 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 1124 { 1125 struct amdgpu_device *adev = ring->adev; 1126 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1127 uint32_t tmp = 0; 1128 unsigned i; 1129 int r; 1130 1131 WREG32(scratch, 0xCAFEDEAD); 1132 r = amdgpu_ring_alloc(ring, 3); 1133 if (r) 1134 return r; 1135 1136 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1137 amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); 1138 amdgpu_ring_write(ring, 0xDEADBEEF); 1139 amdgpu_ring_commit(ring); 1140 1141 for (i = 0; i < adev->usec_timeout; i++) { 1142 tmp = RREG32(scratch); 1143 if (tmp == 0xDEADBEEF) 1144 break; 1145 udelay(1); 1146 } 1147 1148 if (i >= adev->usec_timeout) 1149 r = -ETIMEDOUT; 1150 return r; 1151 } 1152 1153 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1154 { 1155 struct amdgpu_device *adev = ring->adev; 1156 struct amdgpu_ib ib; 1157 struct dma_fence *f = NULL; 1158 1159 unsigned index; 1160 uint64_t gpu_addr; 1161 uint32_t tmp; 1162 long r; 1163 1164 r = amdgpu_device_wb_get(adev, &index); 1165 if (r) 1166 return r; 1167 1168 gpu_addr = adev->wb.gpu_addr + (index * 4); 1169 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1170 memset(&ib, 0, sizeof(ib)); 1171 1172 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 1173 if (r) 1174 goto err1; 1175 1176 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1177 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1178 ib.ptr[2] = lower_32_bits(gpu_addr); 1179 ib.ptr[3] = upper_32_bits(gpu_addr); 1180 ib.ptr[4] = 0xDEADBEEF; 1181 ib.length_dw = 5; 1182 1183 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1184 if (r) 1185 goto err2; 1186 1187 r = dma_fence_wait_timeout(f, false, timeout); 1188 if (r == 0) { 1189 r = -ETIMEDOUT; 1190 goto err2; 1191 } else if (r < 0) { 1192 goto err2; 1193 } 1194 1195 tmp = adev->wb.wb[index]; 1196 if (tmp == 0xDEADBEEF) 1197 r = 0; 1198 else 1199 r = -EINVAL; 1200 1201 err2: 1202 amdgpu_ib_free(adev, &ib, NULL); 1203 dma_fence_put(f); 1204 err1: 1205 amdgpu_device_wb_free(adev, index); 1206 return r; 1207 } 1208 1209 1210 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1211 { 1212 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1213 amdgpu_ucode_release(&adev->gfx.me_fw); 1214 amdgpu_ucode_release(&adev->gfx.ce_fw); 1215 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1216 amdgpu_ucode_release(&adev->gfx.mec_fw); 1217 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1218 1219 kfree(adev->gfx.rlc.register_list_format); 1220 } 1221 1222 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1223 { 1224 adev->gfx.me_fw_write_wait = false; 1225 adev->gfx.mec_fw_write_wait = false; 1226 1227 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && 1228 ((adev->gfx.mec_fw_version < 0x000001a5) || 1229 (adev->gfx.mec_feature_version < 46) || 1230 (adev->gfx.pfp_fw_version < 0x000000b7) || 1231 (adev->gfx.pfp_feature_version < 46))) 1232 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1233 1234 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1235 case IP_VERSION(9, 0, 1): 1236 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1237 (adev->gfx.me_feature_version >= 42) && 1238 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1239 (adev->gfx.pfp_feature_version >= 42)) 1240 adev->gfx.me_fw_write_wait = true; 1241 1242 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1243 (adev->gfx.mec_feature_version >= 42)) 1244 adev->gfx.mec_fw_write_wait = true; 1245 break; 1246 case IP_VERSION(9, 2, 1): 1247 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1248 (adev->gfx.me_feature_version >= 44) && 1249 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1250 (adev->gfx.pfp_feature_version >= 44)) 1251 adev->gfx.me_fw_write_wait = true; 1252 1253 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1254 (adev->gfx.mec_feature_version >= 44)) 1255 adev->gfx.mec_fw_write_wait = true; 1256 break; 1257 case IP_VERSION(9, 4, 0): 1258 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1259 (adev->gfx.me_feature_version >= 44) && 1260 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1261 (adev->gfx.pfp_feature_version >= 44)) 1262 adev->gfx.me_fw_write_wait = true; 1263 1264 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1265 (adev->gfx.mec_feature_version >= 44)) 1266 adev->gfx.mec_fw_write_wait = true; 1267 break; 1268 case IP_VERSION(9, 1, 0): 1269 case IP_VERSION(9, 2, 2): 1270 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1271 (adev->gfx.me_feature_version >= 42) && 1272 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1273 (adev->gfx.pfp_feature_version >= 42)) 1274 adev->gfx.me_fw_write_wait = true; 1275 1276 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1277 (adev->gfx.mec_feature_version >= 42)) 1278 adev->gfx.mec_fw_write_wait = true; 1279 break; 1280 default: 1281 adev->gfx.me_fw_write_wait = true; 1282 adev->gfx.mec_fw_write_wait = true; 1283 break; 1284 } 1285 } 1286 1287 struct amdgpu_gfxoff_quirk { 1288 u16 chip_vendor; 1289 u16 chip_device; 1290 u16 subsys_vendor; 1291 u16 subsys_device; 1292 u8 revision; 1293 }; 1294 1295 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { 1296 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ 1297 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, 1298 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ 1299 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, 1300 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ 1301 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, 1302 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ 1303 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, 1304 { 0, 0, 0, 0, 0 }, 1305 }; 1306 1307 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) 1308 { 1309 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; 1310 1311 while (p && p->chip_device != 0) { 1312 if (pdev->vendor == p->chip_vendor && 1313 pdev->device == p->chip_device && 1314 pdev->subsystem_vendor == p->subsys_vendor && 1315 pdev->subsystem_device == p->subsys_device && 1316 pdev->revision == p->revision) { 1317 return true; 1318 } 1319 ++p; 1320 } 1321 return false; 1322 } 1323 1324 static bool is_raven_kicker(struct amdgpu_device *adev) 1325 { 1326 if (adev->pm.fw_version >= 0x41e2b) 1327 return true; 1328 else 1329 return false; 1330 } 1331 1332 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) 1333 { 1334 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) && 1335 (adev->gfx.me_fw_version >= 0x000000a5) && 1336 (adev->gfx.me_feature_version >= 52)) 1337 return true; 1338 else 1339 return false; 1340 } 1341 1342 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1343 { 1344 if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) 1345 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1346 1347 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1348 case IP_VERSION(9, 0, 1): 1349 case IP_VERSION(9, 2, 1): 1350 case IP_VERSION(9, 4, 0): 1351 break; 1352 case IP_VERSION(9, 2, 2): 1353 case IP_VERSION(9, 1, 0): 1354 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) || 1355 (adev->apu_flags & AMD_APU_IS_PICASSO)) && 1356 ((!is_raven_kicker(adev) && 1357 adev->gfx.rlc_fw_version < 531) || 1358 (adev->gfx.rlc_feature_version < 1) || 1359 !adev->gfx.rlc.is_rlc_v2_1)) 1360 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1361 1362 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1363 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1364 AMD_PG_SUPPORT_CP | 1365 AMD_PG_SUPPORT_RLC_SMU_HS; 1366 break; 1367 case IP_VERSION(9, 3, 0): 1368 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1369 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1370 AMD_PG_SUPPORT_CP | 1371 AMD_PG_SUPPORT_RLC_SMU_HS; 1372 break; 1373 default: 1374 break; 1375 } 1376 } 1377 1378 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1379 char *chip_name) 1380 { 1381 int err; 1382 1383 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 1384 "amdgpu/%s_pfp.bin", chip_name); 1385 if (err) 1386 goto out; 1387 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 1388 1389 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 1390 "amdgpu/%s_me.bin", chip_name); 1391 if (err) 1392 goto out; 1393 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 1394 1395 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, 1396 "amdgpu/%s_ce.bin", chip_name); 1397 if (err) 1398 goto out; 1399 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); 1400 1401 out: 1402 if (err) { 1403 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1404 amdgpu_ucode_release(&adev->gfx.me_fw); 1405 amdgpu_ucode_release(&adev->gfx.ce_fw); 1406 } 1407 return err; 1408 } 1409 1410 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1411 char *chip_name) 1412 { 1413 int err; 1414 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1415 uint16_t version_major; 1416 uint16_t version_minor; 1417 uint32_t smu_version; 1418 1419 /* 1420 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1421 * instead of picasso_rlc.bin. 1422 * Judgment method: 1423 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1424 * or revision >= 0xD8 && revision <= 0xDF 1425 * otherwise is PCO FP5 1426 */ 1427 if (!strcmp(chip_name, "picasso") && 1428 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1429 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1430 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1431 "amdgpu/%s_rlc_am4.bin", chip_name); 1432 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1433 (smu_version >= 0x41e2b)) 1434 /** 1435 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1436 */ 1437 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1438 "amdgpu/%s_kicker_rlc.bin", chip_name); 1439 else 1440 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1441 "amdgpu/%s_rlc.bin", chip_name); 1442 if (err) 1443 goto out; 1444 1445 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1446 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1447 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1448 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 1449 out: 1450 if (err) 1451 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1452 1453 return err; 1454 } 1455 1456 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) 1457 { 1458 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || 1459 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || 1460 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) 1461 return false; 1462 1463 return true; 1464 } 1465 1466 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1467 char *chip_name) 1468 { 1469 int err; 1470 1471 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) 1472 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1473 "amdgpu/%s_sjt_mec.bin", chip_name); 1474 else 1475 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1476 "amdgpu/%s_mec.bin", chip_name); 1477 if (err) 1478 goto out; 1479 1480 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 1481 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 1482 1483 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1484 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) 1485 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1486 "amdgpu/%s_sjt_mec2.bin", chip_name); 1487 else 1488 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1489 "amdgpu/%s_mec2.bin", chip_name); 1490 if (!err) { 1491 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); 1492 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); 1493 } else { 1494 err = 0; 1495 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1496 } 1497 } else { 1498 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; 1499 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; 1500 } 1501 1502 gfx_v9_0_check_if_need_gfxoff(adev); 1503 gfx_v9_0_check_fw_write_wait(adev); 1504 1505 out: 1506 if (err) 1507 amdgpu_ucode_release(&adev->gfx.mec_fw); 1508 return err; 1509 } 1510 1511 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1512 { 1513 char ucode_prefix[30]; 1514 int r; 1515 1516 DRM_DEBUG("\n"); 1517 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 1518 1519 /* No CPG in Arcturus */ 1520 if (adev->gfx.num_gfx_rings) { 1521 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix); 1522 if (r) 1523 return r; 1524 } 1525 1526 r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix); 1527 if (r) 1528 return r; 1529 1530 r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix); 1531 if (r) 1532 return r; 1533 1534 return r; 1535 } 1536 1537 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1538 { 1539 u32 count = 0; 1540 const struct cs_section_def *sect = NULL; 1541 const struct cs_extent_def *ext = NULL; 1542 1543 /* begin clear state */ 1544 count += 2; 1545 /* context control state */ 1546 count += 3; 1547 1548 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1549 for (ext = sect->section; ext->extent != NULL; ++ext) { 1550 if (sect->id == SECT_CONTEXT) 1551 count += 2 + ext->reg_count; 1552 else 1553 return 0; 1554 } 1555 } 1556 1557 /* end clear state */ 1558 count += 2; 1559 /* clear state */ 1560 count += 2; 1561 1562 return count; 1563 } 1564 1565 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1566 volatile u32 *buffer) 1567 { 1568 u32 count = 0, i; 1569 const struct cs_section_def *sect = NULL; 1570 const struct cs_extent_def *ext = NULL; 1571 1572 if (adev->gfx.rlc.cs_data == NULL) 1573 return; 1574 if (buffer == NULL) 1575 return; 1576 1577 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1578 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1579 1580 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1581 buffer[count++] = cpu_to_le32(0x80000000); 1582 buffer[count++] = cpu_to_le32(0x80000000); 1583 1584 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1585 for (ext = sect->section; ext->extent != NULL; ++ext) { 1586 if (sect->id == SECT_CONTEXT) { 1587 buffer[count++] = 1588 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1589 buffer[count++] = cpu_to_le32(ext->reg_index - 1590 PACKET3_SET_CONTEXT_REG_START); 1591 for (i = 0; i < ext->reg_count; i++) 1592 buffer[count++] = cpu_to_le32(ext->extent[i]); 1593 } else { 1594 return; 1595 } 1596 } 1597 } 1598 1599 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1600 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1601 1602 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1603 buffer[count++] = cpu_to_le32(0); 1604 } 1605 1606 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1607 { 1608 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1609 uint32_t pg_always_on_cu_num = 2; 1610 uint32_t always_on_cu_num; 1611 uint32_t i, j, k; 1612 uint32_t mask, cu_bitmap, counter; 1613 1614 if (adev->flags & AMD_IS_APU) 1615 always_on_cu_num = 4; 1616 else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1)) 1617 always_on_cu_num = 8; 1618 else 1619 always_on_cu_num = 12; 1620 1621 mutex_lock(&adev->grbm_idx_mutex); 1622 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1623 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1624 mask = 1; 1625 cu_bitmap = 0; 1626 counter = 0; 1627 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 1628 1629 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1630 if (cu_info->bitmap[0][i][j] & mask) { 1631 if (counter == pg_always_on_cu_num) 1632 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1633 if (counter < always_on_cu_num) 1634 cu_bitmap |= mask; 1635 else 1636 break; 1637 counter++; 1638 } 1639 mask <<= 1; 1640 } 1641 1642 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1643 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1644 } 1645 } 1646 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1647 mutex_unlock(&adev->grbm_idx_mutex); 1648 } 1649 1650 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1651 { 1652 uint32_t data; 1653 1654 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1655 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1656 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1657 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1658 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1659 1660 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1661 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1662 1663 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1664 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1665 1666 mutex_lock(&adev->grbm_idx_mutex); 1667 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1668 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1669 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1670 1671 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1672 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1673 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1674 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1675 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1676 1677 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1678 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1679 data &= 0x0000FFFF; 1680 data |= 0x00C00000; 1681 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1682 1683 /* 1684 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1685 * programmed in gfx_v9_0_init_always_on_cu_mask() 1686 */ 1687 1688 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1689 * but used for RLC_LB_CNTL configuration */ 1690 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1691 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1692 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1693 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1694 mutex_unlock(&adev->grbm_idx_mutex); 1695 1696 gfx_v9_0_init_always_on_cu_mask(adev); 1697 } 1698 1699 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1700 { 1701 uint32_t data; 1702 1703 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1704 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1705 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1706 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1707 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1708 1709 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1710 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1711 1712 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1713 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1714 1715 mutex_lock(&adev->grbm_idx_mutex); 1716 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1717 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1718 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1719 1720 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1721 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1722 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1723 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1724 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1725 1726 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1727 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1728 data &= 0x0000FFFF; 1729 data |= 0x00C00000; 1730 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1731 1732 /* 1733 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1734 * programmed in gfx_v9_0_init_always_on_cu_mask() 1735 */ 1736 1737 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1738 * but used for RLC_LB_CNTL configuration */ 1739 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1740 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1741 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1742 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1743 mutex_unlock(&adev->grbm_idx_mutex); 1744 1745 gfx_v9_0_init_always_on_cu_mask(adev); 1746 } 1747 1748 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1749 { 1750 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1751 } 1752 1753 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1754 { 1755 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) 1756 return 5; 1757 else 1758 return 4; 1759 } 1760 1761 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 1762 { 1763 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 1764 1765 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 1766 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1767 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1); 1768 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2); 1769 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3); 1770 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL); 1771 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX); 1772 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT); 1773 adev->gfx.rlc.rlcg_reg_access_supported = true; 1774 } 1775 1776 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1777 { 1778 const struct cs_section_def *cs_data; 1779 int r; 1780 1781 adev->gfx.rlc.cs_data = gfx9_cs_data; 1782 1783 cs_data = adev->gfx.rlc.cs_data; 1784 1785 if (cs_data) { 1786 /* init clear state block */ 1787 r = amdgpu_gfx_rlc_init_csb(adev); 1788 if (r) 1789 return r; 1790 } 1791 1792 if (adev->flags & AMD_IS_APU) { 1793 /* TODO: double check the cp_table_size for RV */ 1794 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1795 r = amdgpu_gfx_rlc_init_cpt(adev); 1796 if (r) 1797 return r; 1798 } 1799 1800 return 0; 1801 } 1802 1803 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1804 { 1805 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1806 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1807 } 1808 1809 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1810 { 1811 int r; 1812 u32 *hpd; 1813 const __le32 *fw_data; 1814 unsigned fw_size; 1815 u32 *fw; 1816 size_t mec_hpd_size; 1817 1818 const struct gfx_firmware_header_v1_0 *mec_hdr; 1819 1820 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1821 1822 /* take ownership of the relevant compute queues */ 1823 amdgpu_gfx_compute_queue_acquire(adev); 1824 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1825 if (mec_hpd_size) { 1826 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1827 AMDGPU_GEM_DOMAIN_VRAM | 1828 AMDGPU_GEM_DOMAIN_GTT, 1829 &adev->gfx.mec.hpd_eop_obj, 1830 &adev->gfx.mec.hpd_eop_gpu_addr, 1831 (void **)&hpd); 1832 if (r) { 1833 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1834 gfx_v9_0_mec_fini(adev); 1835 return r; 1836 } 1837 1838 memset(hpd, 0, mec_hpd_size); 1839 1840 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1841 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1842 } 1843 1844 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1845 1846 fw_data = (const __le32 *) 1847 (adev->gfx.mec_fw->data + 1848 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1849 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 1850 1851 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1852 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1853 &adev->gfx.mec.mec_fw_obj, 1854 &adev->gfx.mec.mec_fw_gpu_addr, 1855 (void **)&fw); 1856 if (r) { 1857 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1858 gfx_v9_0_mec_fini(adev); 1859 return r; 1860 } 1861 1862 memcpy(fw, fw_data, fw_size); 1863 1864 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1865 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1866 1867 return 0; 1868 } 1869 1870 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1871 { 1872 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 1873 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1874 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1875 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1876 (SQ_IND_INDEX__FORCE_READ_MASK)); 1877 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1878 } 1879 1880 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1881 uint32_t wave, uint32_t thread, 1882 uint32_t regno, uint32_t num, uint32_t *out) 1883 { 1884 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 1885 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1886 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1887 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1888 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1889 (SQ_IND_INDEX__FORCE_READ_MASK) | 1890 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1891 while (num--) 1892 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1893 } 1894 1895 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1896 { 1897 /* type 1 wave data */ 1898 dst[(*no_fields)++] = 1; 1899 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1900 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1901 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1902 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1903 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1904 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1905 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1906 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1907 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1908 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1909 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1910 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1911 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1912 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1913 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); 1914 } 1915 1916 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1917 uint32_t wave, uint32_t start, 1918 uint32_t size, uint32_t *dst) 1919 { 1920 wave_read_regs( 1921 adev, simd, wave, 0, 1922 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1923 } 1924 1925 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1926 uint32_t wave, uint32_t thread, 1927 uint32_t start, uint32_t size, 1928 uint32_t *dst) 1929 { 1930 wave_read_regs( 1931 adev, simd, wave, thread, 1932 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1933 } 1934 1935 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1936 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1937 { 1938 soc15_grbm_select(adev, me, pipe, q, vm, 0); 1939 } 1940 1941 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1942 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1943 .select_se_sh = &gfx_v9_0_select_se_sh, 1944 .read_wave_data = &gfx_v9_0_read_wave_data, 1945 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1946 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1947 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 1948 }; 1949 1950 const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = { 1951 .ras_error_inject = &gfx_v9_0_ras_error_inject, 1952 .query_ras_error_count = &gfx_v9_0_query_ras_error_count, 1953 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, 1954 }; 1955 1956 static struct amdgpu_gfx_ras gfx_v9_0_ras = { 1957 .ras_block = { 1958 .hw_ops = &gfx_v9_0_ras_ops, 1959 }, 1960 }; 1961 1962 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1963 { 1964 u32 gb_addr_config; 1965 int err; 1966 1967 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1968 case IP_VERSION(9, 0, 1): 1969 adev->gfx.config.max_hw_contexts = 8; 1970 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1971 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1972 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1973 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1974 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1975 break; 1976 case IP_VERSION(9, 2, 1): 1977 adev->gfx.config.max_hw_contexts = 8; 1978 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1979 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1980 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1981 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1982 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1983 DRM_INFO("fix gfx.config for vega12\n"); 1984 break; 1985 case IP_VERSION(9, 4, 0): 1986 adev->gfx.ras = &gfx_v9_0_ras; 1987 adev->gfx.config.max_hw_contexts = 8; 1988 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1989 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1990 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1991 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1992 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1993 gb_addr_config &= ~0xf3e777ff; 1994 gb_addr_config |= 0x22014042; 1995 /* check vbios table if gpu info is not available */ 1996 err = amdgpu_atomfirmware_get_gfx_info(adev); 1997 if (err) 1998 return err; 1999 break; 2000 case IP_VERSION(9, 2, 2): 2001 case IP_VERSION(9, 1, 0): 2002 adev->gfx.config.max_hw_contexts = 8; 2003 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2004 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2005 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2006 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2007 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 2008 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 2009 else 2010 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2011 break; 2012 case IP_VERSION(9, 4, 1): 2013 adev->gfx.ras = &gfx_v9_4_ras; 2014 adev->gfx.config.max_hw_contexts = 8; 2015 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2016 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2017 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2018 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2019 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2020 gb_addr_config &= ~0xf3e777ff; 2021 gb_addr_config |= 0x22014042; 2022 break; 2023 case IP_VERSION(9, 3, 0): 2024 adev->gfx.config.max_hw_contexts = 8; 2025 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2026 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2027 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2028 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2029 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2030 gb_addr_config &= ~0xf3e777ff; 2031 gb_addr_config |= 0x22010042; 2032 break; 2033 case IP_VERSION(9, 4, 2): 2034 adev->gfx.ras = &gfx_v9_4_2_ras; 2035 adev->gfx.config.max_hw_contexts = 8; 2036 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2037 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2038 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2039 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2040 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2041 gb_addr_config &= ~0xf3e777ff; 2042 gb_addr_config |= 0x22014042; 2043 /* check vbios table if gpu info is not available */ 2044 err = amdgpu_atomfirmware_get_gfx_info(adev); 2045 if (err) 2046 return err; 2047 break; 2048 default: 2049 BUG(); 2050 break; 2051 } 2052 2053 adev->gfx.config.gb_addr_config = gb_addr_config; 2054 2055 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2056 REG_GET_FIELD( 2057 adev->gfx.config.gb_addr_config, 2058 GB_ADDR_CONFIG, 2059 NUM_PIPES); 2060 2061 adev->gfx.config.max_tile_pipes = 2062 adev->gfx.config.gb_addr_config_fields.num_pipes; 2063 2064 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2065 REG_GET_FIELD( 2066 adev->gfx.config.gb_addr_config, 2067 GB_ADDR_CONFIG, 2068 NUM_BANKS); 2069 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2070 REG_GET_FIELD( 2071 adev->gfx.config.gb_addr_config, 2072 GB_ADDR_CONFIG, 2073 MAX_COMPRESSED_FRAGS); 2074 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2075 REG_GET_FIELD( 2076 adev->gfx.config.gb_addr_config, 2077 GB_ADDR_CONFIG, 2078 NUM_RB_PER_SE); 2079 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2080 REG_GET_FIELD( 2081 adev->gfx.config.gb_addr_config, 2082 GB_ADDR_CONFIG, 2083 NUM_SHADER_ENGINES); 2084 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2085 REG_GET_FIELD( 2086 adev->gfx.config.gb_addr_config, 2087 GB_ADDR_CONFIG, 2088 PIPE_INTERLEAVE_SIZE)); 2089 2090 return 0; 2091 } 2092 2093 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2094 int mec, int pipe, int queue) 2095 { 2096 unsigned irq_type; 2097 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2098 unsigned int hw_prio; 2099 2100 ring = &adev->gfx.compute_ring[ring_id]; 2101 2102 /* mec0 is me1 */ 2103 ring->me = mec + 1; 2104 ring->pipe = pipe; 2105 ring->queue = queue; 2106 2107 ring->ring_obj = NULL; 2108 ring->use_doorbell = true; 2109 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2110 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2111 + (ring_id * GFX9_MEC_HPD_SIZE); 2112 ring->vm_hub = AMDGPU_GFXHUB(0); 2113 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2114 2115 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2116 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2117 + ring->pipe; 2118 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 2119 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; 2120 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2121 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 2122 hw_prio, NULL); 2123 } 2124 2125 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) 2126 { 2127 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 2128 uint32_t *ptr; 2129 uint32_t inst; 2130 2131 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 2132 if (ptr == NULL) { 2133 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 2134 adev->gfx.ip_dump_core = NULL; 2135 } else { 2136 adev->gfx.ip_dump_core = ptr; 2137 } 2138 2139 /* Allocate memory for compute queue registers for all the instances */ 2140 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 2141 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 2142 adev->gfx.mec.num_queue_per_pipe; 2143 2144 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 2145 if (ptr == NULL) { 2146 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 2147 adev->gfx.ip_dump_compute_queues = NULL; 2148 } else { 2149 adev->gfx.ip_dump_compute_queues = ptr; 2150 } 2151 } 2152 2153 static int gfx_v9_0_sw_init(void *handle) 2154 { 2155 int i, j, k, r, ring_id; 2156 int xcc_id = 0; 2157 struct amdgpu_ring *ring; 2158 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2159 unsigned int hw_prio; 2160 2161 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2162 case IP_VERSION(9, 0, 1): 2163 case IP_VERSION(9, 2, 1): 2164 case IP_VERSION(9, 4, 0): 2165 case IP_VERSION(9, 2, 2): 2166 case IP_VERSION(9, 1, 0): 2167 case IP_VERSION(9, 4, 1): 2168 case IP_VERSION(9, 3, 0): 2169 case IP_VERSION(9, 4, 2): 2170 adev->gfx.mec.num_mec = 2; 2171 break; 2172 default: 2173 adev->gfx.mec.num_mec = 1; 2174 break; 2175 } 2176 2177 adev->gfx.mec.num_pipe_per_mec = 4; 2178 adev->gfx.mec.num_queue_per_pipe = 8; 2179 2180 /* EOP Event */ 2181 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2182 if (r) 2183 return r; 2184 2185 /* Privileged reg */ 2186 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2187 &adev->gfx.priv_reg_irq); 2188 if (r) 2189 return r; 2190 2191 /* Privileged inst */ 2192 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2193 &adev->gfx.priv_inst_irq); 2194 if (r) 2195 return r; 2196 2197 /* ECC error */ 2198 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2199 &adev->gfx.cp_ecc_error_irq); 2200 if (r) 2201 return r; 2202 2203 /* FUE error */ 2204 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2205 &adev->gfx.cp_ecc_error_irq); 2206 if (r) 2207 return r; 2208 2209 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2210 2211 if (adev->gfx.rlc.funcs) { 2212 if (adev->gfx.rlc.funcs->init) { 2213 r = adev->gfx.rlc.funcs->init(adev); 2214 if (r) { 2215 dev_err(adev->dev, "Failed to init rlc BOs!\n"); 2216 return r; 2217 } 2218 } 2219 } 2220 2221 r = gfx_v9_0_mec_init(adev); 2222 if (r) { 2223 DRM_ERROR("Failed to init MEC BOs!\n"); 2224 return r; 2225 } 2226 2227 /* set up the gfx ring */ 2228 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2229 ring = &adev->gfx.gfx_ring[i]; 2230 ring->ring_obj = NULL; 2231 if (!i) 2232 sprintf(ring->name, "gfx"); 2233 else 2234 sprintf(ring->name, "gfx_%d", i); 2235 ring->use_doorbell = true; 2236 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2237 2238 /* disable scheduler on the real ring */ 2239 ring->no_scheduler = adev->gfx.mcbp; 2240 ring->vm_hub = AMDGPU_GFXHUB(0); 2241 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2242 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2243 AMDGPU_RING_PRIO_DEFAULT, NULL); 2244 if (r) 2245 return r; 2246 } 2247 2248 /* set up the software rings */ 2249 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 2250 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2251 ring = &adev->gfx.sw_gfx_ring[i]; 2252 ring->ring_obj = NULL; 2253 sprintf(ring->name, amdgpu_sw_ring_name(i)); 2254 ring->use_doorbell = true; 2255 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2256 ring->is_sw_ring = true; 2257 hw_prio = amdgpu_sw_ring_priority(i); 2258 ring->vm_hub = AMDGPU_GFXHUB(0); 2259 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2260 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, 2261 NULL); 2262 if (r) 2263 return r; 2264 ring->wptr = 0; 2265 } 2266 2267 /* init the muxer and add software rings */ 2268 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], 2269 GFX9_NUM_SW_GFX_RINGS); 2270 if (r) { 2271 DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); 2272 return r; 2273 } 2274 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2275 r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, 2276 &adev->gfx.sw_gfx_ring[i]); 2277 if (r) { 2278 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); 2279 return r; 2280 } 2281 } 2282 } 2283 2284 /* set up the compute queues - allocate horizontally across pipes */ 2285 ring_id = 0; 2286 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2287 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2288 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2289 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 2290 k, j)) 2291 continue; 2292 2293 r = gfx_v9_0_compute_ring_init(adev, 2294 ring_id, 2295 i, k, j); 2296 if (r) 2297 return r; 2298 2299 ring_id++; 2300 } 2301 } 2302 } 2303 2304 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); 2305 if (r) { 2306 DRM_ERROR("Failed to init KIQ BOs!\n"); 2307 return r; 2308 } 2309 2310 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 2311 if (r) 2312 return r; 2313 2314 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2315 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0); 2316 if (r) 2317 return r; 2318 2319 adev->gfx.ce_ram_size = 0x8000; 2320 2321 r = gfx_v9_0_gpu_early_init(adev); 2322 if (r) 2323 return r; 2324 2325 if (amdgpu_gfx_ras_sw_init(adev)) { 2326 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 2327 return -EINVAL; 2328 } 2329 2330 gfx_v9_0_alloc_ip_dump(adev); 2331 2332 return 0; 2333 } 2334 2335 2336 static int gfx_v9_0_sw_fini(void *handle) 2337 { 2338 int i; 2339 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2340 2341 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 2342 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 2343 amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); 2344 amdgpu_ring_mux_fini(&adev->gfx.muxer); 2345 } 2346 2347 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2348 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2349 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2350 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2351 2352 amdgpu_gfx_mqd_sw_fini(adev, 0); 2353 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 2354 amdgpu_gfx_kiq_fini(adev, 0); 2355 2356 gfx_v9_0_mec_fini(adev); 2357 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2358 &adev->gfx.rlc.clear_state_gpu_addr, 2359 (void **)&adev->gfx.rlc.cs_ptr); 2360 if (adev->flags & AMD_IS_APU) { 2361 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2362 &adev->gfx.rlc.cp_table_gpu_addr, 2363 (void **)&adev->gfx.rlc.cp_table_ptr); 2364 } 2365 gfx_v9_0_free_microcode(adev); 2366 2367 kfree(adev->gfx.ip_dump_core); 2368 kfree(adev->gfx.ip_dump_compute_queues); 2369 2370 return 0; 2371 } 2372 2373 2374 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2375 { 2376 /* TODO */ 2377 } 2378 2379 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, 2380 u32 instance, int xcc_id) 2381 { 2382 u32 data; 2383 2384 if (instance == 0xffffffff) 2385 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2386 else 2387 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2388 2389 if (se_num == 0xffffffff) 2390 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2391 else 2392 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2393 2394 if (sh_num == 0xffffffff) 2395 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2396 else 2397 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2398 2399 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2400 } 2401 2402 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2403 { 2404 u32 data, mask; 2405 2406 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2407 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2408 2409 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2410 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2411 2412 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2413 adev->gfx.config.max_sh_per_se); 2414 2415 return (~data) & mask; 2416 } 2417 2418 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2419 { 2420 int i, j; 2421 u32 data; 2422 u32 active_rbs = 0; 2423 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2424 adev->gfx.config.max_sh_per_se; 2425 2426 mutex_lock(&adev->grbm_idx_mutex); 2427 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2428 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2429 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 2430 data = gfx_v9_0_get_rb_active_bitmap(adev); 2431 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2432 rb_bitmap_width_per_sh); 2433 } 2434 } 2435 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 2436 mutex_unlock(&adev->grbm_idx_mutex); 2437 2438 adev->gfx.config.backend_enable_mask = active_rbs; 2439 adev->gfx.config.num_rbs = hweight32(active_rbs); 2440 } 2441 2442 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev, 2443 uint32_t first_vmid, 2444 uint32_t last_vmid) 2445 { 2446 uint32_t data; 2447 uint32_t trap_config_vmid_mask = 0; 2448 int i; 2449 2450 /* Calculate trap config vmid mask */ 2451 for (i = first_vmid; i < last_vmid; i++) 2452 trap_config_vmid_mask |= (1 << i); 2453 2454 data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG, 2455 VMID_SEL, trap_config_vmid_mask); 2456 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, 2457 TRAP_EN, 1); 2458 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); 2459 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); 2460 2461 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); 2462 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); 2463 } 2464 2465 #define DEFAULT_SH_MEM_BASES (0x6000) 2466 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2467 { 2468 int i; 2469 uint32_t sh_mem_config; 2470 uint32_t sh_mem_bases; 2471 2472 /* 2473 * Configure apertures: 2474 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2475 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2476 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2477 */ 2478 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2479 2480 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2481 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2482 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2483 2484 mutex_lock(&adev->srbm_mutex); 2485 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2486 soc15_grbm_select(adev, 0, 0, 0, i, 0); 2487 /* CP and shaders */ 2488 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2489 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2490 } 2491 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 2492 mutex_unlock(&adev->srbm_mutex); 2493 2494 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2495 access. These should be enabled by FW for target VMIDs. */ 2496 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2497 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2498 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2499 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2500 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2501 } 2502 } 2503 2504 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2505 { 2506 int vmid; 2507 2508 /* 2509 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2510 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2511 * the driver can enable them for graphics. VMID0 should maintain 2512 * access so that HWS firmware can save/restore entries. 2513 */ 2514 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 2515 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2516 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2517 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2518 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2519 } 2520 } 2521 2522 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) 2523 { 2524 uint32_t tmp; 2525 2526 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2527 case IP_VERSION(9, 4, 1): 2528 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); 2529 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT, 2530 !READ_ONCE(adev->barrier_has_auto_waitcnt)); 2531 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); 2532 break; 2533 default: 2534 break; 2535 } 2536 } 2537 2538 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2539 { 2540 u32 tmp; 2541 int i; 2542 2543 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2544 2545 gfx_v9_0_tiling_mode_table_init(adev); 2546 2547 if (adev->gfx.num_gfx_rings) 2548 gfx_v9_0_setup_rb(adev); 2549 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2550 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2551 2552 /* XXX SH_MEM regs */ 2553 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2554 mutex_lock(&adev->srbm_mutex); 2555 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2556 soc15_grbm_select(adev, 0, 0, 0, i, 0); 2557 /* CP and shaders */ 2558 if (i == 0) { 2559 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2560 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2561 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2562 !!adev->gmc.noretry); 2563 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2564 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2565 } else { 2566 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2567 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2568 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2569 !!adev->gmc.noretry); 2570 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2571 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2572 (adev->gmc.private_aperture_start >> 48)); 2573 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2574 (adev->gmc.shared_aperture_start >> 48)); 2575 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2576 } 2577 } 2578 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 2579 2580 mutex_unlock(&adev->srbm_mutex); 2581 2582 gfx_v9_0_init_compute_vmid(adev); 2583 gfx_v9_0_init_gds_vmid(adev); 2584 gfx_v9_0_init_sq_config(adev); 2585 } 2586 2587 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2588 { 2589 u32 i, j, k; 2590 u32 mask; 2591 2592 mutex_lock(&adev->grbm_idx_mutex); 2593 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2594 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2595 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 2596 for (k = 0; k < adev->usec_timeout; k++) { 2597 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2598 break; 2599 udelay(1); 2600 } 2601 if (k == adev->usec_timeout) { 2602 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 2603 0xffffffff, 0xffffffff, 0); 2604 mutex_unlock(&adev->grbm_idx_mutex); 2605 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2606 i, j); 2607 return; 2608 } 2609 } 2610 } 2611 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 2612 mutex_unlock(&adev->grbm_idx_mutex); 2613 2614 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2615 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2616 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2617 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2618 for (k = 0; k < adev->usec_timeout; k++) { 2619 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2620 break; 2621 udelay(1); 2622 } 2623 } 2624 2625 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2626 bool enable) 2627 { 2628 u32 tmp; 2629 2630 /* These interrupts should be enabled to drive DS clock */ 2631 2632 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2633 2634 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2635 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2636 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2637 if(adev->gfx.num_gfx_rings) 2638 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2639 2640 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2641 } 2642 2643 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2644 { 2645 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2646 /* csib */ 2647 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2648 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2649 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2650 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2651 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2652 adev->gfx.rlc.clear_state_size); 2653 } 2654 2655 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2656 int indirect_offset, 2657 int list_size, 2658 int *unique_indirect_regs, 2659 int unique_indirect_reg_count, 2660 int *indirect_start_offsets, 2661 int *indirect_start_offsets_count, 2662 int max_start_offsets_count) 2663 { 2664 int idx; 2665 2666 for (; indirect_offset < list_size; indirect_offset++) { 2667 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2668 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2669 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2670 2671 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2672 indirect_offset += 2; 2673 2674 /* look for the matching indice */ 2675 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2676 if (unique_indirect_regs[idx] == 2677 register_list_format[indirect_offset] || 2678 !unique_indirect_regs[idx]) 2679 break; 2680 } 2681 2682 BUG_ON(idx >= unique_indirect_reg_count); 2683 2684 if (!unique_indirect_regs[idx]) 2685 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2686 2687 indirect_offset++; 2688 } 2689 } 2690 } 2691 2692 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2693 { 2694 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2695 int unique_indirect_reg_count = 0; 2696 2697 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2698 int indirect_start_offsets_count = 0; 2699 2700 int list_size = 0; 2701 int i = 0, j = 0; 2702 u32 tmp = 0; 2703 2704 u32 *register_list_format = 2705 kmemdup(adev->gfx.rlc.register_list_format, 2706 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2707 if (!register_list_format) 2708 return -ENOMEM; 2709 2710 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2711 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2712 gfx_v9_1_parse_ind_reg_list(register_list_format, 2713 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2714 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2715 unique_indirect_regs, 2716 unique_indirect_reg_count, 2717 indirect_start_offsets, 2718 &indirect_start_offsets_count, 2719 ARRAY_SIZE(indirect_start_offsets)); 2720 2721 /* enable auto inc in case it is disabled */ 2722 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2723 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2724 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2725 2726 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2727 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2728 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2729 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2730 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2731 adev->gfx.rlc.register_restore[i]); 2732 2733 /* load indirect register */ 2734 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2735 adev->gfx.rlc.reg_list_format_start); 2736 2737 /* direct register portion */ 2738 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2739 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2740 register_list_format[i]); 2741 2742 /* indirect register portion */ 2743 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2744 if (register_list_format[i] == 0xFFFFFFFF) { 2745 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2746 continue; 2747 } 2748 2749 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2750 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2751 2752 for (j = 0; j < unique_indirect_reg_count; j++) { 2753 if (register_list_format[i] == unique_indirect_regs[j]) { 2754 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2755 break; 2756 } 2757 } 2758 2759 BUG_ON(j >= unique_indirect_reg_count); 2760 2761 i++; 2762 } 2763 2764 /* set save/restore list size */ 2765 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2766 list_size = list_size >> 1; 2767 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2768 adev->gfx.rlc.reg_restore_list_size); 2769 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2770 2771 /* write the starting offsets to RLC scratch ram */ 2772 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2773 adev->gfx.rlc.starting_offsets_start); 2774 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2775 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2776 indirect_start_offsets[i]); 2777 2778 /* load unique indirect regs*/ 2779 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2780 if (unique_indirect_regs[i] != 0) { 2781 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2782 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2783 unique_indirect_regs[i] & 0x3FFFF); 2784 2785 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2786 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2787 unique_indirect_regs[i] >> 20); 2788 } 2789 } 2790 2791 kfree(register_list_format); 2792 return 0; 2793 } 2794 2795 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2796 { 2797 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2798 } 2799 2800 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2801 bool enable) 2802 { 2803 uint32_t data = 0; 2804 uint32_t default_data = 0; 2805 2806 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2807 if (enable) { 2808 /* enable GFXIP control over CGPG */ 2809 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2810 if(default_data != data) 2811 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2812 2813 /* update status */ 2814 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2815 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2816 if(default_data != data) 2817 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2818 } else { 2819 /* restore GFXIP control over GCPG */ 2820 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2821 if(default_data != data) 2822 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2823 } 2824 } 2825 2826 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2827 { 2828 uint32_t data = 0; 2829 2830 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2831 AMD_PG_SUPPORT_GFX_SMG | 2832 AMD_PG_SUPPORT_GFX_DMG)) { 2833 /* init IDLE_POLL_COUNT = 60 */ 2834 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2835 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2836 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2837 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2838 2839 /* init RLC PG Delay */ 2840 data = 0; 2841 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2842 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2843 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2844 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2845 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2846 2847 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2848 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2849 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2850 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2851 2852 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2853 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2854 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2855 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2856 2857 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2858 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2859 2860 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2861 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2862 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2863 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0)) 2864 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2865 } 2866 } 2867 2868 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2869 bool enable) 2870 { 2871 uint32_t data = 0; 2872 uint32_t default_data = 0; 2873 2874 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2875 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2876 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2877 enable ? 1 : 0); 2878 if (default_data != data) 2879 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2880 } 2881 2882 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2883 bool enable) 2884 { 2885 uint32_t data = 0; 2886 uint32_t default_data = 0; 2887 2888 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2889 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2890 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2891 enable ? 1 : 0); 2892 if(default_data != data) 2893 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2894 } 2895 2896 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2897 bool enable) 2898 { 2899 uint32_t data = 0; 2900 uint32_t default_data = 0; 2901 2902 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2903 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2904 CP_PG_DISABLE, 2905 enable ? 0 : 1); 2906 if(default_data != data) 2907 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2908 } 2909 2910 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2911 bool enable) 2912 { 2913 uint32_t data, default_data; 2914 2915 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2916 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2917 GFX_POWER_GATING_ENABLE, 2918 enable ? 1 : 0); 2919 if(default_data != data) 2920 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2921 } 2922 2923 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2924 bool enable) 2925 { 2926 uint32_t data, default_data; 2927 2928 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2929 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2930 GFX_PIPELINE_PG_ENABLE, 2931 enable ? 1 : 0); 2932 if(default_data != data) 2933 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2934 2935 if (!enable) 2936 /* read any GFX register to wake up GFX */ 2937 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2938 } 2939 2940 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2941 bool enable) 2942 { 2943 uint32_t data, default_data; 2944 2945 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2946 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2947 STATIC_PER_CU_PG_ENABLE, 2948 enable ? 1 : 0); 2949 if(default_data != data) 2950 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2951 } 2952 2953 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2954 bool enable) 2955 { 2956 uint32_t data, default_data; 2957 2958 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2959 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2960 DYN_PER_CU_PG_ENABLE, 2961 enable ? 1 : 0); 2962 if(default_data != data) 2963 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2964 } 2965 2966 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2967 { 2968 gfx_v9_0_init_csb(adev); 2969 2970 /* 2971 * Rlc save restore list is workable since v2_1. 2972 * And it's needed by gfxoff feature. 2973 */ 2974 if (adev->gfx.rlc.is_rlc_v2_1) { 2975 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 2976 IP_VERSION(9, 2, 1) || 2977 (adev->apu_flags & AMD_APU_IS_RAVEN2)) 2978 gfx_v9_1_init_rlc_save_restore_list(adev); 2979 gfx_v9_0_enable_save_restore_machine(adev); 2980 } 2981 2982 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2983 AMD_PG_SUPPORT_GFX_SMG | 2984 AMD_PG_SUPPORT_GFX_DMG | 2985 AMD_PG_SUPPORT_CP | 2986 AMD_PG_SUPPORT_GDS | 2987 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2988 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, 2989 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2990 gfx_v9_0_init_gfx_power_gating(adev); 2991 } 2992 } 2993 2994 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2995 { 2996 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2997 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2998 gfx_v9_0_wait_for_rlc_serdes(adev); 2999 } 3000 3001 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 3002 { 3003 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3004 udelay(50); 3005 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3006 udelay(50); 3007 } 3008 3009 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 3010 { 3011 #ifdef AMDGPU_RLC_DEBUG_RETRY 3012 u32 rlc_ucode_ver; 3013 #endif 3014 3015 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 3016 udelay(50); 3017 3018 /* carrizo do enable cp interrupt after cp inited */ 3019 if (!(adev->flags & AMD_IS_APU)) { 3020 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3021 udelay(50); 3022 } 3023 3024 #ifdef AMDGPU_RLC_DEBUG_RETRY 3025 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 3026 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 3027 if(rlc_ucode_ver == 0x108) { 3028 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 3029 rlc_ucode_ver, adev->gfx.rlc_fw_version); 3030 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 3031 * default is 0x9C4 to create a 100us interval */ 3032 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 3033 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 3034 * to disable the page fault retry interrupts, default is 3035 * 0x100 (256) */ 3036 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 3037 } 3038 #endif 3039 } 3040 3041 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 3042 { 3043 const struct rlc_firmware_header_v2_0 *hdr; 3044 const __le32 *fw_data; 3045 unsigned i, fw_size; 3046 3047 if (!adev->gfx.rlc_fw) 3048 return -EINVAL; 3049 3050 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3051 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3052 3053 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3054 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3055 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3056 3057 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 3058 RLCG_UCODE_LOADING_START_ADDRESS); 3059 for (i = 0; i < fw_size; i++) 3060 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3061 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3062 3063 return 0; 3064 } 3065 3066 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3067 { 3068 int r; 3069 3070 if (amdgpu_sriov_vf(adev)) { 3071 gfx_v9_0_init_csb(adev); 3072 return 0; 3073 } 3074 3075 adev->gfx.rlc.funcs->stop(adev); 3076 3077 /* disable CG */ 3078 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3079 3080 gfx_v9_0_init_pg(adev); 3081 3082 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3083 /* legacy rlc firmware loading */ 3084 r = gfx_v9_0_rlc_load_microcode(adev); 3085 if (r) 3086 return r; 3087 } 3088 3089 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 3090 case IP_VERSION(9, 2, 2): 3091 case IP_VERSION(9, 1, 0): 3092 gfx_v9_0_init_lbpw(adev); 3093 if (amdgpu_lbpw == 0) 3094 gfx_v9_0_enable_lbpw(adev, false); 3095 else 3096 gfx_v9_0_enable_lbpw(adev, true); 3097 break; 3098 case IP_VERSION(9, 4, 0): 3099 gfx_v9_4_init_lbpw(adev); 3100 if (amdgpu_lbpw > 0) 3101 gfx_v9_0_enable_lbpw(adev, true); 3102 else 3103 gfx_v9_0_enable_lbpw(adev, false); 3104 break; 3105 default: 3106 break; 3107 } 3108 3109 gfx_v9_0_update_spm_vmid_internal(adev, 0xf); 3110 3111 adev->gfx.rlc.funcs->start(adev); 3112 3113 return 0; 3114 } 3115 3116 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3117 { 3118 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3119 3120 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3121 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3122 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3123 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3124 udelay(50); 3125 } 3126 3127 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3128 { 3129 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3130 const struct gfx_firmware_header_v1_0 *ce_hdr; 3131 const struct gfx_firmware_header_v1_0 *me_hdr; 3132 const __le32 *fw_data; 3133 unsigned i, fw_size; 3134 3135 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3136 return -EINVAL; 3137 3138 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3139 adev->gfx.pfp_fw->data; 3140 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3141 adev->gfx.ce_fw->data; 3142 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3143 adev->gfx.me_fw->data; 3144 3145 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3146 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3147 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3148 3149 gfx_v9_0_cp_gfx_enable(adev, false); 3150 3151 /* PFP */ 3152 fw_data = (const __le32 *) 3153 (adev->gfx.pfp_fw->data + 3154 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3155 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3156 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3157 for (i = 0; i < fw_size; i++) 3158 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3159 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3160 3161 /* CE */ 3162 fw_data = (const __le32 *) 3163 (adev->gfx.ce_fw->data + 3164 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3165 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3166 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3167 for (i = 0; i < fw_size; i++) 3168 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3169 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3170 3171 /* ME */ 3172 fw_data = (const __le32 *) 3173 (adev->gfx.me_fw->data + 3174 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3175 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3176 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3177 for (i = 0; i < fw_size; i++) 3178 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3179 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3180 3181 return 0; 3182 } 3183 3184 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3185 { 3186 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3187 const struct cs_section_def *sect = NULL; 3188 const struct cs_extent_def *ext = NULL; 3189 int r, i, tmp; 3190 3191 /* init the CP */ 3192 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3193 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3194 3195 gfx_v9_0_cp_gfx_enable(adev, true); 3196 3197 /* Now only limit the quirk on the APU gfx9 series and already 3198 * confirmed that the APU gfx10/gfx11 needn't such update. 3199 */ 3200 if (adev->flags & AMD_IS_APU && 3201 adev->in_s3 && !adev->suspend_complete) { 3202 DRM_INFO(" Will skip the CSB packet resubmit\n"); 3203 return 0; 3204 } 3205 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3206 if (r) { 3207 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3208 return r; 3209 } 3210 3211 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3212 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3213 3214 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3215 amdgpu_ring_write(ring, 0x80000000); 3216 amdgpu_ring_write(ring, 0x80000000); 3217 3218 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3219 for (ext = sect->section; ext->extent != NULL; ++ext) { 3220 if (sect->id == SECT_CONTEXT) { 3221 amdgpu_ring_write(ring, 3222 PACKET3(PACKET3_SET_CONTEXT_REG, 3223 ext->reg_count)); 3224 amdgpu_ring_write(ring, 3225 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3226 for (i = 0; i < ext->reg_count; i++) 3227 amdgpu_ring_write(ring, ext->extent[i]); 3228 } 3229 } 3230 } 3231 3232 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3233 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3234 3235 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3236 amdgpu_ring_write(ring, 0); 3237 3238 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3239 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3240 amdgpu_ring_write(ring, 0x8000); 3241 amdgpu_ring_write(ring, 0x8000); 3242 3243 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3244 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3245 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3246 amdgpu_ring_write(ring, tmp); 3247 amdgpu_ring_write(ring, 0); 3248 3249 amdgpu_ring_commit(ring); 3250 3251 return 0; 3252 } 3253 3254 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3255 { 3256 struct amdgpu_ring *ring; 3257 u32 tmp; 3258 u32 rb_bufsz; 3259 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3260 3261 /* Set the write pointer delay */ 3262 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3263 3264 /* set the RB to use vmid 0 */ 3265 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3266 3267 /* Set ring buffer size */ 3268 ring = &adev->gfx.gfx_ring[0]; 3269 rb_bufsz = order_base_2(ring->ring_size / 8); 3270 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3271 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3272 #ifdef __BIG_ENDIAN 3273 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3274 #endif 3275 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3276 3277 /* Initialize the ring buffer's write pointers */ 3278 ring->wptr = 0; 3279 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3280 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3281 3282 /* set the wb address wether it's enabled or not */ 3283 rptr_addr = ring->rptr_gpu_addr; 3284 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3285 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3286 3287 wptr_gpu_addr = ring->wptr_gpu_addr; 3288 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3289 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3290 3291 mdelay(1); 3292 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3293 3294 rb_addr = ring->gpu_addr >> 8; 3295 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3296 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3297 3298 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3299 if (ring->use_doorbell) { 3300 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3301 DOORBELL_OFFSET, ring->doorbell_index); 3302 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3303 DOORBELL_EN, 1); 3304 } else { 3305 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3306 } 3307 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3308 3309 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3310 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3311 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3312 3313 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3314 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3315 3316 3317 /* start the ring */ 3318 gfx_v9_0_cp_gfx_start(adev); 3319 3320 return 0; 3321 } 3322 3323 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3324 { 3325 if (enable) { 3326 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3327 } else { 3328 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3329 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3330 adev->gfx.kiq[0].ring.sched.ready = false; 3331 } 3332 udelay(50); 3333 } 3334 3335 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3336 { 3337 const struct gfx_firmware_header_v1_0 *mec_hdr; 3338 const __le32 *fw_data; 3339 unsigned i; 3340 u32 tmp; 3341 3342 if (!adev->gfx.mec_fw) 3343 return -EINVAL; 3344 3345 gfx_v9_0_cp_compute_enable(adev, false); 3346 3347 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3348 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3349 3350 fw_data = (const __le32 *) 3351 (adev->gfx.mec_fw->data + 3352 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3353 tmp = 0; 3354 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3355 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3356 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3357 3358 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3359 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3360 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3361 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3362 3363 /* MEC1 */ 3364 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3365 mec_hdr->jt_offset); 3366 for (i = 0; i < mec_hdr->jt_size; i++) 3367 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3368 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3369 3370 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3371 adev->gfx.mec_fw_version); 3372 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3373 3374 return 0; 3375 } 3376 3377 /* KIQ functions */ 3378 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3379 { 3380 uint32_t tmp; 3381 struct amdgpu_device *adev = ring->adev; 3382 3383 /* tell RLC which is KIQ queue */ 3384 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3385 tmp &= 0xffffff00; 3386 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3387 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3388 tmp |= 0x80; 3389 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3390 } 3391 3392 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) 3393 { 3394 struct amdgpu_device *adev = ring->adev; 3395 3396 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3397 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 3398 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 3399 mqd->cp_hqd_queue_priority = 3400 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 3401 } 3402 } 3403 } 3404 3405 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3406 { 3407 struct amdgpu_device *adev = ring->adev; 3408 struct v9_mqd *mqd = ring->mqd_ptr; 3409 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3410 uint32_t tmp; 3411 3412 mqd->header = 0xC0310800; 3413 mqd->compute_pipelinestat_enable = 0x00000001; 3414 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3415 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3416 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3417 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3418 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3419 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3420 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3421 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3422 mqd->compute_misc_reserved = 0x00000003; 3423 3424 mqd->dynamic_cu_mask_addr_lo = 3425 lower_32_bits(ring->mqd_gpu_addr 3426 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3427 mqd->dynamic_cu_mask_addr_hi = 3428 upper_32_bits(ring->mqd_gpu_addr 3429 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3430 3431 eop_base_addr = ring->eop_gpu_addr >> 8; 3432 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3433 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3434 3435 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3436 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3437 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3438 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3439 3440 mqd->cp_hqd_eop_control = tmp; 3441 3442 /* enable doorbell? */ 3443 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3444 3445 if (ring->use_doorbell) { 3446 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3447 DOORBELL_OFFSET, ring->doorbell_index); 3448 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3449 DOORBELL_EN, 1); 3450 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3451 DOORBELL_SOURCE, 0); 3452 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3453 DOORBELL_HIT, 0); 3454 } else { 3455 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3456 DOORBELL_EN, 0); 3457 } 3458 3459 mqd->cp_hqd_pq_doorbell_control = tmp; 3460 3461 /* disable the queue if it's active */ 3462 ring->wptr = 0; 3463 mqd->cp_hqd_dequeue_request = 0; 3464 mqd->cp_hqd_pq_rptr = 0; 3465 mqd->cp_hqd_pq_wptr_lo = 0; 3466 mqd->cp_hqd_pq_wptr_hi = 0; 3467 3468 /* set the pointer to the MQD */ 3469 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3470 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3471 3472 /* set MQD vmid to 0 */ 3473 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3474 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3475 mqd->cp_mqd_control = tmp; 3476 3477 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3478 hqd_gpu_addr = ring->gpu_addr >> 8; 3479 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3480 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3481 3482 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3483 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3484 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3485 (order_base_2(ring->ring_size / 4) - 1)); 3486 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3487 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 3488 #ifdef __BIG_ENDIAN 3489 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3490 #endif 3491 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3492 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3493 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3494 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3495 mqd->cp_hqd_pq_control = tmp; 3496 3497 /* set the wb address whether it's enabled or not */ 3498 wb_gpu_addr = ring->rptr_gpu_addr; 3499 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3500 mqd->cp_hqd_pq_rptr_report_addr_hi = 3501 upper_32_bits(wb_gpu_addr) & 0xffff; 3502 3503 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3504 wb_gpu_addr = ring->wptr_gpu_addr; 3505 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3506 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3507 3508 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3509 ring->wptr = 0; 3510 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3511 3512 /* set the vmid for the queue */ 3513 mqd->cp_hqd_vmid = 0; 3514 3515 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3516 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3517 mqd->cp_hqd_persistent_state = tmp; 3518 3519 /* set MIN_IB_AVAIL_SIZE */ 3520 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3521 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3522 mqd->cp_hqd_ib_control = tmp; 3523 3524 /* set static priority for a queue/ring */ 3525 gfx_v9_0_mqd_set_priority(ring, mqd); 3526 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM); 3527 3528 /* map_queues packet doesn't need activate the queue, 3529 * so only kiq need set this field. 3530 */ 3531 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3532 mqd->cp_hqd_active = 1; 3533 3534 return 0; 3535 } 3536 3537 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3538 { 3539 struct amdgpu_device *adev = ring->adev; 3540 struct v9_mqd *mqd = ring->mqd_ptr; 3541 int j; 3542 3543 /* disable wptr polling */ 3544 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3545 3546 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3547 mqd->cp_hqd_eop_base_addr_lo); 3548 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3549 mqd->cp_hqd_eop_base_addr_hi); 3550 3551 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3552 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3553 mqd->cp_hqd_eop_control); 3554 3555 /* enable doorbell? */ 3556 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3557 mqd->cp_hqd_pq_doorbell_control); 3558 3559 /* disable the queue if it's active */ 3560 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3561 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3562 for (j = 0; j < adev->usec_timeout; j++) { 3563 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3564 break; 3565 udelay(1); 3566 } 3567 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3568 mqd->cp_hqd_dequeue_request); 3569 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3570 mqd->cp_hqd_pq_rptr); 3571 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3572 mqd->cp_hqd_pq_wptr_lo); 3573 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3574 mqd->cp_hqd_pq_wptr_hi); 3575 } 3576 3577 /* set the pointer to the MQD */ 3578 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3579 mqd->cp_mqd_base_addr_lo); 3580 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3581 mqd->cp_mqd_base_addr_hi); 3582 3583 /* set MQD vmid to 0 */ 3584 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3585 mqd->cp_mqd_control); 3586 3587 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3588 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3589 mqd->cp_hqd_pq_base_lo); 3590 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3591 mqd->cp_hqd_pq_base_hi); 3592 3593 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3594 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3595 mqd->cp_hqd_pq_control); 3596 3597 /* set the wb address whether it's enabled or not */ 3598 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3599 mqd->cp_hqd_pq_rptr_report_addr_lo); 3600 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3601 mqd->cp_hqd_pq_rptr_report_addr_hi); 3602 3603 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3604 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3605 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3606 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3607 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3608 3609 /* enable the doorbell if requested */ 3610 if (ring->use_doorbell) { 3611 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3612 (adev->doorbell_index.kiq * 2) << 2); 3613 /* If GC has entered CGPG, ringing doorbell > first page 3614 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to 3615 * workaround this issue. And this change has to align with firmware 3616 * update. 3617 */ 3618 if (check_if_enlarge_doorbell_range(adev)) 3619 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3620 (adev->doorbell.size - 4)); 3621 else 3622 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3623 (adev->doorbell_index.userqueue_end * 2) << 2); 3624 } 3625 3626 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3627 mqd->cp_hqd_pq_doorbell_control); 3628 3629 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3630 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3631 mqd->cp_hqd_pq_wptr_lo); 3632 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3633 mqd->cp_hqd_pq_wptr_hi); 3634 3635 /* set the vmid for the queue */ 3636 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3637 3638 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3639 mqd->cp_hqd_persistent_state); 3640 3641 /* activate the queue */ 3642 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3643 mqd->cp_hqd_active); 3644 3645 if (ring->use_doorbell) 3646 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3647 3648 return 0; 3649 } 3650 3651 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3652 { 3653 struct amdgpu_device *adev = ring->adev; 3654 int j; 3655 3656 /* disable the queue if it's active */ 3657 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3658 3659 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3660 3661 for (j = 0; j < adev->usec_timeout; j++) { 3662 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3663 break; 3664 udelay(1); 3665 } 3666 3667 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3668 DRM_DEBUG("KIQ dequeue request failed.\n"); 3669 3670 /* Manual disable if dequeue request times out */ 3671 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3672 } 3673 3674 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3675 0); 3676 } 3677 3678 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3679 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3680 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3681 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3682 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3683 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3684 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3685 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3686 3687 return 0; 3688 } 3689 3690 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3691 { 3692 struct amdgpu_device *adev = ring->adev; 3693 struct v9_mqd *mqd = ring->mqd_ptr; 3694 struct v9_mqd *tmp_mqd; 3695 3696 gfx_v9_0_kiq_setting(ring); 3697 3698 /* GPU could be in bad state during probe, driver trigger the reset 3699 * after load the SMU, in this case , the mqd is not be initialized. 3700 * driver need to re-init the mqd. 3701 * check mqd->cp_hqd_pq_control since this value should not be 0 3702 */ 3703 tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup; 3704 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ 3705 /* for GPU_RESET case , reset MQD to a clean status */ 3706 if (adev->gfx.kiq[0].mqd_backup) 3707 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation)); 3708 3709 /* reset ring buffer */ 3710 ring->wptr = 0; 3711 amdgpu_ring_clear_ring(ring); 3712 3713 mutex_lock(&adev->srbm_mutex); 3714 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3715 gfx_v9_0_kiq_init_register(ring); 3716 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3717 mutex_unlock(&adev->srbm_mutex); 3718 } else { 3719 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3720 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3721 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3722 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 3723 amdgpu_ring_clear_ring(ring); 3724 mutex_lock(&adev->srbm_mutex); 3725 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3726 gfx_v9_0_mqd_init(ring); 3727 gfx_v9_0_kiq_init_register(ring); 3728 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3729 mutex_unlock(&adev->srbm_mutex); 3730 3731 if (adev->gfx.kiq[0].mqd_backup) 3732 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); 3733 } 3734 3735 return 0; 3736 } 3737 3738 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3739 { 3740 struct amdgpu_device *adev = ring->adev; 3741 struct v9_mqd *mqd = ring->mqd_ptr; 3742 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3743 struct v9_mqd *tmp_mqd; 3744 3745 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control 3746 * is not be initialized before 3747 */ 3748 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3749 3750 if (!tmp_mqd->cp_hqd_pq_control || 3751 (!amdgpu_in_reset(adev) && !adev->in_suspend)) { 3752 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3753 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3754 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3755 mutex_lock(&adev->srbm_mutex); 3756 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3757 gfx_v9_0_mqd_init(ring); 3758 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3759 mutex_unlock(&adev->srbm_mutex); 3760 3761 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3762 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3763 } else { 3764 /* restore MQD to a clean status */ 3765 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3766 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3767 /* reset ring buffer */ 3768 ring->wptr = 0; 3769 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 3770 amdgpu_ring_clear_ring(ring); 3771 } 3772 3773 return 0; 3774 } 3775 3776 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3777 { 3778 struct amdgpu_ring *ring; 3779 int r; 3780 3781 ring = &adev->gfx.kiq[0].ring; 3782 3783 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3784 if (unlikely(r != 0)) 3785 return r; 3786 3787 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3788 if (unlikely(r != 0)) { 3789 amdgpu_bo_unreserve(ring->mqd_obj); 3790 return r; 3791 } 3792 3793 gfx_v9_0_kiq_init_queue(ring); 3794 amdgpu_bo_kunmap(ring->mqd_obj); 3795 ring->mqd_ptr = NULL; 3796 amdgpu_bo_unreserve(ring->mqd_obj); 3797 return 0; 3798 } 3799 3800 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3801 { 3802 struct amdgpu_ring *ring = NULL; 3803 int r = 0, i; 3804 3805 gfx_v9_0_cp_compute_enable(adev, true); 3806 3807 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3808 ring = &adev->gfx.compute_ring[i]; 3809 3810 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3811 if (unlikely(r != 0)) 3812 goto done; 3813 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3814 if (!r) { 3815 r = gfx_v9_0_kcq_init_queue(ring); 3816 amdgpu_bo_kunmap(ring->mqd_obj); 3817 ring->mqd_ptr = NULL; 3818 } 3819 amdgpu_bo_unreserve(ring->mqd_obj); 3820 if (r) 3821 goto done; 3822 } 3823 3824 r = amdgpu_gfx_enable_kcq(adev, 0); 3825 done: 3826 return r; 3827 } 3828 3829 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3830 { 3831 int r, i; 3832 struct amdgpu_ring *ring; 3833 3834 if (!(adev->flags & AMD_IS_APU)) 3835 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3836 3837 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3838 if (adev->gfx.num_gfx_rings) { 3839 /* legacy firmware loading */ 3840 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3841 if (r) 3842 return r; 3843 } 3844 3845 r = gfx_v9_0_cp_compute_load_microcode(adev); 3846 if (r) 3847 return r; 3848 } 3849 3850 r = gfx_v9_0_kiq_resume(adev); 3851 if (r) 3852 return r; 3853 3854 if (adev->gfx.num_gfx_rings) { 3855 r = gfx_v9_0_cp_gfx_resume(adev); 3856 if (r) 3857 return r; 3858 } 3859 3860 r = gfx_v9_0_kcq_resume(adev); 3861 if (r) 3862 return r; 3863 3864 if (adev->gfx.num_gfx_rings) { 3865 ring = &adev->gfx.gfx_ring[0]; 3866 r = amdgpu_ring_test_helper(ring); 3867 if (r) 3868 return r; 3869 } 3870 3871 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3872 ring = &adev->gfx.compute_ring[i]; 3873 amdgpu_ring_test_helper(ring); 3874 } 3875 3876 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3877 3878 return 0; 3879 } 3880 3881 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 3882 { 3883 u32 tmp; 3884 3885 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) && 3886 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) 3887 return; 3888 3889 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 3890 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 3891 adev->df.hash_status.hash_64k); 3892 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 3893 adev->df.hash_status.hash_2m); 3894 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 3895 adev->df.hash_status.hash_1g); 3896 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 3897 } 3898 3899 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3900 { 3901 if (adev->gfx.num_gfx_rings) 3902 gfx_v9_0_cp_gfx_enable(adev, enable); 3903 gfx_v9_0_cp_compute_enable(adev, enable); 3904 } 3905 3906 static int gfx_v9_0_hw_init(void *handle) 3907 { 3908 int r; 3909 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3910 3911 if (!amdgpu_sriov_vf(adev)) 3912 gfx_v9_0_init_golden_registers(adev); 3913 3914 gfx_v9_0_constants_init(adev); 3915 3916 gfx_v9_0_init_tcp_config(adev); 3917 3918 r = adev->gfx.rlc.funcs->resume(adev); 3919 if (r) 3920 return r; 3921 3922 r = gfx_v9_0_cp_resume(adev); 3923 if (r) 3924 return r; 3925 3926 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 3927 gfx_v9_4_2_set_power_brake_sequence(adev); 3928 3929 return r; 3930 } 3931 3932 static int gfx_v9_0_hw_fini(void *handle) 3933 { 3934 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3935 3936 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 3937 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3938 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3939 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3940 3941 /* DF freeze and kcq disable will fail */ 3942 if (!amdgpu_ras_intr_triggered()) 3943 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3944 amdgpu_gfx_disable_kcq(adev, 0); 3945 3946 if (amdgpu_sriov_vf(adev)) { 3947 gfx_v9_0_cp_gfx_enable(adev, false); 3948 /* must disable polling for SRIOV when hw finished, otherwise 3949 * CPC engine may still keep fetching WB address which is already 3950 * invalid after sw finished and trigger DMAR reading error in 3951 * hypervisor side. 3952 */ 3953 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3954 return 0; 3955 } 3956 3957 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3958 * otherwise KIQ is hanging when binding back 3959 */ 3960 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 3961 mutex_lock(&adev->srbm_mutex); 3962 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, 3963 adev->gfx.kiq[0].ring.pipe, 3964 adev->gfx.kiq[0].ring.queue, 0, 0); 3965 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring); 3966 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3967 mutex_unlock(&adev->srbm_mutex); 3968 } 3969 3970 gfx_v9_0_cp_enable(adev, false); 3971 3972 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */ 3973 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) || 3974 (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) { 3975 dev_dbg(adev->dev, "Skipping RLC halt\n"); 3976 return 0; 3977 } 3978 3979 adev->gfx.rlc.funcs->stop(adev); 3980 return 0; 3981 } 3982 3983 static int gfx_v9_0_suspend(void *handle) 3984 { 3985 return gfx_v9_0_hw_fini(handle); 3986 } 3987 3988 static int gfx_v9_0_resume(void *handle) 3989 { 3990 return gfx_v9_0_hw_init(handle); 3991 } 3992 3993 static bool gfx_v9_0_is_idle(void *handle) 3994 { 3995 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3996 3997 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3998 GRBM_STATUS, GUI_ACTIVE)) 3999 return false; 4000 else 4001 return true; 4002 } 4003 4004 static int gfx_v9_0_wait_for_idle(void *handle) 4005 { 4006 unsigned i; 4007 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4008 4009 for (i = 0; i < adev->usec_timeout; i++) { 4010 if (gfx_v9_0_is_idle(handle)) 4011 return 0; 4012 udelay(1); 4013 } 4014 return -ETIMEDOUT; 4015 } 4016 4017 static int gfx_v9_0_soft_reset(void *handle) 4018 { 4019 u32 grbm_soft_reset = 0; 4020 u32 tmp; 4021 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4022 4023 /* GRBM_STATUS */ 4024 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4025 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4026 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4027 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4028 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4029 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4030 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4031 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4032 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4033 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4034 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4035 } 4036 4037 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4038 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4039 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4040 } 4041 4042 /* GRBM_STATUS2 */ 4043 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4044 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4045 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4046 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4047 4048 4049 if (grbm_soft_reset) { 4050 /* stop the rlc */ 4051 adev->gfx.rlc.funcs->stop(adev); 4052 4053 if (adev->gfx.num_gfx_rings) 4054 /* Disable GFX parsing/prefetching */ 4055 gfx_v9_0_cp_gfx_enable(adev, false); 4056 4057 /* Disable MEC parsing/prefetching */ 4058 gfx_v9_0_cp_compute_enable(adev, false); 4059 4060 if (grbm_soft_reset) { 4061 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4062 tmp |= grbm_soft_reset; 4063 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4064 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4065 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4066 4067 udelay(50); 4068 4069 tmp &= ~grbm_soft_reset; 4070 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4071 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4072 } 4073 4074 /* Wait a little for things to settle down */ 4075 udelay(50); 4076 } 4077 return 0; 4078 } 4079 4080 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) 4081 { 4082 signed long r, cnt = 0; 4083 unsigned long flags; 4084 uint32_t seq, reg_val_offs = 0; 4085 uint64_t value = 0; 4086 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 4087 struct amdgpu_ring *ring = &kiq->ring; 4088 4089 BUG_ON(!ring->funcs->emit_rreg); 4090 4091 spin_lock_irqsave(&kiq->ring_lock, flags); 4092 if (amdgpu_device_wb_get(adev, ®_val_offs)) { 4093 pr_err("critical bug! too many kiq readers\n"); 4094 goto failed_unlock; 4095 } 4096 amdgpu_ring_alloc(ring, 32); 4097 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4098 amdgpu_ring_write(ring, 9 | /* src: register*/ 4099 (5 << 8) | /* dst: memory */ 4100 (1 << 16) | /* count sel */ 4101 (1 << 20)); /* write confirm */ 4102 amdgpu_ring_write(ring, 0); 4103 amdgpu_ring_write(ring, 0); 4104 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4105 reg_val_offs * 4)); 4106 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4107 reg_val_offs * 4)); 4108 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 4109 if (r) 4110 goto failed_undo; 4111 4112 amdgpu_ring_commit(ring); 4113 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4114 4115 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4116 4117 /* don't wait anymore for gpu reset case because this way may 4118 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 4119 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 4120 * never return if we keep waiting in virt_kiq_rreg, which cause 4121 * gpu_recover() hang there. 4122 * 4123 * also don't wait anymore for IRQ context 4124 * */ 4125 if (r < 1 && (amdgpu_in_reset(adev))) 4126 goto failed_kiq_read; 4127 4128 might_sleep(); 4129 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 4130 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 4131 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4132 } 4133 4134 if (cnt > MAX_KIQ_REG_TRY) 4135 goto failed_kiq_read; 4136 4137 mb(); 4138 value = (uint64_t)adev->wb.wb[reg_val_offs] | 4139 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL; 4140 amdgpu_device_wb_free(adev, reg_val_offs); 4141 return value; 4142 4143 failed_undo: 4144 amdgpu_ring_undo(ring); 4145 failed_unlock: 4146 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4147 failed_kiq_read: 4148 if (reg_val_offs) 4149 amdgpu_device_wb_free(adev, reg_val_offs); 4150 pr_err("failed to read gpu clock\n"); 4151 return ~0; 4152 } 4153 4154 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4155 { 4156 uint64_t clock, clock_lo, clock_hi, hi_check; 4157 4158 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 4159 case IP_VERSION(9, 3, 0): 4160 preempt_disable(); 4161 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4162 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4163 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4164 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over 4165 * roughly every 42 seconds. 4166 */ 4167 if (hi_check != clock_hi) { 4168 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4169 clock_hi = hi_check; 4170 } 4171 preempt_enable(); 4172 clock = clock_lo | (clock_hi << 32ULL); 4173 break; 4174 default: 4175 amdgpu_gfx_off_ctrl(adev, false); 4176 mutex_lock(&adev->gfx.gpu_clock_mutex); 4177 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 4178 IP_VERSION(9, 0, 1) && 4179 amdgpu_sriov_runtime(adev)) { 4180 clock = gfx_v9_0_kiq_read_clock(adev); 4181 } else { 4182 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4183 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4184 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4185 } 4186 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4187 amdgpu_gfx_off_ctrl(adev, true); 4188 break; 4189 } 4190 return clock; 4191 } 4192 4193 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4194 uint32_t vmid, 4195 uint32_t gds_base, uint32_t gds_size, 4196 uint32_t gws_base, uint32_t gws_size, 4197 uint32_t oa_base, uint32_t oa_size) 4198 { 4199 struct amdgpu_device *adev = ring->adev; 4200 4201 /* GDS Base */ 4202 gfx_v9_0_write_data_to_reg(ring, 0, false, 4203 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4204 gds_base); 4205 4206 /* GDS Size */ 4207 gfx_v9_0_write_data_to_reg(ring, 0, false, 4208 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4209 gds_size); 4210 4211 /* GWS */ 4212 gfx_v9_0_write_data_to_reg(ring, 0, false, 4213 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4214 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4215 4216 /* OA */ 4217 gfx_v9_0_write_data_to_reg(ring, 0, false, 4218 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4219 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4220 } 4221 4222 static const u32 vgpr_init_compute_shader[] = 4223 { 4224 0xb07c0000, 0xbe8000ff, 4225 0x000000f8, 0xbf110800, 4226 0x7e000280, 0x7e020280, 4227 0x7e040280, 0x7e060280, 4228 0x7e080280, 0x7e0a0280, 4229 0x7e0c0280, 0x7e0e0280, 4230 0x80808800, 0xbe803200, 4231 0xbf84fff5, 0xbf9c0000, 4232 0xd28c0001, 0x0001007f, 4233 0xd28d0001, 0x0002027e, 4234 0x10020288, 0xb8810904, 4235 0xb7814000, 0xd1196a01, 4236 0x00000301, 0xbe800087, 4237 0xbefc00c1, 0xd89c4000, 4238 0x00020201, 0xd89cc080, 4239 0x00040401, 0x320202ff, 4240 0x00000800, 0x80808100, 4241 0xbf84fff8, 0x7e020280, 4242 0xbf810000, 0x00000000, 4243 }; 4244 4245 static const u32 sgpr_init_compute_shader[] = 4246 { 4247 0xb07c0000, 0xbe8000ff, 4248 0x0000005f, 0xbee50080, 4249 0xbe812c65, 0xbe822c65, 4250 0xbe832c65, 0xbe842c65, 4251 0xbe852c65, 0xb77c0005, 4252 0x80808500, 0xbf84fff8, 4253 0xbe800080, 0xbf810000, 4254 }; 4255 4256 static const u32 vgpr_init_compute_shader_arcturus[] = { 4257 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 4258 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 4259 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 4260 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 4261 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 4262 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 4263 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 4264 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 4265 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 4266 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 4267 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 4268 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 4269 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 4270 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 4271 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 4272 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 4273 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 4274 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 4275 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 4276 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 4277 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 4278 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 4279 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 4280 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 4281 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 4282 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 4283 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 4284 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 4285 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 4286 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 4287 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 4288 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 4289 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 4290 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 4291 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 4292 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 4293 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 4294 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 4295 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 4296 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 4297 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 4298 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 4299 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 4300 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 4301 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 4302 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 4303 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 4304 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 4305 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 4306 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 4307 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 4308 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 4309 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 4310 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 4311 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 4312 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 4313 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 4314 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 4315 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 4316 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 4317 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 4318 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 4319 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 4320 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 4321 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 4322 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 4323 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 4324 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 4325 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 4326 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 4327 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 4328 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 4329 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 4330 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 4331 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 4332 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 4333 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 4334 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 4335 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 4336 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 4337 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 4338 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 4339 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 4340 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 4341 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 4342 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 4343 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 4344 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 4345 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 4346 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 4347 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 4348 0xbf84fff8, 0xbf810000, 4349 }; 4350 4351 /* When below register arrays changed, please update gpr_reg_size, 4352 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 4353 to cover all gfx9 ASICs */ 4354 static const struct soc15_reg_entry vgpr_init_regs[] = { 4355 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4356 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4357 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4358 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4359 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 4360 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4361 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4362 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4363 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4364 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4365 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4366 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4367 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4368 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4369 }; 4370 4371 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = { 4372 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4373 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4374 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4375 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4376 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf }, 4377 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4378 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4379 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4380 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4381 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4382 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4383 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4384 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4385 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4386 }; 4387 4388 static const struct soc15_reg_entry sgpr1_init_regs[] = { 4389 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4390 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4391 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4392 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4393 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4394 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4395 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4396 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4397 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4398 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4399 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4400 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4401 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4402 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4403 }; 4404 4405 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4406 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4407 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4408 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4409 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4410 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4411 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4412 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4413 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4414 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4415 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4416 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4417 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4418 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4419 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4420 }; 4421 4422 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { 4423 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4424 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4425 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4426 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4427 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4428 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4429 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4430 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4431 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4432 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4433 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4434 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4435 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4436 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4437 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4438 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4439 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4440 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4441 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4442 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4443 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4444 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4445 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4446 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4447 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4448 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4449 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4450 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4451 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4452 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4453 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4454 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4455 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4456 }; 4457 4458 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4459 { 4460 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4461 int i, r; 4462 4463 /* only support when RAS is enabled */ 4464 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4465 return 0; 4466 4467 r = amdgpu_ring_alloc(ring, 7); 4468 if (r) { 4469 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4470 ring->name, r); 4471 return r; 4472 } 4473 4474 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4475 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4476 4477 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4478 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4479 PACKET3_DMA_DATA_DST_SEL(1) | 4480 PACKET3_DMA_DATA_SRC_SEL(2) | 4481 PACKET3_DMA_DATA_ENGINE(0))); 4482 amdgpu_ring_write(ring, 0); 4483 amdgpu_ring_write(ring, 0); 4484 amdgpu_ring_write(ring, 0); 4485 amdgpu_ring_write(ring, 0); 4486 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4487 adev->gds.gds_size); 4488 4489 amdgpu_ring_commit(ring); 4490 4491 for (i = 0; i < adev->usec_timeout; i++) { 4492 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4493 break; 4494 udelay(1); 4495 } 4496 4497 if (i >= adev->usec_timeout) 4498 r = -ETIMEDOUT; 4499 4500 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4501 4502 return r; 4503 } 4504 4505 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4506 { 4507 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4508 struct amdgpu_ib ib; 4509 struct dma_fence *f = NULL; 4510 int r, i; 4511 unsigned total_size, vgpr_offset, sgpr_offset; 4512 u64 gpu_addr; 4513 4514 int compute_dim_x = adev->gfx.config.max_shader_engines * 4515 adev->gfx.config.max_cu_per_sh * 4516 adev->gfx.config.max_sh_per_se; 4517 int sgpr_work_group_size = 5; 4518 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6; 4519 int vgpr_init_shader_size; 4520 const u32 *vgpr_init_shader_ptr; 4521 const struct soc15_reg_entry *vgpr_init_regs_ptr; 4522 4523 /* only support when RAS is enabled */ 4524 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4525 return 0; 4526 4527 /* bail if the compute ring is not ready */ 4528 if (!ring->sched.ready) 4529 return 0; 4530 4531 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) { 4532 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; 4533 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); 4534 vgpr_init_regs_ptr = vgpr_init_regs_arcturus; 4535 } else { 4536 vgpr_init_shader_ptr = vgpr_init_compute_shader; 4537 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader); 4538 vgpr_init_regs_ptr = vgpr_init_regs; 4539 } 4540 4541 total_size = 4542 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4543 total_size += 4544 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4545 total_size += 4546 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4547 total_size = ALIGN(total_size, 256); 4548 vgpr_offset = total_size; 4549 total_size += ALIGN(vgpr_init_shader_size, 256); 4550 sgpr_offset = total_size; 4551 total_size += sizeof(sgpr_init_compute_shader); 4552 4553 /* allocate an indirect buffer to put the commands in */ 4554 memset(&ib, 0, sizeof(ib)); 4555 r = amdgpu_ib_get(adev, NULL, total_size, 4556 AMDGPU_IB_POOL_DIRECT, &ib); 4557 if (r) { 4558 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4559 return r; 4560 } 4561 4562 /* load the compute shaders */ 4563 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++) 4564 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i]; 4565 4566 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4567 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4568 4569 /* init the ib length to 0 */ 4570 ib.length_dw = 0; 4571 4572 /* VGPR */ 4573 /* write the register state for the compute dispatch */ 4574 for (i = 0; i < gpr_reg_size; i++) { 4575 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4576 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i]) 4577 - PACKET3_SET_SH_REG_START; 4578 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value; 4579 } 4580 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4581 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4582 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4583 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4584 - PACKET3_SET_SH_REG_START; 4585 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4586 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4587 4588 /* write dispatch packet */ 4589 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4590 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */ 4591 ib.ptr[ib.length_dw++] = 1; /* y */ 4592 ib.ptr[ib.length_dw++] = 1; /* z */ 4593 ib.ptr[ib.length_dw++] = 4594 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4595 4596 /* write CS partial flush packet */ 4597 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4598 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4599 4600 /* SGPR1 */ 4601 /* write the register state for the compute dispatch */ 4602 for (i = 0; i < gpr_reg_size; i++) { 4603 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4604 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4605 - PACKET3_SET_SH_REG_START; 4606 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4607 } 4608 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4609 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4610 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4611 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4612 - PACKET3_SET_SH_REG_START; 4613 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4614 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4615 4616 /* write dispatch packet */ 4617 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4618 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4619 ib.ptr[ib.length_dw++] = 1; /* y */ 4620 ib.ptr[ib.length_dw++] = 1; /* z */ 4621 ib.ptr[ib.length_dw++] = 4622 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4623 4624 /* write CS partial flush packet */ 4625 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4626 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4627 4628 /* SGPR2 */ 4629 /* write the register state for the compute dispatch */ 4630 for (i = 0; i < gpr_reg_size; i++) { 4631 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4632 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4633 - PACKET3_SET_SH_REG_START; 4634 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4635 } 4636 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4637 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4638 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4639 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4640 - PACKET3_SET_SH_REG_START; 4641 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4642 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4643 4644 /* write dispatch packet */ 4645 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4646 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4647 ib.ptr[ib.length_dw++] = 1; /* y */ 4648 ib.ptr[ib.length_dw++] = 1; /* z */ 4649 ib.ptr[ib.length_dw++] = 4650 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4651 4652 /* write CS partial flush packet */ 4653 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4654 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4655 4656 /* shedule the ib on the ring */ 4657 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4658 if (r) { 4659 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4660 goto fail; 4661 } 4662 4663 /* wait for the GPU to finish processing the IB */ 4664 r = dma_fence_wait(f, false); 4665 if (r) { 4666 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4667 goto fail; 4668 } 4669 4670 fail: 4671 amdgpu_ib_free(adev, &ib, NULL); 4672 dma_fence_put(f); 4673 4674 return r; 4675 } 4676 4677 static int gfx_v9_0_early_init(void *handle) 4678 { 4679 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4680 4681 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 4682 4683 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || 4684 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4685 adev->gfx.num_gfx_rings = 0; 4686 else 4687 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4688 adev->gfx.xcc_mask = 1; 4689 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4690 AMDGPU_MAX_COMPUTE_RINGS); 4691 gfx_v9_0_set_kiq_pm4_funcs(adev); 4692 gfx_v9_0_set_ring_funcs(adev); 4693 gfx_v9_0_set_irq_funcs(adev); 4694 gfx_v9_0_set_gds_init(adev); 4695 gfx_v9_0_set_rlc_funcs(adev); 4696 4697 /* init rlcg reg access ctrl */ 4698 gfx_v9_0_init_rlcg_reg_access_ctrl(adev); 4699 4700 return gfx_v9_0_init_microcode(adev); 4701 } 4702 4703 static int gfx_v9_0_ecc_late_init(void *handle) 4704 { 4705 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4706 int r; 4707 4708 /* 4709 * Temp workaround to fix the issue that CP firmware fails to 4710 * update read pointer when CPDMA is writing clearing operation 4711 * to GDS in suspend/resume sequence on several cards. So just 4712 * limit this operation in cold boot sequence. 4713 */ 4714 if ((!adev->in_suspend) && 4715 (adev->gds.gds_size)) { 4716 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4717 if (r) 4718 return r; 4719 } 4720 4721 /* requires IBs so do in late init after IB pool is initialized */ 4722 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4723 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); 4724 else 4725 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4726 4727 if (r) 4728 return r; 4729 4730 if (adev->gfx.ras && 4731 adev->gfx.ras->enable_watchdog_timer) 4732 adev->gfx.ras->enable_watchdog_timer(adev); 4733 4734 return 0; 4735 } 4736 4737 static int gfx_v9_0_late_init(void *handle) 4738 { 4739 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4740 int r; 4741 4742 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4743 if (r) 4744 return r; 4745 4746 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4747 if (r) 4748 return r; 4749 4750 r = gfx_v9_0_ecc_late_init(handle); 4751 if (r) 4752 return r; 4753 4754 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4755 gfx_v9_4_2_debug_trap_config_init(adev, 4756 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); 4757 else 4758 gfx_v9_0_debug_trap_config_init(adev, 4759 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); 4760 4761 return 0; 4762 } 4763 4764 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4765 { 4766 uint32_t rlc_setting; 4767 4768 /* if RLC is not enabled, do nothing */ 4769 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4770 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4771 return false; 4772 4773 return true; 4774 } 4775 4776 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 4777 { 4778 uint32_t data; 4779 unsigned i; 4780 4781 data = RLC_SAFE_MODE__CMD_MASK; 4782 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4783 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4784 4785 /* wait for RLC_SAFE_MODE */ 4786 for (i = 0; i < adev->usec_timeout; i++) { 4787 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4788 break; 4789 udelay(1); 4790 } 4791 } 4792 4793 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 4794 { 4795 uint32_t data; 4796 4797 data = RLC_SAFE_MODE__CMD_MASK; 4798 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4799 } 4800 4801 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4802 bool enable) 4803 { 4804 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4805 4806 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4807 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4808 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4809 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4810 } else { 4811 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4812 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4813 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4814 } 4815 4816 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4817 } 4818 4819 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4820 bool enable) 4821 { 4822 /* TODO: double check if we need to perform under safe mode */ 4823 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4824 4825 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4826 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4827 else 4828 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4829 4830 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4831 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4832 else 4833 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4834 4835 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4836 } 4837 4838 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4839 bool enable) 4840 { 4841 uint32_t data, def; 4842 4843 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4844 4845 /* It is disabled by HW by default */ 4846 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4847 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4848 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4849 4850 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) 4851 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4852 4853 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4854 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4855 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4856 4857 /* only for Vega10 & Raven1 */ 4858 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4859 4860 if (def != data) 4861 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4862 4863 /* MGLS is a global flag to control all MGLS in GFX */ 4864 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4865 /* 2 - RLC memory Light sleep */ 4866 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4867 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4868 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4869 if (def != data) 4870 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4871 } 4872 /* 3 - CP memory Light sleep */ 4873 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4874 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4875 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4876 if (def != data) 4877 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4878 } 4879 } 4880 } else { 4881 /* 1 - MGCG_OVERRIDE */ 4882 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4883 4884 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) 4885 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4886 4887 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4888 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4889 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4890 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4891 4892 if (def != data) 4893 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4894 4895 /* 2 - disable MGLS in RLC */ 4896 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4897 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4898 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4899 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4900 } 4901 4902 /* 3 - disable MGLS in CP */ 4903 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4904 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4905 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4906 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4907 } 4908 } 4909 4910 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4911 } 4912 4913 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4914 bool enable) 4915 { 4916 uint32_t data, def; 4917 4918 if (!adev->gfx.num_gfx_rings) 4919 return; 4920 4921 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4922 4923 /* Enable 3D CGCG/CGLS */ 4924 if (enable) { 4925 /* write cmd to clear cgcg/cgls ov */ 4926 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4927 /* unset CGCG override */ 4928 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4929 /* update CGCG and CGLS override bits */ 4930 if (def != data) 4931 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4932 4933 /* enable 3Dcgcg FSM(0x0000363f) */ 4934 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4935 4936 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 4937 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4938 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4939 else 4940 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT; 4941 4942 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4943 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4944 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4945 if (def != data) 4946 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4947 4948 /* set IDLE_POLL_COUNT(0x00900100) */ 4949 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4950 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4951 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4952 if (def != data) 4953 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4954 } else { 4955 /* Disable CGCG/CGLS */ 4956 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4957 /* disable cgcg, cgls should be disabled */ 4958 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4959 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4960 /* disable cgcg and cgls in FSM */ 4961 if (def != data) 4962 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4963 } 4964 4965 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4966 } 4967 4968 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4969 bool enable) 4970 { 4971 uint32_t def, data; 4972 4973 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4974 4975 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4976 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4977 /* unset CGCG override */ 4978 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4979 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4980 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4981 else 4982 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4983 /* update CGCG and CGLS override bits */ 4984 if (def != data) 4985 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4986 4987 /* enable cgcg FSM(0x0000363F) */ 4988 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4989 4990 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) 4991 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4992 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4993 else 4994 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4995 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4996 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4997 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4998 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4999 if (def != data) 5000 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5001 5002 /* set IDLE_POLL_COUNT(0x00900100) */ 5003 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5004 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5005 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5006 if (def != data) 5007 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5008 } else { 5009 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5010 /* reset CGCG/CGLS bits */ 5011 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5012 /* disable cgcg and cgls in FSM */ 5013 if (def != data) 5014 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5015 } 5016 5017 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5018 } 5019 5020 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5021 bool enable) 5022 { 5023 if (enable) { 5024 /* CGCG/CGLS should be enabled after MGCG/MGLS 5025 * === MGCG + MGLS === 5026 */ 5027 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5028 /* === CGCG /CGLS for GFX 3D Only === */ 5029 gfx_v9_0_update_3d_clock_gating(adev, enable); 5030 /* === CGCG + CGLS === */ 5031 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5032 } else { 5033 /* CGCG/CGLS should be disabled before MGCG/MGLS 5034 * === CGCG + CGLS === 5035 */ 5036 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5037 /* === CGCG /CGLS for GFX 3D Only === */ 5038 gfx_v9_0_update_3d_clock_gating(adev, enable); 5039 /* === MGCG + MGLS === */ 5040 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5041 } 5042 return 0; 5043 } 5044 5045 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, 5046 unsigned int vmid) 5047 { 5048 u32 reg, data; 5049 5050 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); 5051 if (amdgpu_sriov_is_pp_one_vf(adev)) 5052 data = RREG32_NO_KIQ(reg); 5053 else 5054 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); 5055 5056 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5057 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5058 5059 if (amdgpu_sriov_is_pp_one_vf(adev)) 5060 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); 5061 else 5062 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); 5063 } 5064 5065 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) 5066 { 5067 amdgpu_gfx_off_ctrl(adev, false); 5068 5069 gfx_v9_0_update_spm_vmid_internal(adev, vmid); 5070 5071 amdgpu_gfx_off_ctrl(adev, true); 5072 } 5073 5074 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev, 5075 uint32_t offset, 5076 struct soc15_reg_rlcg *entries, int arr_size) 5077 { 5078 int i; 5079 uint32_t reg; 5080 5081 if (!entries) 5082 return false; 5083 5084 for (i = 0; i < arr_size; i++) { 5085 const struct soc15_reg_rlcg *entry; 5086 5087 entry = &entries[i]; 5088 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 5089 if (offset == reg) 5090 return true; 5091 } 5092 5093 return false; 5094 } 5095 5096 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 5097 { 5098 return gfx_v9_0_check_rlcg_range(adev, offset, 5099 (void *)rlcg_access_gc_9_0, 5100 ARRAY_SIZE(rlcg_access_gc_9_0)); 5101 } 5102 5103 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 5104 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 5105 .set_safe_mode = gfx_v9_0_set_safe_mode, 5106 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 5107 .init = gfx_v9_0_rlc_init, 5108 .get_csb_size = gfx_v9_0_get_csb_size, 5109 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 5110 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 5111 .resume = gfx_v9_0_rlc_resume, 5112 .stop = gfx_v9_0_rlc_stop, 5113 .reset = gfx_v9_0_rlc_reset, 5114 .start = gfx_v9_0_rlc_start, 5115 .update_spm_vmid = gfx_v9_0_update_spm_vmid, 5116 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, 5117 }; 5118 5119 static int gfx_v9_0_set_powergating_state(void *handle, 5120 enum amd_powergating_state state) 5121 { 5122 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5123 bool enable = (state == AMD_PG_STATE_GATE); 5124 5125 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5126 case IP_VERSION(9, 2, 2): 5127 case IP_VERSION(9, 1, 0): 5128 case IP_VERSION(9, 3, 0): 5129 if (!enable) 5130 amdgpu_gfx_off_ctrl(adev, false); 5131 5132 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5133 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 5134 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 5135 } else { 5136 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 5137 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 5138 } 5139 5140 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5141 gfx_v9_0_enable_cp_power_gating(adev, true); 5142 else 5143 gfx_v9_0_enable_cp_power_gating(adev, false); 5144 5145 /* update gfx cgpg state */ 5146 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 5147 5148 /* update mgcg state */ 5149 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 5150 5151 if (enable) 5152 amdgpu_gfx_off_ctrl(adev, true); 5153 break; 5154 case IP_VERSION(9, 2, 1): 5155 amdgpu_gfx_off_ctrl(adev, enable); 5156 break; 5157 default: 5158 break; 5159 } 5160 5161 return 0; 5162 } 5163 5164 static int gfx_v9_0_set_clockgating_state(void *handle, 5165 enum amd_clockgating_state state) 5166 { 5167 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5168 5169 if (amdgpu_sriov_vf(adev)) 5170 return 0; 5171 5172 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5173 case IP_VERSION(9, 0, 1): 5174 case IP_VERSION(9, 2, 1): 5175 case IP_VERSION(9, 4, 0): 5176 case IP_VERSION(9, 2, 2): 5177 case IP_VERSION(9, 1, 0): 5178 case IP_VERSION(9, 4, 1): 5179 case IP_VERSION(9, 3, 0): 5180 case IP_VERSION(9, 4, 2): 5181 gfx_v9_0_update_gfx_clock_gating(adev, 5182 state == AMD_CG_STATE_GATE); 5183 break; 5184 default: 5185 break; 5186 } 5187 return 0; 5188 } 5189 5190 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags) 5191 { 5192 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5193 int data; 5194 5195 if (amdgpu_sriov_vf(adev)) 5196 *flags = 0; 5197 5198 /* AMD_CG_SUPPORT_GFX_MGCG */ 5199 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 5200 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5201 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5202 5203 /* AMD_CG_SUPPORT_GFX_CGCG */ 5204 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 5205 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5206 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5207 5208 /* AMD_CG_SUPPORT_GFX_CGLS */ 5209 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5210 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5211 5212 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5213 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 5214 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5215 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5216 5217 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5218 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 5219 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5220 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5221 5222 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) { 5223 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5224 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 5225 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5226 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5227 5228 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5229 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5230 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5231 } 5232 } 5233 5234 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5235 { 5236 return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/ 5237 } 5238 5239 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5240 { 5241 struct amdgpu_device *adev = ring->adev; 5242 u64 wptr; 5243 5244 /* XXX check if swapping is necessary on BE */ 5245 if (ring->use_doorbell) { 5246 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5247 } else { 5248 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 5249 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 5250 } 5251 5252 return wptr; 5253 } 5254 5255 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5256 { 5257 struct amdgpu_device *adev = ring->adev; 5258 5259 if (ring->use_doorbell) { 5260 /* XXX check if swapping is necessary on BE */ 5261 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); 5262 WDOORBELL64(ring->doorbell_index, ring->wptr); 5263 } else { 5264 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 5265 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 5266 } 5267 } 5268 5269 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5270 { 5271 struct amdgpu_device *adev = ring->adev; 5272 u32 ref_and_mask, reg_mem_engine; 5273 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5274 5275 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5276 switch (ring->me) { 5277 case 1: 5278 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5279 break; 5280 case 2: 5281 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5282 break; 5283 default: 5284 return; 5285 } 5286 reg_mem_engine = 0; 5287 } else { 5288 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5289 reg_mem_engine = 1; /* pfp */ 5290 } 5291 5292 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5293 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5294 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5295 ref_and_mask, ref_and_mask, 0x20); 5296 } 5297 5298 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5299 struct amdgpu_job *job, 5300 struct amdgpu_ib *ib, 5301 uint32_t flags) 5302 { 5303 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5304 u32 header, control = 0; 5305 5306 if (ib->flags & AMDGPU_IB_FLAG_CE) 5307 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5308 else 5309 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5310 5311 control |= ib->length_dw | (vmid << 24); 5312 5313 if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) { 5314 control |= INDIRECT_BUFFER_PRE_ENB(1); 5315 5316 if (flags & AMDGPU_IB_PREEMPTED) 5317 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5318 5319 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 5320 gfx_v9_0_ring_emit_de_meta(ring, 5321 (!amdgpu_sriov_vf(ring->adev) && 5322 flags & AMDGPU_IB_PREEMPTED) ? 5323 true : false, 5324 job->gds_size > 0 && job->gds_base != 0); 5325 } 5326 5327 amdgpu_ring_write(ring, header); 5328 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5329 amdgpu_ring_write(ring, 5330 #ifdef __BIG_ENDIAN 5331 (2 << 0) | 5332 #endif 5333 lower_32_bits(ib->gpu_addr)); 5334 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5335 amdgpu_ring_ib_on_emit_cntl(ring); 5336 amdgpu_ring_write(ring, control); 5337 } 5338 5339 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring, 5340 unsigned offset) 5341 { 5342 u32 control = ring->ring[offset]; 5343 5344 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5345 ring->ring[offset] = control; 5346 } 5347 5348 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, 5349 unsigned offset) 5350 { 5351 struct amdgpu_device *adev = ring->adev; 5352 void *ce_payload_cpu_addr; 5353 uint64_t payload_offset, payload_size; 5354 5355 payload_size = sizeof(struct v9_ce_ib_state); 5356 5357 if (ring->is_mes_queue) { 5358 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5359 gfx[0].gfx_meta_data) + 5360 offsetof(struct v9_gfx_meta_data, ce_payload); 5361 ce_payload_cpu_addr = 5362 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); 5363 } else { 5364 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5365 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5366 } 5367 5368 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5369 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); 5370 } else { 5371 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, 5372 (ring->buf_mask + 1 - offset) << 2); 5373 payload_size -= (ring->buf_mask + 1 - offset) << 2; 5374 memcpy((void *)&ring->ring[0], 5375 ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), 5376 payload_size); 5377 } 5378 } 5379 5380 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, 5381 unsigned offset) 5382 { 5383 struct amdgpu_device *adev = ring->adev; 5384 void *de_payload_cpu_addr; 5385 uint64_t payload_offset, payload_size; 5386 5387 payload_size = sizeof(struct v9_de_ib_state); 5388 5389 if (ring->is_mes_queue) { 5390 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5391 gfx[0].gfx_meta_data) + 5392 offsetof(struct v9_gfx_meta_data, de_payload); 5393 de_payload_cpu_addr = 5394 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); 5395 } else { 5396 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); 5397 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5398 } 5399 5400 ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = 5401 IB_COMPLETION_STATUS_PREEMPTED; 5402 5403 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5404 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size); 5405 } else { 5406 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, 5407 (ring->buf_mask + 1 - offset) << 2); 5408 payload_size -= (ring->buf_mask + 1 - offset) << 2; 5409 memcpy((void *)&ring->ring[0], 5410 de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), 5411 payload_size); 5412 } 5413 } 5414 5415 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5416 struct amdgpu_job *job, 5417 struct amdgpu_ib *ib, 5418 uint32_t flags) 5419 { 5420 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5421 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5422 5423 /* Currently, there is a high possibility to get wave ID mismatch 5424 * between ME and GDS, leading to a hw deadlock, because ME generates 5425 * different wave IDs than the GDS expects. This situation happens 5426 * randomly when at least 5 compute pipes use GDS ordered append. 5427 * The wave IDs generated by ME are also wrong after suspend/resume. 5428 * Those are probably bugs somewhere else in the kernel driver. 5429 * 5430 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5431 * GDS to 0 for this ring (me/pipe). 5432 */ 5433 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5434 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5435 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5436 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5437 } 5438 5439 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5440 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5441 amdgpu_ring_write(ring, 5442 #ifdef __BIG_ENDIAN 5443 (2 << 0) | 5444 #endif 5445 lower_32_bits(ib->gpu_addr)); 5446 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5447 amdgpu_ring_write(ring, control); 5448 } 5449 5450 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5451 u64 seq, unsigned flags) 5452 { 5453 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5454 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5455 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5456 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; 5457 uint32_t dw2 = 0; 5458 5459 /* RELEASE_MEM - flush caches, send int */ 5460 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5461 5462 if (writeback) { 5463 dw2 = EOP_TC_NC_ACTION_EN; 5464 } else { 5465 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | 5466 EOP_TC_MD_ACTION_EN; 5467 } 5468 dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5469 EVENT_INDEX(5); 5470 if (exec) 5471 dw2 |= EOP_EXEC; 5472 5473 amdgpu_ring_write(ring, dw2); 5474 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5475 5476 /* 5477 * the address should be Qword aligned if 64bit write, Dword 5478 * aligned if only send 32bit data low (discard data high) 5479 */ 5480 if (write64bit) 5481 BUG_ON(addr & 0x7); 5482 else 5483 BUG_ON(addr & 0x3); 5484 amdgpu_ring_write(ring, lower_32_bits(addr)); 5485 amdgpu_ring_write(ring, upper_32_bits(addr)); 5486 amdgpu_ring_write(ring, lower_32_bits(seq)); 5487 amdgpu_ring_write(ring, upper_32_bits(seq)); 5488 amdgpu_ring_write(ring, 0); 5489 } 5490 5491 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5492 { 5493 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5494 uint32_t seq = ring->fence_drv.sync_seq; 5495 uint64_t addr = ring->fence_drv.gpu_addr; 5496 5497 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5498 lower_32_bits(addr), upper_32_bits(addr), 5499 seq, 0xffffffff, 4); 5500 } 5501 5502 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5503 unsigned vmid, uint64_t pd_addr) 5504 { 5505 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5506 5507 /* compute doesn't have PFP */ 5508 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5509 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5510 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5511 amdgpu_ring_write(ring, 0x0); 5512 } 5513 } 5514 5515 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5516 { 5517 return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */ 5518 } 5519 5520 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5521 { 5522 u64 wptr; 5523 5524 /* XXX check if swapping is necessary on BE */ 5525 if (ring->use_doorbell) 5526 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5527 else 5528 BUG(); 5529 return wptr; 5530 } 5531 5532 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5533 { 5534 struct amdgpu_device *adev = ring->adev; 5535 5536 /* XXX check if swapping is necessary on BE */ 5537 if (ring->use_doorbell) { 5538 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); 5539 WDOORBELL64(ring->doorbell_index, ring->wptr); 5540 } else{ 5541 BUG(); /* only DOORBELL method supported on gfx9 now */ 5542 } 5543 } 5544 5545 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5546 u64 seq, unsigned int flags) 5547 { 5548 struct amdgpu_device *adev = ring->adev; 5549 5550 /* we only allocate 32bit for each seq wb address */ 5551 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5552 5553 /* write fence seq to the "addr" */ 5554 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5555 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5556 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5557 amdgpu_ring_write(ring, lower_32_bits(addr)); 5558 amdgpu_ring_write(ring, upper_32_bits(addr)); 5559 amdgpu_ring_write(ring, lower_32_bits(seq)); 5560 5561 if (flags & AMDGPU_FENCE_FLAG_INT) { 5562 /* set register to trigger INT */ 5563 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5564 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5565 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5566 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5567 amdgpu_ring_write(ring, 0); 5568 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5569 } 5570 } 5571 5572 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5573 { 5574 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5575 amdgpu_ring_write(ring, 0); 5576 } 5577 5578 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) 5579 { 5580 struct amdgpu_device *adev = ring->adev; 5581 struct v9_ce_ib_state ce_payload = {0}; 5582 uint64_t offset, ce_payload_gpu_addr; 5583 void *ce_payload_cpu_addr; 5584 int cnt; 5585 5586 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5587 5588 if (ring->is_mes_queue) { 5589 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5590 gfx[0].gfx_meta_data) + 5591 offsetof(struct v9_gfx_meta_data, ce_payload); 5592 ce_payload_gpu_addr = 5593 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5594 ce_payload_cpu_addr = 5595 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5596 } else { 5597 offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5598 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5599 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5600 } 5601 5602 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5603 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5604 WRITE_DATA_DST_SEL(8) | 5605 WR_CONFIRM) | 5606 WRITE_DATA_CACHE_POLICY(0)); 5607 amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); 5608 amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); 5609 5610 amdgpu_ring_ib_on_emit_ce(ring); 5611 5612 if (resume) 5613 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, 5614 sizeof(ce_payload) >> 2); 5615 else 5616 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, 5617 sizeof(ce_payload) >> 2); 5618 } 5619 5620 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) 5621 { 5622 int i, r = 0; 5623 struct amdgpu_device *adev = ring->adev; 5624 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 5625 struct amdgpu_ring *kiq_ring = &kiq->ring; 5626 unsigned long flags; 5627 5628 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 5629 return -EINVAL; 5630 5631 spin_lock_irqsave(&kiq->ring_lock, flags); 5632 5633 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 5634 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5635 return -ENOMEM; 5636 } 5637 5638 /* assert preemption condition */ 5639 amdgpu_ring_set_preempt_cond_exec(ring, false); 5640 5641 ring->trail_seq += 1; 5642 amdgpu_ring_alloc(ring, 13); 5643 gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, 5644 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); 5645 5646 /* assert IB preemption, emit the trailing fence */ 5647 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 5648 ring->trail_fence_gpu_addr, 5649 ring->trail_seq); 5650 5651 amdgpu_ring_commit(kiq_ring); 5652 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5653 5654 /* poll the trailing fence */ 5655 for (i = 0; i < adev->usec_timeout; i++) { 5656 if (ring->trail_seq == 5657 le32_to_cpu(*ring->trail_fence_cpu_addr)) 5658 break; 5659 udelay(1); 5660 } 5661 5662 if (i >= adev->usec_timeout) { 5663 r = -EINVAL; 5664 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); 5665 } 5666 5667 /*reset the CP_VMID_PREEMPT after trailing fence*/ 5668 amdgpu_ring_emit_wreg(ring, 5669 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), 5670 0x0); 5671 amdgpu_ring_commit(ring); 5672 5673 /* deassert preemption condition */ 5674 amdgpu_ring_set_preempt_cond_exec(ring, true); 5675 return r; 5676 } 5677 5678 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds) 5679 { 5680 struct amdgpu_device *adev = ring->adev; 5681 struct v9_de_ib_state de_payload = {0}; 5682 uint64_t offset, gds_addr, de_payload_gpu_addr; 5683 void *de_payload_cpu_addr; 5684 int cnt; 5685 5686 if (ring->is_mes_queue) { 5687 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5688 gfx[0].gfx_meta_data) + 5689 offsetof(struct v9_gfx_meta_data, de_payload); 5690 de_payload_gpu_addr = 5691 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5692 de_payload_cpu_addr = 5693 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5694 5695 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5696 gfx[0].gds_backup) + 5697 offsetof(struct v9_gfx_meta_data, de_payload); 5698 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5699 } else { 5700 offset = offsetof(struct v9_gfx_meta_data, de_payload); 5701 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5702 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5703 5704 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 5705 AMDGPU_CSA_SIZE - adev->gds.gds_size, 5706 PAGE_SIZE); 5707 } 5708 5709 if (usegds) { 5710 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5711 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5712 } 5713 5714 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5715 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5716 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5717 WRITE_DATA_DST_SEL(8) | 5718 WR_CONFIRM) | 5719 WRITE_DATA_CACHE_POLICY(0)); 5720 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 5721 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 5722 5723 amdgpu_ring_ib_on_emit_de(ring); 5724 if (resume) 5725 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 5726 sizeof(de_payload) >> 2); 5727 else 5728 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 5729 sizeof(de_payload) >> 2); 5730 } 5731 5732 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5733 bool secure) 5734 { 5735 uint32_t v = secure ? FRAME_TMZ : 0; 5736 5737 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5738 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5739 } 5740 5741 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5742 { 5743 uint32_t dw2 = 0; 5744 5745 gfx_v9_0_ring_emit_ce_meta(ring, 5746 (!amdgpu_sriov_vf(ring->adev) && 5747 flags & AMDGPU_IB_PREEMPTED) ? true : false); 5748 5749 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5750 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5751 /* set load_global_config & load_global_uconfig */ 5752 dw2 |= 0x8001; 5753 /* set load_cs_sh_regs */ 5754 dw2 |= 0x01000000; 5755 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5756 dw2 |= 0x10002; 5757 5758 /* set load_ce_ram if preamble presented */ 5759 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5760 dw2 |= 0x10000000; 5761 } else { 5762 /* still load_ce_ram if this is the first time preamble presented 5763 * although there is no context switch happens. 5764 */ 5765 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5766 dw2 |= 0x10000000; 5767 } 5768 5769 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5770 amdgpu_ring_write(ring, dw2); 5771 amdgpu_ring_write(ring, 0); 5772 } 5773 5774 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 5775 uint64_t addr) 5776 { 5777 unsigned ret; 5778 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5779 amdgpu_ring_write(ring, lower_32_bits(addr)); 5780 amdgpu_ring_write(ring, upper_32_bits(addr)); 5781 /* discard following DWs if *cond_exec_gpu_addr==0 */ 5782 amdgpu_ring_write(ring, 0); 5783 ret = ring->wptr & ring->buf_mask; 5784 /* patch dummy value later */ 5785 amdgpu_ring_write(ring, 0); 5786 return ret; 5787 } 5788 5789 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5790 uint32_t reg_val_offs) 5791 { 5792 struct amdgpu_device *adev = ring->adev; 5793 5794 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5795 amdgpu_ring_write(ring, 0 | /* src: register*/ 5796 (5 << 8) | /* dst: memory */ 5797 (1 << 20)); /* write confirm */ 5798 amdgpu_ring_write(ring, reg); 5799 amdgpu_ring_write(ring, 0); 5800 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5801 reg_val_offs * 4)); 5802 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5803 reg_val_offs * 4)); 5804 } 5805 5806 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5807 uint32_t val) 5808 { 5809 uint32_t cmd = 0; 5810 5811 switch (ring->funcs->type) { 5812 case AMDGPU_RING_TYPE_GFX: 5813 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5814 break; 5815 case AMDGPU_RING_TYPE_KIQ: 5816 cmd = (1 << 16); /* no inc addr */ 5817 break; 5818 default: 5819 cmd = WR_CONFIRM; 5820 break; 5821 } 5822 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5823 amdgpu_ring_write(ring, cmd); 5824 amdgpu_ring_write(ring, reg); 5825 amdgpu_ring_write(ring, 0); 5826 amdgpu_ring_write(ring, val); 5827 } 5828 5829 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5830 uint32_t val, uint32_t mask) 5831 { 5832 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5833 } 5834 5835 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5836 uint32_t reg0, uint32_t reg1, 5837 uint32_t ref, uint32_t mask) 5838 { 5839 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5840 struct amdgpu_device *adev = ring->adev; 5841 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5842 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5843 5844 if (fw_version_ok) 5845 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5846 ref, mask, 0x20); 5847 else 5848 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5849 ref, mask); 5850 } 5851 5852 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5853 { 5854 struct amdgpu_device *adev = ring->adev; 5855 uint32_t value = 0; 5856 5857 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5858 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5859 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5860 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5861 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5862 } 5863 5864 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5865 enum amdgpu_interrupt_state state) 5866 { 5867 switch (state) { 5868 case AMDGPU_IRQ_STATE_DISABLE: 5869 case AMDGPU_IRQ_STATE_ENABLE: 5870 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5871 TIME_STAMP_INT_ENABLE, 5872 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5873 break; 5874 default: 5875 break; 5876 } 5877 } 5878 5879 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5880 int me, int pipe, 5881 enum amdgpu_interrupt_state state) 5882 { 5883 u32 mec_int_cntl, mec_int_cntl_reg; 5884 5885 /* 5886 * amdgpu controls only the first MEC. That's why this function only 5887 * handles the setting of interrupts for this specific MEC. All other 5888 * pipes' interrupts are set by amdkfd. 5889 */ 5890 5891 if (me == 1) { 5892 switch (pipe) { 5893 case 0: 5894 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5895 break; 5896 case 1: 5897 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5898 break; 5899 case 2: 5900 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5901 break; 5902 case 3: 5903 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5904 break; 5905 default: 5906 DRM_DEBUG("invalid pipe %d\n", pipe); 5907 return; 5908 } 5909 } else { 5910 DRM_DEBUG("invalid me %d\n", me); 5911 return; 5912 } 5913 5914 switch (state) { 5915 case AMDGPU_IRQ_STATE_DISABLE: 5916 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg); 5917 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5918 TIME_STAMP_INT_ENABLE, 0); 5919 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5920 break; 5921 case AMDGPU_IRQ_STATE_ENABLE: 5922 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 5923 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5924 TIME_STAMP_INT_ENABLE, 1); 5925 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5926 break; 5927 default: 5928 break; 5929 } 5930 } 5931 5932 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5933 struct amdgpu_irq_src *source, 5934 unsigned type, 5935 enum amdgpu_interrupt_state state) 5936 { 5937 switch (state) { 5938 case AMDGPU_IRQ_STATE_DISABLE: 5939 case AMDGPU_IRQ_STATE_ENABLE: 5940 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5941 PRIV_REG_INT_ENABLE, 5942 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5943 break; 5944 default: 5945 break; 5946 } 5947 5948 return 0; 5949 } 5950 5951 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5952 struct amdgpu_irq_src *source, 5953 unsigned type, 5954 enum amdgpu_interrupt_state state) 5955 { 5956 switch (state) { 5957 case AMDGPU_IRQ_STATE_DISABLE: 5958 case AMDGPU_IRQ_STATE_ENABLE: 5959 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5960 PRIV_INSTR_INT_ENABLE, 5961 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5962 break; 5963 default: 5964 break; 5965 } 5966 5967 return 0; 5968 } 5969 5970 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5971 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5972 CP_ECC_ERROR_INT_ENABLE, 1) 5973 5974 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5975 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5976 CP_ECC_ERROR_INT_ENABLE, 0) 5977 5978 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5979 struct amdgpu_irq_src *source, 5980 unsigned type, 5981 enum amdgpu_interrupt_state state) 5982 { 5983 switch (state) { 5984 case AMDGPU_IRQ_STATE_DISABLE: 5985 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5986 CP_ECC_ERROR_INT_ENABLE, 0); 5987 DISABLE_ECC_ON_ME_PIPE(1, 0); 5988 DISABLE_ECC_ON_ME_PIPE(1, 1); 5989 DISABLE_ECC_ON_ME_PIPE(1, 2); 5990 DISABLE_ECC_ON_ME_PIPE(1, 3); 5991 break; 5992 5993 case AMDGPU_IRQ_STATE_ENABLE: 5994 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5995 CP_ECC_ERROR_INT_ENABLE, 1); 5996 ENABLE_ECC_ON_ME_PIPE(1, 0); 5997 ENABLE_ECC_ON_ME_PIPE(1, 1); 5998 ENABLE_ECC_ON_ME_PIPE(1, 2); 5999 ENABLE_ECC_ON_ME_PIPE(1, 3); 6000 break; 6001 default: 6002 break; 6003 } 6004 6005 return 0; 6006 } 6007 6008 6009 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6010 struct amdgpu_irq_src *src, 6011 unsigned type, 6012 enum amdgpu_interrupt_state state) 6013 { 6014 switch (type) { 6015 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6016 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 6017 break; 6018 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6019 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6020 break; 6021 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6022 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6023 break; 6024 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6025 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6026 break; 6027 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6028 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6029 break; 6030 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6031 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6032 break; 6033 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6034 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6035 break; 6036 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6037 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6038 break; 6039 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6040 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6041 break; 6042 default: 6043 break; 6044 } 6045 return 0; 6046 } 6047 6048 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 6049 struct amdgpu_irq_src *source, 6050 struct amdgpu_iv_entry *entry) 6051 { 6052 int i; 6053 u8 me_id, pipe_id, queue_id; 6054 struct amdgpu_ring *ring; 6055 6056 DRM_DEBUG("IH: CP EOP\n"); 6057 me_id = (entry->ring_id & 0x0c) >> 2; 6058 pipe_id = (entry->ring_id & 0x03) >> 0; 6059 queue_id = (entry->ring_id & 0x70) >> 4; 6060 6061 switch (me_id) { 6062 case 0: 6063 if (adev->gfx.num_gfx_rings) { 6064 if (!adev->gfx.mcbp) { 6065 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6066 } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { 6067 /* Fence signals are handled on the software rings*/ 6068 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 6069 amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); 6070 } 6071 } 6072 break; 6073 case 1: 6074 case 2: 6075 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6076 ring = &adev->gfx.compute_ring[i]; 6077 /* Per-queue interrupt is supported for MEC starting from VI. 6078 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6079 */ 6080 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6081 amdgpu_fence_process(ring); 6082 } 6083 break; 6084 } 6085 return 0; 6086 } 6087 6088 static void gfx_v9_0_fault(struct amdgpu_device *adev, 6089 struct amdgpu_iv_entry *entry) 6090 { 6091 u8 me_id, pipe_id, queue_id; 6092 struct amdgpu_ring *ring; 6093 int i; 6094 6095 me_id = (entry->ring_id & 0x0c) >> 2; 6096 pipe_id = (entry->ring_id & 0x03) >> 0; 6097 queue_id = (entry->ring_id & 0x70) >> 4; 6098 6099 switch (me_id) { 6100 case 0: 6101 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 6102 break; 6103 case 1: 6104 case 2: 6105 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6106 ring = &adev->gfx.compute_ring[i]; 6107 if (ring->me == me_id && ring->pipe == pipe_id && 6108 ring->queue == queue_id) 6109 drm_sched_fault(&ring->sched); 6110 } 6111 break; 6112 } 6113 } 6114 6115 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 6116 struct amdgpu_irq_src *source, 6117 struct amdgpu_iv_entry *entry) 6118 { 6119 DRM_ERROR("Illegal register access in command stream\n"); 6120 gfx_v9_0_fault(adev, entry); 6121 return 0; 6122 } 6123 6124 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 6125 struct amdgpu_irq_src *source, 6126 struct amdgpu_iv_entry *entry) 6127 { 6128 DRM_ERROR("Illegal instruction in command stream\n"); 6129 gfx_v9_0_fault(adev, entry); 6130 return 0; 6131 } 6132 6133 6134 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { 6135 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 6136 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 6137 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 6138 }, 6139 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 6140 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 6141 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 6142 }, 6143 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6144 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 6145 0, 0 6146 }, 6147 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6148 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 6149 0, 0 6150 }, 6151 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 6152 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 6153 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 6154 }, 6155 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6156 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 6157 0, 0 6158 }, 6159 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6160 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 6161 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 6162 }, 6163 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 6164 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 6165 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 6166 }, 6167 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 6168 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 6169 0, 0 6170 }, 6171 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 6172 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 6173 0, 0 6174 }, 6175 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 6176 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 6177 0, 0 6178 }, 6179 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6180 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 6181 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 6182 }, 6183 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6184 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 6185 0, 0 6186 }, 6187 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6188 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 6189 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 6190 }, 6191 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 6192 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6193 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 6194 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 6195 }, 6196 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 6197 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6198 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 6199 0, 0 6200 }, 6201 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 6202 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6203 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 6204 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 6205 }, 6206 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 6207 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6208 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 6209 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 6210 }, 6211 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 6212 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6213 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 6214 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 6215 }, 6216 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 6217 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6218 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 6219 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 6220 }, 6221 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 6222 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 6223 0, 0 6224 }, 6225 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6226 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 6227 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 6228 }, 6229 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6230 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 6231 0, 0 6232 }, 6233 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6234 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 6235 0, 0 6236 }, 6237 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6238 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 6239 0, 0 6240 }, 6241 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6242 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 6243 0, 0 6244 }, 6245 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6246 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 6247 0, 0 6248 }, 6249 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6250 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 6251 0, 0 6252 }, 6253 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6254 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 6255 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 6256 }, 6257 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6258 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 6259 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 6260 }, 6261 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6262 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 6263 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 6264 }, 6265 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6266 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 6267 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 6268 }, 6269 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6270 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 6271 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 6272 }, 6273 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6274 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 6275 0, 0 6276 }, 6277 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6278 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 6279 0, 0 6280 }, 6281 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6282 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 6283 0, 0 6284 }, 6285 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6286 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 6287 0, 0 6288 }, 6289 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6290 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 6291 0, 0 6292 }, 6293 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6294 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 6295 0, 0 6296 }, 6297 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6298 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 6299 0, 0 6300 }, 6301 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6302 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 6303 0, 0 6304 }, 6305 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6306 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 6307 0, 0 6308 }, 6309 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6310 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 6311 0, 0 6312 }, 6313 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6314 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 6315 0, 0 6316 }, 6317 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6318 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 6319 0, 0 6320 }, 6321 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6322 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 6323 0, 0 6324 }, 6325 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 6326 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 6327 0, 0 6328 }, 6329 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6330 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 6331 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 6332 }, 6333 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6334 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 6335 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 6336 }, 6337 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6338 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 6339 0, 0 6340 }, 6341 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6342 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 6343 0, 0 6344 }, 6345 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6346 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 6347 0, 0 6348 }, 6349 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6350 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 6351 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 6352 }, 6353 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6354 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 6355 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 6356 }, 6357 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6358 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 6359 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 6360 }, 6361 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6362 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 6363 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 6364 }, 6365 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6366 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 6367 0, 0 6368 }, 6369 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6370 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 6371 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 6372 }, 6373 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6374 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 6375 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 6376 }, 6377 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6378 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 6379 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 6380 }, 6381 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6382 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 6383 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 6384 }, 6385 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6386 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 6387 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 6388 }, 6389 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6390 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 6391 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 6392 }, 6393 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6394 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 6395 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 6396 }, 6397 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6398 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 6399 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 6400 }, 6401 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6402 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 6403 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 6404 }, 6405 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6406 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 6407 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 6408 }, 6409 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6410 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 6411 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 6412 }, 6413 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6414 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 6415 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 6416 }, 6417 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6418 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 6419 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 6420 }, 6421 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6422 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 6423 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 6424 }, 6425 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6426 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 6427 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 6428 }, 6429 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6430 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 6431 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 6432 }, 6433 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6434 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 6435 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 6436 }, 6437 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6438 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 6439 0, 0 6440 }, 6441 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6442 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 6443 0, 0 6444 }, 6445 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6446 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 6447 0, 0 6448 }, 6449 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6450 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 6451 0, 0 6452 }, 6453 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6454 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 6455 0, 0 6456 }, 6457 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6458 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 6459 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 6460 }, 6461 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6462 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 6463 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 6464 }, 6465 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6466 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 6467 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 6468 }, 6469 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6470 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 6471 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 6472 }, 6473 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6474 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 6475 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 6476 }, 6477 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6478 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 6479 0, 0 6480 }, 6481 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6482 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 6483 0, 0 6484 }, 6485 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6486 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 6487 0, 0 6488 }, 6489 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6490 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 6491 0, 0 6492 }, 6493 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6494 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 6495 0, 0 6496 }, 6497 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6498 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6499 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 6500 }, 6501 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6502 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6503 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 6504 }, 6505 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6506 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6507 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 6508 }, 6509 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6510 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6511 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 6512 }, 6513 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6514 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6515 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 6516 }, 6517 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6518 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 6519 0, 0 6520 }, 6521 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6522 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 6523 0, 0 6524 }, 6525 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6526 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 6527 0, 0 6528 }, 6529 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6530 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 6531 0, 0 6532 }, 6533 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6534 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 6535 0, 0 6536 }, 6537 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6538 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6539 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 6540 }, 6541 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6542 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6543 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 6544 }, 6545 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6546 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6547 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 6548 }, 6549 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6550 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 6551 0, 0 6552 }, 6553 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6554 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 6555 0, 0 6556 }, 6557 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6558 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 6559 0, 0 6560 }, 6561 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6562 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 6563 0, 0 6564 }, 6565 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6566 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 6567 0, 0 6568 }, 6569 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6570 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 6571 0, 0 6572 } 6573 }; 6574 6575 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6576 void *inject_if, uint32_t instance_mask) 6577 { 6578 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6579 int ret; 6580 struct ta_ras_trigger_error_input block_info = { 0 }; 6581 6582 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6583 return -EINVAL; 6584 6585 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6586 return -EINVAL; 6587 6588 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6589 return -EPERM; 6590 6591 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6592 info->head.type)) { 6593 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6594 ras_gfx_subblocks[info->head.sub_block_index].name, 6595 info->head.type); 6596 return -EPERM; 6597 } 6598 6599 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6600 info->head.type)) { 6601 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6602 ras_gfx_subblocks[info->head.sub_block_index].name, 6603 info->head.type); 6604 return -EPERM; 6605 } 6606 6607 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6608 block_info.sub_block_index = 6609 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6610 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6611 block_info.address = info->address; 6612 block_info.value = info->value; 6613 6614 mutex_lock(&adev->grbm_idx_mutex); 6615 ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask); 6616 mutex_unlock(&adev->grbm_idx_mutex); 6617 6618 return ret; 6619 } 6620 6621 static const char * const vml2_mems[] = { 6622 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6623 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6624 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6625 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6626 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6627 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6628 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6629 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6630 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6631 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6632 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6633 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6634 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6635 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6636 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6637 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6638 }; 6639 6640 static const char * const vml2_walker_mems[] = { 6641 "UTC_VML2_CACHE_PDE0_MEM0", 6642 "UTC_VML2_CACHE_PDE0_MEM1", 6643 "UTC_VML2_CACHE_PDE1_MEM0", 6644 "UTC_VML2_CACHE_PDE1_MEM1", 6645 "UTC_VML2_CACHE_PDE2_MEM0", 6646 "UTC_VML2_CACHE_PDE2_MEM1", 6647 "UTC_VML2_RDIF_LOG_FIFO", 6648 }; 6649 6650 static const char * const atc_l2_cache_2m_mems[] = { 6651 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6652 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6653 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6654 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6655 }; 6656 6657 static const char *atc_l2_cache_4k_mems[] = { 6658 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6659 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6660 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6661 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6662 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6663 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6664 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6665 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6666 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6667 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6668 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6669 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6670 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6671 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6672 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6673 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6674 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6675 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6676 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6677 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6678 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6679 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6680 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6681 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6682 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6683 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6684 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6685 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6686 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6687 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6688 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6689 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6690 }; 6691 6692 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6693 struct ras_err_data *err_data) 6694 { 6695 uint32_t i, data; 6696 uint32_t sec_count, ded_count; 6697 6698 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6699 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6700 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6701 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6702 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6703 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6704 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6705 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6706 6707 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6708 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6709 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6710 6711 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6712 if (sec_count) { 6713 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6714 "SEC %d\n", i, vml2_mems[i], sec_count); 6715 err_data->ce_count += sec_count; 6716 } 6717 6718 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6719 if (ded_count) { 6720 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6721 "DED %d\n", i, vml2_mems[i], ded_count); 6722 err_data->ue_count += ded_count; 6723 } 6724 } 6725 6726 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6727 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6728 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6729 6730 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6731 SEC_COUNT); 6732 if (sec_count) { 6733 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6734 "SEC %d\n", i, vml2_walker_mems[i], sec_count); 6735 err_data->ce_count += sec_count; 6736 } 6737 6738 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6739 DED_COUNT); 6740 if (ded_count) { 6741 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6742 "DED %d\n", i, vml2_walker_mems[i], ded_count); 6743 err_data->ue_count += ded_count; 6744 } 6745 } 6746 6747 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6748 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6749 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6750 6751 sec_count = (data & 0x00006000L) >> 0xd; 6752 if (sec_count) { 6753 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6754 "SEC %d\n", i, atc_l2_cache_2m_mems[i], 6755 sec_count); 6756 err_data->ce_count += sec_count; 6757 } 6758 } 6759 6760 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6761 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6762 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6763 6764 sec_count = (data & 0x00006000L) >> 0xd; 6765 if (sec_count) { 6766 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6767 "SEC %d\n", i, atc_l2_cache_4k_mems[i], 6768 sec_count); 6769 err_data->ce_count += sec_count; 6770 } 6771 6772 ded_count = (data & 0x00018000L) >> 0xf; 6773 if (ded_count) { 6774 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6775 "DED %d\n", i, atc_l2_cache_4k_mems[i], 6776 ded_count); 6777 err_data->ue_count += ded_count; 6778 } 6779 } 6780 6781 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6782 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6783 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6784 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6785 6786 return 0; 6787 } 6788 6789 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev, 6790 const struct soc15_reg_entry *reg, 6791 uint32_t se_id, uint32_t inst_id, uint32_t value, 6792 uint32_t *sec_count, uint32_t *ded_count) 6793 { 6794 uint32_t i; 6795 uint32_t sec_cnt, ded_cnt; 6796 6797 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { 6798 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || 6799 gfx_v9_0_ras_fields[i].seg != reg->seg || 6800 gfx_v9_0_ras_fields[i].inst != reg->inst) 6801 continue; 6802 6803 sec_cnt = (value & 6804 gfx_v9_0_ras_fields[i].sec_count_mask) >> 6805 gfx_v9_0_ras_fields[i].sec_count_shift; 6806 if (sec_cnt) { 6807 dev_info(adev->dev, "GFX SubBlock %s, " 6808 "Instance[%d][%d], SEC %d\n", 6809 gfx_v9_0_ras_fields[i].name, 6810 se_id, inst_id, 6811 sec_cnt); 6812 *sec_count += sec_cnt; 6813 } 6814 6815 ded_cnt = (value & 6816 gfx_v9_0_ras_fields[i].ded_count_mask) >> 6817 gfx_v9_0_ras_fields[i].ded_count_shift; 6818 if (ded_cnt) { 6819 dev_info(adev->dev, "GFX SubBlock %s, " 6820 "Instance[%d][%d], DED %d\n", 6821 gfx_v9_0_ras_fields[i].name, 6822 se_id, inst_id, 6823 ded_cnt); 6824 *ded_count += ded_cnt; 6825 } 6826 } 6827 6828 return 0; 6829 } 6830 6831 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) 6832 { 6833 int i, j, k; 6834 6835 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6836 return; 6837 6838 /* read back registers to clear the counters */ 6839 mutex_lock(&adev->grbm_idx_mutex); 6840 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6841 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6842 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6843 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0); 6844 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6845 } 6846 } 6847 } 6848 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 6849 mutex_unlock(&adev->grbm_idx_mutex); 6850 6851 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6852 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6853 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6854 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6855 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6856 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6857 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6858 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6859 6860 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6861 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6862 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6863 } 6864 6865 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6866 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6867 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6868 } 6869 6870 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6871 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6872 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6873 } 6874 6875 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6876 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6877 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6878 } 6879 6880 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6881 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6882 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6883 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6884 } 6885 6886 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6887 void *ras_error_status) 6888 { 6889 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6890 uint32_t sec_count = 0, ded_count = 0; 6891 uint32_t i, j, k; 6892 uint32_t reg_value; 6893 6894 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6895 return; 6896 6897 err_data->ue_count = 0; 6898 err_data->ce_count = 0; 6899 6900 mutex_lock(&adev->grbm_idx_mutex); 6901 6902 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6903 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6904 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6905 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0); 6906 reg_value = 6907 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6908 if (reg_value) 6909 gfx_v9_0_ras_error_count(adev, 6910 &gfx_v9_0_edc_counter_regs[i], 6911 j, k, reg_value, 6912 &sec_count, &ded_count); 6913 } 6914 } 6915 } 6916 6917 err_data->ce_count += sec_count; 6918 err_data->ue_count += ded_count; 6919 6920 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 6921 mutex_unlock(&adev->grbm_idx_mutex); 6922 6923 gfx_v9_0_query_utc_edc_status(adev, err_data); 6924 } 6925 6926 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) 6927 { 6928 const unsigned int cp_coher_cntl = 6929 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | 6930 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | 6931 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | 6932 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | 6933 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); 6934 6935 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ 6936 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 6937 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ 6938 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6939 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6940 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6941 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6942 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6943 } 6944 6945 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, 6946 uint32_t pipe, bool enable) 6947 { 6948 struct amdgpu_device *adev = ring->adev; 6949 uint32_t val; 6950 uint32_t wcl_cs_reg; 6951 6952 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 6953 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT; 6954 6955 switch (pipe) { 6956 case 0: 6957 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0); 6958 break; 6959 case 1: 6960 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1); 6961 break; 6962 case 2: 6963 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2); 6964 break; 6965 case 3: 6966 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3); 6967 break; 6968 default: 6969 DRM_DEBUG("invalid pipe %d\n", pipe); 6970 return; 6971 } 6972 6973 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 6974 6975 } 6976 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 6977 { 6978 struct amdgpu_device *adev = ring->adev; 6979 uint32_t val; 6980 int i; 6981 6982 6983 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 6984 * number of gfx waves. Setting 5 bit will make sure gfx only gets 6985 * around 25% of gpu resources. 6986 */ 6987 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; 6988 amdgpu_ring_emit_wreg(ring, 6989 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX), 6990 val); 6991 6992 /* Restrict waves for normal/low priority compute queues as well 6993 * to get best QoS for high priority compute jobs. 6994 * 6995 * amdgpu controls only 1st ME(0-3 CS pipes). 6996 */ 6997 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 6998 if (i != ring->pipe) 6999 gfx_v9_0_emit_wave_limit_cs(ring, i, enable); 7000 7001 } 7002 } 7003 7004 static void gfx_v9_ip_print(void *handle, struct drm_printer *p) 7005 { 7006 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 7007 uint32_t i, j, k, reg, index = 0; 7008 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 7009 7010 if (!adev->gfx.ip_dump_core) 7011 return; 7012 7013 for (i = 0; i < reg_count; i++) 7014 drm_printf(p, "%-50s \t 0x%08x\n", 7015 gc_reg_list_9[i].reg_name, 7016 adev->gfx.ip_dump_core[i]); 7017 7018 /* print compute queue registers for all instances */ 7019 if (!adev->gfx.ip_dump_compute_queues) 7020 return; 7021 7022 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 7023 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7024 adev->gfx.mec.num_mec, 7025 adev->gfx.mec.num_pipe_per_mec, 7026 adev->gfx.mec.num_queue_per_pipe); 7027 7028 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7029 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7030 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7031 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7032 for (reg = 0; reg < reg_count; reg++) { 7033 drm_printf(p, "%-50s \t 0x%08x\n", 7034 gc_cp_reg_list_9[reg].reg_name, 7035 adev->gfx.ip_dump_compute_queues[index + reg]); 7036 } 7037 index += reg_count; 7038 } 7039 } 7040 } 7041 7042 } 7043 7044 static void gfx_v9_ip_dump(void *handle) 7045 { 7046 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 7047 uint32_t i, j, k, reg, index = 0; 7048 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 7049 7050 if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings) 7051 return; 7052 7053 amdgpu_gfx_off_ctrl(adev, false); 7054 for (i = 0; i < reg_count; i++) 7055 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i])); 7056 amdgpu_gfx_off_ctrl(adev, true); 7057 7058 /* dump compute queue registers for all instances */ 7059 if (!adev->gfx.ip_dump_compute_queues) 7060 return; 7061 7062 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 7063 amdgpu_gfx_off_ctrl(adev, false); 7064 mutex_lock(&adev->srbm_mutex); 7065 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7066 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7067 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7068 /* ME0 is for GFX so start from 1 for CP */ 7069 soc15_grbm_select(adev, 1 + i, j, k, 0, 0); 7070 7071 for (reg = 0; reg < reg_count; reg++) { 7072 adev->gfx.ip_dump_compute_queues[index + reg] = 7073 RREG32(SOC15_REG_ENTRY_OFFSET( 7074 gc_cp_reg_list_9[reg])); 7075 } 7076 index += reg_count; 7077 } 7078 } 7079 } 7080 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 7081 mutex_unlock(&adev->srbm_mutex); 7082 amdgpu_gfx_off_ctrl(adev, true); 7083 7084 } 7085 7086 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 7087 .name = "gfx_v9_0", 7088 .early_init = gfx_v9_0_early_init, 7089 .late_init = gfx_v9_0_late_init, 7090 .sw_init = gfx_v9_0_sw_init, 7091 .sw_fini = gfx_v9_0_sw_fini, 7092 .hw_init = gfx_v9_0_hw_init, 7093 .hw_fini = gfx_v9_0_hw_fini, 7094 .suspend = gfx_v9_0_suspend, 7095 .resume = gfx_v9_0_resume, 7096 .is_idle = gfx_v9_0_is_idle, 7097 .wait_for_idle = gfx_v9_0_wait_for_idle, 7098 .soft_reset = gfx_v9_0_soft_reset, 7099 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 7100 .set_powergating_state = gfx_v9_0_set_powergating_state, 7101 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 7102 .dump_ip_state = gfx_v9_ip_dump, 7103 .print_ip_state = gfx_v9_ip_print, 7104 }; 7105 7106 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 7107 .type = AMDGPU_RING_TYPE_GFX, 7108 .align_mask = 0xff, 7109 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7110 .support_64bit_ptrs = true, 7111 .secure_submission_supported = true, 7112 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 7113 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 7114 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 7115 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 7116 5 + /* COND_EXEC */ 7117 7 + /* PIPELINE_SYNC */ 7118 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7119 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7120 2 + /* VM_FLUSH */ 7121 8 + /* FENCE for VM_FLUSH */ 7122 20 + /* GDS switch */ 7123 4 + /* double SWITCH_BUFFER, 7124 the first COND_EXEC jump to the place just 7125 prior to this double SWITCH_BUFFER */ 7126 5 + /* COND_EXEC */ 7127 7 + /* HDP_flush */ 7128 4 + /* VGT_flush */ 7129 14 + /* CE_META */ 7130 31 + /* DE_META */ 7131 3 + /* CNTX_CTRL */ 7132 5 + /* HDP_INVL */ 7133 8 + 8 + /* FENCE x2 */ 7134 2 + /* SWITCH_BUFFER */ 7135 7, /* gfx_v9_0_emit_mem_sync */ 7136 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 7137 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 7138 .emit_fence = gfx_v9_0_ring_emit_fence, 7139 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7140 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7141 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7142 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7143 .test_ring = gfx_v9_0_ring_test_ring, 7144 .insert_nop = amdgpu_ring_insert_nop, 7145 .pad_ib = amdgpu_ring_generic_pad_ib, 7146 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7147 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7148 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7149 .preempt_ib = gfx_v9_0_ring_preempt_ib, 7150 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7151 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7152 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7153 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7154 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7155 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7156 }; 7157 7158 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 7159 .type = AMDGPU_RING_TYPE_GFX, 7160 .align_mask = 0xff, 7161 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7162 .support_64bit_ptrs = true, 7163 .secure_submission_supported = true, 7164 .get_rptr = amdgpu_sw_ring_get_rptr_gfx, 7165 .get_wptr = amdgpu_sw_ring_get_wptr_gfx, 7166 .set_wptr = amdgpu_sw_ring_set_wptr_gfx, 7167 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 7168 5 + /* COND_EXEC */ 7169 7 + /* PIPELINE_SYNC */ 7170 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7171 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7172 2 + /* VM_FLUSH */ 7173 8 + /* FENCE for VM_FLUSH */ 7174 20 + /* GDS switch */ 7175 4 + /* double SWITCH_BUFFER, 7176 * the first COND_EXEC jump to the place just 7177 * prior to this double SWITCH_BUFFER 7178 */ 7179 5 + /* COND_EXEC */ 7180 7 + /* HDP_flush */ 7181 4 + /* VGT_flush */ 7182 14 + /* CE_META */ 7183 31 + /* DE_META */ 7184 3 + /* CNTX_CTRL */ 7185 5 + /* HDP_INVL */ 7186 8 + 8 + /* FENCE x2 */ 7187 2 + /* SWITCH_BUFFER */ 7188 7, /* gfx_v9_0_emit_mem_sync */ 7189 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 7190 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 7191 .emit_fence = gfx_v9_0_ring_emit_fence, 7192 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7193 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7194 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7195 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7196 .test_ring = gfx_v9_0_ring_test_ring, 7197 .test_ib = gfx_v9_0_ring_test_ib, 7198 .insert_nop = amdgpu_sw_ring_insert_nop, 7199 .pad_ib = amdgpu_ring_generic_pad_ib, 7200 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7201 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7202 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7203 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7204 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7205 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7206 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7207 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7208 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7209 .patch_cntl = gfx_v9_0_ring_patch_cntl, 7210 .patch_de = gfx_v9_0_ring_patch_de_meta, 7211 .patch_ce = gfx_v9_0_ring_patch_ce_meta, 7212 }; 7213 7214 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7215 .type = AMDGPU_RING_TYPE_COMPUTE, 7216 .align_mask = 0xff, 7217 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7218 .support_64bit_ptrs = true, 7219 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7220 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7221 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7222 .emit_frame_size = 7223 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7224 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7225 5 + /* hdp invalidate */ 7226 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7227 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7228 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7229 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7230 7 + /* gfx_v9_0_emit_mem_sync */ 7231 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ 7232 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ 7233 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7234 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 7235 .emit_fence = gfx_v9_0_ring_emit_fence, 7236 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7237 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7238 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7239 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7240 .test_ring = gfx_v9_0_ring_test_ring, 7241 .test_ib = gfx_v9_0_ring_test_ib, 7242 .insert_nop = amdgpu_ring_insert_nop, 7243 .pad_ib = amdgpu_ring_generic_pad_ib, 7244 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7245 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7246 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7247 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7248 .emit_wave_limit = gfx_v9_0_emit_wave_limit, 7249 }; 7250 7251 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7252 .type = AMDGPU_RING_TYPE_KIQ, 7253 .align_mask = 0xff, 7254 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7255 .support_64bit_ptrs = true, 7256 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7257 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7258 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7259 .emit_frame_size = 7260 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7261 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7262 5 + /* hdp invalidate */ 7263 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7264 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7265 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7266 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7267 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7268 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 7269 .test_ring = gfx_v9_0_ring_test_ring, 7270 .insert_nop = amdgpu_ring_insert_nop, 7271 .pad_ib = amdgpu_ring_generic_pad_ib, 7272 .emit_rreg = gfx_v9_0_ring_emit_rreg, 7273 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7274 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7275 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7276 }; 7277 7278 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 7279 { 7280 int i; 7281 7282 adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq; 7283 7284 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7285 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 7286 7287 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 7288 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 7289 adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; 7290 } 7291 7292 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7293 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 7294 } 7295 7296 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 7297 .set = gfx_v9_0_set_eop_interrupt_state, 7298 .process = gfx_v9_0_eop_irq, 7299 }; 7300 7301 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 7302 .set = gfx_v9_0_set_priv_reg_fault_state, 7303 .process = gfx_v9_0_priv_reg_irq, 7304 }; 7305 7306 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 7307 .set = gfx_v9_0_set_priv_inst_fault_state, 7308 .process = gfx_v9_0_priv_inst_irq, 7309 }; 7310 7311 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 7312 .set = gfx_v9_0_set_cp_ecc_error_state, 7313 .process = amdgpu_gfx_cp_ecc_error_irq, 7314 }; 7315 7316 7317 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 7318 { 7319 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7320 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 7321 7322 adev->gfx.priv_reg_irq.num_types = 1; 7323 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 7324 7325 adev->gfx.priv_inst_irq.num_types = 1; 7326 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 7327 7328 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 7329 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 7330 } 7331 7332 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 7333 { 7334 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7335 case IP_VERSION(9, 0, 1): 7336 case IP_VERSION(9, 2, 1): 7337 case IP_VERSION(9, 4, 0): 7338 case IP_VERSION(9, 2, 2): 7339 case IP_VERSION(9, 1, 0): 7340 case IP_VERSION(9, 4, 1): 7341 case IP_VERSION(9, 3, 0): 7342 case IP_VERSION(9, 4, 2): 7343 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 7344 break; 7345 default: 7346 break; 7347 } 7348 } 7349 7350 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 7351 { 7352 /* init asci gds info */ 7353 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7354 case IP_VERSION(9, 0, 1): 7355 case IP_VERSION(9, 2, 1): 7356 case IP_VERSION(9, 4, 0): 7357 adev->gds.gds_size = 0x10000; 7358 break; 7359 case IP_VERSION(9, 2, 2): 7360 case IP_VERSION(9, 1, 0): 7361 case IP_VERSION(9, 4, 1): 7362 adev->gds.gds_size = 0x1000; 7363 break; 7364 case IP_VERSION(9, 4, 2): 7365 /* aldebaran removed all the GDS internal memory, 7366 * only support GWS opcode in kernel, like barrier 7367 * semaphore.etc */ 7368 adev->gds.gds_size = 0; 7369 break; 7370 default: 7371 adev->gds.gds_size = 0x10000; 7372 break; 7373 } 7374 7375 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7376 case IP_VERSION(9, 0, 1): 7377 case IP_VERSION(9, 4, 0): 7378 adev->gds.gds_compute_max_wave_id = 0x7ff; 7379 break; 7380 case IP_VERSION(9, 2, 1): 7381 adev->gds.gds_compute_max_wave_id = 0x27f; 7382 break; 7383 case IP_VERSION(9, 2, 2): 7384 case IP_VERSION(9, 1, 0): 7385 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 7386 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 7387 else 7388 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 7389 break; 7390 case IP_VERSION(9, 4, 1): 7391 adev->gds.gds_compute_max_wave_id = 0xfff; 7392 break; 7393 case IP_VERSION(9, 4, 2): 7394 /* deprecated for Aldebaran, no usage at all */ 7395 adev->gds.gds_compute_max_wave_id = 0; 7396 break; 7397 default: 7398 /* this really depends on the chip */ 7399 adev->gds.gds_compute_max_wave_id = 0x7ff; 7400 break; 7401 } 7402 7403 adev->gds.gws_size = 64; 7404 adev->gds.oa_size = 16; 7405 } 7406 7407 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7408 u32 bitmap) 7409 { 7410 u32 data; 7411 7412 if (!bitmap) 7413 return; 7414 7415 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7416 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7417 7418 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 7419 } 7420 7421 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7422 { 7423 u32 data, mask; 7424 7425 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 7426 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 7427 7428 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7429 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7430 7431 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7432 7433 return (~data) & mask; 7434 } 7435 7436 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 7437 struct amdgpu_cu_info *cu_info) 7438 { 7439 int i, j, k, counter, active_cu_number = 0; 7440 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7441 unsigned disable_masks[4 * 4]; 7442 7443 if (!adev || !cu_info) 7444 return -EINVAL; 7445 7446 /* 7447 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 7448 */ 7449 if (adev->gfx.config.max_shader_engines * 7450 adev->gfx.config.max_sh_per_se > 16) 7451 return -EINVAL; 7452 7453 amdgpu_gfx_parse_disable_cu(disable_masks, 7454 adev->gfx.config.max_shader_engines, 7455 adev->gfx.config.max_sh_per_se); 7456 7457 mutex_lock(&adev->grbm_idx_mutex); 7458 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7459 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7460 mask = 1; 7461 ao_bitmap = 0; 7462 counter = 0; 7463 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 7464 gfx_v9_0_set_user_cu_inactive_bitmap( 7465 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 7466 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 7467 7468 /* 7469 * The bitmap(and ao_cu_bitmap) in cu_info structure is 7470 * 4x4 size array, and it's usually suitable for Vega 7471 * ASICs which has 4*2 SE/SH layout. 7472 * But for Arcturus, SE/SH layout is changed to 8*1. 7473 * To mostly reduce the impact, we make it compatible 7474 * with current bitmap array as below: 7475 * SE4,SH0 --> bitmap[0][1] 7476 * SE5,SH0 --> bitmap[1][1] 7477 * SE6,SH0 --> bitmap[2][1] 7478 * SE7,SH0 --> bitmap[3][1] 7479 */ 7480 cu_info->bitmap[0][i % 4][j + i / 4] = bitmap; 7481 7482 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7483 if (bitmap & mask) { 7484 if (counter < adev->gfx.config.max_cu_per_sh) 7485 ao_bitmap |= mask; 7486 counter ++; 7487 } 7488 mask <<= 1; 7489 } 7490 active_cu_number += counter; 7491 if (i < 2 && j < 2) 7492 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7493 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 7494 } 7495 } 7496 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7497 mutex_unlock(&adev->grbm_idx_mutex); 7498 7499 cu_info->number = active_cu_number; 7500 cu_info->ao_cu_mask = ao_cu_mask; 7501 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7502 7503 return 0; 7504 } 7505 7506 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 7507 { 7508 .type = AMD_IP_BLOCK_TYPE_GFX, 7509 .major = 9, 7510 .minor = 0, 7511 .rev = 0, 7512 .funcs = &gfx_v9_0_ip_funcs, 7513 }; 7514