1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 42 #include "soc15_common.h" 43 #include "clearstate_gfx9.h" 44 #include "v9_structs.h" 45 46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 47 48 #include "amdgpu_ras.h" 49 50 #include "amdgpu_ring_mux.h" 51 #include "gfx_v9_4.h" 52 #include "gfx_v9_0.h" 53 #include "gfx_v9_4_2.h" 54 55 #include "asic_reg/pwr/pwr_10_0_offset.h" 56 #include "asic_reg/pwr/pwr_10_0_sh_mask.h" 57 #include "asic_reg/gc/gc_9_0_default.h" 58 59 #define GFX9_NUM_GFX_RINGS 1 60 #define GFX9_NUM_SW_GFX_RINGS 2 61 #define GFX9_MEC_HPD_SIZE 4096 62 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 63 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 64 65 #define mmGCEA_PROBE_MAP 0x070c 66 #define mmGCEA_PROBE_MAP_BASE_IDX 0 67 68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 74 75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 81 82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 88 89 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 95 96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 103 104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 111 112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 113 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 114 115 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 116 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 120 121 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin"); 122 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin"); 123 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin"); 124 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin"); 125 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin"); 126 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); 127 128 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); 129 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); 130 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); 131 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin"); 132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); 133 134 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 135 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 136 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 137 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 138 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 139 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 140 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 141 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 142 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 143 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 144 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 145 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 146 147 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025 148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1 149 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 151 152 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = { 153 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS), 154 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2), 155 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1), 156 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2), 157 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1), 158 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1), 159 SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT), 160 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT), 161 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT), 162 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS), 163 SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR), 164 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE), 165 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR), 166 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR), 167 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE), 168 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR), 169 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR), 170 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE), 171 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR), 172 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR), 173 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE), 174 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), 175 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), 176 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ), 177 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ), 178 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ), 179 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ), 180 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO), 181 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI), 182 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ), 183 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO), 184 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI), 185 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ), 186 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO), 187 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI), 188 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ), 189 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO), 190 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI), 191 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ), 192 SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS), 193 SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS), 194 SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS), 195 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT), 196 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT), 197 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS), 198 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL), 199 SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS), 200 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS), 201 SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS), 202 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS), 203 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS), 204 SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS), 205 SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS), 206 SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS), 207 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), 208 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS), 209 SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG), 210 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL), 211 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR), 212 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR), 213 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR), 214 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR), 215 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR), 216 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS), 217 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT), 218 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND), 219 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE), 220 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1), 221 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2), 222 SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE), 223 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE), 224 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), 225 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), 226 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), 227 /* cp header registers */ 228 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 229 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), 230 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), 231 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 232 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), 233 /* SE status registers */ 234 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), 235 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), 236 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), 237 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) 238 }; 239 240 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { 241 /* compute queue registers */ 242 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID), 243 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE), 244 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE), 245 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY), 246 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY), 247 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM), 248 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE), 249 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI), 250 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR), 251 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 252 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 253 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), 254 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL), 255 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR), 256 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI), 257 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR), 258 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL), 259 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 260 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR), 261 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), 262 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL), 263 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR), 264 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR), 265 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS), 266 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO), 267 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI), 268 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL), 269 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET), 270 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE), 271 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET), 272 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE), 273 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE), 274 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR), 275 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM), 276 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), 277 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), 278 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS), 279 }; 280 281 enum ta_ras_gfx_subblock { 282 /*CPC*/ 283 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 284 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 285 TA_RAS_BLOCK__GFX_CPC_UCODE, 286 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 287 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 288 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 289 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 290 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 291 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 292 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 293 /* CPF*/ 294 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 295 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 296 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 297 TA_RAS_BLOCK__GFX_CPF_TAG, 298 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 299 /* CPG*/ 300 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 301 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 302 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 303 TA_RAS_BLOCK__GFX_CPG_TAG, 304 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 305 /* GDS*/ 306 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 307 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 308 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 309 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 310 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 311 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 312 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 313 /* SPI*/ 314 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 315 /* SQ*/ 316 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 317 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 318 TA_RAS_BLOCK__GFX_SQ_LDS_D, 319 TA_RAS_BLOCK__GFX_SQ_LDS_I, 320 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 321 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 322 /* SQC (3 ranges)*/ 323 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 324 /* SQC range 0*/ 325 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 326 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 327 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 328 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 329 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 330 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 331 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 332 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 333 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 334 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 335 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 336 /* SQC range 1*/ 337 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 338 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 339 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 340 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 341 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 342 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 343 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 344 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 345 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 346 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 347 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 348 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 349 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 350 /* SQC range 2*/ 351 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 352 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 353 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 354 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 355 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 356 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 357 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 358 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 359 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 360 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 361 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 362 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 363 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 364 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 365 /* TA*/ 366 TA_RAS_BLOCK__GFX_TA_INDEX_START, 367 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 368 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 369 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 370 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 371 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 372 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 373 /* TCA*/ 374 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 375 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 376 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 377 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 378 /* TCC (5 sub-ranges)*/ 379 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 380 /* TCC range 0*/ 381 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 382 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 383 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 384 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 385 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 386 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 387 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 388 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 389 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 390 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 391 /* TCC range 1*/ 392 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 393 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 394 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 395 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 396 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 397 /* TCC range 2*/ 398 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 399 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 400 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 401 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 402 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 403 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 404 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 405 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 406 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 407 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 408 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 409 /* TCC range 3*/ 410 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 411 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 412 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 413 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 414 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 415 /* TCC range 4*/ 416 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 417 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 418 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 419 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 420 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 421 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 422 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 423 /* TCI*/ 424 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 425 /* TCP*/ 426 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 427 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 428 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 429 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 430 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 431 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 432 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 433 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 434 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 435 /* TD*/ 436 TA_RAS_BLOCK__GFX_TD_INDEX_START, 437 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 438 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 439 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 440 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 441 /* EA (3 sub-ranges)*/ 442 TA_RAS_BLOCK__GFX_EA_INDEX_START, 443 /* EA range 0*/ 444 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 445 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 446 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 447 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 448 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 449 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 450 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 451 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 452 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 453 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 454 /* EA range 1*/ 455 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 456 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 457 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 458 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 459 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 460 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 461 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 462 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 463 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 464 /* EA range 2*/ 465 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 466 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 467 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 468 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 469 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 470 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 471 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 472 /* UTC VM L2 bank*/ 473 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 474 /* UTC VM walker*/ 475 TA_RAS_BLOCK__UTC_VML2_WALKER, 476 /* UTC ATC L2 2MB cache*/ 477 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 478 /* UTC ATC L2 4KB cache*/ 479 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 480 TA_RAS_BLOCK__GFX_MAX 481 }; 482 483 struct ras_gfx_subblock { 484 unsigned char *name; 485 int ta_subblock; 486 int hw_supported_error_type; 487 int sw_supported_error_type; 488 }; 489 490 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 491 [AMDGPU_RAS_BLOCK__##subblock] = { \ 492 #subblock, \ 493 TA_RAS_BLOCK__##subblock, \ 494 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 495 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 496 } 497 498 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 499 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 500 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 501 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 502 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 503 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 504 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 505 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 506 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 507 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 508 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 509 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 510 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 511 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 512 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 513 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 514 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 515 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 516 0), 517 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 518 0), 519 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 520 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 521 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 522 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 523 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 524 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 525 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 526 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 527 0, 0), 528 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 529 0), 530 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 531 0, 0), 532 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 533 0), 534 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 535 0, 0), 536 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 537 0), 538 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 539 1), 540 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 541 0, 0, 0), 542 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 543 0), 544 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 545 0), 546 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 547 0), 548 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 549 0), 550 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 551 0), 552 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 553 0, 0), 554 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 555 0), 556 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 557 0), 558 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 559 0, 0, 0), 560 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 561 0), 562 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 563 0), 564 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 565 0), 566 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 567 0), 568 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 569 0), 570 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 571 0, 0), 572 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 573 0), 574 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 575 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 576 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 577 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 578 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 579 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 580 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 581 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 582 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 583 1), 584 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 585 1), 586 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 587 1), 588 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 589 0), 590 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 591 0), 592 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 593 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 594 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 595 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 596 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 597 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 598 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 599 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 600 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 601 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 602 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 603 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 604 0), 605 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 606 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 607 0), 608 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 609 0, 0), 610 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 611 0), 612 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 613 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 614 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 615 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 616 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 617 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 618 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 619 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 620 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 621 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 622 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 623 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 624 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 625 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 626 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 627 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 628 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 629 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 630 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 631 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 632 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 633 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 634 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 635 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 636 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 637 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 638 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 639 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 640 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 641 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 642 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 643 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 644 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 645 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 646 }; 647 648 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 649 { 650 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 670 }; 671 672 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 673 { 674 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 692 }; 693 694 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 695 { 696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 707 }; 708 709 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 710 { 711 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 715 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 716 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 717 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 718 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 719 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 720 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 721 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 722 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 723 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 724 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 725 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 726 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 727 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 728 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 729 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 730 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 731 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 732 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 733 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 734 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 735 }; 736 737 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 738 { 739 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 740 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 741 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 742 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 743 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 744 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 745 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 746 }; 747 748 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 749 { 750 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 751 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 752 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 753 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 754 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 755 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 756 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 757 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 758 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 759 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 760 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 761 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 762 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 763 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 764 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 765 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 766 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 767 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 768 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 769 }; 770 771 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 772 { 773 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 774 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 775 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 776 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 777 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 778 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 779 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 780 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 781 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 782 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 783 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 784 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 785 }; 786 787 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 788 { 789 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 790 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 791 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 792 }; 793 794 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 795 { 796 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 797 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 798 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 799 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 800 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 801 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 802 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 803 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 804 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 805 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 806 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 807 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 808 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 809 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 810 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 811 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 812 }; 813 814 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 815 { 816 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 817 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 818 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 819 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 820 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 821 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 822 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 823 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 824 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 825 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 826 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 827 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 828 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 829 }; 830 831 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 832 { 833 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 834 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 835 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 836 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 837 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 838 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 839 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 840 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 841 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 842 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 843 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000) 844 }; 845 846 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = { 847 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)}, 848 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)}, 849 }; 850 851 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 852 { 853 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 854 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 855 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 856 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 857 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 858 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 859 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 860 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 861 }; 862 863 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 864 { 865 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 866 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 867 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 868 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 869 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 870 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 871 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 872 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 873 }; 874 875 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 876 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 877 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 878 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 879 880 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 881 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 882 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 883 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 884 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 885 struct amdgpu_cu_info *cu_info); 886 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 887 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds); 888 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 889 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 890 void *ras_error_status); 891 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 892 void *inject_if, uint32_t instance_mask); 893 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); 894 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, 895 unsigned int vmid); 896 897 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 898 uint64_t queue_mask) 899 { 900 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 901 amdgpu_ring_write(kiq_ring, 902 PACKET3_SET_RESOURCES_VMID_MASK(0) | 903 /* vmid_mask:0* queue_type:0 (KIQ) */ 904 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 905 amdgpu_ring_write(kiq_ring, 906 lower_32_bits(queue_mask)); /* queue mask lo */ 907 amdgpu_ring_write(kiq_ring, 908 upper_32_bits(queue_mask)); /* queue mask hi */ 909 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 910 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 911 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 912 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 913 } 914 915 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 916 struct amdgpu_ring *ring) 917 { 918 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 919 uint64_t wptr_addr = ring->wptr_gpu_addr; 920 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 921 922 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 923 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 924 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 925 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 926 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 927 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 928 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 929 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 930 /*queue_type: normal compute queue */ 931 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 932 /* alloc format: all_on_one_pipe */ 933 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 934 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 935 /* num_queues: must be 1 */ 936 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 937 amdgpu_ring_write(kiq_ring, 938 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 939 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 940 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 941 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 942 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 943 } 944 945 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 946 struct amdgpu_ring *ring, 947 enum amdgpu_unmap_queues_action action, 948 u64 gpu_addr, u64 seq) 949 { 950 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 951 952 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 953 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 954 PACKET3_UNMAP_QUEUES_ACTION(action) | 955 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 956 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 957 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 958 amdgpu_ring_write(kiq_ring, 959 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 960 961 if (action == PREEMPT_QUEUES_NO_UNMAP) { 962 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask)); 963 amdgpu_ring_write(kiq_ring, 0); 964 amdgpu_ring_write(kiq_ring, 0); 965 966 } else { 967 amdgpu_ring_write(kiq_ring, 0); 968 amdgpu_ring_write(kiq_ring, 0); 969 amdgpu_ring_write(kiq_ring, 0); 970 } 971 } 972 973 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 974 struct amdgpu_ring *ring, 975 u64 addr, 976 u64 seq) 977 { 978 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 979 980 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 981 amdgpu_ring_write(kiq_ring, 982 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 983 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 984 PACKET3_QUERY_STATUS_COMMAND(2)); 985 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 986 amdgpu_ring_write(kiq_ring, 987 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 988 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 989 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 990 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 991 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 992 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 993 } 994 995 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 996 uint16_t pasid, uint32_t flush_type, 997 bool all_hub) 998 { 999 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 1000 amdgpu_ring_write(kiq_ring, 1001 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 1002 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 1003 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 1004 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 1005 } 1006 1007 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 1008 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 1009 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 1010 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 1011 .kiq_query_status = gfx_v9_0_kiq_query_status, 1012 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 1013 .set_resources_size = 8, 1014 .map_queues_size = 7, 1015 .unmap_queues_size = 6, 1016 .query_status_size = 7, 1017 .invalidate_tlbs_size = 2, 1018 }; 1019 1020 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 1021 { 1022 adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs; 1023 } 1024 1025 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 1026 { 1027 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1028 case IP_VERSION(9, 0, 1): 1029 soc15_program_register_sequence(adev, 1030 golden_settings_gc_9_0, 1031 ARRAY_SIZE(golden_settings_gc_9_0)); 1032 soc15_program_register_sequence(adev, 1033 golden_settings_gc_9_0_vg10, 1034 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 1035 break; 1036 case IP_VERSION(9, 2, 1): 1037 soc15_program_register_sequence(adev, 1038 golden_settings_gc_9_2_1, 1039 ARRAY_SIZE(golden_settings_gc_9_2_1)); 1040 soc15_program_register_sequence(adev, 1041 golden_settings_gc_9_2_1_vg12, 1042 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 1043 break; 1044 case IP_VERSION(9, 4, 0): 1045 soc15_program_register_sequence(adev, 1046 golden_settings_gc_9_0, 1047 ARRAY_SIZE(golden_settings_gc_9_0)); 1048 soc15_program_register_sequence(adev, 1049 golden_settings_gc_9_0_vg20, 1050 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 1051 break; 1052 case IP_VERSION(9, 4, 1): 1053 soc15_program_register_sequence(adev, 1054 golden_settings_gc_9_4_1_arct, 1055 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 1056 break; 1057 case IP_VERSION(9, 2, 2): 1058 case IP_VERSION(9, 1, 0): 1059 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 1060 ARRAY_SIZE(golden_settings_gc_9_1)); 1061 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1062 soc15_program_register_sequence(adev, 1063 golden_settings_gc_9_1_rv2, 1064 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 1065 else 1066 soc15_program_register_sequence(adev, 1067 golden_settings_gc_9_1_rv1, 1068 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 1069 break; 1070 case IP_VERSION(9, 3, 0): 1071 soc15_program_register_sequence(adev, 1072 golden_settings_gc_9_1_rn, 1073 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 1074 return; /* for renoir, don't need common goldensetting */ 1075 case IP_VERSION(9, 4, 2): 1076 gfx_v9_4_2_init_golden_registers(adev, 1077 adev->smuio.funcs->get_die_id(adev)); 1078 break; 1079 default: 1080 break; 1081 } 1082 1083 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && 1084 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))) 1085 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 1086 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 1087 } 1088 1089 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 1090 bool wc, uint32_t reg, uint32_t val) 1091 { 1092 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1093 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 1094 WRITE_DATA_DST_SEL(0) | 1095 (wc ? WR_CONFIRM : 0)); 1096 amdgpu_ring_write(ring, reg); 1097 amdgpu_ring_write(ring, 0); 1098 amdgpu_ring_write(ring, val); 1099 } 1100 1101 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 1102 int mem_space, int opt, uint32_t addr0, 1103 uint32_t addr1, uint32_t ref, uint32_t mask, 1104 uint32_t inv) 1105 { 1106 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 1107 amdgpu_ring_write(ring, 1108 /* memory (1) or register (0) */ 1109 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 1110 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 1111 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 1112 WAIT_REG_MEM_ENGINE(eng_sel))); 1113 1114 if (mem_space) 1115 BUG_ON(addr0 & 0x3); /* Dword align */ 1116 amdgpu_ring_write(ring, addr0); 1117 amdgpu_ring_write(ring, addr1); 1118 amdgpu_ring_write(ring, ref); 1119 amdgpu_ring_write(ring, mask); 1120 amdgpu_ring_write(ring, inv); /* poll interval */ 1121 } 1122 1123 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 1124 { 1125 struct amdgpu_device *adev = ring->adev; 1126 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1127 uint32_t tmp = 0; 1128 unsigned i; 1129 int r; 1130 1131 WREG32(scratch, 0xCAFEDEAD); 1132 r = amdgpu_ring_alloc(ring, 3); 1133 if (r) 1134 return r; 1135 1136 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1137 amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); 1138 amdgpu_ring_write(ring, 0xDEADBEEF); 1139 amdgpu_ring_commit(ring); 1140 1141 for (i = 0; i < adev->usec_timeout; i++) { 1142 tmp = RREG32(scratch); 1143 if (tmp == 0xDEADBEEF) 1144 break; 1145 udelay(1); 1146 } 1147 1148 if (i >= adev->usec_timeout) 1149 r = -ETIMEDOUT; 1150 return r; 1151 } 1152 1153 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1154 { 1155 struct amdgpu_device *adev = ring->adev; 1156 struct amdgpu_ib ib; 1157 struct dma_fence *f = NULL; 1158 1159 unsigned index; 1160 uint64_t gpu_addr; 1161 uint32_t tmp; 1162 long r; 1163 1164 r = amdgpu_device_wb_get(adev, &index); 1165 if (r) 1166 return r; 1167 1168 gpu_addr = adev->wb.gpu_addr + (index * 4); 1169 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1170 memset(&ib, 0, sizeof(ib)); 1171 1172 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 1173 if (r) 1174 goto err1; 1175 1176 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1177 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1178 ib.ptr[2] = lower_32_bits(gpu_addr); 1179 ib.ptr[3] = upper_32_bits(gpu_addr); 1180 ib.ptr[4] = 0xDEADBEEF; 1181 ib.length_dw = 5; 1182 1183 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1184 if (r) 1185 goto err2; 1186 1187 r = dma_fence_wait_timeout(f, false, timeout); 1188 if (r == 0) { 1189 r = -ETIMEDOUT; 1190 goto err2; 1191 } else if (r < 0) { 1192 goto err2; 1193 } 1194 1195 tmp = adev->wb.wb[index]; 1196 if (tmp == 0xDEADBEEF) 1197 r = 0; 1198 else 1199 r = -EINVAL; 1200 1201 err2: 1202 amdgpu_ib_free(adev, &ib, NULL); 1203 dma_fence_put(f); 1204 err1: 1205 amdgpu_device_wb_free(adev, index); 1206 return r; 1207 } 1208 1209 1210 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1211 { 1212 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1213 amdgpu_ucode_release(&adev->gfx.me_fw); 1214 amdgpu_ucode_release(&adev->gfx.ce_fw); 1215 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1216 amdgpu_ucode_release(&adev->gfx.mec_fw); 1217 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1218 1219 kfree(adev->gfx.rlc.register_list_format); 1220 } 1221 1222 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1223 { 1224 adev->gfx.me_fw_write_wait = false; 1225 adev->gfx.mec_fw_write_wait = false; 1226 1227 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && 1228 ((adev->gfx.mec_fw_version < 0x000001a5) || 1229 (adev->gfx.mec_feature_version < 46) || 1230 (adev->gfx.pfp_fw_version < 0x000000b7) || 1231 (adev->gfx.pfp_feature_version < 46))) 1232 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1233 1234 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1235 case IP_VERSION(9, 0, 1): 1236 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1237 (adev->gfx.me_feature_version >= 42) && 1238 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1239 (adev->gfx.pfp_feature_version >= 42)) 1240 adev->gfx.me_fw_write_wait = true; 1241 1242 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1243 (adev->gfx.mec_feature_version >= 42)) 1244 adev->gfx.mec_fw_write_wait = true; 1245 break; 1246 case IP_VERSION(9, 2, 1): 1247 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1248 (adev->gfx.me_feature_version >= 44) && 1249 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1250 (adev->gfx.pfp_feature_version >= 44)) 1251 adev->gfx.me_fw_write_wait = true; 1252 1253 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1254 (adev->gfx.mec_feature_version >= 44)) 1255 adev->gfx.mec_fw_write_wait = true; 1256 break; 1257 case IP_VERSION(9, 4, 0): 1258 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1259 (adev->gfx.me_feature_version >= 44) && 1260 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1261 (adev->gfx.pfp_feature_version >= 44)) 1262 adev->gfx.me_fw_write_wait = true; 1263 1264 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1265 (adev->gfx.mec_feature_version >= 44)) 1266 adev->gfx.mec_fw_write_wait = true; 1267 break; 1268 case IP_VERSION(9, 1, 0): 1269 case IP_VERSION(9, 2, 2): 1270 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1271 (adev->gfx.me_feature_version >= 42) && 1272 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1273 (adev->gfx.pfp_feature_version >= 42)) 1274 adev->gfx.me_fw_write_wait = true; 1275 1276 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1277 (adev->gfx.mec_feature_version >= 42)) 1278 adev->gfx.mec_fw_write_wait = true; 1279 break; 1280 default: 1281 adev->gfx.me_fw_write_wait = true; 1282 adev->gfx.mec_fw_write_wait = true; 1283 break; 1284 } 1285 } 1286 1287 struct amdgpu_gfxoff_quirk { 1288 u16 chip_vendor; 1289 u16 chip_device; 1290 u16 subsys_vendor; 1291 u16 subsys_device; 1292 u8 revision; 1293 }; 1294 1295 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { 1296 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ 1297 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, 1298 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ 1299 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, 1300 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ 1301 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, 1302 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ 1303 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, 1304 { 0, 0, 0, 0, 0 }, 1305 }; 1306 1307 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) 1308 { 1309 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; 1310 1311 while (p && p->chip_device != 0) { 1312 if (pdev->vendor == p->chip_vendor && 1313 pdev->device == p->chip_device && 1314 pdev->subsystem_vendor == p->subsys_vendor && 1315 pdev->subsystem_device == p->subsys_device && 1316 pdev->revision == p->revision) { 1317 return true; 1318 } 1319 ++p; 1320 } 1321 return false; 1322 } 1323 1324 static bool is_raven_kicker(struct amdgpu_device *adev) 1325 { 1326 if (adev->pm.fw_version >= 0x41e2b) 1327 return true; 1328 else 1329 return false; 1330 } 1331 1332 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) 1333 { 1334 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) && 1335 (adev->gfx.me_fw_version >= 0x000000a5) && 1336 (adev->gfx.me_feature_version >= 52)) 1337 return true; 1338 else 1339 return false; 1340 } 1341 1342 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1343 { 1344 if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) 1345 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1346 1347 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1348 case IP_VERSION(9, 0, 1): 1349 case IP_VERSION(9, 2, 1): 1350 case IP_VERSION(9, 4, 0): 1351 break; 1352 case IP_VERSION(9, 2, 2): 1353 case IP_VERSION(9, 1, 0): 1354 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) || 1355 (adev->apu_flags & AMD_APU_IS_PICASSO)) && 1356 ((!is_raven_kicker(adev) && 1357 adev->gfx.rlc_fw_version < 531) || 1358 (adev->gfx.rlc_feature_version < 1) || 1359 !adev->gfx.rlc.is_rlc_v2_1)) 1360 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1361 1362 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1363 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1364 AMD_PG_SUPPORT_CP | 1365 AMD_PG_SUPPORT_RLC_SMU_HS; 1366 break; 1367 case IP_VERSION(9, 3, 0): 1368 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1369 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1370 AMD_PG_SUPPORT_CP | 1371 AMD_PG_SUPPORT_RLC_SMU_HS; 1372 break; 1373 default: 1374 break; 1375 } 1376 } 1377 1378 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1379 char *chip_name) 1380 { 1381 char fw_name[50]; 1382 int err; 1383 1384 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1385 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name); 1386 if (err) 1387 goto out; 1388 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 1389 1390 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1391 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name); 1392 if (err) 1393 goto out; 1394 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 1395 1396 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1397 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name); 1398 if (err) 1399 goto out; 1400 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); 1401 1402 out: 1403 if (err) { 1404 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1405 amdgpu_ucode_release(&adev->gfx.me_fw); 1406 amdgpu_ucode_release(&adev->gfx.ce_fw); 1407 } 1408 return err; 1409 } 1410 1411 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1412 char *chip_name) 1413 { 1414 char fw_name[53]; 1415 int err; 1416 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1417 uint16_t version_major; 1418 uint16_t version_minor; 1419 uint32_t smu_version; 1420 1421 /* 1422 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1423 * instead of picasso_rlc.bin. 1424 * Judgment method: 1425 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1426 * or revision >= 0xD8 && revision <= 0xDF 1427 * otherwise is PCO FP5 1428 */ 1429 if (!strcmp(chip_name, "picasso") && 1430 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1431 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1432 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 1433 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1434 (smu_version >= 0x41e2b)) 1435 /** 1436 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1437 */ 1438 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 1439 else 1440 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1441 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); 1442 if (err) 1443 goto out; 1444 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1445 1446 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1447 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1448 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 1449 out: 1450 if (err) 1451 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1452 1453 return err; 1454 } 1455 1456 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) 1457 { 1458 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || 1459 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || 1460 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) 1461 return false; 1462 1463 return true; 1464 } 1465 1466 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1467 char *chip_name) 1468 { 1469 char fw_name[50]; 1470 int err; 1471 1472 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) 1473 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name); 1474 else 1475 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1476 1477 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name); 1478 if (err) 1479 goto out; 1480 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 1481 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 1482 1483 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1484 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) 1485 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name); 1486 else 1487 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1488 1489 /* ignore failures to load */ 1490 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name); 1491 if (!err) { 1492 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); 1493 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); 1494 } else { 1495 err = 0; 1496 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1497 } 1498 } else { 1499 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; 1500 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; 1501 } 1502 1503 gfx_v9_0_check_if_need_gfxoff(adev); 1504 gfx_v9_0_check_fw_write_wait(adev); 1505 1506 out: 1507 if (err) 1508 amdgpu_ucode_release(&adev->gfx.mec_fw); 1509 return err; 1510 } 1511 1512 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1513 { 1514 char ucode_prefix[30]; 1515 int r; 1516 1517 DRM_DEBUG("\n"); 1518 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 1519 1520 /* No CPG in Arcturus */ 1521 if (adev->gfx.num_gfx_rings) { 1522 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix); 1523 if (r) 1524 return r; 1525 } 1526 1527 r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix); 1528 if (r) 1529 return r; 1530 1531 r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix); 1532 if (r) 1533 return r; 1534 1535 return r; 1536 } 1537 1538 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1539 { 1540 u32 count = 0; 1541 const struct cs_section_def *sect = NULL; 1542 const struct cs_extent_def *ext = NULL; 1543 1544 /* begin clear state */ 1545 count += 2; 1546 /* context control state */ 1547 count += 3; 1548 1549 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1550 for (ext = sect->section; ext->extent != NULL; ++ext) { 1551 if (sect->id == SECT_CONTEXT) 1552 count += 2 + ext->reg_count; 1553 else 1554 return 0; 1555 } 1556 } 1557 1558 /* end clear state */ 1559 count += 2; 1560 /* clear state */ 1561 count += 2; 1562 1563 return count; 1564 } 1565 1566 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1567 volatile u32 *buffer) 1568 { 1569 u32 count = 0, i; 1570 const struct cs_section_def *sect = NULL; 1571 const struct cs_extent_def *ext = NULL; 1572 1573 if (adev->gfx.rlc.cs_data == NULL) 1574 return; 1575 if (buffer == NULL) 1576 return; 1577 1578 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1579 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1580 1581 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1582 buffer[count++] = cpu_to_le32(0x80000000); 1583 buffer[count++] = cpu_to_le32(0x80000000); 1584 1585 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1586 for (ext = sect->section; ext->extent != NULL; ++ext) { 1587 if (sect->id == SECT_CONTEXT) { 1588 buffer[count++] = 1589 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1590 buffer[count++] = cpu_to_le32(ext->reg_index - 1591 PACKET3_SET_CONTEXT_REG_START); 1592 for (i = 0; i < ext->reg_count; i++) 1593 buffer[count++] = cpu_to_le32(ext->extent[i]); 1594 } else { 1595 return; 1596 } 1597 } 1598 } 1599 1600 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1601 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1602 1603 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1604 buffer[count++] = cpu_to_le32(0); 1605 } 1606 1607 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1608 { 1609 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1610 uint32_t pg_always_on_cu_num = 2; 1611 uint32_t always_on_cu_num; 1612 uint32_t i, j, k; 1613 uint32_t mask, cu_bitmap, counter; 1614 1615 if (adev->flags & AMD_IS_APU) 1616 always_on_cu_num = 4; 1617 else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1)) 1618 always_on_cu_num = 8; 1619 else 1620 always_on_cu_num = 12; 1621 1622 mutex_lock(&adev->grbm_idx_mutex); 1623 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1624 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1625 mask = 1; 1626 cu_bitmap = 0; 1627 counter = 0; 1628 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 1629 1630 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1631 if (cu_info->bitmap[0][i][j] & mask) { 1632 if (counter == pg_always_on_cu_num) 1633 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1634 if (counter < always_on_cu_num) 1635 cu_bitmap |= mask; 1636 else 1637 break; 1638 counter++; 1639 } 1640 mask <<= 1; 1641 } 1642 1643 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1644 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1645 } 1646 } 1647 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1648 mutex_unlock(&adev->grbm_idx_mutex); 1649 } 1650 1651 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1652 { 1653 uint32_t data; 1654 1655 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1656 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1657 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1658 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1659 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1660 1661 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1662 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1663 1664 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1665 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1666 1667 mutex_lock(&adev->grbm_idx_mutex); 1668 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1669 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1670 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1671 1672 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1673 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1674 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1675 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1676 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1677 1678 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1679 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1680 data &= 0x0000FFFF; 1681 data |= 0x00C00000; 1682 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1683 1684 /* 1685 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1686 * programmed in gfx_v9_0_init_always_on_cu_mask() 1687 */ 1688 1689 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1690 * but used for RLC_LB_CNTL configuration */ 1691 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1692 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1693 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1694 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1695 mutex_unlock(&adev->grbm_idx_mutex); 1696 1697 gfx_v9_0_init_always_on_cu_mask(adev); 1698 } 1699 1700 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1701 { 1702 uint32_t data; 1703 1704 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1705 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1706 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1707 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1708 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1709 1710 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1711 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1712 1713 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1714 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1715 1716 mutex_lock(&adev->grbm_idx_mutex); 1717 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1718 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1719 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1720 1721 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1722 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1723 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1724 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1725 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1726 1727 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1728 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1729 data &= 0x0000FFFF; 1730 data |= 0x00C00000; 1731 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1732 1733 /* 1734 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1735 * programmed in gfx_v9_0_init_always_on_cu_mask() 1736 */ 1737 1738 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1739 * but used for RLC_LB_CNTL configuration */ 1740 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1741 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1742 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1743 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1744 mutex_unlock(&adev->grbm_idx_mutex); 1745 1746 gfx_v9_0_init_always_on_cu_mask(adev); 1747 } 1748 1749 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1750 { 1751 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1752 } 1753 1754 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1755 { 1756 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) 1757 return 5; 1758 else 1759 return 4; 1760 } 1761 1762 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 1763 { 1764 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 1765 1766 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 1767 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1768 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1); 1769 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2); 1770 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3); 1771 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL); 1772 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX); 1773 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT); 1774 adev->gfx.rlc.rlcg_reg_access_supported = true; 1775 } 1776 1777 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1778 { 1779 const struct cs_section_def *cs_data; 1780 int r; 1781 1782 adev->gfx.rlc.cs_data = gfx9_cs_data; 1783 1784 cs_data = adev->gfx.rlc.cs_data; 1785 1786 if (cs_data) { 1787 /* init clear state block */ 1788 r = amdgpu_gfx_rlc_init_csb(adev); 1789 if (r) 1790 return r; 1791 } 1792 1793 if (adev->flags & AMD_IS_APU) { 1794 /* TODO: double check the cp_table_size for RV */ 1795 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1796 r = amdgpu_gfx_rlc_init_cpt(adev); 1797 if (r) 1798 return r; 1799 } 1800 1801 return 0; 1802 } 1803 1804 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1805 { 1806 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1807 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1808 } 1809 1810 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1811 { 1812 int r; 1813 u32 *hpd; 1814 const __le32 *fw_data; 1815 unsigned fw_size; 1816 u32 *fw; 1817 size_t mec_hpd_size; 1818 1819 const struct gfx_firmware_header_v1_0 *mec_hdr; 1820 1821 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1822 1823 /* take ownership of the relevant compute queues */ 1824 amdgpu_gfx_compute_queue_acquire(adev); 1825 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1826 if (mec_hpd_size) { 1827 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1828 AMDGPU_GEM_DOMAIN_VRAM | 1829 AMDGPU_GEM_DOMAIN_GTT, 1830 &adev->gfx.mec.hpd_eop_obj, 1831 &adev->gfx.mec.hpd_eop_gpu_addr, 1832 (void **)&hpd); 1833 if (r) { 1834 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1835 gfx_v9_0_mec_fini(adev); 1836 return r; 1837 } 1838 1839 memset(hpd, 0, mec_hpd_size); 1840 1841 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1842 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1843 } 1844 1845 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1846 1847 fw_data = (const __le32 *) 1848 (adev->gfx.mec_fw->data + 1849 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1850 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 1851 1852 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1853 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1854 &adev->gfx.mec.mec_fw_obj, 1855 &adev->gfx.mec.mec_fw_gpu_addr, 1856 (void **)&fw); 1857 if (r) { 1858 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1859 gfx_v9_0_mec_fini(adev); 1860 return r; 1861 } 1862 1863 memcpy(fw, fw_data, fw_size); 1864 1865 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1866 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1867 1868 return 0; 1869 } 1870 1871 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1872 { 1873 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 1874 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1875 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1876 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1877 (SQ_IND_INDEX__FORCE_READ_MASK)); 1878 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1879 } 1880 1881 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1882 uint32_t wave, uint32_t thread, 1883 uint32_t regno, uint32_t num, uint32_t *out) 1884 { 1885 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 1886 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1887 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1888 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1889 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1890 (SQ_IND_INDEX__FORCE_READ_MASK) | 1891 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1892 while (num--) 1893 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1894 } 1895 1896 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1897 { 1898 /* type 1 wave data */ 1899 dst[(*no_fields)++] = 1; 1900 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1901 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1902 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1903 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1904 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1905 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1906 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1907 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1908 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1909 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1910 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1911 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1912 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1913 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1914 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); 1915 } 1916 1917 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1918 uint32_t wave, uint32_t start, 1919 uint32_t size, uint32_t *dst) 1920 { 1921 wave_read_regs( 1922 adev, simd, wave, 0, 1923 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1924 } 1925 1926 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1927 uint32_t wave, uint32_t thread, 1928 uint32_t start, uint32_t size, 1929 uint32_t *dst) 1930 { 1931 wave_read_regs( 1932 adev, simd, wave, thread, 1933 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1934 } 1935 1936 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1937 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1938 { 1939 soc15_grbm_select(adev, me, pipe, q, vm, 0); 1940 } 1941 1942 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1943 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1944 .select_se_sh = &gfx_v9_0_select_se_sh, 1945 .read_wave_data = &gfx_v9_0_read_wave_data, 1946 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1947 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1948 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 1949 }; 1950 1951 const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = { 1952 .ras_error_inject = &gfx_v9_0_ras_error_inject, 1953 .query_ras_error_count = &gfx_v9_0_query_ras_error_count, 1954 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, 1955 }; 1956 1957 static struct amdgpu_gfx_ras gfx_v9_0_ras = { 1958 .ras_block = { 1959 .hw_ops = &gfx_v9_0_ras_ops, 1960 }, 1961 }; 1962 1963 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1964 { 1965 u32 gb_addr_config; 1966 int err; 1967 1968 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1969 case IP_VERSION(9, 0, 1): 1970 adev->gfx.config.max_hw_contexts = 8; 1971 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1972 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1973 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1974 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1975 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1976 break; 1977 case IP_VERSION(9, 2, 1): 1978 adev->gfx.config.max_hw_contexts = 8; 1979 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1980 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1981 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1982 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1983 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1984 DRM_INFO("fix gfx.config for vega12\n"); 1985 break; 1986 case IP_VERSION(9, 4, 0): 1987 adev->gfx.ras = &gfx_v9_0_ras; 1988 adev->gfx.config.max_hw_contexts = 8; 1989 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1990 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1991 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1992 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1993 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1994 gb_addr_config &= ~0xf3e777ff; 1995 gb_addr_config |= 0x22014042; 1996 /* check vbios table if gpu info is not available */ 1997 err = amdgpu_atomfirmware_get_gfx_info(adev); 1998 if (err) 1999 return err; 2000 break; 2001 case IP_VERSION(9, 2, 2): 2002 case IP_VERSION(9, 1, 0): 2003 adev->gfx.config.max_hw_contexts = 8; 2004 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2005 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2006 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2007 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2008 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 2009 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 2010 else 2011 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2012 break; 2013 case IP_VERSION(9, 4, 1): 2014 adev->gfx.ras = &gfx_v9_4_ras; 2015 adev->gfx.config.max_hw_contexts = 8; 2016 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2017 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2018 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2019 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2020 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2021 gb_addr_config &= ~0xf3e777ff; 2022 gb_addr_config |= 0x22014042; 2023 break; 2024 case IP_VERSION(9, 3, 0): 2025 adev->gfx.config.max_hw_contexts = 8; 2026 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2027 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2028 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2029 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2030 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2031 gb_addr_config &= ~0xf3e777ff; 2032 gb_addr_config |= 0x22010042; 2033 break; 2034 case IP_VERSION(9, 4, 2): 2035 adev->gfx.ras = &gfx_v9_4_2_ras; 2036 adev->gfx.config.max_hw_contexts = 8; 2037 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2038 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2039 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2040 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2041 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2042 gb_addr_config &= ~0xf3e777ff; 2043 gb_addr_config |= 0x22014042; 2044 /* check vbios table if gpu info is not available */ 2045 err = amdgpu_atomfirmware_get_gfx_info(adev); 2046 if (err) 2047 return err; 2048 break; 2049 default: 2050 BUG(); 2051 break; 2052 } 2053 2054 adev->gfx.config.gb_addr_config = gb_addr_config; 2055 2056 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2057 REG_GET_FIELD( 2058 adev->gfx.config.gb_addr_config, 2059 GB_ADDR_CONFIG, 2060 NUM_PIPES); 2061 2062 adev->gfx.config.max_tile_pipes = 2063 adev->gfx.config.gb_addr_config_fields.num_pipes; 2064 2065 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2066 REG_GET_FIELD( 2067 adev->gfx.config.gb_addr_config, 2068 GB_ADDR_CONFIG, 2069 NUM_BANKS); 2070 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2071 REG_GET_FIELD( 2072 adev->gfx.config.gb_addr_config, 2073 GB_ADDR_CONFIG, 2074 MAX_COMPRESSED_FRAGS); 2075 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2076 REG_GET_FIELD( 2077 adev->gfx.config.gb_addr_config, 2078 GB_ADDR_CONFIG, 2079 NUM_RB_PER_SE); 2080 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2081 REG_GET_FIELD( 2082 adev->gfx.config.gb_addr_config, 2083 GB_ADDR_CONFIG, 2084 NUM_SHADER_ENGINES); 2085 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2086 REG_GET_FIELD( 2087 adev->gfx.config.gb_addr_config, 2088 GB_ADDR_CONFIG, 2089 PIPE_INTERLEAVE_SIZE)); 2090 2091 return 0; 2092 } 2093 2094 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2095 int mec, int pipe, int queue) 2096 { 2097 unsigned irq_type; 2098 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2099 unsigned int hw_prio; 2100 2101 ring = &adev->gfx.compute_ring[ring_id]; 2102 2103 /* mec0 is me1 */ 2104 ring->me = mec + 1; 2105 ring->pipe = pipe; 2106 ring->queue = queue; 2107 2108 ring->ring_obj = NULL; 2109 ring->use_doorbell = true; 2110 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2111 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2112 + (ring_id * GFX9_MEC_HPD_SIZE); 2113 ring->vm_hub = AMDGPU_GFXHUB(0); 2114 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2115 2116 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2117 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2118 + ring->pipe; 2119 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 2120 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; 2121 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2122 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 2123 hw_prio, NULL); 2124 } 2125 2126 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) 2127 { 2128 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 2129 uint32_t *ptr; 2130 uint32_t inst; 2131 2132 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 2133 if (ptr == NULL) { 2134 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 2135 adev->gfx.ip_dump_core = NULL; 2136 } else { 2137 adev->gfx.ip_dump_core = ptr; 2138 } 2139 2140 /* Allocate memory for compute queue registers for all the instances */ 2141 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 2142 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 2143 adev->gfx.mec.num_queue_per_pipe; 2144 2145 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 2146 if (ptr == NULL) { 2147 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 2148 adev->gfx.ip_dump_compute_queues = NULL; 2149 } else { 2150 adev->gfx.ip_dump_compute_queues = ptr; 2151 } 2152 } 2153 2154 static int gfx_v9_0_sw_init(void *handle) 2155 { 2156 int i, j, k, r, ring_id; 2157 int xcc_id = 0; 2158 struct amdgpu_ring *ring; 2159 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2160 unsigned int hw_prio; 2161 2162 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2163 case IP_VERSION(9, 0, 1): 2164 case IP_VERSION(9, 2, 1): 2165 case IP_VERSION(9, 4, 0): 2166 case IP_VERSION(9, 2, 2): 2167 case IP_VERSION(9, 1, 0): 2168 case IP_VERSION(9, 4, 1): 2169 case IP_VERSION(9, 3, 0): 2170 case IP_VERSION(9, 4, 2): 2171 adev->gfx.mec.num_mec = 2; 2172 break; 2173 default: 2174 adev->gfx.mec.num_mec = 1; 2175 break; 2176 } 2177 2178 adev->gfx.mec.num_pipe_per_mec = 4; 2179 adev->gfx.mec.num_queue_per_pipe = 8; 2180 2181 /* EOP Event */ 2182 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2183 if (r) 2184 return r; 2185 2186 /* Privileged reg */ 2187 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2188 &adev->gfx.priv_reg_irq); 2189 if (r) 2190 return r; 2191 2192 /* Privileged inst */ 2193 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2194 &adev->gfx.priv_inst_irq); 2195 if (r) 2196 return r; 2197 2198 /* ECC error */ 2199 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2200 &adev->gfx.cp_ecc_error_irq); 2201 if (r) 2202 return r; 2203 2204 /* FUE error */ 2205 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2206 &adev->gfx.cp_ecc_error_irq); 2207 if (r) 2208 return r; 2209 2210 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2211 2212 if (adev->gfx.rlc.funcs) { 2213 if (adev->gfx.rlc.funcs->init) { 2214 r = adev->gfx.rlc.funcs->init(adev); 2215 if (r) { 2216 dev_err(adev->dev, "Failed to init rlc BOs!\n"); 2217 return r; 2218 } 2219 } 2220 } 2221 2222 r = gfx_v9_0_mec_init(adev); 2223 if (r) { 2224 DRM_ERROR("Failed to init MEC BOs!\n"); 2225 return r; 2226 } 2227 2228 /* set up the gfx ring */ 2229 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2230 ring = &adev->gfx.gfx_ring[i]; 2231 ring->ring_obj = NULL; 2232 if (!i) 2233 sprintf(ring->name, "gfx"); 2234 else 2235 sprintf(ring->name, "gfx_%d", i); 2236 ring->use_doorbell = true; 2237 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2238 2239 /* disable scheduler on the real ring */ 2240 ring->no_scheduler = adev->gfx.mcbp; 2241 ring->vm_hub = AMDGPU_GFXHUB(0); 2242 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2243 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2244 AMDGPU_RING_PRIO_DEFAULT, NULL); 2245 if (r) 2246 return r; 2247 } 2248 2249 /* set up the software rings */ 2250 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 2251 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2252 ring = &adev->gfx.sw_gfx_ring[i]; 2253 ring->ring_obj = NULL; 2254 sprintf(ring->name, amdgpu_sw_ring_name(i)); 2255 ring->use_doorbell = true; 2256 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2257 ring->is_sw_ring = true; 2258 hw_prio = amdgpu_sw_ring_priority(i); 2259 ring->vm_hub = AMDGPU_GFXHUB(0); 2260 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2261 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, 2262 NULL); 2263 if (r) 2264 return r; 2265 ring->wptr = 0; 2266 } 2267 2268 /* init the muxer and add software rings */ 2269 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], 2270 GFX9_NUM_SW_GFX_RINGS); 2271 if (r) { 2272 DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); 2273 return r; 2274 } 2275 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2276 r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, 2277 &adev->gfx.sw_gfx_ring[i]); 2278 if (r) { 2279 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); 2280 return r; 2281 } 2282 } 2283 } 2284 2285 /* set up the compute queues - allocate horizontally across pipes */ 2286 ring_id = 0; 2287 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2288 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2289 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2290 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 2291 k, j)) 2292 continue; 2293 2294 r = gfx_v9_0_compute_ring_init(adev, 2295 ring_id, 2296 i, k, j); 2297 if (r) 2298 return r; 2299 2300 ring_id++; 2301 } 2302 } 2303 } 2304 2305 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); 2306 if (r) { 2307 DRM_ERROR("Failed to init KIQ BOs!\n"); 2308 return r; 2309 } 2310 2311 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 2312 if (r) 2313 return r; 2314 2315 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2316 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0); 2317 if (r) 2318 return r; 2319 2320 adev->gfx.ce_ram_size = 0x8000; 2321 2322 r = gfx_v9_0_gpu_early_init(adev); 2323 if (r) 2324 return r; 2325 2326 if (amdgpu_gfx_ras_sw_init(adev)) { 2327 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 2328 return -EINVAL; 2329 } 2330 2331 gfx_v9_0_alloc_ip_dump(adev); 2332 2333 return 0; 2334 } 2335 2336 2337 static int gfx_v9_0_sw_fini(void *handle) 2338 { 2339 int i; 2340 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2341 2342 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 2343 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 2344 amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); 2345 amdgpu_ring_mux_fini(&adev->gfx.muxer); 2346 } 2347 2348 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2349 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2350 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2351 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2352 2353 amdgpu_gfx_mqd_sw_fini(adev, 0); 2354 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 2355 amdgpu_gfx_kiq_fini(adev, 0); 2356 2357 gfx_v9_0_mec_fini(adev); 2358 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2359 &adev->gfx.rlc.clear_state_gpu_addr, 2360 (void **)&adev->gfx.rlc.cs_ptr); 2361 if (adev->flags & AMD_IS_APU) { 2362 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2363 &adev->gfx.rlc.cp_table_gpu_addr, 2364 (void **)&adev->gfx.rlc.cp_table_ptr); 2365 } 2366 gfx_v9_0_free_microcode(adev); 2367 2368 kfree(adev->gfx.ip_dump_core); 2369 kfree(adev->gfx.ip_dump_compute_queues); 2370 2371 return 0; 2372 } 2373 2374 2375 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2376 { 2377 /* TODO */ 2378 } 2379 2380 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, 2381 u32 instance, int xcc_id) 2382 { 2383 u32 data; 2384 2385 if (instance == 0xffffffff) 2386 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2387 else 2388 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2389 2390 if (se_num == 0xffffffff) 2391 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2392 else 2393 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2394 2395 if (sh_num == 0xffffffff) 2396 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2397 else 2398 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2399 2400 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2401 } 2402 2403 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2404 { 2405 u32 data, mask; 2406 2407 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2408 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2409 2410 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2411 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2412 2413 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2414 adev->gfx.config.max_sh_per_se); 2415 2416 return (~data) & mask; 2417 } 2418 2419 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2420 { 2421 int i, j; 2422 u32 data; 2423 u32 active_rbs = 0; 2424 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2425 adev->gfx.config.max_sh_per_se; 2426 2427 mutex_lock(&adev->grbm_idx_mutex); 2428 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2429 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2430 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 2431 data = gfx_v9_0_get_rb_active_bitmap(adev); 2432 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2433 rb_bitmap_width_per_sh); 2434 } 2435 } 2436 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 2437 mutex_unlock(&adev->grbm_idx_mutex); 2438 2439 adev->gfx.config.backend_enable_mask = active_rbs; 2440 adev->gfx.config.num_rbs = hweight32(active_rbs); 2441 } 2442 2443 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev, 2444 uint32_t first_vmid, 2445 uint32_t last_vmid) 2446 { 2447 uint32_t data; 2448 uint32_t trap_config_vmid_mask = 0; 2449 int i; 2450 2451 /* Calculate trap config vmid mask */ 2452 for (i = first_vmid; i < last_vmid; i++) 2453 trap_config_vmid_mask |= (1 << i); 2454 2455 data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG, 2456 VMID_SEL, trap_config_vmid_mask); 2457 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, 2458 TRAP_EN, 1); 2459 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); 2460 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); 2461 2462 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); 2463 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); 2464 } 2465 2466 #define DEFAULT_SH_MEM_BASES (0x6000) 2467 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2468 { 2469 int i; 2470 uint32_t sh_mem_config; 2471 uint32_t sh_mem_bases; 2472 2473 /* 2474 * Configure apertures: 2475 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2476 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2477 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2478 */ 2479 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2480 2481 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2482 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2483 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2484 2485 mutex_lock(&adev->srbm_mutex); 2486 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2487 soc15_grbm_select(adev, 0, 0, 0, i, 0); 2488 /* CP and shaders */ 2489 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2490 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2491 } 2492 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 2493 mutex_unlock(&adev->srbm_mutex); 2494 2495 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2496 access. These should be enabled by FW for target VMIDs. */ 2497 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2498 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2499 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2500 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2501 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2502 } 2503 } 2504 2505 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2506 { 2507 int vmid; 2508 2509 /* 2510 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2511 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2512 * the driver can enable them for graphics. VMID0 should maintain 2513 * access so that HWS firmware can save/restore entries. 2514 */ 2515 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 2516 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2517 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2518 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2519 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2520 } 2521 } 2522 2523 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) 2524 { 2525 uint32_t tmp; 2526 2527 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2528 case IP_VERSION(9, 4, 1): 2529 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); 2530 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT, 2531 !READ_ONCE(adev->barrier_has_auto_waitcnt)); 2532 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); 2533 break; 2534 default: 2535 break; 2536 } 2537 } 2538 2539 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2540 { 2541 u32 tmp; 2542 int i; 2543 2544 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2545 2546 gfx_v9_0_tiling_mode_table_init(adev); 2547 2548 if (adev->gfx.num_gfx_rings) 2549 gfx_v9_0_setup_rb(adev); 2550 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2551 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2552 2553 /* XXX SH_MEM regs */ 2554 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2555 mutex_lock(&adev->srbm_mutex); 2556 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2557 soc15_grbm_select(adev, 0, 0, 0, i, 0); 2558 /* CP and shaders */ 2559 if (i == 0) { 2560 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2561 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2562 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2563 !!adev->gmc.noretry); 2564 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2565 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2566 } else { 2567 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2568 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2569 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2570 !!adev->gmc.noretry); 2571 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2572 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2573 (adev->gmc.private_aperture_start >> 48)); 2574 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2575 (adev->gmc.shared_aperture_start >> 48)); 2576 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2577 } 2578 } 2579 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 2580 2581 mutex_unlock(&adev->srbm_mutex); 2582 2583 gfx_v9_0_init_compute_vmid(adev); 2584 gfx_v9_0_init_gds_vmid(adev); 2585 gfx_v9_0_init_sq_config(adev); 2586 } 2587 2588 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2589 { 2590 u32 i, j, k; 2591 u32 mask; 2592 2593 mutex_lock(&adev->grbm_idx_mutex); 2594 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2595 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2596 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 2597 for (k = 0; k < adev->usec_timeout; k++) { 2598 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2599 break; 2600 udelay(1); 2601 } 2602 if (k == adev->usec_timeout) { 2603 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 2604 0xffffffff, 0xffffffff, 0); 2605 mutex_unlock(&adev->grbm_idx_mutex); 2606 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2607 i, j); 2608 return; 2609 } 2610 } 2611 } 2612 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 2613 mutex_unlock(&adev->grbm_idx_mutex); 2614 2615 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2616 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2617 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2618 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2619 for (k = 0; k < adev->usec_timeout; k++) { 2620 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2621 break; 2622 udelay(1); 2623 } 2624 } 2625 2626 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2627 bool enable) 2628 { 2629 u32 tmp; 2630 2631 /* These interrupts should be enabled to drive DS clock */ 2632 2633 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2634 2635 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2636 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2637 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2638 if(adev->gfx.num_gfx_rings) 2639 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2640 2641 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2642 } 2643 2644 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2645 { 2646 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2647 /* csib */ 2648 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2649 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2650 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2651 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2652 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2653 adev->gfx.rlc.clear_state_size); 2654 } 2655 2656 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2657 int indirect_offset, 2658 int list_size, 2659 int *unique_indirect_regs, 2660 int unique_indirect_reg_count, 2661 int *indirect_start_offsets, 2662 int *indirect_start_offsets_count, 2663 int max_start_offsets_count) 2664 { 2665 int idx; 2666 2667 for (; indirect_offset < list_size; indirect_offset++) { 2668 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2669 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2670 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2671 2672 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2673 indirect_offset += 2; 2674 2675 /* look for the matching indice */ 2676 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2677 if (unique_indirect_regs[idx] == 2678 register_list_format[indirect_offset] || 2679 !unique_indirect_regs[idx]) 2680 break; 2681 } 2682 2683 BUG_ON(idx >= unique_indirect_reg_count); 2684 2685 if (!unique_indirect_regs[idx]) 2686 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2687 2688 indirect_offset++; 2689 } 2690 } 2691 } 2692 2693 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2694 { 2695 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2696 int unique_indirect_reg_count = 0; 2697 2698 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2699 int indirect_start_offsets_count = 0; 2700 2701 int list_size = 0; 2702 int i = 0, j = 0; 2703 u32 tmp = 0; 2704 2705 u32 *register_list_format = 2706 kmemdup(adev->gfx.rlc.register_list_format, 2707 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2708 if (!register_list_format) 2709 return -ENOMEM; 2710 2711 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2712 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2713 gfx_v9_1_parse_ind_reg_list(register_list_format, 2714 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2715 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2716 unique_indirect_regs, 2717 unique_indirect_reg_count, 2718 indirect_start_offsets, 2719 &indirect_start_offsets_count, 2720 ARRAY_SIZE(indirect_start_offsets)); 2721 2722 /* enable auto inc in case it is disabled */ 2723 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2724 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2725 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2726 2727 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2728 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2729 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2730 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2731 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2732 adev->gfx.rlc.register_restore[i]); 2733 2734 /* load indirect register */ 2735 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2736 adev->gfx.rlc.reg_list_format_start); 2737 2738 /* direct register portion */ 2739 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2740 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2741 register_list_format[i]); 2742 2743 /* indirect register portion */ 2744 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2745 if (register_list_format[i] == 0xFFFFFFFF) { 2746 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2747 continue; 2748 } 2749 2750 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2751 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2752 2753 for (j = 0; j < unique_indirect_reg_count; j++) { 2754 if (register_list_format[i] == unique_indirect_regs[j]) { 2755 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2756 break; 2757 } 2758 } 2759 2760 BUG_ON(j >= unique_indirect_reg_count); 2761 2762 i++; 2763 } 2764 2765 /* set save/restore list size */ 2766 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2767 list_size = list_size >> 1; 2768 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2769 adev->gfx.rlc.reg_restore_list_size); 2770 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2771 2772 /* write the starting offsets to RLC scratch ram */ 2773 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2774 adev->gfx.rlc.starting_offsets_start); 2775 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2776 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2777 indirect_start_offsets[i]); 2778 2779 /* load unique indirect regs*/ 2780 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2781 if (unique_indirect_regs[i] != 0) { 2782 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2783 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2784 unique_indirect_regs[i] & 0x3FFFF); 2785 2786 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2787 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2788 unique_indirect_regs[i] >> 20); 2789 } 2790 } 2791 2792 kfree(register_list_format); 2793 return 0; 2794 } 2795 2796 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2797 { 2798 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2799 } 2800 2801 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2802 bool enable) 2803 { 2804 uint32_t data = 0; 2805 uint32_t default_data = 0; 2806 2807 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2808 if (enable) { 2809 /* enable GFXIP control over CGPG */ 2810 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2811 if(default_data != data) 2812 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2813 2814 /* update status */ 2815 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2816 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2817 if(default_data != data) 2818 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2819 } else { 2820 /* restore GFXIP control over GCPG */ 2821 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2822 if(default_data != data) 2823 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2824 } 2825 } 2826 2827 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2828 { 2829 uint32_t data = 0; 2830 2831 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2832 AMD_PG_SUPPORT_GFX_SMG | 2833 AMD_PG_SUPPORT_GFX_DMG)) { 2834 /* init IDLE_POLL_COUNT = 60 */ 2835 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2836 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2837 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2838 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2839 2840 /* init RLC PG Delay */ 2841 data = 0; 2842 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2843 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2844 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2845 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2846 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2847 2848 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2849 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2850 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2851 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2852 2853 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2854 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2855 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2856 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2857 2858 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2859 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2860 2861 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2862 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2863 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2864 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0)) 2865 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2866 } 2867 } 2868 2869 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2870 bool enable) 2871 { 2872 uint32_t data = 0; 2873 uint32_t default_data = 0; 2874 2875 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2876 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2877 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2878 enable ? 1 : 0); 2879 if (default_data != data) 2880 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2881 } 2882 2883 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2884 bool enable) 2885 { 2886 uint32_t data = 0; 2887 uint32_t default_data = 0; 2888 2889 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2890 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2891 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2892 enable ? 1 : 0); 2893 if(default_data != data) 2894 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2895 } 2896 2897 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2898 bool enable) 2899 { 2900 uint32_t data = 0; 2901 uint32_t default_data = 0; 2902 2903 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2904 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2905 CP_PG_DISABLE, 2906 enable ? 0 : 1); 2907 if(default_data != data) 2908 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2909 } 2910 2911 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2912 bool enable) 2913 { 2914 uint32_t data, default_data; 2915 2916 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2917 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2918 GFX_POWER_GATING_ENABLE, 2919 enable ? 1 : 0); 2920 if(default_data != data) 2921 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2922 } 2923 2924 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2925 bool enable) 2926 { 2927 uint32_t data, default_data; 2928 2929 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2930 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2931 GFX_PIPELINE_PG_ENABLE, 2932 enable ? 1 : 0); 2933 if(default_data != data) 2934 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2935 2936 if (!enable) 2937 /* read any GFX register to wake up GFX */ 2938 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2939 } 2940 2941 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2942 bool enable) 2943 { 2944 uint32_t data, default_data; 2945 2946 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2947 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2948 STATIC_PER_CU_PG_ENABLE, 2949 enable ? 1 : 0); 2950 if(default_data != data) 2951 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2952 } 2953 2954 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2955 bool enable) 2956 { 2957 uint32_t data, default_data; 2958 2959 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2960 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2961 DYN_PER_CU_PG_ENABLE, 2962 enable ? 1 : 0); 2963 if(default_data != data) 2964 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2965 } 2966 2967 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2968 { 2969 gfx_v9_0_init_csb(adev); 2970 2971 /* 2972 * Rlc save restore list is workable since v2_1. 2973 * And it's needed by gfxoff feature. 2974 */ 2975 if (adev->gfx.rlc.is_rlc_v2_1) { 2976 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 2977 IP_VERSION(9, 2, 1) || 2978 (adev->apu_flags & AMD_APU_IS_RAVEN2)) 2979 gfx_v9_1_init_rlc_save_restore_list(adev); 2980 gfx_v9_0_enable_save_restore_machine(adev); 2981 } 2982 2983 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2984 AMD_PG_SUPPORT_GFX_SMG | 2985 AMD_PG_SUPPORT_GFX_DMG | 2986 AMD_PG_SUPPORT_CP | 2987 AMD_PG_SUPPORT_GDS | 2988 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2989 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, 2990 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2991 gfx_v9_0_init_gfx_power_gating(adev); 2992 } 2993 } 2994 2995 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2996 { 2997 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2998 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2999 gfx_v9_0_wait_for_rlc_serdes(adev); 3000 } 3001 3002 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 3003 { 3004 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3005 udelay(50); 3006 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3007 udelay(50); 3008 } 3009 3010 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 3011 { 3012 #ifdef AMDGPU_RLC_DEBUG_RETRY 3013 u32 rlc_ucode_ver; 3014 #endif 3015 3016 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 3017 udelay(50); 3018 3019 /* carrizo do enable cp interrupt after cp inited */ 3020 if (!(adev->flags & AMD_IS_APU)) { 3021 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3022 udelay(50); 3023 } 3024 3025 #ifdef AMDGPU_RLC_DEBUG_RETRY 3026 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 3027 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 3028 if(rlc_ucode_ver == 0x108) { 3029 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 3030 rlc_ucode_ver, adev->gfx.rlc_fw_version); 3031 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 3032 * default is 0x9C4 to create a 100us interval */ 3033 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 3034 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 3035 * to disable the page fault retry interrupts, default is 3036 * 0x100 (256) */ 3037 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 3038 } 3039 #endif 3040 } 3041 3042 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 3043 { 3044 const struct rlc_firmware_header_v2_0 *hdr; 3045 const __le32 *fw_data; 3046 unsigned i, fw_size; 3047 3048 if (!adev->gfx.rlc_fw) 3049 return -EINVAL; 3050 3051 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3052 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3053 3054 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3055 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3056 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3057 3058 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 3059 RLCG_UCODE_LOADING_START_ADDRESS); 3060 for (i = 0; i < fw_size; i++) 3061 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3062 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3063 3064 return 0; 3065 } 3066 3067 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3068 { 3069 int r; 3070 3071 if (amdgpu_sriov_vf(adev)) { 3072 gfx_v9_0_init_csb(adev); 3073 return 0; 3074 } 3075 3076 adev->gfx.rlc.funcs->stop(adev); 3077 3078 /* disable CG */ 3079 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3080 3081 gfx_v9_0_init_pg(adev); 3082 3083 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3084 /* legacy rlc firmware loading */ 3085 r = gfx_v9_0_rlc_load_microcode(adev); 3086 if (r) 3087 return r; 3088 } 3089 3090 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 3091 case IP_VERSION(9, 2, 2): 3092 case IP_VERSION(9, 1, 0): 3093 gfx_v9_0_init_lbpw(adev); 3094 if (amdgpu_lbpw == 0) 3095 gfx_v9_0_enable_lbpw(adev, false); 3096 else 3097 gfx_v9_0_enable_lbpw(adev, true); 3098 break; 3099 case IP_VERSION(9, 4, 0): 3100 gfx_v9_4_init_lbpw(adev); 3101 if (amdgpu_lbpw > 0) 3102 gfx_v9_0_enable_lbpw(adev, true); 3103 else 3104 gfx_v9_0_enable_lbpw(adev, false); 3105 break; 3106 default: 3107 break; 3108 } 3109 3110 gfx_v9_0_update_spm_vmid_internal(adev, 0xf); 3111 3112 adev->gfx.rlc.funcs->start(adev); 3113 3114 return 0; 3115 } 3116 3117 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3118 { 3119 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3120 3121 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3122 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3123 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3124 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3125 udelay(50); 3126 } 3127 3128 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3129 { 3130 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3131 const struct gfx_firmware_header_v1_0 *ce_hdr; 3132 const struct gfx_firmware_header_v1_0 *me_hdr; 3133 const __le32 *fw_data; 3134 unsigned i, fw_size; 3135 3136 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3137 return -EINVAL; 3138 3139 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3140 adev->gfx.pfp_fw->data; 3141 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3142 adev->gfx.ce_fw->data; 3143 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3144 adev->gfx.me_fw->data; 3145 3146 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3147 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3148 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3149 3150 gfx_v9_0_cp_gfx_enable(adev, false); 3151 3152 /* PFP */ 3153 fw_data = (const __le32 *) 3154 (adev->gfx.pfp_fw->data + 3155 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3156 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3157 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3158 for (i = 0; i < fw_size; i++) 3159 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3160 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3161 3162 /* CE */ 3163 fw_data = (const __le32 *) 3164 (adev->gfx.ce_fw->data + 3165 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3166 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3167 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3168 for (i = 0; i < fw_size; i++) 3169 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3170 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3171 3172 /* ME */ 3173 fw_data = (const __le32 *) 3174 (adev->gfx.me_fw->data + 3175 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3176 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3177 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3178 for (i = 0; i < fw_size; i++) 3179 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3180 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3181 3182 return 0; 3183 } 3184 3185 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3186 { 3187 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3188 const struct cs_section_def *sect = NULL; 3189 const struct cs_extent_def *ext = NULL; 3190 int r, i, tmp; 3191 3192 /* init the CP */ 3193 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3194 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3195 3196 gfx_v9_0_cp_gfx_enable(adev, true); 3197 3198 /* Now only limit the quirk on the APU gfx9 series and already 3199 * confirmed that the APU gfx10/gfx11 needn't such update. 3200 */ 3201 if (adev->flags & AMD_IS_APU && 3202 adev->in_s3 && !adev->suspend_complete) { 3203 DRM_INFO(" Will skip the CSB packet resubmit\n"); 3204 return 0; 3205 } 3206 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3207 if (r) { 3208 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3209 return r; 3210 } 3211 3212 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3213 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3214 3215 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3216 amdgpu_ring_write(ring, 0x80000000); 3217 amdgpu_ring_write(ring, 0x80000000); 3218 3219 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3220 for (ext = sect->section; ext->extent != NULL; ++ext) { 3221 if (sect->id == SECT_CONTEXT) { 3222 amdgpu_ring_write(ring, 3223 PACKET3(PACKET3_SET_CONTEXT_REG, 3224 ext->reg_count)); 3225 amdgpu_ring_write(ring, 3226 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3227 for (i = 0; i < ext->reg_count; i++) 3228 amdgpu_ring_write(ring, ext->extent[i]); 3229 } 3230 } 3231 } 3232 3233 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3234 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3235 3236 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3237 amdgpu_ring_write(ring, 0); 3238 3239 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3240 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3241 amdgpu_ring_write(ring, 0x8000); 3242 amdgpu_ring_write(ring, 0x8000); 3243 3244 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3245 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3246 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3247 amdgpu_ring_write(ring, tmp); 3248 amdgpu_ring_write(ring, 0); 3249 3250 amdgpu_ring_commit(ring); 3251 3252 return 0; 3253 } 3254 3255 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3256 { 3257 struct amdgpu_ring *ring; 3258 u32 tmp; 3259 u32 rb_bufsz; 3260 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3261 3262 /* Set the write pointer delay */ 3263 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3264 3265 /* set the RB to use vmid 0 */ 3266 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3267 3268 /* Set ring buffer size */ 3269 ring = &adev->gfx.gfx_ring[0]; 3270 rb_bufsz = order_base_2(ring->ring_size / 8); 3271 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3272 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3273 #ifdef __BIG_ENDIAN 3274 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3275 #endif 3276 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3277 3278 /* Initialize the ring buffer's write pointers */ 3279 ring->wptr = 0; 3280 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3281 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3282 3283 /* set the wb address wether it's enabled or not */ 3284 rptr_addr = ring->rptr_gpu_addr; 3285 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3286 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3287 3288 wptr_gpu_addr = ring->wptr_gpu_addr; 3289 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3290 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3291 3292 mdelay(1); 3293 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3294 3295 rb_addr = ring->gpu_addr >> 8; 3296 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3297 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3298 3299 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3300 if (ring->use_doorbell) { 3301 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3302 DOORBELL_OFFSET, ring->doorbell_index); 3303 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3304 DOORBELL_EN, 1); 3305 } else { 3306 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3307 } 3308 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3309 3310 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3311 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3312 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3313 3314 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3315 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3316 3317 3318 /* start the ring */ 3319 gfx_v9_0_cp_gfx_start(adev); 3320 3321 return 0; 3322 } 3323 3324 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3325 { 3326 if (enable) { 3327 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3328 } else { 3329 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3330 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3331 adev->gfx.kiq[0].ring.sched.ready = false; 3332 } 3333 udelay(50); 3334 } 3335 3336 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3337 { 3338 const struct gfx_firmware_header_v1_0 *mec_hdr; 3339 const __le32 *fw_data; 3340 unsigned i; 3341 u32 tmp; 3342 3343 if (!adev->gfx.mec_fw) 3344 return -EINVAL; 3345 3346 gfx_v9_0_cp_compute_enable(adev, false); 3347 3348 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3349 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3350 3351 fw_data = (const __le32 *) 3352 (adev->gfx.mec_fw->data + 3353 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3354 tmp = 0; 3355 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3356 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3357 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3358 3359 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3360 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3361 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3362 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3363 3364 /* MEC1 */ 3365 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3366 mec_hdr->jt_offset); 3367 for (i = 0; i < mec_hdr->jt_size; i++) 3368 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3369 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3370 3371 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3372 adev->gfx.mec_fw_version); 3373 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3374 3375 return 0; 3376 } 3377 3378 /* KIQ functions */ 3379 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3380 { 3381 uint32_t tmp; 3382 struct amdgpu_device *adev = ring->adev; 3383 3384 /* tell RLC which is KIQ queue */ 3385 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3386 tmp &= 0xffffff00; 3387 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3388 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3389 tmp |= 0x80; 3390 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3391 } 3392 3393 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) 3394 { 3395 struct amdgpu_device *adev = ring->adev; 3396 3397 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3398 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 3399 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 3400 mqd->cp_hqd_queue_priority = 3401 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 3402 } 3403 } 3404 } 3405 3406 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3407 { 3408 struct amdgpu_device *adev = ring->adev; 3409 struct v9_mqd *mqd = ring->mqd_ptr; 3410 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3411 uint32_t tmp; 3412 3413 mqd->header = 0xC0310800; 3414 mqd->compute_pipelinestat_enable = 0x00000001; 3415 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3416 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3417 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3418 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3419 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3420 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3421 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3422 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3423 mqd->compute_misc_reserved = 0x00000003; 3424 3425 mqd->dynamic_cu_mask_addr_lo = 3426 lower_32_bits(ring->mqd_gpu_addr 3427 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3428 mqd->dynamic_cu_mask_addr_hi = 3429 upper_32_bits(ring->mqd_gpu_addr 3430 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3431 3432 eop_base_addr = ring->eop_gpu_addr >> 8; 3433 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3434 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3435 3436 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3437 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3438 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3439 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3440 3441 mqd->cp_hqd_eop_control = tmp; 3442 3443 /* enable doorbell? */ 3444 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3445 3446 if (ring->use_doorbell) { 3447 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3448 DOORBELL_OFFSET, ring->doorbell_index); 3449 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3450 DOORBELL_EN, 1); 3451 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3452 DOORBELL_SOURCE, 0); 3453 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3454 DOORBELL_HIT, 0); 3455 } else { 3456 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3457 DOORBELL_EN, 0); 3458 } 3459 3460 mqd->cp_hqd_pq_doorbell_control = tmp; 3461 3462 /* disable the queue if it's active */ 3463 ring->wptr = 0; 3464 mqd->cp_hqd_dequeue_request = 0; 3465 mqd->cp_hqd_pq_rptr = 0; 3466 mqd->cp_hqd_pq_wptr_lo = 0; 3467 mqd->cp_hqd_pq_wptr_hi = 0; 3468 3469 /* set the pointer to the MQD */ 3470 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3471 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3472 3473 /* set MQD vmid to 0 */ 3474 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3475 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3476 mqd->cp_mqd_control = tmp; 3477 3478 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3479 hqd_gpu_addr = ring->gpu_addr >> 8; 3480 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3481 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3482 3483 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3484 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3485 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3486 (order_base_2(ring->ring_size / 4) - 1)); 3487 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3488 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 3489 #ifdef __BIG_ENDIAN 3490 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3491 #endif 3492 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3493 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3494 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3495 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3496 mqd->cp_hqd_pq_control = tmp; 3497 3498 /* set the wb address whether it's enabled or not */ 3499 wb_gpu_addr = ring->rptr_gpu_addr; 3500 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3501 mqd->cp_hqd_pq_rptr_report_addr_hi = 3502 upper_32_bits(wb_gpu_addr) & 0xffff; 3503 3504 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3505 wb_gpu_addr = ring->wptr_gpu_addr; 3506 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3507 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3508 3509 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3510 ring->wptr = 0; 3511 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3512 3513 /* set the vmid for the queue */ 3514 mqd->cp_hqd_vmid = 0; 3515 3516 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3517 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3518 mqd->cp_hqd_persistent_state = tmp; 3519 3520 /* set MIN_IB_AVAIL_SIZE */ 3521 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3522 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3523 mqd->cp_hqd_ib_control = tmp; 3524 3525 /* set static priority for a queue/ring */ 3526 gfx_v9_0_mqd_set_priority(ring, mqd); 3527 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM); 3528 3529 /* map_queues packet doesn't need activate the queue, 3530 * so only kiq need set this field. 3531 */ 3532 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3533 mqd->cp_hqd_active = 1; 3534 3535 return 0; 3536 } 3537 3538 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3539 { 3540 struct amdgpu_device *adev = ring->adev; 3541 struct v9_mqd *mqd = ring->mqd_ptr; 3542 int j; 3543 3544 /* disable wptr polling */ 3545 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3546 3547 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3548 mqd->cp_hqd_eop_base_addr_lo); 3549 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3550 mqd->cp_hqd_eop_base_addr_hi); 3551 3552 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3553 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3554 mqd->cp_hqd_eop_control); 3555 3556 /* enable doorbell? */ 3557 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3558 mqd->cp_hqd_pq_doorbell_control); 3559 3560 /* disable the queue if it's active */ 3561 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3562 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3563 for (j = 0; j < adev->usec_timeout; j++) { 3564 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3565 break; 3566 udelay(1); 3567 } 3568 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3569 mqd->cp_hqd_dequeue_request); 3570 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3571 mqd->cp_hqd_pq_rptr); 3572 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3573 mqd->cp_hqd_pq_wptr_lo); 3574 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3575 mqd->cp_hqd_pq_wptr_hi); 3576 } 3577 3578 /* set the pointer to the MQD */ 3579 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3580 mqd->cp_mqd_base_addr_lo); 3581 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3582 mqd->cp_mqd_base_addr_hi); 3583 3584 /* set MQD vmid to 0 */ 3585 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3586 mqd->cp_mqd_control); 3587 3588 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3589 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3590 mqd->cp_hqd_pq_base_lo); 3591 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3592 mqd->cp_hqd_pq_base_hi); 3593 3594 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3595 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3596 mqd->cp_hqd_pq_control); 3597 3598 /* set the wb address whether it's enabled or not */ 3599 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3600 mqd->cp_hqd_pq_rptr_report_addr_lo); 3601 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3602 mqd->cp_hqd_pq_rptr_report_addr_hi); 3603 3604 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3605 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3606 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3607 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3608 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3609 3610 /* enable the doorbell if requested */ 3611 if (ring->use_doorbell) { 3612 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3613 (adev->doorbell_index.kiq * 2) << 2); 3614 /* If GC has entered CGPG, ringing doorbell > first page 3615 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to 3616 * workaround this issue. And this change has to align with firmware 3617 * update. 3618 */ 3619 if (check_if_enlarge_doorbell_range(adev)) 3620 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3621 (adev->doorbell.size - 4)); 3622 else 3623 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3624 (adev->doorbell_index.userqueue_end * 2) << 2); 3625 } 3626 3627 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3628 mqd->cp_hqd_pq_doorbell_control); 3629 3630 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3631 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3632 mqd->cp_hqd_pq_wptr_lo); 3633 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3634 mqd->cp_hqd_pq_wptr_hi); 3635 3636 /* set the vmid for the queue */ 3637 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3638 3639 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3640 mqd->cp_hqd_persistent_state); 3641 3642 /* activate the queue */ 3643 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3644 mqd->cp_hqd_active); 3645 3646 if (ring->use_doorbell) 3647 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3648 3649 return 0; 3650 } 3651 3652 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3653 { 3654 struct amdgpu_device *adev = ring->adev; 3655 int j; 3656 3657 /* disable the queue if it's active */ 3658 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3659 3660 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3661 3662 for (j = 0; j < adev->usec_timeout; j++) { 3663 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3664 break; 3665 udelay(1); 3666 } 3667 3668 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3669 DRM_DEBUG("KIQ dequeue request failed.\n"); 3670 3671 /* Manual disable if dequeue request times out */ 3672 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3673 } 3674 3675 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3676 0); 3677 } 3678 3679 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3680 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3681 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3682 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3683 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3684 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3685 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3686 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3687 3688 return 0; 3689 } 3690 3691 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3692 { 3693 struct amdgpu_device *adev = ring->adev; 3694 struct v9_mqd *mqd = ring->mqd_ptr; 3695 struct v9_mqd *tmp_mqd; 3696 3697 gfx_v9_0_kiq_setting(ring); 3698 3699 /* GPU could be in bad state during probe, driver trigger the reset 3700 * after load the SMU, in this case , the mqd is not be initialized. 3701 * driver need to re-init the mqd. 3702 * check mqd->cp_hqd_pq_control since this value should not be 0 3703 */ 3704 tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup; 3705 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ 3706 /* for GPU_RESET case , reset MQD to a clean status */ 3707 if (adev->gfx.kiq[0].mqd_backup) 3708 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation)); 3709 3710 /* reset ring buffer */ 3711 ring->wptr = 0; 3712 amdgpu_ring_clear_ring(ring); 3713 3714 mutex_lock(&adev->srbm_mutex); 3715 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3716 gfx_v9_0_kiq_init_register(ring); 3717 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3718 mutex_unlock(&adev->srbm_mutex); 3719 } else { 3720 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3721 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3722 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3723 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 3724 amdgpu_ring_clear_ring(ring); 3725 mutex_lock(&adev->srbm_mutex); 3726 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3727 gfx_v9_0_mqd_init(ring); 3728 gfx_v9_0_kiq_init_register(ring); 3729 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3730 mutex_unlock(&adev->srbm_mutex); 3731 3732 if (adev->gfx.kiq[0].mqd_backup) 3733 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); 3734 } 3735 3736 return 0; 3737 } 3738 3739 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3740 { 3741 struct amdgpu_device *adev = ring->adev; 3742 struct v9_mqd *mqd = ring->mqd_ptr; 3743 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3744 struct v9_mqd *tmp_mqd; 3745 3746 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control 3747 * is not be initialized before 3748 */ 3749 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3750 3751 if (!tmp_mqd->cp_hqd_pq_control || 3752 (!amdgpu_in_reset(adev) && !adev->in_suspend)) { 3753 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3754 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3755 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3756 mutex_lock(&adev->srbm_mutex); 3757 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3758 gfx_v9_0_mqd_init(ring); 3759 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3760 mutex_unlock(&adev->srbm_mutex); 3761 3762 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3763 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3764 } else { 3765 /* restore MQD to a clean status */ 3766 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3767 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3768 /* reset ring buffer */ 3769 ring->wptr = 0; 3770 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 3771 amdgpu_ring_clear_ring(ring); 3772 } 3773 3774 return 0; 3775 } 3776 3777 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3778 { 3779 struct amdgpu_ring *ring; 3780 int r; 3781 3782 ring = &adev->gfx.kiq[0].ring; 3783 3784 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3785 if (unlikely(r != 0)) 3786 return r; 3787 3788 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3789 if (unlikely(r != 0)) { 3790 amdgpu_bo_unreserve(ring->mqd_obj); 3791 return r; 3792 } 3793 3794 gfx_v9_0_kiq_init_queue(ring); 3795 amdgpu_bo_kunmap(ring->mqd_obj); 3796 ring->mqd_ptr = NULL; 3797 amdgpu_bo_unreserve(ring->mqd_obj); 3798 return 0; 3799 } 3800 3801 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3802 { 3803 struct amdgpu_ring *ring = NULL; 3804 int r = 0, i; 3805 3806 gfx_v9_0_cp_compute_enable(adev, true); 3807 3808 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3809 ring = &adev->gfx.compute_ring[i]; 3810 3811 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3812 if (unlikely(r != 0)) 3813 goto done; 3814 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3815 if (!r) { 3816 r = gfx_v9_0_kcq_init_queue(ring); 3817 amdgpu_bo_kunmap(ring->mqd_obj); 3818 ring->mqd_ptr = NULL; 3819 } 3820 amdgpu_bo_unreserve(ring->mqd_obj); 3821 if (r) 3822 goto done; 3823 } 3824 3825 r = amdgpu_gfx_enable_kcq(adev, 0); 3826 done: 3827 return r; 3828 } 3829 3830 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3831 { 3832 int r, i; 3833 struct amdgpu_ring *ring; 3834 3835 if (!(adev->flags & AMD_IS_APU)) 3836 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3837 3838 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3839 if (adev->gfx.num_gfx_rings) { 3840 /* legacy firmware loading */ 3841 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3842 if (r) 3843 return r; 3844 } 3845 3846 r = gfx_v9_0_cp_compute_load_microcode(adev); 3847 if (r) 3848 return r; 3849 } 3850 3851 r = gfx_v9_0_kiq_resume(adev); 3852 if (r) 3853 return r; 3854 3855 if (adev->gfx.num_gfx_rings) { 3856 r = gfx_v9_0_cp_gfx_resume(adev); 3857 if (r) 3858 return r; 3859 } 3860 3861 r = gfx_v9_0_kcq_resume(adev); 3862 if (r) 3863 return r; 3864 3865 if (adev->gfx.num_gfx_rings) { 3866 ring = &adev->gfx.gfx_ring[0]; 3867 r = amdgpu_ring_test_helper(ring); 3868 if (r) 3869 return r; 3870 } 3871 3872 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3873 ring = &adev->gfx.compute_ring[i]; 3874 amdgpu_ring_test_helper(ring); 3875 } 3876 3877 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3878 3879 return 0; 3880 } 3881 3882 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 3883 { 3884 u32 tmp; 3885 3886 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) && 3887 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) 3888 return; 3889 3890 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 3891 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 3892 adev->df.hash_status.hash_64k); 3893 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 3894 adev->df.hash_status.hash_2m); 3895 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 3896 adev->df.hash_status.hash_1g); 3897 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 3898 } 3899 3900 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3901 { 3902 if (adev->gfx.num_gfx_rings) 3903 gfx_v9_0_cp_gfx_enable(adev, enable); 3904 gfx_v9_0_cp_compute_enable(adev, enable); 3905 } 3906 3907 static int gfx_v9_0_hw_init(void *handle) 3908 { 3909 int r; 3910 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3911 3912 if (!amdgpu_sriov_vf(adev)) 3913 gfx_v9_0_init_golden_registers(adev); 3914 3915 gfx_v9_0_constants_init(adev); 3916 3917 gfx_v9_0_init_tcp_config(adev); 3918 3919 r = adev->gfx.rlc.funcs->resume(adev); 3920 if (r) 3921 return r; 3922 3923 r = gfx_v9_0_cp_resume(adev); 3924 if (r) 3925 return r; 3926 3927 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 3928 gfx_v9_4_2_set_power_brake_sequence(adev); 3929 3930 return r; 3931 } 3932 3933 static int gfx_v9_0_hw_fini(void *handle) 3934 { 3935 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3936 3937 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 3938 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3939 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3940 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3941 3942 /* DF freeze and kcq disable will fail */ 3943 if (!amdgpu_ras_intr_triggered()) 3944 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3945 amdgpu_gfx_disable_kcq(adev, 0); 3946 3947 if (amdgpu_sriov_vf(adev)) { 3948 gfx_v9_0_cp_gfx_enable(adev, false); 3949 /* must disable polling for SRIOV when hw finished, otherwise 3950 * CPC engine may still keep fetching WB address which is already 3951 * invalid after sw finished and trigger DMAR reading error in 3952 * hypervisor side. 3953 */ 3954 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3955 return 0; 3956 } 3957 3958 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3959 * otherwise KIQ is hanging when binding back 3960 */ 3961 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 3962 mutex_lock(&adev->srbm_mutex); 3963 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, 3964 adev->gfx.kiq[0].ring.pipe, 3965 adev->gfx.kiq[0].ring.queue, 0, 0); 3966 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring); 3967 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3968 mutex_unlock(&adev->srbm_mutex); 3969 } 3970 3971 gfx_v9_0_cp_enable(adev, false); 3972 3973 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */ 3974 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) || 3975 (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) { 3976 dev_dbg(adev->dev, "Skipping RLC halt\n"); 3977 return 0; 3978 } 3979 3980 adev->gfx.rlc.funcs->stop(adev); 3981 return 0; 3982 } 3983 3984 static int gfx_v9_0_suspend(void *handle) 3985 { 3986 return gfx_v9_0_hw_fini(handle); 3987 } 3988 3989 static int gfx_v9_0_resume(void *handle) 3990 { 3991 return gfx_v9_0_hw_init(handle); 3992 } 3993 3994 static bool gfx_v9_0_is_idle(void *handle) 3995 { 3996 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3997 3998 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3999 GRBM_STATUS, GUI_ACTIVE)) 4000 return false; 4001 else 4002 return true; 4003 } 4004 4005 static int gfx_v9_0_wait_for_idle(void *handle) 4006 { 4007 unsigned i; 4008 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4009 4010 for (i = 0; i < adev->usec_timeout; i++) { 4011 if (gfx_v9_0_is_idle(handle)) 4012 return 0; 4013 udelay(1); 4014 } 4015 return -ETIMEDOUT; 4016 } 4017 4018 static int gfx_v9_0_soft_reset(void *handle) 4019 { 4020 u32 grbm_soft_reset = 0; 4021 u32 tmp; 4022 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4023 4024 /* GRBM_STATUS */ 4025 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4026 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4027 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4028 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4029 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4030 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4031 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4032 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4033 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4034 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4035 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4036 } 4037 4038 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4039 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4040 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4041 } 4042 4043 /* GRBM_STATUS2 */ 4044 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4045 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4046 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4047 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4048 4049 4050 if (grbm_soft_reset) { 4051 /* stop the rlc */ 4052 adev->gfx.rlc.funcs->stop(adev); 4053 4054 if (adev->gfx.num_gfx_rings) 4055 /* Disable GFX parsing/prefetching */ 4056 gfx_v9_0_cp_gfx_enable(adev, false); 4057 4058 /* Disable MEC parsing/prefetching */ 4059 gfx_v9_0_cp_compute_enable(adev, false); 4060 4061 if (grbm_soft_reset) { 4062 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4063 tmp |= grbm_soft_reset; 4064 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4065 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4066 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4067 4068 udelay(50); 4069 4070 tmp &= ~grbm_soft_reset; 4071 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4072 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4073 } 4074 4075 /* Wait a little for things to settle down */ 4076 udelay(50); 4077 } 4078 return 0; 4079 } 4080 4081 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) 4082 { 4083 signed long r, cnt = 0; 4084 unsigned long flags; 4085 uint32_t seq, reg_val_offs = 0; 4086 uint64_t value = 0; 4087 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 4088 struct amdgpu_ring *ring = &kiq->ring; 4089 4090 BUG_ON(!ring->funcs->emit_rreg); 4091 4092 spin_lock_irqsave(&kiq->ring_lock, flags); 4093 if (amdgpu_device_wb_get(adev, ®_val_offs)) { 4094 pr_err("critical bug! too many kiq readers\n"); 4095 goto failed_unlock; 4096 } 4097 amdgpu_ring_alloc(ring, 32); 4098 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4099 amdgpu_ring_write(ring, 9 | /* src: register*/ 4100 (5 << 8) | /* dst: memory */ 4101 (1 << 16) | /* count sel */ 4102 (1 << 20)); /* write confirm */ 4103 amdgpu_ring_write(ring, 0); 4104 amdgpu_ring_write(ring, 0); 4105 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4106 reg_val_offs * 4)); 4107 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4108 reg_val_offs * 4)); 4109 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 4110 if (r) 4111 goto failed_undo; 4112 4113 amdgpu_ring_commit(ring); 4114 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4115 4116 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4117 4118 /* don't wait anymore for gpu reset case because this way may 4119 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 4120 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 4121 * never return if we keep waiting in virt_kiq_rreg, which cause 4122 * gpu_recover() hang there. 4123 * 4124 * also don't wait anymore for IRQ context 4125 * */ 4126 if (r < 1 && (amdgpu_in_reset(adev))) 4127 goto failed_kiq_read; 4128 4129 might_sleep(); 4130 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 4131 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 4132 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4133 } 4134 4135 if (cnt > MAX_KIQ_REG_TRY) 4136 goto failed_kiq_read; 4137 4138 mb(); 4139 value = (uint64_t)adev->wb.wb[reg_val_offs] | 4140 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL; 4141 amdgpu_device_wb_free(adev, reg_val_offs); 4142 return value; 4143 4144 failed_undo: 4145 amdgpu_ring_undo(ring); 4146 failed_unlock: 4147 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4148 failed_kiq_read: 4149 if (reg_val_offs) 4150 amdgpu_device_wb_free(adev, reg_val_offs); 4151 pr_err("failed to read gpu clock\n"); 4152 return ~0; 4153 } 4154 4155 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4156 { 4157 uint64_t clock, clock_lo, clock_hi, hi_check; 4158 4159 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 4160 case IP_VERSION(9, 3, 0): 4161 preempt_disable(); 4162 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4163 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4164 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4165 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over 4166 * roughly every 42 seconds. 4167 */ 4168 if (hi_check != clock_hi) { 4169 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4170 clock_hi = hi_check; 4171 } 4172 preempt_enable(); 4173 clock = clock_lo | (clock_hi << 32ULL); 4174 break; 4175 default: 4176 amdgpu_gfx_off_ctrl(adev, false); 4177 mutex_lock(&adev->gfx.gpu_clock_mutex); 4178 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 4179 IP_VERSION(9, 0, 1) && 4180 amdgpu_sriov_runtime(adev)) { 4181 clock = gfx_v9_0_kiq_read_clock(adev); 4182 } else { 4183 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4184 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4185 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4186 } 4187 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4188 amdgpu_gfx_off_ctrl(adev, true); 4189 break; 4190 } 4191 return clock; 4192 } 4193 4194 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4195 uint32_t vmid, 4196 uint32_t gds_base, uint32_t gds_size, 4197 uint32_t gws_base, uint32_t gws_size, 4198 uint32_t oa_base, uint32_t oa_size) 4199 { 4200 struct amdgpu_device *adev = ring->adev; 4201 4202 /* GDS Base */ 4203 gfx_v9_0_write_data_to_reg(ring, 0, false, 4204 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4205 gds_base); 4206 4207 /* GDS Size */ 4208 gfx_v9_0_write_data_to_reg(ring, 0, false, 4209 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4210 gds_size); 4211 4212 /* GWS */ 4213 gfx_v9_0_write_data_to_reg(ring, 0, false, 4214 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4215 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4216 4217 /* OA */ 4218 gfx_v9_0_write_data_to_reg(ring, 0, false, 4219 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4220 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4221 } 4222 4223 static const u32 vgpr_init_compute_shader[] = 4224 { 4225 0xb07c0000, 0xbe8000ff, 4226 0x000000f8, 0xbf110800, 4227 0x7e000280, 0x7e020280, 4228 0x7e040280, 0x7e060280, 4229 0x7e080280, 0x7e0a0280, 4230 0x7e0c0280, 0x7e0e0280, 4231 0x80808800, 0xbe803200, 4232 0xbf84fff5, 0xbf9c0000, 4233 0xd28c0001, 0x0001007f, 4234 0xd28d0001, 0x0002027e, 4235 0x10020288, 0xb8810904, 4236 0xb7814000, 0xd1196a01, 4237 0x00000301, 0xbe800087, 4238 0xbefc00c1, 0xd89c4000, 4239 0x00020201, 0xd89cc080, 4240 0x00040401, 0x320202ff, 4241 0x00000800, 0x80808100, 4242 0xbf84fff8, 0x7e020280, 4243 0xbf810000, 0x00000000, 4244 }; 4245 4246 static const u32 sgpr_init_compute_shader[] = 4247 { 4248 0xb07c0000, 0xbe8000ff, 4249 0x0000005f, 0xbee50080, 4250 0xbe812c65, 0xbe822c65, 4251 0xbe832c65, 0xbe842c65, 4252 0xbe852c65, 0xb77c0005, 4253 0x80808500, 0xbf84fff8, 4254 0xbe800080, 0xbf810000, 4255 }; 4256 4257 static const u32 vgpr_init_compute_shader_arcturus[] = { 4258 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 4259 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 4260 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 4261 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 4262 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 4263 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 4264 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 4265 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 4266 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 4267 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 4268 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 4269 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 4270 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 4271 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 4272 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 4273 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 4274 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 4275 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 4276 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 4277 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 4278 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 4279 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 4280 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 4281 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 4282 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 4283 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 4284 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 4285 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 4286 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 4287 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 4288 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 4289 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 4290 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 4291 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 4292 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 4293 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 4294 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 4295 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 4296 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 4297 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 4298 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 4299 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 4300 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 4301 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 4302 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 4303 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 4304 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 4305 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 4306 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 4307 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 4308 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 4309 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 4310 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 4311 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 4312 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 4313 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 4314 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 4315 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 4316 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 4317 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 4318 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 4319 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 4320 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 4321 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 4322 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 4323 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 4324 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 4325 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 4326 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 4327 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 4328 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 4329 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 4330 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 4331 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 4332 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 4333 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 4334 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 4335 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 4336 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 4337 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 4338 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 4339 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 4340 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 4341 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 4342 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 4343 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 4344 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 4345 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 4346 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 4347 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 4348 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 4349 0xbf84fff8, 0xbf810000, 4350 }; 4351 4352 /* When below register arrays changed, please update gpr_reg_size, 4353 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 4354 to cover all gfx9 ASICs */ 4355 static const struct soc15_reg_entry vgpr_init_regs[] = { 4356 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4357 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4358 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4359 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4360 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 4361 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4362 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4363 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4364 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4365 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4366 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4367 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4368 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4369 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4370 }; 4371 4372 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = { 4373 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4374 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4375 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4376 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4377 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf }, 4378 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4379 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4380 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4381 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4382 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4383 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4384 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4385 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4386 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4387 }; 4388 4389 static const struct soc15_reg_entry sgpr1_init_regs[] = { 4390 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4391 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4392 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4393 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4394 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4395 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4396 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4397 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4398 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4399 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4400 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4401 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4402 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4403 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4404 }; 4405 4406 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4407 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4408 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4409 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4410 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4411 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4412 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4413 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4414 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4415 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4416 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4417 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4418 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4419 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4420 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4421 }; 4422 4423 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { 4424 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4425 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4426 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4427 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4428 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4429 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4430 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4431 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4432 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4433 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4434 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4435 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4436 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4437 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4438 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4439 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4440 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4441 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4442 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4443 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4444 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4445 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4446 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4447 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4448 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4449 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4450 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4451 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4452 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4453 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4454 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4455 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4456 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4457 }; 4458 4459 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4460 { 4461 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4462 int i, r; 4463 4464 /* only support when RAS is enabled */ 4465 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4466 return 0; 4467 4468 r = amdgpu_ring_alloc(ring, 7); 4469 if (r) { 4470 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4471 ring->name, r); 4472 return r; 4473 } 4474 4475 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4476 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4477 4478 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4479 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4480 PACKET3_DMA_DATA_DST_SEL(1) | 4481 PACKET3_DMA_DATA_SRC_SEL(2) | 4482 PACKET3_DMA_DATA_ENGINE(0))); 4483 amdgpu_ring_write(ring, 0); 4484 amdgpu_ring_write(ring, 0); 4485 amdgpu_ring_write(ring, 0); 4486 amdgpu_ring_write(ring, 0); 4487 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4488 adev->gds.gds_size); 4489 4490 amdgpu_ring_commit(ring); 4491 4492 for (i = 0; i < adev->usec_timeout; i++) { 4493 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4494 break; 4495 udelay(1); 4496 } 4497 4498 if (i >= adev->usec_timeout) 4499 r = -ETIMEDOUT; 4500 4501 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4502 4503 return r; 4504 } 4505 4506 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4507 { 4508 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4509 struct amdgpu_ib ib; 4510 struct dma_fence *f = NULL; 4511 int r, i; 4512 unsigned total_size, vgpr_offset, sgpr_offset; 4513 u64 gpu_addr; 4514 4515 int compute_dim_x = adev->gfx.config.max_shader_engines * 4516 adev->gfx.config.max_cu_per_sh * 4517 adev->gfx.config.max_sh_per_se; 4518 int sgpr_work_group_size = 5; 4519 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6; 4520 int vgpr_init_shader_size; 4521 const u32 *vgpr_init_shader_ptr; 4522 const struct soc15_reg_entry *vgpr_init_regs_ptr; 4523 4524 /* only support when RAS is enabled */ 4525 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4526 return 0; 4527 4528 /* bail if the compute ring is not ready */ 4529 if (!ring->sched.ready) 4530 return 0; 4531 4532 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) { 4533 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; 4534 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); 4535 vgpr_init_regs_ptr = vgpr_init_regs_arcturus; 4536 } else { 4537 vgpr_init_shader_ptr = vgpr_init_compute_shader; 4538 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader); 4539 vgpr_init_regs_ptr = vgpr_init_regs; 4540 } 4541 4542 total_size = 4543 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4544 total_size += 4545 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4546 total_size += 4547 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4548 total_size = ALIGN(total_size, 256); 4549 vgpr_offset = total_size; 4550 total_size += ALIGN(vgpr_init_shader_size, 256); 4551 sgpr_offset = total_size; 4552 total_size += sizeof(sgpr_init_compute_shader); 4553 4554 /* allocate an indirect buffer to put the commands in */ 4555 memset(&ib, 0, sizeof(ib)); 4556 r = amdgpu_ib_get(adev, NULL, total_size, 4557 AMDGPU_IB_POOL_DIRECT, &ib); 4558 if (r) { 4559 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4560 return r; 4561 } 4562 4563 /* load the compute shaders */ 4564 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++) 4565 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i]; 4566 4567 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4568 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4569 4570 /* init the ib length to 0 */ 4571 ib.length_dw = 0; 4572 4573 /* VGPR */ 4574 /* write the register state for the compute dispatch */ 4575 for (i = 0; i < gpr_reg_size; i++) { 4576 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4577 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i]) 4578 - PACKET3_SET_SH_REG_START; 4579 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value; 4580 } 4581 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4582 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4583 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4584 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4585 - PACKET3_SET_SH_REG_START; 4586 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4587 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4588 4589 /* write dispatch packet */ 4590 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4591 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */ 4592 ib.ptr[ib.length_dw++] = 1; /* y */ 4593 ib.ptr[ib.length_dw++] = 1; /* z */ 4594 ib.ptr[ib.length_dw++] = 4595 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4596 4597 /* write CS partial flush packet */ 4598 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4599 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4600 4601 /* SGPR1 */ 4602 /* write the register state for the compute dispatch */ 4603 for (i = 0; i < gpr_reg_size; i++) { 4604 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4605 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4606 - PACKET3_SET_SH_REG_START; 4607 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4608 } 4609 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4610 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4611 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4612 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4613 - PACKET3_SET_SH_REG_START; 4614 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4615 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4616 4617 /* write dispatch packet */ 4618 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4619 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4620 ib.ptr[ib.length_dw++] = 1; /* y */ 4621 ib.ptr[ib.length_dw++] = 1; /* z */ 4622 ib.ptr[ib.length_dw++] = 4623 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4624 4625 /* write CS partial flush packet */ 4626 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4627 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4628 4629 /* SGPR2 */ 4630 /* write the register state for the compute dispatch */ 4631 for (i = 0; i < gpr_reg_size; i++) { 4632 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4633 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4634 - PACKET3_SET_SH_REG_START; 4635 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4636 } 4637 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4638 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4639 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4640 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4641 - PACKET3_SET_SH_REG_START; 4642 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4643 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4644 4645 /* write dispatch packet */ 4646 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4647 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4648 ib.ptr[ib.length_dw++] = 1; /* y */ 4649 ib.ptr[ib.length_dw++] = 1; /* z */ 4650 ib.ptr[ib.length_dw++] = 4651 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4652 4653 /* write CS partial flush packet */ 4654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4655 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4656 4657 /* shedule the ib on the ring */ 4658 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4659 if (r) { 4660 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4661 goto fail; 4662 } 4663 4664 /* wait for the GPU to finish processing the IB */ 4665 r = dma_fence_wait(f, false); 4666 if (r) { 4667 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4668 goto fail; 4669 } 4670 4671 fail: 4672 amdgpu_ib_free(adev, &ib, NULL); 4673 dma_fence_put(f); 4674 4675 return r; 4676 } 4677 4678 static int gfx_v9_0_early_init(void *handle) 4679 { 4680 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4681 4682 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 4683 4684 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || 4685 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4686 adev->gfx.num_gfx_rings = 0; 4687 else 4688 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4689 adev->gfx.xcc_mask = 1; 4690 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4691 AMDGPU_MAX_COMPUTE_RINGS); 4692 gfx_v9_0_set_kiq_pm4_funcs(adev); 4693 gfx_v9_0_set_ring_funcs(adev); 4694 gfx_v9_0_set_irq_funcs(adev); 4695 gfx_v9_0_set_gds_init(adev); 4696 gfx_v9_0_set_rlc_funcs(adev); 4697 4698 /* init rlcg reg access ctrl */ 4699 gfx_v9_0_init_rlcg_reg_access_ctrl(adev); 4700 4701 return gfx_v9_0_init_microcode(adev); 4702 } 4703 4704 static int gfx_v9_0_ecc_late_init(void *handle) 4705 { 4706 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4707 int r; 4708 4709 /* 4710 * Temp workaround to fix the issue that CP firmware fails to 4711 * update read pointer when CPDMA is writing clearing operation 4712 * to GDS in suspend/resume sequence on several cards. So just 4713 * limit this operation in cold boot sequence. 4714 */ 4715 if ((!adev->in_suspend) && 4716 (adev->gds.gds_size)) { 4717 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4718 if (r) 4719 return r; 4720 } 4721 4722 /* requires IBs so do in late init after IB pool is initialized */ 4723 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4724 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); 4725 else 4726 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4727 4728 if (r) 4729 return r; 4730 4731 if (adev->gfx.ras && 4732 adev->gfx.ras->enable_watchdog_timer) 4733 adev->gfx.ras->enable_watchdog_timer(adev); 4734 4735 return 0; 4736 } 4737 4738 static int gfx_v9_0_late_init(void *handle) 4739 { 4740 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4741 int r; 4742 4743 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4744 if (r) 4745 return r; 4746 4747 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4748 if (r) 4749 return r; 4750 4751 r = gfx_v9_0_ecc_late_init(handle); 4752 if (r) 4753 return r; 4754 4755 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4756 gfx_v9_4_2_debug_trap_config_init(adev, 4757 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); 4758 else 4759 gfx_v9_0_debug_trap_config_init(adev, 4760 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); 4761 4762 return 0; 4763 } 4764 4765 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4766 { 4767 uint32_t rlc_setting; 4768 4769 /* if RLC is not enabled, do nothing */ 4770 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4771 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4772 return false; 4773 4774 return true; 4775 } 4776 4777 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 4778 { 4779 uint32_t data; 4780 unsigned i; 4781 4782 data = RLC_SAFE_MODE__CMD_MASK; 4783 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4784 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4785 4786 /* wait for RLC_SAFE_MODE */ 4787 for (i = 0; i < adev->usec_timeout; i++) { 4788 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4789 break; 4790 udelay(1); 4791 } 4792 } 4793 4794 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 4795 { 4796 uint32_t data; 4797 4798 data = RLC_SAFE_MODE__CMD_MASK; 4799 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4800 } 4801 4802 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4803 bool enable) 4804 { 4805 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4806 4807 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4808 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4809 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4810 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4811 } else { 4812 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4813 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4814 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4815 } 4816 4817 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4818 } 4819 4820 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4821 bool enable) 4822 { 4823 /* TODO: double check if we need to perform under safe mode */ 4824 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4825 4826 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4827 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4828 else 4829 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4830 4831 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4832 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4833 else 4834 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4835 4836 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4837 } 4838 4839 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4840 bool enable) 4841 { 4842 uint32_t data, def; 4843 4844 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4845 4846 /* It is disabled by HW by default */ 4847 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4848 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4849 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4850 4851 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) 4852 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4853 4854 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4855 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4856 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4857 4858 /* only for Vega10 & Raven1 */ 4859 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4860 4861 if (def != data) 4862 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4863 4864 /* MGLS is a global flag to control all MGLS in GFX */ 4865 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4866 /* 2 - RLC memory Light sleep */ 4867 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4868 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4869 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4870 if (def != data) 4871 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4872 } 4873 /* 3 - CP memory Light sleep */ 4874 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4875 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4876 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4877 if (def != data) 4878 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4879 } 4880 } 4881 } else { 4882 /* 1 - MGCG_OVERRIDE */ 4883 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4884 4885 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) 4886 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4887 4888 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4889 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4890 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4891 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4892 4893 if (def != data) 4894 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4895 4896 /* 2 - disable MGLS in RLC */ 4897 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4898 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4899 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4900 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4901 } 4902 4903 /* 3 - disable MGLS in CP */ 4904 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4905 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4906 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4907 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4908 } 4909 } 4910 4911 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4912 } 4913 4914 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4915 bool enable) 4916 { 4917 uint32_t data, def; 4918 4919 if (!adev->gfx.num_gfx_rings) 4920 return; 4921 4922 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4923 4924 /* Enable 3D CGCG/CGLS */ 4925 if (enable) { 4926 /* write cmd to clear cgcg/cgls ov */ 4927 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4928 /* unset CGCG override */ 4929 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4930 /* update CGCG and CGLS override bits */ 4931 if (def != data) 4932 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4933 4934 /* enable 3Dcgcg FSM(0x0000363f) */ 4935 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4936 4937 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 4938 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4939 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4940 else 4941 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT; 4942 4943 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4944 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4945 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4946 if (def != data) 4947 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4948 4949 /* set IDLE_POLL_COUNT(0x00900100) */ 4950 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4951 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4952 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4953 if (def != data) 4954 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4955 } else { 4956 /* Disable CGCG/CGLS */ 4957 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4958 /* disable cgcg, cgls should be disabled */ 4959 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4960 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4961 /* disable cgcg and cgls in FSM */ 4962 if (def != data) 4963 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4964 } 4965 4966 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4967 } 4968 4969 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4970 bool enable) 4971 { 4972 uint32_t def, data; 4973 4974 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4975 4976 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4977 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4978 /* unset CGCG override */ 4979 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4980 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4981 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4982 else 4983 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4984 /* update CGCG and CGLS override bits */ 4985 if (def != data) 4986 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4987 4988 /* enable cgcg FSM(0x0000363F) */ 4989 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4990 4991 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) 4992 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4993 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4994 else 4995 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4996 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4997 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4998 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4999 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5000 if (def != data) 5001 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5002 5003 /* set IDLE_POLL_COUNT(0x00900100) */ 5004 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5005 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5006 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5007 if (def != data) 5008 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5009 } else { 5010 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5011 /* reset CGCG/CGLS bits */ 5012 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5013 /* disable cgcg and cgls in FSM */ 5014 if (def != data) 5015 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5016 } 5017 5018 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5019 } 5020 5021 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5022 bool enable) 5023 { 5024 if (enable) { 5025 /* CGCG/CGLS should be enabled after MGCG/MGLS 5026 * === MGCG + MGLS === 5027 */ 5028 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5029 /* === CGCG /CGLS for GFX 3D Only === */ 5030 gfx_v9_0_update_3d_clock_gating(adev, enable); 5031 /* === CGCG + CGLS === */ 5032 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5033 } else { 5034 /* CGCG/CGLS should be disabled before MGCG/MGLS 5035 * === CGCG + CGLS === 5036 */ 5037 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5038 /* === CGCG /CGLS for GFX 3D Only === */ 5039 gfx_v9_0_update_3d_clock_gating(adev, enable); 5040 /* === MGCG + MGLS === */ 5041 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5042 } 5043 return 0; 5044 } 5045 5046 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, 5047 unsigned int vmid) 5048 { 5049 u32 reg, data; 5050 5051 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); 5052 if (amdgpu_sriov_is_pp_one_vf(adev)) 5053 data = RREG32_NO_KIQ(reg); 5054 else 5055 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); 5056 5057 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5058 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5059 5060 if (amdgpu_sriov_is_pp_one_vf(adev)) 5061 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); 5062 else 5063 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); 5064 } 5065 5066 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) 5067 { 5068 amdgpu_gfx_off_ctrl(adev, false); 5069 5070 gfx_v9_0_update_spm_vmid_internal(adev, vmid); 5071 5072 amdgpu_gfx_off_ctrl(adev, true); 5073 } 5074 5075 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev, 5076 uint32_t offset, 5077 struct soc15_reg_rlcg *entries, int arr_size) 5078 { 5079 int i; 5080 uint32_t reg; 5081 5082 if (!entries) 5083 return false; 5084 5085 for (i = 0; i < arr_size; i++) { 5086 const struct soc15_reg_rlcg *entry; 5087 5088 entry = &entries[i]; 5089 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 5090 if (offset == reg) 5091 return true; 5092 } 5093 5094 return false; 5095 } 5096 5097 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 5098 { 5099 return gfx_v9_0_check_rlcg_range(adev, offset, 5100 (void *)rlcg_access_gc_9_0, 5101 ARRAY_SIZE(rlcg_access_gc_9_0)); 5102 } 5103 5104 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 5105 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 5106 .set_safe_mode = gfx_v9_0_set_safe_mode, 5107 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 5108 .init = gfx_v9_0_rlc_init, 5109 .get_csb_size = gfx_v9_0_get_csb_size, 5110 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 5111 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 5112 .resume = gfx_v9_0_rlc_resume, 5113 .stop = gfx_v9_0_rlc_stop, 5114 .reset = gfx_v9_0_rlc_reset, 5115 .start = gfx_v9_0_rlc_start, 5116 .update_spm_vmid = gfx_v9_0_update_spm_vmid, 5117 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, 5118 }; 5119 5120 static int gfx_v9_0_set_powergating_state(void *handle, 5121 enum amd_powergating_state state) 5122 { 5123 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5124 bool enable = (state == AMD_PG_STATE_GATE); 5125 5126 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5127 case IP_VERSION(9, 2, 2): 5128 case IP_VERSION(9, 1, 0): 5129 case IP_VERSION(9, 3, 0): 5130 if (!enable) 5131 amdgpu_gfx_off_ctrl(adev, false); 5132 5133 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5134 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 5135 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 5136 } else { 5137 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 5138 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 5139 } 5140 5141 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5142 gfx_v9_0_enable_cp_power_gating(adev, true); 5143 else 5144 gfx_v9_0_enable_cp_power_gating(adev, false); 5145 5146 /* update gfx cgpg state */ 5147 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 5148 5149 /* update mgcg state */ 5150 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 5151 5152 if (enable) 5153 amdgpu_gfx_off_ctrl(adev, true); 5154 break; 5155 case IP_VERSION(9, 2, 1): 5156 amdgpu_gfx_off_ctrl(adev, enable); 5157 break; 5158 default: 5159 break; 5160 } 5161 5162 return 0; 5163 } 5164 5165 static int gfx_v9_0_set_clockgating_state(void *handle, 5166 enum amd_clockgating_state state) 5167 { 5168 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5169 5170 if (amdgpu_sriov_vf(adev)) 5171 return 0; 5172 5173 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5174 case IP_VERSION(9, 0, 1): 5175 case IP_VERSION(9, 2, 1): 5176 case IP_VERSION(9, 4, 0): 5177 case IP_VERSION(9, 2, 2): 5178 case IP_VERSION(9, 1, 0): 5179 case IP_VERSION(9, 4, 1): 5180 case IP_VERSION(9, 3, 0): 5181 case IP_VERSION(9, 4, 2): 5182 gfx_v9_0_update_gfx_clock_gating(adev, 5183 state == AMD_CG_STATE_GATE); 5184 break; 5185 default: 5186 break; 5187 } 5188 return 0; 5189 } 5190 5191 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags) 5192 { 5193 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5194 int data; 5195 5196 if (amdgpu_sriov_vf(adev)) 5197 *flags = 0; 5198 5199 /* AMD_CG_SUPPORT_GFX_MGCG */ 5200 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 5201 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5202 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5203 5204 /* AMD_CG_SUPPORT_GFX_CGCG */ 5205 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 5206 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5207 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5208 5209 /* AMD_CG_SUPPORT_GFX_CGLS */ 5210 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5211 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5212 5213 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5214 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 5215 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5216 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5217 5218 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5219 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 5220 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5221 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5222 5223 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) { 5224 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5225 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 5226 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5227 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5228 5229 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5230 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5231 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5232 } 5233 } 5234 5235 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5236 { 5237 return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/ 5238 } 5239 5240 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5241 { 5242 struct amdgpu_device *adev = ring->adev; 5243 u64 wptr; 5244 5245 /* XXX check if swapping is necessary on BE */ 5246 if (ring->use_doorbell) { 5247 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5248 } else { 5249 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 5250 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 5251 } 5252 5253 return wptr; 5254 } 5255 5256 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5257 { 5258 struct amdgpu_device *adev = ring->adev; 5259 5260 if (ring->use_doorbell) { 5261 /* XXX check if swapping is necessary on BE */ 5262 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); 5263 WDOORBELL64(ring->doorbell_index, ring->wptr); 5264 } else { 5265 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 5266 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 5267 } 5268 } 5269 5270 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5271 { 5272 struct amdgpu_device *adev = ring->adev; 5273 u32 ref_and_mask, reg_mem_engine; 5274 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5275 5276 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5277 switch (ring->me) { 5278 case 1: 5279 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5280 break; 5281 case 2: 5282 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5283 break; 5284 default: 5285 return; 5286 } 5287 reg_mem_engine = 0; 5288 } else { 5289 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5290 reg_mem_engine = 1; /* pfp */ 5291 } 5292 5293 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5294 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5295 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5296 ref_and_mask, ref_and_mask, 0x20); 5297 } 5298 5299 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5300 struct amdgpu_job *job, 5301 struct amdgpu_ib *ib, 5302 uint32_t flags) 5303 { 5304 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5305 u32 header, control = 0; 5306 5307 if (ib->flags & AMDGPU_IB_FLAG_CE) 5308 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5309 else 5310 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5311 5312 control |= ib->length_dw | (vmid << 24); 5313 5314 if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) { 5315 control |= INDIRECT_BUFFER_PRE_ENB(1); 5316 5317 if (flags & AMDGPU_IB_PREEMPTED) 5318 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5319 5320 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 5321 gfx_v9_0_ring_emit_de_meta(ring, 5322 (!amdgpu_sriov_vf(ring->adev) && 5323 flags & AMDGPU_IB_PREEMPTED) ? 5324 true : false, 5325 job->gds_size > 0 && job->gds_base != 0); 5326 } 5327 5328 amdgpu_ring_write(ring, header); 5329 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5330 amdgpu_ring_write(ring, 5331 #ifdef __BIG_ENDIAN 5332 (2 << 0) | 5333 #endif 5334 lower_32_bits(ib->gpu_addr)); 5335 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5336 amdgpu_ring_ib_on_emit_cntl(ring); 5337 amdgpu_ring_write(ring, control); 5338 } 5339 5340 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring, 5341 unsigned offset) 5342 { 5343 u32 control = ring->ring[offset]; 5344 5345 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5346 ring->ring[offset] = control; 5347 } 5348 5349 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, 5350 unsigned offset) 5351 { 5352 struct amdgpu_device *adev = ring->adev; 5353 void *ce_payload_cpu_addr; 5354 uint64_t payload_offset, payload_size; 5355 5356 payload_size = sizeof(struct v9_ce_ib_state); 5357 5358 if (ring->is_mes_queue) { 5359 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5360 gfx[0].gfx_meta_data) + 5361 offsetof(struct v9_gfx_meta_data, ce_payload); 5362 ce_payload_cpu_addr = 5363 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); 5364 } else { 5365 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5366 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5367 } 5368 5369 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5370 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); 5371 } else { 5372 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, 5373 (ring->buf_mask + 1 - offset) << 2); 5374 payload_size -= (ring->buf_mask + 1 - offset) << 2; 5375 memcpy((void *)&ring->ring[0], 5376 ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), 5377 payload_size); 5378 } 5379 } 5380 5381 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, 5382 unsigned offset) 5383 { 5384 struct amdgpu_device *adev = ring->adev; 5385 void *de_payload_cpu_addr; 5386 uint64_t payload_offset, payload_size; 5387 5388 payload_size = sizeof(struct v9_de_ib_state); 5389 5390 if (ring->is_mes_queue) { 5391 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5392 gfx[0].gfx_meta_data) + 5393 offsetof(struct v9_gfx_meta_data, de_payload); 5394 de_payload_cpu_addr = 5395 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); 5396 } else { 5397 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); 5398 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5399 } 5400 5401 ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = 5402 IB_COMPLETION_STATUS_PREEMPTED; 5403 5404 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5405 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size); 5406 } else { 5407 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, 5408 (ring->buf_mask + 1 - offset) << 2); 5409 payload_size -= (ring->buf_mask + 1 - offset) << 2; 5410 memcpy((void *)&ring->ring[0], 5411 de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), 5412 payload_size); 5413 } 5414 } 5415 5416 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5417 struct amdgpu_job *job, 5418 struct amdgpu_ib *ib, 5419 uint32_t flags) 5420 { 5421 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5422 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5423 5424 /* Currently, there is a high possibility to get wave ID mismatch 5425 * between ME and GDS, leading to a hw deadlock, because ME generates 5426 * different wave IDs than the GDS expects. This situation happens 5427 * randomly when at least 5 compute pipes use GDS ordered append. 5428 * The wave IDs generated by ME are also wrong after suspend/resume. 5429 * Those are probably bugs somewhere else in the kernel driver. 5430 * 5431 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5432 * GDS to 0 for this ring (me/pipe). 5433 */ 5434 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5435 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5436 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5437 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5438 } 5439 5440 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5441 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5442 amdgpu_ring_write(ring, 5443 #ifdef __BIG_ENDIAN 5444 (2 << 0) | 5445 #endif 5446 lower_32_bits(ib->gpu_addr)); 5447 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5448 amdgpu_ring_write(ring, control); 5449 } 5450 5451 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5452 u64 seq, unsigned flags) 5453 { 5454 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5455 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5456 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5457 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; 5458 uint32_t dw2 = 0; 5459 5460 /* RELEASE_MEM - flush caches, send int */ 5461 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5462 5463 if (writeback) { 5464 dw2 = EOP_TC_NC_ACTION_EN; 5465 } else { 5466 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | 5467 EOP_TC_MD_ACTION_EN; 5468 } 5469 dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5470 EVENT_INDEX(5); 5471 if (exec) 5472 dw2 |= EOP_EXEC; 5473 5474 amdgpu_ring_write(ring, dw2); 5475 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5476 5477 /* 5478 * the address should be Qword aligned if 64bit write, Dword 5479 * aligned if only send 32bit data low (discard data high) 5480 */ 5481 if (write64bit) 5482 BUG_ON(addr & 0x7); 5483 else 5484 BUG_ON(addr & 0x3); 5485 amdgpu_ring_write(ring, lower_32_bits(addr)); 5486 amdgpu_ring_write(ring, upper_32_bits(addr)); 5487 amdgpu_ring_write(ring, lower_32_bits(seq)); 5488 amdgpu_ring_write(ring, upper_32_bits(seq)); 5489 amdgpu_ring_write(ring, 0); 5490 } 5491 5492 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5493 { 5494 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5495 uint32_t seq = ring->fence_drv.sync_seq; 5496 uint64_t addr = ring->fence_drv.gpu_addr; 5497 5498 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5499 lower_32_bits(addr), upper_32_bits(addr), 5500 seq, 0xffffffff, 4); 5501 } 5502 5503 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5504 unsigned vmid, uint64_t pd_addr) 5505 { 5506 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5507 5508 /* compute doesn't have PFP */ 5509 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5510 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5511 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5512 amdgpu_ring_write(ring, 0x0); 5513 } 5514 } 5515 5516 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5517 { 5518 return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */ 5519 } 5520 5521 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5522 { 5523 u64 wptr; 5524 5525 /* XXX check if swapping is necessary on BE */ 5526 if (ring->use_doorbell) 5527 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5528 else 5529 BUG(); 5530 return wptr; 5531 } 5532 5533 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5534 { 5535 struct amdgpu_device *adev = ring->adev; 5536 5537 /* XXX check if swapping is necessary on BE */ 5538 if (ring->use_doorbell) { 5539 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); 5540 WDOORBELL64(ring->doorbell_index, ring->wptr); 5541 } else{ 5542 BUG(); /* only DOORBELL method supported on gfx9 now */ 5543 } 5544 } 5545 5546 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5547 u64 seq, unsigned int flags) 5548 { 5549 struct amdgpu_device *adev = ring->adev; 5550 5551 /* we only allocate 32bit for each seq wb address */ 5552 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5553 5554 /* write fence seq to the "addr" */ 5555 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5556 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5557 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5558 amdgpu_ring_write(ring, lower_32_bits(addr)); 5559 amdgpu_ring_write(ring, upper_32_bits(addr)); 5560 amdgpu_ring_write(ring, lower_32_bits(seq)); 5561 5562 if (flags & AMDGPU_FENCE_FLAG_INT) { 5563 /* set register to trigger INT */ 5564 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5565 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5566 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5567 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5568 amdgpu_ring_write(ring, 0); 5569 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5570 } 5571 } 5572 5573 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5574 { 5575 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5576 amdgpu_ring_write(ring, 0); 5577 } 5578 5579 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) 5580 { 5581 struct amdgpu_device *adev = ring->adev; 5582 struct v9_ce_ib_state ce_payload = {0}; 5583 uint64_t offset, ce_payload_gpu_addr; 5584 void *ce_payload_cpu_addr; 5585 int cnt; 5586 5587 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5588 5589 if (ring->is_mes_queue) { 5590 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5591 gfx[0].gfx_meta_data) + 5592 offsetof(struct v9_gfx_meta_data, ce_payload); 5593 ce_payload_gpu_addr = 5594 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5595 ce_payload_cpu_addr = 5596 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5597 } else { 5598 offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5599 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5600 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5601 } 5602 5603 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5604 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5605 WRITE_DATA_DST_SEL(8) | 5606 WR_CONFIRM) | 5607 WRITE_DATA_CACHE_POLICY(0)); 5608 amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); 5609 amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); 5610 5611 amdgpu_ring_ib_on_emit_ce(ring); 5612 5613 if (resume) 5614 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, 5615 sizeof(ce_payload) >> 2); 5616 else 5617 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, 5618 sizeof(ce_payload) >> 2); 5619 } 5620 5621 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) 5622 { 5623 int i, r = 0; 5624 struct amdgpu_device *adev = ring->adev; 5625 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 5626 struct amdgpu_ring *kiq_ring = &kiq->ring; 5627 unsigned long flags; 5628 5629 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 5630 return -EINVAL; 5631 5632 spin_lock_irqsave(&kiq->ring_lock, flags); 5633 5634 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 5635 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5636 return -ENOMEM; 5637 } 5638 5639 /* assert preemption condition */ 5640 amdgpu_ring_set_preempt_cond_exec(ring, false); 5641 5642 ring->trail_seq += 1; 5643 amdgpu_ring_alloc(ring, 13); 5644 gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, 5645 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); 5646 5647 /* assert IB preemption, emit the trailing fence */ 5648 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 5649 ring->trail_fence_gpu_addr, 5650 ring->trail_seq); 5651 5652 amdgpu_ring_commit(kiq_ring); 5653 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5654 5655 /* poll the trailing fence */ 5656 for (i = 0; i < adev->usec_timeout; i++) { 5657 if (ring->trail_seq == 5658 le32_to_cpu(*ring->trail_fence_cpu_addr)) 5659 break; 5660 udelay(1); 5661 } 5662 5663 if (i >= adev->usec_timeout) { 5664 r = -EINVAL; 5665 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); 5666 } 5667 5668 /*reset the CP_VMID_PREEMPT after trailing fence*/ 5669 amdgpu_ring_emit_wreg(ring, 5670 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), 5671 0x0); 5672 amdgpu_ring_commit(ring); 5673 5674 /* deassert preemption condition */ 5675 amdgpu_ring_set_preempt_cond_exec(ring, true); 5676 return r; 5677 } 5678 5679 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds) 5680 { 5681 struct amdgpu_device *adev = ring->adev; 5682 struct v9_de_ib_state de_payload = {0}; 5683 uint64_t offset, gds_addr, de_payload_gpu_addr; 5684 void *de_payload_cpu_addr; 5685 int cnt; 5686 5687 if (ring->is_mes_queue) { 5688 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5689 gfx[0].gfx_meta_data) + 5690 offsetof(struct v9_gfx_meta_data, de_payload); 5691 de_payload_gpu_addr = 5692 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5693 de_payload_cpu_addr = 5694 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5695 5696 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5697 gfx[0].gds_backup) + 5698 offsetof(struct v9_gfx_meta_data, de_payload); 5699 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5700 } else { 5701 offset = offsetof(struct v9_gfx_meta_data, de_payload); 5702 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5703 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5704 5705 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 5706 AMDGPU_CSA_SIZE - adev->gds.gds_size, 5707 PAGE_SIZE); 5708 } 5709 5710 if (usegds) { 5711 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5712 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5713 } 5714 5715 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5716 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5717 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5718 WRITE_DATA_DST_SEL(8) | 5719 WR_CONFIRM) | 5720 WRITE_DATA_CACHE_POLICY(0)); 5721 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 5722 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 5723 5724 amdgpu_ring_ib_on_emit_de(ring); 5725 if (resume) 5726 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 5727 sizeof(de_payload) >> 2); 5728 else 5729 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 5730 sizeof(de_payload) >> 2); 5731 } 5732 5733 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5734 bool secure) 5735 { 5736 uint32_t v = secure ? FRAME_TMZ : 0; 5737 5738 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5739 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5740 } 5741 5742 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5743 { 5744 uint32_t dw2 = 0; 5745 5746 gfx_v9_0_ring_emit_ce_meta(ring, 5747 (!amdgpu_sriov_vf(ring->adev) && 5748 flags & AMDGPU_IB_PREEMPTED) ? true : false); 5749 5750 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5751 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5752 /* set load_global_config & load_global_uconfig */ 5753 dw2 |= 0x8001; 5754 /* set load_cs_sh_regs */ 5755 dw2 |= 0x01000000; 5756 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5757 dw2 |= 0x10002; 5758 5759 /* set load_ce_ram if preamble presented */ 5760 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5761 dw2 |= 0x10000000; 5762 } else { 5763 /* still load_ce_ram if this is the first time preamble presented 5764 * although there is no context switch happens. 5765 */ 5766 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5767 dw2 |= 0x10000000; 5768 } 5769 5770 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5771 amdgpu_ring_write(ring, dw2); 5772 amdgpu_ring_write(ring, 0); 5773 } 5774 5775 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 5776 uint64_t addr) 5777 { 5778 unsigned ret; 5779 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5780 amdgpu_ring_write(ring, lower_32_bits(addr)); 5781 amdgpu_ring_write(ring, upper_32_bits(addr)); 5782 /* discard following DWs if *cond_exec_gpu_addr==0 */ 5783 amdgpu_ring_write(ring, 0); 5784 ret = ring->wptr & ring->buf_mask; 5785 /* patch dummy value later */ 5786 amdgpu_ring_write(ring, 0); 5787 return ret; 5788 } 5789 5790 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5791 uint32_t reg_val_offs) 5792 { 5793 struct amdgpu_device *adev = ring->adev; 5794 5795 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5796 amdgpu_ring_write(ring, 0 | /* src: register*/ 5797 (5 << 8) | /* dst: memory */ 5798 (1 << 20)); /* write confirm */ 5799 amdgpu_ring_write(ring, reg); 5800 amdgpu_ring_write(ring, 0); 5801 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5802 reg_val_offs * 4)); 5803 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5804 reg_val_offs * 4)); 5805 } 5806 5807 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5808 uint32_t val) 5809 { 5810 uint32_t cmd = 0; 5811 5812 switch (ring->funcs->type) { 5813 case AMDGPU_RING_TYPE_GFX: 5814 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5815 break; 5816 case AMDGPU_RING_TYPE_KIQ: 5817 cmd = (1 << 16); /* no inc addr */ 5818 break; 5819 default: 5820 cmd = WR_CONFIRM; 5821 break; 5822 } 5823 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5824 amdgpu_ring_write(ring, cmd); 5825 amdgpu_ring_write(ring, reg); 5826 amdgpu_ring_write(ring, 0); 5827 amdgpu_ring_write(ring, val); 5828 } 5829 5830 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5831 uint32_t val, uint32_t mask) 5832 { 5833 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5834 } 5835 5836 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5837 uint32_t reg0, uint32_t reg1, 5838 uint32_t ref, uint32_t mask) 5839 { 5840 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5841 struct amdgpu_device *adev = ring->adev; 5842 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5843 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5844 5845 if (fw_version_ok) 5846 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5847 ref, mask, 0x20); 5848 else 5849 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5850 ref, mask); 5851 } 5852 5853 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5854 { 5855 struct amdgpu_device *adev = ring->adev; 5856 uint32_t value = 0; 5857 5858 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5859 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5860 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5861 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5862 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5863 } 5864 5865 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5866 enum amdgpu_interrupt_state state) 5867 { 5868 switch (state) { 5869 case AMDGPU_IRQ_STATE_DISABLE: 5870 case AMDGPU_IRQ_STATE_ENABLE: 5871 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5872 TIME_STAMP_INT_ENABLE, 5873 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5874 break; 5875 default: 5876 break; 5877 } 5878 } 5879 5880 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5881 int me, int pipe, 5882 enum amdgpu_interrupt_state state) 5883 { 5884 u32 mec_int_cntl, mec_int_cntl_reg; 5885 5886 /* 5887 * amdgpu controls only the first MEC. That's why this function only 5888 * handles the setting of interrupts for this specific MEC. All other 5889 * pipes' interrupts are set by amdkfd. 5890 */ 5891 5892 if (me == 1) { 5893 switch (pipe) { 5894 case 0: 5895 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5896 break; 5897 case 1: 5898 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5899 break; 5900 case 2: 5901 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5902 break; 5903 case 3: 5904 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5905 break; 5906 default: 5907 DRM_DEBUG("invalid pipe %d\n", pipe); 5908 return; 5909 } 5910 } else { 5911 DRM_DEBUG("invalid me %d\n", me); 5912 return; 5913 } 5914 5915 switch (state) { 5916 case AMDGPU_IRQ_STATE_DISABLE: 5917 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg); 5918 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5919 TIME_STAMP_INT_ENABLE, 0); 5920 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5921 break; 5922 case AMDGPU_IRQ_STATE_ENABLE: 5923 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 5924 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5925 TIME_STAMP_INT_ENABLE, 1); 5926 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5927 break; 5928 default: 5929 break; 5930 } 5931 } 5932 5933 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5934 struct amdgpu_irq_src *source, 5935 unsigned type, 5936 enum amdgpu_interrupt_state state) 5937 { 5938 switch (state) { 5939 case AMDGPU_IRQ_STATE_DISABLE: 5940 case AMDGPU_IRQ_STATE_ENABLE: 5941 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5942 PRIV_REG_INT_ENABLE, 5943 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5944 break; 5945 default: 5946 break; 5947 } 5948 5949 return 0; 5950 } 5951 5952 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5953 struct amdgpu_irq_src *source, 5954 unsigned type, 5955 enum amdgpu_interrupt_state state) 5956 { 5957 switch (state) { 5958 case AMDGPU_IRQ_STATE_DISABLE: 5959 case AMDGPU_IRQ_STATE_ENABLE: 5960 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5961 PRIV_INSTR_INT_ENABLE, 5962 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5963 break; 5964 default: 5965 break; 5966 } 5967 5968 return 0; 5969 } 5970 5971 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 5972 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5973 CP_ECC_ERROR_INT_ENABLE, 1) 5974 5975 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 5976 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 5977 CP_ECC_ERROR_INT_ENABLE, 0) 5978 5979 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 5980 struct amdgpu_irq_src *source, 5981 unsigned type, 5982 enum amdgpu_interrupt_state state) 5983 { 5984 switch (state) { 5985 case AMDGPU_IRQ_STATE_DISABLE: 5986 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5987 CP_ECC_ERROR_INT_ENABLE, 0); 5988 DISABLE_ECC_ON_ME_PIPE(1, 0); 5989 DISABLE_ECC_ON_ME_PIPE(1, 1); 5990 DISABLE_ECC_ON_ME_PIPE(1, 2); 5991 DISABLE_ECC_ON_ME_PIPE(1, 3); 5992 break; 5993 5994 case AMDGPU_IRQ_STATE_ENABLE: 5995 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5996 CP_ECC_ERROR_INT_ENABLE, 1); 5997 ENABLE_ECC_ON_ME_PIPE(1, 0); 5998 ENABLE_ECC_ON_ME_PIPE(1, 1); 5999 ENABLE_ECC_ON_ME_PIPE(1, 2); 6000 ENABLE_ECC_ON_ME_PIPE(1, 3); 6001 break; 6002 default: 6003 break; 6004 } 6005 6006 return 0; 6007 } 6008 6009 6010 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6011 struct amdgpu_irq_src *src, 6012 unsigned type, 6013 enum amdgpu_interrupt_state state) 6014 { 6015 switch (type) { 6016 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6017 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 6018 break; 6019 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6020 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6021 break; 6022 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6023 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6024 break; 6025 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6026 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6027 break; 6028 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6029 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6030 break; 6031 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6032 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6033 break; 6034 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6035 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6036 break; 6037 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6038 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6039 break; 6040 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6041 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6042 break; 6043 default: 6044 break; 6045 } 6046 return 0; 6047 } 6048 6049 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 6050 struct amdgpu_irq_src *source, 6051 struct amdgpu_iv_entry *entry) 6052 { 6053 int i; 6054 u8 me_id, pipe_id, queue_id; 6055 struct amdgpu_ring *ring; 6056 6057 DRM_DEBUG("IH: CP EOP\n"); 6058 me_id = (entry->ring_id & 0x0c) >> 2; 6059 pipe_id = (entry->ring_id & 0x03) >> 0; 6060 queue_id = (entry->ring_id & 0x70) >> 4; 6061 6062 switch (me_id) { 6063 case 0: 6064 if (adev->gfx.num_gfx_rings) { 6065 if (!adev->gfx.mcbp) { 6066 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6067 } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { 6068 /* Fence signals are handled on the software rings*/ 6069 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 6070 amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); 6071 } 6072 } 6073 break; 6074 case 1: 6075 case 2: 6076 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6077 ring = &adev->gfx.compute_ring[i]; 6078 /* Per-queue interrupt is supported for MEC starting from VI. 6079 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6080 */ 6081 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6082 amdgpu_fence_process(ring); 6083 } 6084 break; 6085 } 6086 return 0; 6087 } 6088 6089 static void gfx_v9_0_fault(struct amdgpu_device *adev, 6090 struct amdgpu_iv_entry *entry) 6091 { 6092 u8 me_id, pipe_id, queue_id; 6093 struct amdgpu_ring *ring; 6094 int i; 6095 6096 me_id = (entry->ring_id & 0x0c) >> 2; 6097 pipe_id = (entry->ring_id & 0x03) >> 0; 6098 queue_id = (entry->ring_id & 0x70) >> 4; 6099 6100 switch (me_id) { 6101 case 0: 6102 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 6103 break; 6104 case 1: 6105 case 2: 6106 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6107 ring = &adev->gfx.compute_ring[i]; 6108 if (ring->me == me_id && ring->pipe == pipe_id && 6109 ring->queue == queue_id) 6110 drm_sched_fault(&ring->sched); 6111 } 6112 break; 6113 } 6114 } 6115 6116 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 6117 struct amdgpu_irq_src *source, 6118 struct amdgpu_iv_entry *entry) 6119 { 6120 DRM_ERROR("Illegal register access in command stream\n"); 6121 gfx_v9_0_fault(adev, entry); 6122 return 0; 6123 } 6124 6125 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 6126 struct amdgpu_irq_src *source, 6127 struct amdgpu_iv_entry *entry) 6128 { 6129 DRM_ERROR("Illegal instruction in command stream\n"); 6130 gfx_v9_0_fault(adev, entry); 6131 return 0; 6132 } 6133 6134 6135 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { 6136 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 6137 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 6138 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 6139 }, 6140 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 6141 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 6142 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 6143 }, 6144 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6145 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 6146 0, 0 6147 }, 6148 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6149 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 6150 0, 0 6151 }, 6152 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 6153 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 6154 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 6155 }, 6156 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6157 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 6158 0, 0 6159 }, 6160 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6161 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 6162 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 6163 }, 6164 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 6165 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 6166 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 6167 }, 6168 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 6169 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 6170 0, 0 6171 }, 6172 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 6173 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 6174 0, 0 6175 }, 6176 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 6177 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 6178 0, 0 6179 }, 6180 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6181 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 6182 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 6183 }, 6184 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6185 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 6186 0, 0 6187 }, 6188 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6189 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 6190 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 6191 }, 6192 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 6193 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6194 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 6195 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 6196 }, 6197 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 6198 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6199 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 6200 0, 0 6201 }, 6202 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 6203 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6204 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 6205 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 6206 }, 6207 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 6208 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6209 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 6210 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 6211 }, 6212 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 6213 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6214 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 6215 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 6216 }, 6217 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 6218 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6219 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 6220 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 6221 }, 6222 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 6223 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 6224 0, 0 6225 }, 6226 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6227 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 6228 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 6229 }, 6230 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6231 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 6232 0, 0 6233 }, 6234 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6235 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 6236 0, 0 6237 }, 6238 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6239 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 6240 0, 0 6241 }, 6242 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6243 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 6244 0, 0 6245 }, 6246 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6247 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 6248 0, 0 6249 }, 6250 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6251 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 6252 0, 0 6253 }, 6254 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6255 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 6256 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 6257 }, 6258 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6259 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 6260 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 6261 }, 6262 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6263 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 6264 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 6265 }, 6266 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6267 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 6268 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 6269 }, 6270 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6271 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 6272 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 6273 }, 6274 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6275 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 6276 0, 0 6277 }, 6278 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6279 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 6280 0, 0 6281 }, 6282 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6283 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 6284 0, 0 6285 }, 6286 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6287 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 6288 0, 0 6289 }, 6290 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6291 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 6292 0, 0 6293 }, 6294 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6295 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 6296 0, 0 6297 }, 6298 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6299 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 6300 0, 0 6301 }, 6302 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6303 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 6304 0, 0 6305 }, 6306 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6307 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 6308 0, 0 6309 }, 6310 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6311 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 6312 0, 0 6313 }, 6314 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6315 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 6316 0, 0 6317 }, 6318 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6319 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 6320 0, 0 6321 }, 6322 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6323 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 6324 0, 0 6325 }, 6326 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 6327 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 6328 0, 0 6329 }, 6330 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6331 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 6332 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 6333 }, 6334 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6335 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 6336 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 6337 }, 6338 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6339 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 6340 0, 0 6341 }, 6342 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6343 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 6344 0, 0 6345 }, 6346 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6347 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 6348 0, 0 6349 }, 6350 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6351 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 6352 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 6353 }, 6354 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6355 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 6356 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 6357 }, 6358 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6359 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 6360 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 6361 }, 6362 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6363 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 6364 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 6365 }, 6366 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6367 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 6368 0, 0 6369 }, 6370 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6371 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 6372 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 6373 }, 6374 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6375 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 6376 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 6377 }, 6378 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6379 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 6380 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 6381 }, 6382 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6383 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 6384 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 6385 }, 6386 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6387 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 6388 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 6389 }, 6390 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6391 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 6392 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 6393 }, 6394 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6395 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 6396 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 6397 }, 6398 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6399 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 6400 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 6401 }, 6402 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6403 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 6404 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 6405 }, 6406 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6407 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 6408 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 6409 }, 6410 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6411 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 6412 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 6413 }, 6414 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6415 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 6416 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 6417 }, 6418 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6419 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 6420 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 6421 }, 6422 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6423 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 6424 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 6425 }, 6426 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6427 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 6428 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 6429 }, 6430 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6431 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 6432 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 6433 }, 6434 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6435 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 6436 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 6437 }, 6438 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6439 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 6440 0, 0 6441 }, 6442 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6443 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 6444 0, 0 6445 }, 6446 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6447 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 6448 0, 0 6449 }, 6450 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6451 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 6452 0, 0 6453 }, 6454 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6455 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 6456 0, 0 6457 }, 6458 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6459 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 6460 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 6461 }, 6462 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6463 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 6464 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 6465 }, 6466 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6467 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 6468 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 6469 }, 6470 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6471 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 6472 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 6473 }, 6474 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6475 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 6476 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 6477 }, 6478 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6479 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 6480 0, 0 6481 }, 6482 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6483 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 6484 0, 0 6485 }, 6486 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6487 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 6488 0, 0 6489 }, 6490 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6491 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 6492 0, 0 6493 }, 6494 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6495 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 6496 0, 0 6497 }, 6498 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6499 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6500 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 6501 }, 6502 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6503 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6504 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 6505 }, 6506 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6507 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6508 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 6509 }, 6510 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6511 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6512 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 6513 }, 6514 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6515 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6516 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 6517 }, 6518 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6519 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 6520 0, 0 6521 }, 6522 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6523 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 6524 0, 0 6525 }, 6526 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6527 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 6528 0, 0 6529 }, 6530 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6531 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 6532 0, 0 6533 }, 6534 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6535 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 6536 0, 0 6537 }, 6538 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6539 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6540 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 6541 }, 6542 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6543 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6544 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 6545 }, 6546 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6547 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6548 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 6549 }, 6550 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6551 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 6552 0, 0 6553 }, 6554 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6555 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 6556 0, 0 6557 }, 6558 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6559 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 6560 0, 0 6561 }, 6562 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6563 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 6564 0, 0 6565 }, 6566 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6567 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 6568 0, 0 6569 }, 6570 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6571 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 6572 0, 0 6573 } 6574 }; 6575 6576 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6577 void *inject_if, uint32_t instance_mask) 6578 { 6579 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6580 int ret; 6581 struct ta_ras_trigger_error_input block_info = { 0 }; 6582 6583 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6584 return -EINVAL; 6585 6586 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6587 return -EINVAL; 6588 6589 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6590 return -EPERM; 6591 6592 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6593 info->head.type)) { 6594 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6595 ras_gfx_subblocks[info->head.sub_block_index].name, 6596 info->head.type); 6597 return -EPERM; 6598 } 6599 6600 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6601 info->head.type)) { 6602 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6603 ras_gfx_subblocks[info->head.sub_block_index].name, 6604 info->head.type); 6605 return -EPERM; 6606 } 6607 6608 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6609 block_info.sub_block_index = 6610 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6611 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6612 block_info.address = info->address; 6613 block_info.value = info->value; 6614 6615 mutex_lock(&adev->grbm_idx_mutex); 6616 ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask); 6617 mutex_unlock(&adev->grbm_idx_mutex); 6618 6619 return ret; 6620 } 6621 6622 static const char * const vml2_mems[] = { 6623 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6624 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6625 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6626 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6627 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6628 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6629 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6630 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6631 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6632 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6633 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6634 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6635 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6636 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6637 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6638 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6639 }; 6640 6641 static const char * const vml2_walker_mems[] = { 6642 "UTC_VML2_CACHE_PDE0_MEM0", 6643 "UTC_VML2_CACHE_PDE0_MEM1", 6644 "UTC_VML2_CACHE_PDE1_MEM0", 6645 "UTC_VML2_CACHE_PDE1_MEM1", 6646 "UTC_VML2_CACHE_PDE2_MEM0", 6647 "UTC_VML2_CACHE_PDE2_MEM1", 6648 "UTC_VML2_RDIF_LOG_FIFO", 6649 }; 6650 6651 static const char * const atc_l2_cache_2m_mems[] = { 6652 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6653 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6654 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6655 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6656 }; 6657 6658 static const char *atc_l2_cache_4k_mems[] = { 6659 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6660 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6661 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6662 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6663 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6664 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6665 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6666 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6667 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6668 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6669 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6670 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6671 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6672 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6673 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6674 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6675 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6676 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6677 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6678 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6679 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6680 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6681 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6682 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6683 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6684 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6685 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6686 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6687 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6688 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6689 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6690 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6691 }; 6692 6693 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6694 struct ras_err_data *err_data) 6695 { 6696 uint32_t i, data; 6697 uint32_t sec_count, ded_count; 6698 6699 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6700 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6701 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6702 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6703 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6704 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6705 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6706 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6707 6708 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6709 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6710 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6711 6712 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6713 if (sec_count) { 6714 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6715 "SEC %d\n", i, vml2_mems[i], sec_count); 6716 err_data->ce_count += sec_count; 6717 } 6718 6719 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6720 if (ded_count) { 6721 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6722 "DED %d\n", i, vml2_mems[i], ded_count); 6723 err_data->ue_count += ded_count; 6724 } 6725 } 6726 6727 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6728 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6729 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6730 6731 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6732 SEC_COUNT); 6733 if (sec_count) { 6734 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6735 "SEC %d\n", i, vml2_walker_mems[i], sec_count); 6736 err_data->ce_count += sec_count; 6737 } 6738 6739 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6740 DED_COUNT); 6741 if (ded_count) { 6742 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6743 "DED %d\n", i, vml2_walker_mems[i], ded_count); 6744 err_data->ue_count += ded_count; 6745 } 6746 } 6747 6748 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6749 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6750 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6751 6752 sec_count = (data & 0x00006000L) >> 0xd; 6753 if (sec_count) { 6754 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6755 "SEC %d\n", i, atc_l2_cache_2m_mems[i], 6756 sec_count); 6757 err_data->ce_count += sec_count; 6758 } 6759 } 6760 6761 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6762 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6763 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6764 6765 sec_count = (data & 0x00006000L) >> 0xd; 6766 if (sec_count) { 6767 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6768 "SEC %d\n", i, atc_l2_cache_4k_mems[i], 6769 sec_count); 6770 err_data->ce_count += sec_count; 6771 } 6772 6773 ded_count = (data & 0x00018000L) >> 0xf; 6774 if (ded_count) { 6775 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6776 "DED %d\n", i, atc_l2_cache_4k_mems[i], 6777 ded_count); 6778 err_data->ue_count += ded_count; 6779 } 6780 } 6781 6782 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6783 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6784 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6785 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6786 6787 return 0; 6788 } 6789 6790 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev, 6791 const struct soc15_reg_entry *reg, 6792 uint32_t se_id, uint32_t inst_id, uint32_t value, 6793 uint32_t *sec_count, uint32_t *ded_count) 6794 { 6795 uint32_t i; 6796 uint32_t sec_cnt, ded_cnt; 6797 6798 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { 6799 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || 6800 gfx_v9_0_ras_fields[i].seg != reg->seg || 6801 gfx_v9_0_ras_fields[i].inst != reg->inst) 6802 continue; 6803 6804 sec_cnt = (value & 6805 gfx_v9_0_ras_fields[i].sec_count_mask) >> 6806 gfx_v9_0_ras_fields[i].sec_count_shift; 6807 if (sec_cnt) { 6808 dev_info(adev->dev, "GFX SubBlock %s, " 6809 "Instance[%d][%d], SEC %d\n", 6810 gfx_v9_0_ras_fields[i].name, 6811 se_id, inst_id, 6812 sec_cnt); 6813 *sec_count += sec_cnt; 6814 } 6815 6816 ded_cnt = (value & 6817 gfx_v9_0_ras_fields[i].ded_count_mask) >> 6818 gfx_v9_0_ras_fields[i].ded_count_shift; 6819 if (ded_cnt) { 6820 dev_info(adev->dev, "GFX SubBlock %s, " 6821 "Instance[%d][%d], DED %d\n", 6822 gfx_v9_0_ras_fields[i].name, 6823 se_id, inst_id, 6824 ded_cnt); 6825 *ded_count += ded_cnt; 6826 } 6827 } 6828 6829 return 0; 6830 } 6831 6832 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) 6833 { 6834 int i, j, k; 6835 6836 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6837 return; 6838 6839 /* read back registers to clear the counters */ 6840 mutex_lock(&adev->grbm_idx_mutex); 6841 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6842 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6843 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6844 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0); 6845 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6846 } 6847 } 6848 } 6849 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 6850 mutex_unlock(&adev->grbm_idx_mutex); 6851 6852 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6853 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6854 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6855 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6856 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6857 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6858 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6859 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6860 6861 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6862 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6863 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6864 } 6865 6866 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6867 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6868 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6869 } 6870 6871 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6872 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6873 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6874 } 6875 6876 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6877 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6878 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6879 } 6880 6881 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6882 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6883 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6884 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6885 } 6886 6887 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 6888 void *ras_error_status) 6889 { 6890 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 6891 uint32_t sec_count = 0, ded_count = 0; 6892 uint32_t i, j, k; 6893 uint32_t reg_value; 6894 6895 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6896 return; 6897 6898 err_data->ue_count = 0; 6899 err_data->ce_count = 0; 6900 6901 mutex_lock(&adev->grbm_idx_mutex); 6902 6903 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6904 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6905 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6906 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0); 6907 reg_value = 6908 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6909 if (reg_value) 6910 gfx_v9_0_ras_error_count(adev, 6911 &gfx_v9_0_edc_counter_regs[i], 6912 j, k, reg_value, 6913 &sec_count, &ded_count); 6914 } 6915 } 6916 } 6917 6918 err_data->ce_count += sec_count; 6919 err_data->ue_count += ded_count; 6920 6921 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 6922 mutex_unlock(&adev->grbm_idx_mutex); 6923 6924 gfx_v9_0_query_utc_edc_status(adev, err_data); 6925 } 6926 6927 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) 6928 { 6929 const unsigned int cp_coher_cntl = 6930 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | 6931 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | 6932 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | 6933 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | 6934 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); 6935 6936 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ 6937 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 6938 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ 6939 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6940 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6941 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6942 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6943 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6944 } 6945 6946 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, 6947 uint32_t pipe, bool enable) 6948 { 6949 struct amdgpu_device *adev = ring->adev; 6950 uint32_t val; 6951 uint32_t wcl_cs_reg; 6952 6953 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 6954 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT; 6955 6956 switch (pipe) { 6957 case 0: 6958 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0); 6959 break; 6960 case 1: 6961 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1); 6962 break; 6963 case 2: 6964 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2); 6965 break; 6966 case 3: 6967 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3); 6968 break; 6969 default: 6970 DRM_DEBUG("invalid pipe %d\n", pipe); 6971 return; 6972 } 6973 6974 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 6975 6976 } 6977 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 6978 { 6979 struct amdgpu_device *adev = ring->adev; 6980 uint32_t val; 6981 int i; 6982 6983 6984 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 6985 * number of gfx waves. Setting 5 bit will make sure gfx only gets 6986 * around 25% of gpu resources. 6987 */ 6988 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; 6989 amdgpu_ring_emit_wreg(ring, 6990 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX), 6991 val); 6992 6993 /* Restrict waves for normal/low priority compute queues as well 6994 * to get best QoS for high priority compute jobs. 6995 * 6996 * amdgpu controls only 1st ME(0-3 CS pipes). 6997 */ 6998 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 6999 if (i != ring->pipe) 7000 gfx_v9_0_emit_wave_limit_cs(ring, i, enable); 7001 7002 } 7003 } 7004 7005 static void gfx_v9_ip_print(void *handle, struct drm_printer *p) 7006 { 7007 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 7008 uint32_t i, j, k, reg, index = 0; 7009 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 7010 7011 if (!adev->gfx.ip_dump_core) 7012 return; 7013 7014 for (i = 0; i < reg_count; i++) 7015 drm_printf(p, "%-50s \t 0x%08x\n", 7016 gc_reg_list_9[i].reg_name, 7017 adev->gfx.ip_dump_core[i]); 7018 7019 /* print compute queue registers for all instances */ 7020 if (!adev->gfx.ip_dump_compute_queues) 7021 return; 7022 7023 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 7024 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7025 adev->gfx.mec.num_mec, 7026 adev->gfx.mec.num_pipe_per_mec, 7027 adev->gfx.mec.num_queue_per_pipe); 7028 7029 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7030 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7031 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7032 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7033 for (reg = 0; reg < reg_count; reg++) { 7034 drm_printf(p, "%-50s \t 0x%08x\n", 7035 gc_cp_reg_list_9[reg].reg_name, 7036 adev->gfx.ip_dump_compute_queues[index + reg]); 7037 } 7038 index += reg_count; 7039 } 7040 } 7041 } 7042 7043 } 7044 7045 static void gfx_v9_ip_dump(void *handle) 7046 { 7047 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 7048 uint32_t i, j, k, reg, index = 0; 7049 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 7050 7051 if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings) 7052 return; 7053 7054 amdgpu_gfx_off_ctrl(adev, false); 7055 for (i = 0; i < reg_count; i++) 7056 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i])); 7057 amdgpu_gfx_off_ctrl(adev, true); 7058 7059 /* dump compute queue registers for all instances */ 7060 if (!adev->gfx.ip_dump_compute_queues) 7061 return; 7062 7063 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 7064 amdgpu_gfx_off_ctrl(adev, false); 7065 mutex_lock(&adev->srbm_mutex); 7066 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7067 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7068 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7069 /* ME0 is for GFX so start from 1 for CP */ 7070 soc15_grbm_select(adev, 1 + i, j, k, 0, 0); 7071 7072 for (reg = 0; reg < reg_count; reg++) { 7073 adev->gfx.ip_dump_compute_queues[index + reg] = 7074 RREG32(SOC15_REG_ENTRY_OFFSET( 7075 gc_cp_reg_list_9[reg])); 7076 } 7077 index += reg_count; 7078 } 7079 } 7080 } 7081 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 7082 mutex_unlock(&adev->srbm_mutex); 7083 amdgpu_gfx_off_ctrl(adev, true); 7084 7085 } 7086 7087 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 7088 .name = "gfx_v9_0", 7089 .early_init = gfx_v9_0_early_init, 7090 .late_init = gfx_v9_0_late_init, 7091 .sw_init = gfx_v9_0_sw_init, 7092 .sw_fini = gfx_v9_0_sw_fini, 7093 .hw_init = gfx_v9_0_hw_init, 7094 .hw_fini = gfx_v9_0_hw_fini, 7095 .suspend = gfx_v9_0_suspend, 7096 .resume = gfx_v9_0_resume, 7097 .is_idle = gfx_v9_0_is_idle, 7098 .wait_for_idle = gfx_v9_0_wait_for_idle, 7099 .soft_reset = gfx_v9_0_soft_reset, 7100 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 7101 .set_powergating_state = gfx_v9_0_set_powergating_state, 7102 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 7103 .dump_ip_state = gfx_v9_ip_dump, 7104 .print_ip_state = gfx_v9_ip_print, 7105 }; 7106 7107 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 7108 .type = AMDGPU_RING_TYPE_GFX, 7109 .align_mask = 0xff, 7110 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7111 .support_64bit_ptrs = true, 7112 .secure_submission_supported = true, 7113 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 7114 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 7115 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 7116 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 7117 5 + /* COND_EXEC */ 7118 7 + /* PIPELINE_SYNC */ 7119 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7120 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7121 2 + /* VM_FLUSH */ 7122 8 + /* FENCE for VM_FLUSH */ 7123 20 + /* GDS switch */ 7124 4 + /* double SWITCH_BUFFER, 7125 the first COND_EXEC jump to the place just 7126 prior to this double SWITCH_BUFFER */ 7127 5 + /* COND_EXEC */ 7128 7 + /* HDP_flush */ 7129 4 + /* VGT_flush */ 7130 14 + /* CE_META */ 7131 31 + /* DE_META */ 7132 3 + /* CNTX_CTRL */ 7133 5 + /* HDP_INVL */ 7134 8 + 8 + /* FENCE x2 */ 7135 2 + /* SWITCH_BUFFER */ 7136 7, /* gfx_v9_0_emit_mem_sync */ 7137 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 7138 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 7139 .emit_fence = gfx_v9_0_ring_emit_fence, 7140 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7141 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7142 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7143 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7144 .test_ring = gfx_v9_0_ring_test_ring, 7145 .insert_nop = amdgpu_ring_insert_nop, 7146 .pad_ib = amdgpu_ring_generic_pad_ib, 7147 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7148 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7149 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7150 .preempt_ib = gfx_v9_0_ring_preempt_ib, 7151 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7152 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7153 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7154 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7155 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7156 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7157 }; 7158 7159 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 7160 .type = AMDGPU_RING_TYPE_GFX, 7161 .align_mask = 0xff, 7162 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7163 .support_64bit_ptrs = true, 7164 .secure_submission_supported = true, 7165 .get_rptr = amdgpu_sw_ring_get_rptr_gfx, 7166 .get_wptr = amdgpu_sw_ring_get_wptr_gfx, 7167 .set_wptr = amdgpu_sw_ring_set_wptr_gfx, 7168 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 7169 5 + /* COND_EXEC */ 7170 7 + /* PIPELINE_SYNC */ 7171 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7172 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7173 2 + /* VM_FLUSH */ 7174 8 + /* FENCE for VM_FLUSH */ 7175 20 + /* GDS switch */ 7176 4 + /* double SWITCH_BUFFER, 7177 * the first COND_EXEC jump to the place just 7178 * prior to this double SWITCH_BUFFER 7179 */ 7180 5 + /* COND_EXEC */ 7181 7 + /* HDP_flush */ 7182 4 + /* VGT_flush */ 7183 14 + /* CE_META */ 7184 31 + /* DE_META */ 7185 3 + /* CNTX_CTRL */ 7186 5 + /* HDP_INVL */ 7187 8 + 8 + /* FENCE x2 */ 7188 2 + /* SWITCH_BUFFER */ 7189 7, /* gfx_v9_0_emit_mem_sync */ 7190 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 7191 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 7192 .emit_fence = gfx_v9_0_ring_emit_fence, 7193 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7194 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7195 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7196 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7197 .test_ring = gfx_v9_0_ring_test_ring, 7198 .test_ib = gfx_v9_0_ring_test_ib, 7199 .insert_nop = amdgpu_sw_ring_insert_nop, 7200 .pad_ib = amdgpu_ring_generic_pad_ib, 7201 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7202 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7203 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7204 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7205 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7206 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7207 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7208 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7209 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7210 .patch_cntl = gfx_v9_0_ring_patch_cntl, 7211 .patch_de = gfx_v9_0_ring_patch_de_meta, 7212 .patch_ce = gfx_v9_0_ring_patch_ce_meta, 7213 }; 7214 7215 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7216 .type = AMDGPU_RING_TYPE_COMPUTE, 7217 .align_mask = 0xff, 7218 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7219 .support_64bit_ptrs = true, 7220 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7221 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7222 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7223 .emit_frame_size = 7224 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7225 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7226 5 + /* hdp invalidate */ 7227 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7228 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7229 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7230 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7231 7 + /* gfx_v9_0_emit_mem_sync */ 7232 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ 7233 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ 7234 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7235 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 7236 .emit_fence = gfx_v9_0_ring_emit_fence, 7237 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7238 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7239 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7240 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7241 .test_ring = gfx_v9_0_ring_test_ring, 7242 .test_ib = gfx_v9_0_ring_test_ib, 7243 .insert_nop = amdgpu_ring_insert_nop, 7244 .pad_ib = amdgpu_ring_generic_pad_ib, 7245 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7246 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7247 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7248 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7249 .emit_wave_limit = gfx_v9_0_emit_wave_limit, 7250 }; 7251 7252 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7253 .type = AMDGPU_RING_TYPE_KIQ, 7254 .align_mask = 0xff, 7255 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7256 .support_64bit_ptrs = true, 7257 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7258 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7259 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7260 .emit_frame_size = 7261 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7262 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7263 5 + /* hdp invalidate */ 7264 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7265 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7266 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7267 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7268 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7269 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 7270 .test_ring = gfx_v9_0_ring_test_ring, 7271 .insert_nop = amdgpu_ring_insert_nop, 7272 .pad_ib = amdgpu_ring_generic_pad_ib, 7273 .emit_rreg = gfx_v9_0_ring_emit_rreg, 7274 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7275 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7276 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7277 }; 7278 7279 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 7280 { 7281 int i; 7282 7283 adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq; 7284 7285 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7286 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 7287 7288 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 7289 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 7290 adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; 7291 } 7292 7293 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7294 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 7295 } 7296 7297 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 7298 .set = gfx_v9_0_set_eop_interrupt_state, 7299 .process = gfx_v9_0_eop_irq, 7300 }; 7301 7302 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 7303 .set = gfx_v9_0_set_priv_reg_fault_state, 7304 .process = gfx_v9_0_priv_reg_irq, 7305 }; 7306 7307 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 7308 .set = gfx_v9_0_set_priv_inst_fault_state, 7309 .process = gfx_v9_0_priv_inst_irq, 7310 }; 7311 7312 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 7313 .set = gfx_v9_0_set_cp_ecc_error_state, 7314 .process = amdgpu_gfx_cp_ecc_error_irq, 7315 }; 7316 7317 7318 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 7319 { 7320 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7321 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 7322 7323 adev->gfx.priv_reg_irq.num_types = 1; 7324 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 7325 7326 adev->gfx.priv_inst_irq.num_types = 1; 7327 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 7328 7329 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 7330 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 7331 } 7332 7333 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 7334 { 7335 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7336 case IP_VERSION(9, 0, 1): 7337 case IP_VERSION(9, 2, 1): 7338 case IP_VERSION(9, 4, 0): 7339 case IP_VERSION(9, 2, 2): 7340 case IP_VERSION(9, 1, 0): 7341 case IP_VERSION(9, 4, 1): 7342 case IP_VERSION(9, 3, 0): 7343 case IP_VERSION(9, 4, 2): 7344 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 7345 break; 7346 default: 7347 break; 7348 } 7349 } 7350 7351 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 7352 { 7353 /* init asci gds info */ 7354 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7355 case IP_VERSION(9, 0, 1): 7356 case IP_VERSION(9, 2, 1): 7357 case IP_VERSION(9, 4, 0): 7358 adev->gds.gds_size = 0x10000; 7359 break; 7360 case IP_VERSION(9, 2, 2): 7361 case IP_VERSION(9, 1, 0): 7362 case IP_VERSION(9, 4, 1): 7363 adev->gds.gds_size = 0x1000; 7364 break; 7365 case IP_VERSION(9, 4, 2): 7366 /* aldebaran removed all the GDS internal memory, 7367 * only support GWS opcode in kernel, like barrier 7368 * semaphore.etc */ 7369 adev->gds.gds_size = 0; 7370 break; 7371 default: 7372 adev->gds.gds_size = 0x10000; 7373 break; 7374 } 7375 7376 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7377 case IP_VERSION(9, 0, 1): 7378 case IP_VERSION(9, 4, 0): 7379 adev->gds.gds_compute_max_wave_id = 0x7ff; 7380 break; 7381 case IP_VERSION(9, 2, 1): 7382 adev->gds.gds_compute_max_wave_id = 0x27f; 7383 break; 7384 case IP_VERSION(9, 2, 2): 7385 case IP_VERSION(9, 1, 0): 7386 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 7387 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 7388 else 7389 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 7390 break; 7391 case IP_VERSION(9, 4, 1): 7392 adev->gds.gds_compute_max_wave_id = 0xfff; 7393 break; 7394 case IP_VERSION(9, 4, 2): 7395 /* deprecated for Aldebaran, no usage at all */ 7396 adev->gds.gds_compute_max_wave_id = 0; 7397 break; 7398 default: 7399 /* this really depends on the chip */ 7400 adev->gds.gds_compute_max_wave_id = 0x7ff; 7401 break; 7402 } 7403 7404 adev->gds.gws_size = 64; 7405 adev->gds.oa_size = 16; 7406 } 7407 7408 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7409 u32 bitmap) 7410 { 7411 u32 data; 7412 7413 if (!bitmap) 7414 return; 7415 7416 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7417 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7418 7419 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 7420 } 7421 7422 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7423 { 7424 u32 data, mask; 7425 7426 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 7427 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 7428 7429 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7430 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7431 7432 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7433 7434 return (~data) & mask; 7435 } 7436 7437 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 7438 struct amdgpu_cu_info *cu_info) 7439 { 7440 int i, j, k, counter, active_cu_number = 0; 7441 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7442 unsigned disable_masks[4 * 4]; 7443 7444 if (!adev || !cu_info) 7445 return -EINVAL; 7446 7447 /* 7448 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 7449 */ 7450 if (adev->gfx.config.max_shader_engines * 7451 adev->gfx.config.max_sh_per_se > 16) 7452 return -EINVAL; 7453 7454 amdgpu_gfx_parse_disable_cu(disable_masks, 7455 adev->gfx.config.max_shader_engines, 7456 adev->gfx.config.max_sh_per_se); 7457 7458 mutex_lock(&adev->grbm_idx_mutex); 7459 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7460 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7461 mask = 1; 7462 ao_bitmap = 0; 7463 counter = 0; 7464 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 7465 gfx_v9_0_set_user_cu_inactive_bitmap( 7466 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 7467 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 7468 7469 /* 7470 * The bitmap(and ao_cu_bitmap) in cu_info structure is 7471 * 4x4 size array, and it's usually suitable for Vega 7472 * ASICs which has 4*2 SE/SH layout. 7473 * But for Arcturus, SE/SH layout is changed to 8*1. 7474 * To mostly reduce the impact, we make it compatible 7475 * with current bitmap array as below: 7476 * SE4,SH0 --> bitmap[0][1] 7477 * SE5,SH0 --> bitmap[1][1] 7478 * SE6,SH0 --> bitmap[2][1] 7479 * SE7,SH0 --> bitmap[3][1] 7480 */ 7481 cu_info->bitmap[0][i % 4][j + i / 4] = bitmap; 7482 7483 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7484 if (bitmap & mask) { 7485 if (counter < adev->gfx.config.max_cu_per_sh) 7486 ao_bitmap |= mask; 7487 counter ++; 7488 } 7489 mask <<= 1; 7490 } 7491 active_cu_number += counter; 7492 if (i < 2 && j < 2) 7493 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7494 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 7495 } 7496 } 7497 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7498 mutex_unlock(&adev->grbm_idx_mutex); 7499 7500 cu_info->number = active_cu_number; 7501 cu_info->ao_cu_mask = ao_cu_mask; 7502 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7503 7504 return 0; 7505 } 7506 7507 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 7508 { 7509 .type = AMD_IP_BLOCK_TYPE_GFX, 7510 .major = 9, 7511 .minor = 0, 7512 .rev = 0, 7513 .funcs = &gfx_v9_0_ip_funcs, 7514 }; 7515