1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 42 #include "soc15_common.h" 43 #include "clearstate_gfx9.h" 44 #include "v9_structs.h" 45 46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 47 48 #include "amdgpu_ras.h" 49 50 #include "amdgpu_ring_mux.h" 51 #include "gfx_v9_4.h" 52 #include "gfx_v9_0.h" 53 #include "gfx_v9_0_cleaner_shader.h" 54 #include "gfx_v9_4_2.h" 55 56 #include "asic_reg/pwr/pwr_10_0_offset.h" 57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h" 58 #include "asic_reg/gc/gc_9_0_default.h" 59 60 #define GFX9_NUM_GFX_RINGS 1 61 #define GFX9_NUM_SW_GFX_RINGS 2 62 #define GFX9_MEC_HPD_SIZE 4096 63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 65 66 #define mmGCEA_PROBE_MAP 0x070c 67 #define mmGCEA_PROBE_MAP_BASE_IDX 0 68 69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 75 76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 82 83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 89 90 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 93 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 96 97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 104 105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 112 113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 115 116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 121 122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin"); 123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin"); 124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin"); 125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin"); 126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin"); 127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); 128 129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); 130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); 131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); 132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin"); 133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); 134 135 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 137 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 139 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 141 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 143 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 145 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 147 148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025 149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1 150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 152 153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = { 154 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS), 155 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2), 156 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1), 157 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2), 158 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1), 159 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1), 160 SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT), 161 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT), 162 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT), 163 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS), 164 SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR), 165 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE), 166 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR), 167 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR), 168 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE), 169 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR), 170 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR), 171 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE), 172 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR), 173 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR), 174 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE), 175 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), 176 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), 177 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ), 178 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ), 179 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ), 180 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ), 181 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO), 182 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI), 183 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ), 184 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO), 185 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI), 186 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ), 187 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO), 188 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI), 189 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ), 190 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO), 191 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI), 192 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ), 193 SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS), 194 SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS), 195 SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS), 196 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT), 197 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT), 198 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS), 199 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL), 200 SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS), 201 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS), 202 SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS), 203 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS), 204 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS), 205 SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS), 206 SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS), 207 SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS), 208 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), 209 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS), 210 SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG), 211 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL), 212 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR), 213 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR), 214 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR), 215 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR), 216 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR), 217 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS), 218 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT), 219 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND), 220 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE), 221 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1), 222 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2), 223 SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE), 224 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE), 225 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), 226 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), 227 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), 228 /* cp header registers */ 229 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 230 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), 231 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), 232 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 233 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), 234 /* SE status registers */ 235 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), 236 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), 237 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), 238 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) 239 }; 240 241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { 242 /* compute queue registers */ 243 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID), 244 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE), 245 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE), 246 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY), 247 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY), 248 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM), 249 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE), 250 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI), 251 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR), 252 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 253 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 254 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), 255 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL), 256 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR), 257 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI), 258 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR), 259 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL), 260 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 261 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR), 262 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), 263 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL), 264 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR), 265 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR), 266 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS), 267 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO), 268 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI), 269 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL), 270 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET), 271 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE), 272 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET), 273 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE), 274 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE), 275 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR), 276 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM), 277 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), 278 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), 279 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS), 280 }; 281 282 enum ta_ras_gfx_subblock { 283 /*CPC*/ 284 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 285 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 286 TA_RAS_BLOCK__GFX_CPC_UCODE, 287 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 288 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 289 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 290 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 291 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 292 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 293 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 294 /* CPF*/ 295 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 296 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 297 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 298 TA_RAS_BLOCK__GFX_CPF_TAG, 299 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 300 /* CPG*/ 301 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 302 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 303 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 304 TA_RAS_BLOCK__GFX_CPG_TAG, 305 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 306 /* GDS*/ 307 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 308 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 309 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 310 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 311 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 312 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 313 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 314 /* SPI*/ 315 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 316 /* SQ*/ 317 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 318 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 319 TA_RAS_BLOCK__GFX_SQ_LDS_D, 320 TA_RAS_BLOCK__GFX_SQ_LDS_I, 321 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 322 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 323 /* SQC (3 ranges)*/ 324 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 325 /* SQC range 0*/ 326 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 327 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 328 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 329 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 330 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 331 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 332 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 333 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 334 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 335 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 336 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 337 /* SQC range 1*/ 338 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 339 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 340 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 341 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 342 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 343 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 344 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 345 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 346 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 347 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 348 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 349 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 350 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 351 /* SQC range 2*/ 352 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 353 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 354 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 355 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 356 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 357 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 358 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 359 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 360 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 361 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 362 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 363 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 364 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 365 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 366 /* TA*/ 367 TA_RAS_BLOCK__GFX_TA_INDEX_START, 368 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 369 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 370 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 371 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 372 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 373 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 374 /* TCA*/ 375 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 376 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 377 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 378 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 379 /* TCC (5 sub-ranges)*/ 380 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 381 /* TCC range 0*/ 382 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 383 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 384 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 385 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 386 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 387 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 388 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 389 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 390 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 391 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 392 /* TCC range 1*/ 393 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 394 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 395 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 396 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 397 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 398 /* TCC range 2*/ 399 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 400 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 401 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 402 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 403 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 404 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 405 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 406 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 407 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 408 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 409 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 410 /* TCC range 3*/ 411 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 412 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 413 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 414 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 415 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 416 /* TCC range 4*/ 417 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 418 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 419 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 420 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 421 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 422 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 423 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 424 /* TCI*/ 425 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 426 /* TCP*/ 427 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 428 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 429 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 430 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 431 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 432 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 433 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 434 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 435 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 436 /* TD*/ 437 TA_RAS_BLOCK__GFX_TD_INDEX_START, 438 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 439 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 440 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 441 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 442 /* EA (3 sub-ranges)*/ 443 TA_RAS_BLOCK__GFX_EA_INDEX_START, 444 /* EA range 0*/ 445 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 446 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 447 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 448 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 449 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 450 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 451 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 452 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 453 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 454 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 455 /* EA range 1*/ 456 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 457 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 458 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 459 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 460 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 461 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 462 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 463 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 464 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 465 /* EA range 2*/ 466 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 467 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 468 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 469 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 470 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 471 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 472 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 473 /* UTC VM L2 bank*/ 474 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 475 /* UTC VM walker*/ 476 TA_RAS_BLOCK__UTC_VML2_WALKER, 477 /* UTC ATC L2 2MB cache*/ 478 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 479 /* UTC ATC L2 4KB cache*/ 480 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 481 TA_RAS_BLOCK__GFX_MAX 482 }; 483 484 struct ras_gfx_subblock { 485 unsigned char *name; 486 int ta_subblock; 487 int hw_supported_error_type; 488 int sw_supported_error_type; 489 }; 490 491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 492 [AMDGPU_RAS_BLOCK__##subblock] = { \ 493 #subblock, \ 494 TA_RAS_BLOCK__##subblock, \ 495 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 496 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 497 } 498 499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 500 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 501 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 502 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 503 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 504 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 505 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 506 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 507 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 508 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 509 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 510 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 511 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 512 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 513 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 514 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 515 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 516 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 517 0), 518 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 519 0), 520 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 521 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 522 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 523 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 524 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 525 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 526 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 527 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 528 0, 0), 529 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 530 0), 531 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 532 0, 0), 533 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 534 0), 535 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 536 0, 0), 537 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 538 0), 539 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 540 1), 541 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 542 0, 0, 0), 543 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 544 0), 545 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 546 0), 547 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 548 0), 549 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 550 0), 551 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 552 0), 553 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 554 0, 0), 555 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 556 0), 557 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 558 0), 559 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 560 0, 0, 0), 561 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 562 0), 563 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 564 0), 565 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 566 0), 567 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 568 0), 569 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 570 0), 571 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 572 0, 0), 573 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 574 0), 575 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 576 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 577 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 578 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 579 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 580 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 581 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 582 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 583 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 584 1), 585 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 586 1), 587 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 588 1), 589 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 590 0), 591 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 592 0), 593 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 594 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 595 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 596 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 597 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 598 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 599 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 600 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 601 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 602 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 603 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 604 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 605 0), 606 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 607 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 608 0), 609 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 610 0, 0), 611 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 612 0), 613 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 614 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 615 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 616 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 617 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 618 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 619 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 620 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 621 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 622 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 623 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 624 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 625 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 626 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 627 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 628 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 629 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 630 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 631 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 632 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 633 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 634 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 635 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 636 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 637 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 638 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 639 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 640 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 641 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 642 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 643 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 644 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 645 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 646 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 647 }; 648 649 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 650 { 651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87), 661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f), 662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 671 }; 672 673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 674 { 675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 693 }; 694 695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 696 { 697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 708 }; 709 710 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 711 { 712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 715 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 716 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 717 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 718 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 719 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 720 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 721 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 722 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 723 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 724 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 725 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 726 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 727 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 728 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 729 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 730 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 731 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 732 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 733 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 734 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 735 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 736 }; 737 738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 739 { 740 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 741 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 742 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 743 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 744 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 745 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 746 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 747 }; 748 749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 750 { 751 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 752 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 753 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 754 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 755 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 756 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 757 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 758 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 759 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 760 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 761 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 762 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 763 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 764 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 765 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 766 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 767 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 768 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 769 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 770 }; 771 772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 773 { 774 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 775 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 776 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 777 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 778 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 779 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 780 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 781 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 782 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 783 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 784 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 785 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 786 }; 787 788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 789 { 790 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 791 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 792 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 793 }; 794 795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 796 { 797 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 798 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 799 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 800 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 801 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 802 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 803 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 804 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 805 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 806 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 807 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 808 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 809 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 810 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 811 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 812 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 813 }; 814 815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 816 { 817 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 818 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 819 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 820 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 821 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 822 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 823 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 824 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 825 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 826 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 827 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 828 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 829 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 830 }; 831 832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 833 { 834 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 835 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 836 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 837 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 838 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 839 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 840 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 841 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 842 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 843 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 844 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000) 845 }; 846 847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = { 848 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)}, 849 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)}, 850 }; 851 852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 853 { 854 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 855 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 856 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 857 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 858 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 859 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 860 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 861 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 862 }; 863 864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 865 { 866 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 867 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 868 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 869 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 870 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 871 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 872 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 873 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 874 }; 875 876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 880 881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 886 struct amdgpu_cu_info *cu_info); 887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds); 889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 891 void *ras_error_status); 892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 893 void *inject_if, uint32_t instance_mask); 894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); 895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, 896 unsigned int vmid); 897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 899 900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 901 uint64_t queue_mask) 902 { 903 struct amdgpu_device *adev = kiq_ring->adev; 904 u64 shader_mc_addr; 905 906 /* Cleaner shader MC address */ 907 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 908 909 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 910 amdgpu_ring_write(kiq_ring, 911 PACKET3_SET_RESOURCES_VMID_MASK(0) | 912 /* vmid_mask:0* queue_type:0 (KIQ) */ 913 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 914 amdgpu_ring_write(kiq_ring, 915 lower_32_bits(queue_mask)); /* queue mask lo */ 916 amdgpu_ring_write(kiq_ring, 917 upper_32_bits(queue_mask)); /* queue mask hi */ 918 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 919 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 920 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 921 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 922 } 923 924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 925 struct amdgpu_ring *ring) 926 { 927 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 928 uint64_t wptr_addr = ring->wptr_gpu_addr; 929 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 930 931 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 932 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 933 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 934 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 935 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 936 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 937 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 938 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 939 /*queue_type: normal compute queue */ 940 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 941 /* alloc format: all_on_one_pipe */ 942 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 943 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 944 /* num_queues: must be 1 */ 945 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 946 amdgpu_ring_write(kiq_ring, 947 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 948 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 949 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 950 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 951 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 952 } 953 954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 955 struct amdgpu_ring *ring, 956 enum amdgpu_unmap_queues_action action, 957 u64 gpu_addr, u64 seq) 958 { 959 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 960 961 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 962 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 963 PACKET3_UNMAP_QUEUES_ACTION(action) | 964 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 965 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 966 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 967 amdgpu_ring_write(kiq_ring, 968 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 969 970 if (action == PREEMPT_QUEUES_NO_UNMAP) { 971 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask)); 972 amdgpu_ring_write(kiq_ring, 0); 973 amdgpu_ring_write(kiq_ring, 0); 974 975 } else { 976 amdgpu_ring_write(kiq_ring, 0); 977 amdgpu_ring_write(kiq_ring, 0); 978 amdgpu_ring_write(kiq_ring, 0); 979 } 980 } 981 982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 983 struct amdgpu_ring *ring, 984 u64 addr, 985 u64 seq) 986 { 987 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 988 989 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 990 amdgpu_ring_write(kiq_ring, 991 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 992 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 993 PACKET3_QUERY_STATUS_COMMAND(2)); 994 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 995 amdgpu_ring_write(kiq_ring, 996 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 997 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 998 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 999 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 1000 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 1001 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 1002 } 1003 1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 1005 uint16_t pasid, uint32_t flush_type, 1006 bool all_hub) 1007 { 1008 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 1009 amdgpu_ring_write(kiq_ring, 1010 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 1011 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 1012 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 1013 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 1014 } 1015 1016 1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, 1018 uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, 1019 uint32_t xcc_id, uint32_t vmid) 1020 { 1021 struct amdgpu_device *adev = kiq_ring->adev; 1022 unsigned i; 1023 1024 /* enter save mode */ 1025 amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); 1026 mutex_lock(&adev->srbm_mutex); 1027 soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0); 1028 1029 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 1030 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2); 1031 WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1); 1032 /* wait till dequeue take effects */ 1033 for (i = 0; i < adev->usec_timeout; i++) { 1034 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 1035 break; 1036 udelay(1); 1037 } 1038 if (i >= adev->usec_timeout) 1039 dev_err(adev->dev, "fail to wait on hqd deactive\n"); 1040 } else { 1041 dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type); 1042 } 1043 1044 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 1045 mutex_unlock(&adev->srbm_mutex); 1046 /* exit safe mode */ 1047 amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); 1048 } 1049 1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 1051 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 1052 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 1053 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 1054 .kiq_query_status = gfx_v9_0_kiq_query_status, 1055 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 1056 .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue, 1057 .set_resources_size = 8, 1058 .map_queues_size = 7, 1059 .unmap_queues_size = 6, 1060 .query_status_size = 7, 1061 .invalidate_tlbs_size = 2, 1062 }; 1063 1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 1065 { 1066 adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs; 1067 } 1068 1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 1070 { 1071 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1072 case IP_VERSION(9, 0, 1): 1073 soc15_program_register_sequence(adev, 1074 golden_settings_gc_9_0, 1075 ARRAY_SIZE(golden_settings_gc_9_0)); 1076 soc15_program_register_sequence(adev, 1077 golden_settings_gc_9_0_vg10, 1078 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 1079 break; 1080 case IP_VERSION(9, 2, 1): 1081 soc15_program_register_sequence(adev, 1082 golden_settings_gc_9_2_1, 1083 ARRAY_SIZE(golden_settings_gc_9_2_1)); 1084 soc15_program_register_sequence(adev, 1085 golden_settings_gc_9_2_1_vg12, 1086 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 1087 break; 1088 case IP_VERSION(9, 4, 0): 1089 soc15_program_register_sequence(adev, 1090 golden_settings_gc_9_0, 1091 ARRAY_SIZE(golden_settings_gc_9_0)); 1092 soc15_program_register_sequence(adev, 1093 golden_settings_gc_9_0_vg20, 1094 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 1095 break; 1096 case IP_VERSION(9, 4, 1): 1097 soc15_program_register_sequence(adev, 1098 golden_settings_gc_9_4_1_arct, 1099 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 1100 break; 1101 case IP_VERSION(9, 2, 2): 1102 case IP_VERSION(9, 1, 0): 1103 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 1104 ARRAY_SIZE(golden_settings_gc_9_1)); 1105 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1106 soc15_program_register_sequence(adev, 1107 golden_settings_gc_9_1_rv2, 1108 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 1109 else 1110 soc15_program_register_sequence(adev, 1111 golden_settings_gc_9_1_rv1, 1112 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 1113 break; 1114 case IP_VERSION(9, 3, 0): 1115 soc15_program_register_sequence(adev, 1116 golden_settings_gc_9_1_rn, 1117 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 1118 return; /* for renoir, don't need common goldensetting */ 1119 case IP_VERSION(9, 4, 2): 1120 gfx_v9_4_2_init_golden_registers(adev, 1121 adev->smuio.funcs->get_die_id(adev)); 1122 break; 1123 default: 1124 break; 1125 } 1126 1127 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && 1128 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))) 1129 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 1130 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 1131 } 1132 1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 1134 bool wc, uint32_t reg, uint32_t val) 1135 { 1136 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1137 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 1138 WRITE_DATA_DST_SEL(0) | 1139 (wc ? WR_CONFIRM : 0)); 1140 amdgpu_ring_write(ring, reg); 1141 amdgpu_ring_write(ring, 0); 1142 amdgpu_ring_write(ring, val); 1143 } 1144 1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 1146 int mem_space, int opt, uint32_t addr0, 1147 uint32_t addr1, uint32_t ref, uint32_t mask, 1148 uint32_t inv) 1149 { 1150 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 1151 amdgpu_ring_write(ring, 1152 /* memory (1) or register (0) */ 1153 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 1154 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 1155 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 1156 WAIT_REG_MEM_ENGINE(eng_sel))); 1157 1158 if (mem_space) 1159 BUG_ON(addr0 & 0x3); /* Dword align */ 1160 amdgpu_ring_write(ring, addr0); 1161 amdgpu_ring_write(ring, addr1); 1162 amdgpu_ring_write(ring, ref); 1163 amdgpu_ring_write(ring, mask); 1164 amdgpu_ring_write(ring, inv); /* poll interval */ 1165 } 1166 1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 1168 { 1169 struct amdgpu_device *adev = ring->adev; 1170 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1171 uint32_t tmp = 0; 1172 unsigned i; 1173 int r; 1174 1175 WREG32(scratch, 0xCAFEDEAD); 1176 r = amdgpu_ring_alloc(ring, 3); 1177 if (r) 1178 return r; 1179 1180 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1181 amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); 1182 amdgpu_ring_write(ring, 0xDEADBEEF); 1183 amdgpu_ring_commit(ring); 1184 1185 for (i = 0; i < adev->usec_timeout; i++) { 1186 tmp = RREG32(scratch); 1187 if (tmp == 0xDEADBEEF) 1188 break; 1189 udelay(1); 1190 } 1191 1192 if (i >= adev->usec_timeout) 1193 r = -ETIMEDOUT; 1194 return r; 1195 } 1196 1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1198 { 1199 struct amdgpu_device *adev = ring->adev; 1200 struct amdgpu_ib ib; 1201 struct dma_fence *f = NULL; 1202 1203 unsigned index; 1204 uint64_t gpu_addr; 1205 uint32_t tmp; 1206 long r; 1207 1208 r = amdgpu_device_wb_get(adev, &index); 1209 if (r) 1210 return r; 1211 1212 gpu_addr = adev->wb.gpu_addr + (index * 4); 1213 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1214 memset(&ib, 0, sizeof(ib)); 1215 1216 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 1217 if (r) 1218 goto err1; 1219 1220 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1221 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1222 ib.ptr[2] = lower_32_bits(gpu_addr); 1223 ib.ptr[3] = upper_32_bits(gpu_addr); 1224 ib.ptr[4] = 0xDEADBEEF; 1225 ib.length_dw = 5; 1226 1227 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1228 if (r) 1229 goto err2; 1230 1231 r = dma_fence_wait_timeout(f, false, timeout); 1232 if (r == 0) { 1233 r = -ETIMEDOUT; 1234 goto err2; 1235 } else if (r < 0) { 1236 goto err2; 1237 } 1238 1239 tmp = adev->wb.wb[index]; 1240 if (tmp == 0xDEADBEEF) 1241 r = 0; 1242 else 1243 r = -EINVAL; 1244 1245 err2: 1246 amdgpu_ib_free(adev, &ib, NULL); 1247 dma_fence_put(f); 1248 err1: 1249 amdgpu_device_wb_free(adev, index); 1250 return r; 1251 } 1252 1253 1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1255 { 1256 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1257 amdgpu_ucode_release(&adev->gfx.me_fw); 1258 amdgpu_ucode_release(&adev->gfx.ce_fw); 1259 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1260 amdgpu_ucode_release(&adev->gfx.mec_fw); 1261 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1262 1263 kfree(adev->gfx.rlc.register_list_format); 1264 } 1265 1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1267 { 1268 adev->gfx.me_fw_write_wait = false; 1269 adev->gfx.mec_fw_write_wait = false; 1270 1271 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && 1272 ((adev->gfx.mec_fw_version < 0x000001a5) || 1273 (adev->gfx.mec_feature_version < 46) || 1274 (adev->gfx.pfp_fw_version < 0x000000b7) || 1275 (adev->gfx.pfp_feature_version < 46))) 1276 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1277 1278 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1279 case IP_VERSION(9, 0, 1): 1280 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1281 (adev->gfx.me_feature_version >= 42) && 1282 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1283 (adev->gfx.pfp_feature_version >= 42)) 1284 adev->gfx.me_fw_write_wait = true; 1285 1286 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1287 (adev->gfx.mec_feature_version >= 42)) 1288 adev->gfx.mec_fw_write_wait = true; 1289 break; 1290 case IP_VERSION(9, 2, 1): 1291 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1292 (adev->gfx.me_feature_version >= 44) && 1293 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1294 (adev->gfx.pfp_feature_version >= 44)) 1295 adev->gfx.me_fw_write_wait = true; 1296 1297 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1298 (adev->gfx.mec_feature_version >= 44)) 1299 adev->gfx.mec_fw_write_wait = true; 1300 break; 1301 case IP_VERSION(9, 4, 0): 1302 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1303 (adev->gfx.me_feature_version >= 44) && 1304 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1305 (adev->gfx.pfp_feature_version >= 44)) 1306 adev->gfx.me_fw_write_wait = true; 1307 1308 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1309 (adev->gfx.mec_feature_version >= 44)) 1310 adev->gfx.mec_fw_write_wait = true; 1311 break; 1312 case IP_VERSION(9, 1, 0): 1313 case IP_VERSION(9, 2, 2): 1314 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1315 (adev->gfx.me_feature_version >= 42) && 1316 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1317 (adev->gfx.pfp_feature_version >= 42)) 1318 adev->gfx.me_fw_write_wait = true; 1319 1320 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1321 (adev->gfx.mec_feature_version >= 42)) 1322 adev->gfx.mec_fw_write_wait = true; 1323 break; 1324 default: 1325 adev->gfx.me_fw_write_wait = true; 1326 adev->gfx.mec_fw_write_wait = true; 1327 break; 1328 } 1329 } 1330 1331 struct amdgpu_gfxoff_quirk { 1332 u16 chip_vendor; 1333 u16 chip_device; 1334 u16 subsys_vendor; 1335 u16 subsys_device; 1336 u8 revision; 1337 }; 1338 1339 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { 1340 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ 1341 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, 1342 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ 1343 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, 1344 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ 1345 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, 1346 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ 1347 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, 1348 { 0, 0, 0, 0, 0 }, 1349 }; 1350 1351 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) 1352 { 1353 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; 1354 1355 while (p && p->chip_device != 0) { 1356 if (pdev->vendor == p->chip_vendor && 1357 pdev->device == p->chip_device && 1358 pdev->subsystem_vendor == p->subsys_vendor && 1359 pdev->subsystem_device == p->subsys_device && 1360 pdev->revision == p->revision) { 1361 return true; 1362 } 1363 ++p; 1364 } 1365 return false; 1366 } 1367 1368 static bool is_raven_kicker(struct amdgpu_device *adev) 1369 { 1370 if (adev->pm.fw_version >= 0x41e2b) 1371 return true; 1372 else 1373 return false; 1374 } 1375 1376 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) 1377 { 1378 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) && 1379 (adev->gfx.me_fw_version >= 0x000000a5) && 1380 (adev->gfx.me_feature_version >= 52)) 1381 return true; 1382 else 1383 return false; 1384 } 1385 1386 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1387 { 1388 if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) 1389 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1390 1391 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1392 case IP_VERSION(9, 0, 1): 1393 case IP_VERSION(9, 2, 1): 1394 case IP_VERSION(9, 4, 0): 1395 break; 1396 case IP_VERSION(9, 2, 2): 1397 case IP_VERSION(9, 1, 0): 1398 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) || 1399 (adev->apu_flags & AMD_APU_IS_PICASSO)) && 1400 ((!is_raven_kicker(adev) && 1401 adev->gfx.rlc_fw_version < 531) || 1402 (adev->gfx.rlc_feature_version < 1) || 1403 !adev->gfx.rlc.is_rlc_v2_1)) 1404 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1405 1406 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1407 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1408 AMD_PG_SUPPORT_CP | 1409 AMD_PG_SUPPORT_RLC_SMU_HS; 1410 break; 1411 case IP_VERSION(9, 3, 0): 1412 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1413 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1414 AMD_PG_SUPPORT_CP | 1415 AMD_PG_SUPPORT_RLC_SMU_HS; 1416 break; 1417 default: 1418 break; 1419 } 1420 } 1421 1422 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1423 char *chip_name) 1424 { 1425 int err; 1426 1427 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 1428 "amdgpu/%s_pfp.bin", chip_name); 1429 if (err) 1430 goto out; 1431 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 1432 1433 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 1434 "amdgpu/%s_me.bin", chip_name); 1435 if (err) 1436 goto out; 1437 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 1438 1439 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, 1440 "amdgpu/%s_ce.bin", chip_name); 1441 if (err) 1442 goto out; 1443 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); 1444 1445 out: 1446 if (err) { 1447 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1448 amdgpu_ucode_release(&adev->gfx.me_fw); 1449 amdgpu_ucode_release(&adev->gfx.ce_fw); 1450 } 1451 return err; 1452 } 1453 1454 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1455 char *chip_name) 1456 { 1457 int err; 1458 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1459 uint16_t version_major; 1460 uint16_t version_minor; 1461 uint32_t smu_version; 1462 1463 /* 1464 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1465 * instead of picasso_rlc.bin. 1466 * Judgment method: 1467 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1468 * or revision >= 0xD8 && revision <= 0xDF 1469 * otherwise is PCO FP5 1470 */ 1471 if (!strcmp(chip_name, "picasso") && 1472 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1473 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1474 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1475 "amdgpu/%s_rlc_am4.bin", chip_name); 1476 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1477 (smu_version >= 0x41e2b)) 1478 /** 1479 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1480 */ 1481 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1482 "amdgpu/%s_kicker_rlc.bin", chip_name); 1483 else 1484 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1485 "amdgpu/%s_rlc.bin", chip_name); 1486 if (err) 1487 goto out; 1488 1489 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1490 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1491 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1492 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 1493 out: 1494 if (err) 1495 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1496 1497 return err; 1498 } 1499 1500 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) 1501 { 1502 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || 1503 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || 1504 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) 1505 return false; 1506 1507 return true; 1508 } 1509 1510 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1511 char *chip_name) 1512 { 1513 int err; 1514 1515 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) 1516 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1517 "amdgpu/%s_sjt_mec.bin", chip_name); 1518 else 1519 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1520 "amdgpu/%s_mec.bin", chip_name); 1521 if (err) 1522 goto out; 1523 1524 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 1525 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 1526 1527 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1528 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) 1529 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1530 "amdgpu/%s_sjt_mec2.bin", chip_name); 1531 else 1532 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1533 "amdgpu/%s_mec2.bin", chip_name); 1534 if (!err) { 1535 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); 1536 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); 1537 } else { 1538 err = 0; 1539 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1540 } 1541 } else { 1542 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; 1543 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; 1544 } 1545 1546 gfx_v9_0_check_if_need_gfxoff(adev); 1547 gfx_v9_0_check_fw_write_wait(adev); 1548 1549 out: 1550 if (err) 1551 amdgpu_ucode_release(&adev->gfx.mec_fw); 1552 return err; 1553 } 1554 1555 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1556 { 1557 char ucode_prefix[30]; 1558 int r; 1559 1560 DRM_DEBUG("\n"); 1561 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 1562 1563 /* No CPG in Arcturus */ 1564 if (adev->gfx.num_gfx_rings) { 1565 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix); 1566 if (r) 1567 return r; 1568 } 1569 1570 r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix); 1571 if (r) 1572 return r; 1573 1574 r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix); 1575 if (r) 1576 return r; 1577 1578 return r; 1579 } 1580 1581 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1582 { 1583 u32 count = 0; 1584 const struct cs_section_def *sect = NULL; 1585 const struct cs_extent_def *ext = NULL; 1586 1587 /* begin clear state */ 1588 count += 2; 1589 /* context control state */ 1590 count += 3; 1591 1592 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1593 for (ext = sect->section; ext->extent != NULL; ++ext) { 1594 if (sect->id == SECT_CONTEXT) 1595 count += 2 + ext->reg_count; 1596 else 1597 return 0; 1598 } 1599 } 1600 1601 /* end clear state */ 1602 count += 2; 1603 /* clear state */ 1604 count += 2; 1605 1606 return count; 1607 } 1608 1609 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1610 volatile u32 *buffer) 1611 { 1612 u32 count = 0, i; 1613 const struct cs_section_def *sect = NULL; 1614 const struct cs_extent_def *ext = NULL; 1615 1616 if (adev->gfx.rlc.cs_data == NULL) 1617 return; 1618 if (buffer == NULL) 1619 return; 1620 1621 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1622 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1623 1624 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1625 buffer[count++] = cpu_to_le32(0x80000000); 1626 buffer[count++] = cpu_to_le32(0x80000000); 1627 1628 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1629 for (ext = sect->section; ext->extent != NULL; ++ext) { 1630 if (sect->id == SECT_CONTEXT) { 1631 buffer[count++] = 1632 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1633 buffer[count++] = cpu_to_le32(ext->reg_index - 1634 PACKET3_SET_CONTEXT_REG_START); 1635 for (i = 0; i < ext->reg_count; i++) 1636 buffer[count++] = cpu_to_le32(ext->extent[i]); 1637 } else { 1638 return; 1639 } 1640 } 1641 } 1642 1643 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1644 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1645 1646 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1647 buffer[count++] = cpu_to_le32(0); 1648 } 1649 1650 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1651 { 1652 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1653 uint32_t pg_always_on_cu_num = 2; 1654 uint32_t always_on_cu_num; 1655 uint32_t i, j, k; 1656 uint32_t mask, cu_bitmap, counter; 1657 1658 if (adev->flags & AMD_IS_APU) 1659 always_on_cu_num = 4; 1660 else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1)) 1661 always_on_cu_num = 8; 1662 else 1663 always_on_cu_num = 12; 1664 1665 mutex_lock(&adev->grbm_idx_mutex); 1666 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1667 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1668 mask = 1; 1669 cu_bitmap = 0; 1670 counter = 0; 1671 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 1672 1673 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1674 if (cu_info->bitmap[0][i][j] & mask) { 1675 if (counter == pg_always_on_cu_num) 1676 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1677 if (counter < always_on_cu_num) 1678 cu_bitmap |= mask; 1679 else 1680 break; 1681 counter++; 1682 } 1683 mask <<= 1; 1684 } 1685 1686 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1687 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1688 } 1689 } 1690 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1691 mutex_unlock(&adev->grbm_idx_mutex); 1692 } 1693 1694 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1695 { 1696 uint32_t data; 1697 1698 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1699 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1700 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1701 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1702 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1703 1704 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1705 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1706 1707 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1708 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1709 1710 mutex_lock(&adev->grbm_idx_mutex); 1711 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1712 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1713 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1714 1715 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1716 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1717 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1718 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1719 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1720 1721 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1722 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1723 data &= 0x0000FFFF; 1724 data |= 0x00C00000; 1725 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1726 1727 /* 1728 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1729 * programmed in gfx_v9_0_init_always_on_cu_mask() 1730 */ 1731 1732 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1733 * but used for RLC_LB_CNTL configuration */ 1734 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1735 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1736 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1737 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1738 mutex_unlock(&adev->grbm_idx_mutex); 1739 1740 gfx_v9_0_init_always_on_cu_mask(adev); 1741 } 1742 1743 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1744 { 1745 uint32_t data; 1746 1747 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1748 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1749 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1750 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1751 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1752 1753 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1754 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1755 1756 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1757 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1758 1759 mutex_lock(&adev->grbm_idx_mutex); 1760 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1761 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1762 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1763 1764 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1765 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1766 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1767 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1768 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1769 1770 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1771 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1772 data &= 0x0000FFFF; 1773 data |= 0x00C00000; 1774 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1775 1776 /* 1777 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1778 * programmed in gfx_v9_0_init_always_on_cu_mask() 1779 */ 1780 1781 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1782 * but used for RLC_LB_CNTL configuration */ 1783 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1784 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1785 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1786 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1787 mutex_unlock(&adev->grbm_idx_mutex); 1788 1789 gfx_v9_0_init_always_on_cu_mask(adev); 1790 } 1791 1792 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1793 { 1794 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1795 } 1796 1797 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1798 { 1799 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) 1800 return 5; 1801 else 1802 return 4; 1803 } 1804 1805 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 1806 { 1807 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 1808 1809 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 1810 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1811 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1); 1812 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2); 1813 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3); 1814 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL); 1815 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX); 1816 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT); 1817 adev->gfx.rlc.rlcg_reg_access_supported = true; 1818 } 1819 1820 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1821 { 1822 const struct cs_section_def *cs_data; 1823 int r; 1824 1825 adev->gfx.rlc.cs_data = gfx9_cs_data; 1826 1827 cs_data = adev->gfx.rlc.cs_data; 1828 1829 if (cs_data) { 1830 /* init clear state block */ 1831 r = amdgpu_gfx_rlc_init_csb(adev); 1832 if (r) 1833 return r; 1834 } 1835 1836 if (adev->flags & AMD_IS_APU) { 1837 /* TODO: double check the cp_table_size for RV */ 1838 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1839 r = amdgpu_gfx_rlc_init_cpt(adev); 1840 if (r) 1841 return r; 1842 } 1843 1844 return 0; 1845 } 1846 1847 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1848 { 1849 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1850 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1851 } 1852 1853 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1854 { 1855 int r; 1856 u32 *hpd; 1857 const __le32 *fw_data; 1858 unsigned fw_size; 1859 u32 *fw; 1860 size_t mec_hpd_size; 1861 1862 const struct gfx_firmware_header_v1_0 *mec_hdr; 1863 1864 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1865 1866 /* take ownership of the relevant compute queues */ 1867 amdgpu_gfx_compute_queue_acquire(adev); 1868 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1869 if (mec_hpd_size) { 1870 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1871 AMDGPU_GEM_DOMAIN_VRAM | 1872 AMDGPU_GEM_DOMAIN_GTT, 1873 &adev->gfx.mec.hpd_eop_obj, 1874 &adev->gfx.mec.hpd_eop_gpu_addr, 1875 (void **)&hpd); 1876 if (r) { 1877 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1878 gfx_v9_0_mec_fini(adev); 1879 return r; 1880 } 1881 1882 memset(hpd, 0, mec_hpd_size); 1883 1884 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1885 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1886 } 1887 1888 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1889 1890 fw_data = (const __le32 *) 1891 (adev->gfx.mec_fw->data + 1892 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1893 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 1894 1895 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1896 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1897 &adev->gfx.mec.mec_fw_obj, 1898 &adev->gfx.mec.mec_fw_gpu_addr, 1899 (void **)&fw); 1900 if (r) { 1901 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1902 gfx_v9_0_mec_fini(adev); 1903 return r; 1904 } 1905 1906 memcpy(fw, fw_data, fw_size); 1907 1908 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1909 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1910 1911 return 0; 1912 } 1913 1914 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1915 { 1916 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 1917 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1918 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1919 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1920 (SQ_IND_INDEX__FORCE_READ_MASK)); 1921 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1922 } 1923 1924 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1925 uint32_t wave, uint32_t thread, 1926 uint32_t regno, uint32_t num, uint32_t *out) 1927 { 1928 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 1929 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1930 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1931 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1932 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1933 (SQ_IND_INDEX__FORCE_READ_MASK) | 1934 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1935 while (num--) 1936 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1937 } 1938 1939 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1940 { 1941 /* type 1 wave data */ 1942 dst[(*no_fields)++] = 1; 1943 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1944 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1945 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1946 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1947 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1948 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1949 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1950 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1951 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1952 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1953 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1954 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1955 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1956 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1957 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); 1958 } 1959 1960 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1961 uint32_t wave, uint32_t start, 1962 uint32_t size, uint32_t *dst) 1963 { 1964 wave_read_regs( 1965 adev, simd, wave, 0, 1966 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1967 } 1968 1969 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1970 uint32_t wave, uint32_t thread, 1971 uint32_t start, uint32_t size, 1972 uint32_t *dst) 1973 { 1974 wave_read_regs( 1975 adev, simd, wave, thread, 1976 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1977 } 1978 1979 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1980 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1981 { 1982 soc15_grbm_select(adev, me, pipe, q, vm, 0); 1983 } 1984 1985 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1986 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1987 .select_se_sh = &gfx_v9_0_select_se_sh, 1988 .read_wave_data = &gfx_v9_0_read_wave_data, 1989 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1990 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1991 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 1992 }; 1993 1994 const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = { 1995 .ras_error_inject = &gfx_v9_0_ras_error_inject, 1996 .query_ras_error_count = &gfx_v9_0_query_ras_error_count, 1997 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, 1998 }; 1999 2000 static struct amdgpu_gfx_ras gfx_v9_0_ras = { 2001 .ras_block = { 2002 .hw_ops = &gfx_v9_0_ras_ops, 2003 }, 2004 }; 2005 2006 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 2007 { 2008 u32 gb_addr_config; 2009 int err; 2010 2011 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2012 case IP_VERSION(9, 0, 1): 2013 adev->gfx.config.max_hw_contexts = 8; 2014 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2015 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2016 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2017 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2018 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 2019 break; 2020 case IP_VERSION(9, 2, 1): 2021 adev->gfx.config.max_hw_contexts = 8; 2022 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2023 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2024 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2025 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2026 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 2027 DRM_INFO("fix gfx.config for vega12\n"); 2028 break; 2029 case IP_VERSION(9, 4, 0): 2030 adev->gfx.ras = &gfx_v9_0_ras; 2031 adev->gfx.config.max_hw_contexts = 8; 2032 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2033 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2034 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2035 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2036 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2037 gb_addr_config &= ~0xf3e777ff; 2038 gb_addr_config |= 0x22014042; 2039 /* check vbios table if gpu info is not available */ 2040 err = amdgpu_atomfirmware_get_gfx_info(adev); 2041 if (err) 2042 return err; 2043 break; 2044 case IP_VERSION(9, 2, 2): 2045 case IP_VERSION(9, 1, 0): 2046 adev->gfx.config.max_hw_contexts = 8; 2047 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2048 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2049 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2050 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2051 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 2052 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 2053 else 2054 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2055 break; 2056 case IP_VERSION(9, 4, 1): 2057 adev->gfx.ras = &gfx_v9_4_ras; 2058 adev->gfx.config.max_hw_contexts = 8; 2059 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2060 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2061 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2062 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2063 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2064 gb_addr_config &= ~0xf3e777ff; 2065 gb_addr_config |= 0x22014042; 2066 break; 2067 case IP_VERSION(9, 3, 0): 2068 adev->gfx.config.max_hw_contexts = 8; 2069 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2070 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2071 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2072 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2073 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2074 gb_addr_config &= ~0xf3e777ff; 2075 gb_addr_config |= 0x22010042; 2076 break; 2077 case IP_VERSION(9, 4, 2): 2078 adev->gfx.ras = &gfx_v9_4_2_ras; 2079 adev->gfx.config.max_hw_contexts = 8; 2080 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2081 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2082 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2083 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2084 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2085 gb_addr_config &= ~0xf3e777ff; 2086 gb_addr_config |= 0x22014042; 2087 /* check vbios table if gpu info is not available */ 2088 err = amdgpu_atomfirmware_get_gfx_info(adev); 2089 if (err) 2090 return err; 2091 break; 2092 default: 2093 BUG(); 2094 break; 2095 } 2096 2097 adev->gfx.config.gb_addr_config = gb_addr_config; 2098 2099 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2100 REG_GET_FIELD( 2101 adev->gfx.config.gb_addr_config, 2102 GB_ADDR_CONFIG, 2103 NUM_PIPES); 2104 2105 adev->gfx.config.max_tile_pipes = 2106 adev->gfx.config.gb_addr_config_fields.num_pipes; 2107 2108 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2109 REG_GET_FIELD( 2110 adev->gfx.config.gb_addr_config, 2111 GB_ADDR_CONFIG, 2112 NUM_BANKS); 2113 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2114 REG_GET_FIELD( 2115 adev->gfx.config.gb_addr_config, 2116 GB_ADDR_CONFIG, 2117 MAX_COMPRESSED_FRAGS); 2118 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2119 REG_GET_FIELD( 2120 adev->gfx.config.gb_addr_config, 2121 GB_ADDR_CONFIG, 2122 NUM_RB_PER_SE); 2123 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2124 REG_GET_FIELD( 2125 adev->gfx.config.gb_addr_config, 2126 GB_ADDR_CONFIG, 2127 NUM_SHADER_ENGINES); 2128 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2129 REG_GET_FIELD( 2130 adev->gfx.config.gb_addr_config, 2131 GB_ADDR_CONFIG, 2132 PIPE_INTERLEAVE_SIZE)); 2133 2134 return 0; 2135 } 2136 2137 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2138 int mec, int pipe, int queue) 2139 { 2140 unsigned irq_type; 2141 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2142 unsigned int hw_prio; 2143 2144 ring = &adev->gfx.compute_ring[ring_id]; 2145 2146 /* mec0 is me1 */ 2147 ring->me = mec + 1; 2148 ring->pipe = pipe; 2149 ring->queue = queue; 2150 2151 ring->ring_obj = NULL; 2152 ring->use_doorbell = true; 2153 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2154 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2155 + (ring_id * GFX9_MEC_HPD_SIZE); 2156 ring->vm_hub = AMDGPU_GFXHUB(0); 2157 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2158 2159 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2160 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2161 + ring->pipe; 2162 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 2163 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; 2164 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2165 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 2166 hw_prio, NULL); 2167 } 2168 2169 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) 2170 { 2171 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 2172 uint32_t *ptr; 2173 uint32_t inst; 2174 2175 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 2176 if (!ptr) { 2177 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 2178 adev->gfx.ip_dump_core = NULL; 2179 } else { 2180 adev->gfx.ip_dump_core = ptr; 2181 } 2182 2183 /* Allocate memory for compute queue registers for all the instances */ 2184 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 2185 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 2186 adev->gfx.mec.num_queue_per_pipe; 2187 2188 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 2189 if (!ptr) { 2190 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 2191 adev->gfx.ip_dump_compute_queues = NULL; 2192 } else { 2193 adev->gfx.ip_dump_compute_queues = ptr; 2194 } 2195 } 2196 2197 static int gfx_v9_0_sw_init(void *handle) 2198 { 2199 int i, j, k, r, ring_id; 2200 int xcc_id = 0; 2201 struct amdgpu_ring *ring; 2202 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2203 unsigned int hw_prio; 2204 2205 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2206 case IP_VERSION(9, 0, 1): 2207 case IP_VERSION(9, 2, 1): 2208 case IP_VERSION(9, 4, 0): 2209 case IP_VERSION(9, 2, 2): 2210 case IP_VERSION(9, 1, 0): 2211 case IP_VERSION(9, 4, 1): 2212 case IP_VERSION(9, 3, 0): 2213 case IP_VERSION(9, 4, 2): 2214 adev->gfx.mec.num_mec = 2; 2215 break; 2216 default: 2217 adev->gfx.mec.num_mec = 1; 2218 break; 2219 } 2220 2221 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2222 default: 2223 adev->gfx.enable_cleaner_shader = false; 2224 break; 2225 } 2226 2227 adev->gfx.mec.num_pipe_per_mec = 4; 2228 adev->gfx.mec.num_queue_per_pipe = 8; 2229 2230 /* EOP Event */ 2231 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2232 if (r) 2233 return r; 2234 2235 /* Bad opcode Event */ 2236 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 2237 GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR, 2238 &adev->gfx.bad_op_irq); 2239 if (r) 2240 return r; 2241 2242 /* Privileged reg */ 2243 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2244 &adev->gfx.priv_reg_irq); 2245 if (r) 2246 return r; 2247 2248 /* Privileged inst */ 2249 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2250 &adev->gfx.priv_inst_irq); 2251 if (r) 2252 return r; 2253 2254 /* ECC error */ 2255 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2256 &adev->gfx.cp_ecc_error_irq); 2257 if (r) 2258 return r; 2259 2260 /* FUE error */ 2261 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2262 &adev->gfx.cp_ecc_error_irq); 2263 if (r) 2264 return r; 2265 2266 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2267 2268 if (adev->gfx.rlc.funcs) { 2269 if (adev->gfx.rlc.funcs->init) { 2270 r = adev->gfx.rlc.funcs->init(adev); 2271 if (r) { 2272 dev_err(adev->dev, "Failed to init rlc BOs!\n"); 2273 return r; 2274 } 2275 } 2276 } 2277 2278 r = gfx_v9_0_mec_init(adev); 2279 if (r) { 2280 DRM_ERROR("Failed to init MEC BOs!\n"); 2281 return r; 2282 } 2283 2284 /* set up the gfx ring */ 2285 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2286 ring = &adev->gfx.gfx_ring[i]; 2287 ring->ring_obj = NULL; 2288 if (!i) 2289 sprintf(ring->name, "gfx"); 2290 else 2291 sprintf(ring->name, "gfx_%d", i); 2292 ring->use_doorbell = true; 2293 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2294 2295 /* disable scheduler on the real ring */ 2296 ring->no_scheduler = adev->gfx.mcbp; 2297 ring->vm_hub = AMDGPU_GFXHUB(0); 2298 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2299 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2300 AMDGPU_RING_PRIO_DEFAULT, NULL); 2301 if (r) 2302 return r; 2303 } 2304 2305 /* set up the software rings */ 2306 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 2307 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2308 ring = &adev->gfx.sw_gfx_ring[i]; 2309 ring->ring_obj = NULL; 2310 sprintf(ring->name, amdgpu_sw_ring_name(i)); 2311 ring->use_doorbell = true; 2312 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2313 ring->is_sw_ring = true; 2314 hw_prio = amdgpu_sw_ring_priority(i); 2315 ring->vm_hub = AMDGPU_GFXHUB(0); 2316 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2317 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, 2318 NULL); 2319 if (r) 2320 return r; 2321 ring->wptr = 0; 2322 } 2323 2324 /* init the muxer and add software rings */ 2325 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], 2326 GFX9_NUM_SW_GFX_RINGS); 2327 if (r) { 2328 DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); 2329 return r; 2330 } 2331 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2332 r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, 2333 &adev->gfx.sw_gfx_ring[i]); 2334 if (r) { 2335 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); 2336 return r; 2337 } 2338 } 2339 } 2340 2341 /* set up the compute queues - allocate horizontally across pipes */ 2342 ring_id = 0; 2343 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2344 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2345 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2346 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 2347 k, j)) 2348 continue; 2349 2350 r = gfx_v9_0_compute_ring_init(adev, 2351 ring_id, 2352 i, k, j); 2353 if (r) 2354 return r; 2355 2356 ring_id++; 2357 } 2358 } 2359 } 2360 2361 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); 2362 if (r) { 2363 DRM_ERROR("Failed to init KIQ BOs!\n"); 2364 return r; 2365 } 2366 2367 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 2368 if (r) 2369 return r; 2370 2371 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2372 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0); 2373 if (r) 2374 return r; 2375 2376 adev->gfx.ce_ram_size = 0x8000; 2377 2378 r = gfx_v9_0_gpu_early_init(adev); 2379 if (r) 2380 return r; 2381 2382 if (amdgpu_gfx_ras_sw_init(adev)) { 2383 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 2384 return -EINVAL; 2385 } 2386 2387 gfx_v9_0_alloc_ip_dump(adev); 2388 2389 r = amdgpu_gfx_sysfs_isolation_shader_init(adev); 2390 if (r) 2391 return r; 2392 2393 return 0; 2394 } 2395 2396 2397 static int gfx_v9_0_sw_fini(void *handle) 2398 { 2399 int i; 2400 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2401 2402 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 2403 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 2404 amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); 2405 amdgpu_ring_mux_fini(&adev->gfx.muxer); 2406 } 2407 2408 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2409 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2410 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2411 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2412 2413 amdgpu_gfx_mqd_sw_fini(adev, 0); 2414 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 2415 amdgpu_gfx_kiq_fini(adev, 0); 2416 2417 gfx_v9_0_mec_fini(adev); 2418 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2419 &adev->gfx.rlc.clear_state_gpu_addr, 2420 (void **)&adev->gfx.rlc.cs_ptr); 2421 if (adev->flags & AMD_IS_APU) { 2422 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2423 &adev->gfx.rlc.cp_table_gpu_addr, 2424 (void **)&adev->gfx.rlc.cp_table_ptr); 2425 } 2426 gfx_v9_0_free_microcode(adev); 2427 2428 amdgpu_gfx_sysfs_isolation_shader_fini(adev); 2429 2430 kfree(adev->gfx.ip_dump_core); 2431 kfree(adev->gfx.ip_dump_compute_queues); 2432 2433 return 0; 2434 } 2435 2436 2437 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2438 { 2439 /* TODO */ 2440 } 2441 2442 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, 2443 u32 instance, int xcc_id) 2444 { 2445 u32 data; 2446 2447 if (instance == 0xffffffff) 2448 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2449 else 2450 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2451 2452 if (se_num == 0xffffffff) 2453 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2454 else 2455 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2456 2457 if (sh_num == 0xffffffff) 2458 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2459 else 2460 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2461 2462 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2463 } 2464 2465 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2466 { 2467 u32 data, mask; 2468 2469 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2470 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2471 2472 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2473 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2474 2475 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2476 adev->gfx.config.max_sh_per_se); 2477 2478 return (~data) & mask; 2479 } 2480 2481 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2482 { 2483 int i, j; 2484 u32 data; 2485 u32 active_rbs = 0; 2486 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2487 adev->gfx.config.max_sh_per_se; 2488 2489 mutex_lock(&adev->grbm_idx_mutex); 2490 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2491 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2492 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 2493 data = gfx_v9_0_get_rb_active_bitmap(adev); 2494 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2495 rb_bitmap_width_per_sh); 2496 } 2497 } 2498 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 2499 mutex_unlock(&adev->grbm_idx_mutex); 2500 2501 adev->gfx.config.backend_enable_mask = active_rbs; 2502 adev->gfx.config.num_rbs = hweight32(active_rbs); 2503 } 2504 2505 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev, 2506 uint32_t first_vmid, 2507 uint32_t last_vmid) 2508 { 2509 uint32_t data; 2510 uint32_t trap_config_vmid_mask = 0; 2511 int i; 2512 2513 /* Calculate trap config vmid mask */ 2514 for (i = first_vmid; i < last_vmid; i++) 2515 trap_config_vmid_mask |= (1 << i); 2516 2517 data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG, 2518 VMID_SEL, trap_config_vmid_mask); 2519 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, 2520 TRAP_EN, 1); 2521 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); 2522 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); 2523 2524 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); 2525 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); 2526 } 2527 2528 #define DEFAULT_SH_MEM_BASES (0x6000) 2529 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2530 { 2531 int i; 2532 uint32_t sh_mem_config; 2533 uint32_t sh_mem_bases; 2534 2535 /* 2536 * Configure apertures: 2537 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2538 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2539 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2540 */ 2541 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2542 2543 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2544 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2545 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2546 2547 mutex_lock(&adev->srbm_mutex); 2548 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2549 soc15_grbm_select(adev, 0, 0, 0, i, 0); 2550 /* CP and shaders */ 2551 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2552 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2553 } 2554 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 2555 mutex_unlock(&adev->srbm_mutex); 2556 2557 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2558 access. These should be enabled by FW for target VMIDs. */ 2559 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2560 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2561 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2562 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2563 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2564 } 2565 } 2566 2567 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2568 { 2569 int vmid; 2570 2571 /* 2572 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2573 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2574 * the driver can enable them for graphics. VMID0 should maintain 2575 * access so that HWS firmware can save/restore entries. 2576 */ 2577 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 2578 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2579 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2580 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2581 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2582 } 2583 } 2584 2585 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) 2586 { 2587 uint32_t tmp; 2588 2589 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2590 case IP_VERSION(9, 4, 1): 2591 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); 2592 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT, 2593 !READ_ONCE(adev->barrier_has_auto_waitcnt)); 2594 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); 2595 break; 2596 default: 2597 break; 2598 } 2599 } 2600 2601 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2602 { 2603 u32 tmp; 2604 int i; 2605 2606 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2607 2608 gfx_v9_0_tiling_mode_table_init(adev); 2609 2610 if (adev->gfx.num_gfx_rings) 2611 gfx_v9_0_setup_rb(adev); 2612 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2613 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2614 2615 /* XXX SH_MEM regs */ 2616 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2617 mutex_lock(&adev->srbm_mutex); 2618 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2619 soc15_grbm_select(adev, 0, 0, 0, i, 0); 2620 /* CP and shaders */ 2621 if (i == 0) { 2622 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2623 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2624 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2625 !!adev->gmc.noretry); 2626 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2627 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2628 } else { 2629 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2630 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2631 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2632 !!adev->gmc.noretry); 2633 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2634 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2635 (adev->gmc.private_aperture_start >> 48)); 2636 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2637 (adev->gmc.shared_aperture_start >> 48)); 2638 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2639 } 2640 } 2641 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 2642 2643 mutex_unlock(&adev->srbm_mutex); 2644 2645 gfx_v9_0_init_compute_vmid(adev); 2646 gfx_v9_0_init_gds_vmid(adev); 2647 gfx_v9_0_init_sq_config(adev); 2648 } 2649 2650 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2651 { 2652 u32 i, j, k; 2653 u32 mask; 2654 2655 mutex_lock(&adev->grbm_idx_mutex); 2656 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2657 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2658 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 2659 for (k = 0; k < adev->usec_timeout; k++) { 2660 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2661 break; 2662 udelay(1); 2663 } 2664 if (k == adev->usec_timeout) { 2665 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 2666 0xffffffff, 0xffffffff, 0); 2667 mutex_unlock(&adev->grbm_idx_mutex); 2668 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2669 i, j); 2670 return; 2671 } 2672 } 2673 } 2674 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 2675 mutex_unlock(&adev->grbm_idx_mutex); 2676 2677 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2678 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2679 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2680 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2681 for (k = 0; k < adev->usec_timeout; k++) { 2682 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2683 break; 2684 udelay(1); 2685 } 2686 } 2687 2688 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2689 bool enable) 2690 { 2691 u32 tmp; 2692 2693 /* These interrupts should be enabled to drive DS clock */ 2694 2695 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2696 2697 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2698 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2699 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2700 if (adev->gfx.num_gfx_rings) 2701 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2702 2703 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2704 } 2705 2706 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2707 { 2708 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2709 /* csib */ 2710 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2711 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2712 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2713 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2714 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2715 adev->gfx.rlc.clear_state_size); 2716 } 2717 2718 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2719 int indirect_offset, 2720 int list_size, 2721 int *unique_indirect_regs, 2722 int unique_indirect_reg_count, 2723 int *indirect_start_offsets, 2724 int *indirect_start_offsets_count, 2725 int max_start_offsets_count) 2726 { 2727 int idx; 2728 2729 for (; indirect_offset < list_size; indirect_offset++) { 2730 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2731 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2732 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2733 2734 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2735 indirect_offset += 2; 2736 2737 /* look for the matching indice */ 2738 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2739 if (unique_indirect_regs[idx] == 2740 register_list_format[indirect_offset] || 2741 !unique_indirect_regs[idx]) 2742 break; 2743 } 2744 2745 BUG_ON(idx >= unique_indirect_reg_count); 2746 2747 if (!unique_indirect_regs[idx]) 2748 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2749 2750 indirect_offset++; 2751 } 2752 } 2753 } 2754 2755 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2756 { 2757 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2758 int unique_indirect_reg_count = 0; 2759 2760 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2761 int indirect_start_offsets_count = 0; 2762 2763 int list_size = 0; 2764 int i = 0, j = 0; 2765 u32 tmp = 0; 2766 2767 u32 *register_list_format = 2768 kmemdup(adev->gfx.rlc.register_list_format, 2769 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2770 if (!register_list_format) 2771 return -ENOMEM; 2772 2773 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2774 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2775 gfx_v9_1_parse_ind_reg_list(register_list_format, 2776 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2777 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2778 unique_indirect_regs, 2779 unique_indirect_reg_count, 2780 indirect_start_offsets, 2781 &indirect_start_offsets_count, 2782 ARRAY_SIZE(indirect_start_offsets)); 2783 2784 /* enable auto inc in case it is disabled */ 2785 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2786 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2787 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2788 2789 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2790 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2791 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2792 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2793 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2794 adev->gfx.rlc.register_restore[i]); 2795 2796 /* load indirect register */ 2797 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2798 adev->gfx.rlc.reg_list_format_start); 2799 2800 /* direct register portion */ 2801 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2802 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2803 register_list_format[i]); 2804 2805 /* indirect register portion */ 2806 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2807 if (register_list_format[i] == 0xFFFFFFFF) { 2808 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2809 continue; 2810 } 2811 2812 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2813 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2814 2815 for (j = 0; j < unique_indirect_reg_count; j++) { 2816 if (register_list_format[i] == unique_indirect_regs[j]) { 2817 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2818 break; 2819 } 2820 } 2821 2822 BUG_ON(j >= unique_indirect_reg_count); 2823 2824 i++; 2825 } 2826 2827 /* set save/restore list size */ 2828 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2829 list_size = list_size >> 1; 2830 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2831 adev->gfx.rlc.reg_restore_list_size); 2832 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2833 2834 /* write the starting offsets to RLC scratch ram */ 2835 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2836 adev->gfx.rlc.starting_offsets_start); 2837 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2838 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2839 indirect_start_offsets[i]); 2840 2841 /* load unique indirect regs*/ 2842 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2843 if (unique_indirect_regs[i] != 0) { 2844 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2845 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2846 unique_indirect_regs[i] & 0x3FFFF); 2847 2848 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2849 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2850 unique_indirect_regs[i] >> 20); 2851 } 2852 } 2853 2854 kfree(register_list_format); 2855 return 0; 2856 } 2857 2858 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2859 { 2860 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2861 } 2862 2863 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2864 bool enable) 2865 { 2866 uint32_t data = 0; 2867 uint32_t default_data = 0; 2868 2869 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2870 if (enable) { 2871 /* enable GFXIP control over CGPG */ 2872 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2873 if(default_data != data) 2874 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2875 2876 /* update status */ 2877 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2878 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2879 if(default_data != data) 2880 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2881 } else { 2882 /* restore GFXIP control over GCPG */ 2883 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2884 if(default_data != data) 2885 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2886 } 2887 } 2888 2889 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2890 { 2891 uint32_t data = 0; 2892 2893 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2894 AMD_PG_SUPPORT_GFX_SMG | 2895 AMD_PG_SUPPORT_GFX_DMG)) { 2896 /* init IDLE_POLL_COUNT = 60 */ 2897 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2898 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2899 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2900 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2901 2902 /* init RLC PG Delay */ 2903 data = 0; 2904 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2905 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2906 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2907 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2908 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2909 2910 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2911 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2912 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2913 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2914 2915 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2916 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2917 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2918 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2919 2920 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2921 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2922 2923 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2924 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2925 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2926 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0)) 2927 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2928 } 2929 } 2930 2931 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2932 bool enable) 2933 { 2934 uint32_t data = 0; 2935 uint32_t default_data = 0; 2936 2937 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2938 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2939 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2940 enable ? 1 : 0); 2941 if (default_data != data) 2942 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2943 } 2944 2945 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2946 bool enable) 2947 { 2948 uint32_t data = 0; 2949 uint32_t default_data = 0; 2950 2951 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2952 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2953 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2954 enable ? 1 : 0); 2955 if(default_data != data) 2956 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2957 } 2958 2959 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2960 bool enable) 2961 { 2962 uint32_t data = 0; 2963 uint32_t default_data = 0; 2964 2965 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2966 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2967 CP_PG_DISABLE, 2968 enable ? 0 : 1); 2969 if(default_data != data) 2970 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2971 } 2972 2973 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2974 bool enable) 2975 { 2976 uint32_t data, default_data; 2977 2978 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2979 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2980 GFX_POWER_GATING_ENABLE, 2981 enable ? 1 : 0); 2982 if(default_data != data) 2983 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2984 } 2985 2986 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2987 bool enable) 2988 { 2989 uint32_t data, default_data; 2990 2991 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2992 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2993 GFX_PIPELINE_PG_ENABLE, 2994 enable ? 1 : 0); 2995 if(default_data != data) 2996 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2997 2998 if (!enable) 2999 /* read any GFX register to wake up GFX */ 3000 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 3001 } 3002 3003 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 3004 bool enable) 3005 { 3006 uint32_t data, default_data; 3007 3008 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3009 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3010 STATIC_PER_CU_PG_ENABLE, 3011 enable ? 1 : 0); 3012 if(default_data != data) 3013 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3014 } 3015 3016 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 3017 bool enable) 3018 { 3019 uint32_t data, default_data; 3020 3021 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3022 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3023 DYN_PER_CU_PG_ENABLE, 3024 enable ? 1 : 0); 3025 if(default_data != data) 3026 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3027 } 3028 3029 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 3030 { 3031 gfx_v9_0_init_csb(adev); 3032 3033 /* 3034 * Rlc save restore list is workable since v2_1. 3035 * And it's needed by gfxoff feature. 3036 */ 3037 if (adev->gfx.rlc.is_rlc_v2_1) { 3038 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 3039 IP_VERSION(9, 2, 1) || 3040 (adev->apu_flags & AMD_APU_IS_RAVEN2)) 3041 gfx_v9_1_init_rlc_save_restore_list(adev); 3042 gfx_v9_0_enable_save_restore_machine(adev); 3043 } 3044 3045 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3046 AMD_PG_SUPPORT_GFX_SMG | 3047 AMD_PG_SUPPORT_GFX_DMG | 3048 AMD_PG_SUPPORT_CP | 3049 AMD_PG_SUPPORT_GDS | 3050 AMD_PG_SUPPORT_RLC_SMU_HS)) { 3051 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, 3052 adev->gfx.rlc.cp_table_gpu_addr >> 8); 3053 gfx_v9_0_init_gfx_power_gating(adev); 3054 } 3055 } 3056 3057 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 3058 { 3059 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 3060 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3061 gfx_v9_0_wait_for_rlc_serdes(adev); 3062 } 3063 3064 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 3065 { 3066 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3067 udelay(50); 3068 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3069 udelay(50); 3070 } 3071 3072 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 3073 { 3074 #ifdef AMDGPU_RLC_DEBUG_RETRY 3075 u32 rlc_ucode_ver; 3076 #endif 3077 3078 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 3079 udelay(50); 3080 3081 /* carrizo do enable cp interrupt after cp inited */ 3082 if (!(adev->flags & AMD_IS_APU)) { 3083 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3084 udelay(50); 3085 } 3086 3087 #ifdef AMDGPU_RLC_DEBUG_RETRY 3088 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 3089 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 3090 if(rlc_ucode_ver == 0x108) { 3091 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 3092 rlc_ucode_ver, adev->gfx.rlc_fw_version); 3093 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 3094 * default is 0x9C4 to create a 100us interval */ 3095 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 3096 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 3097 * to disable the page fault retry interrupts, default is 3098 * 0x100 (256) */ 3099 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 3100 } 3101 #endif 3102 } 3103 3104 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 3105 { 3106 const struct rlc_firmware_header_v2_0 *hdr; 3107 const __le32 *fw_data; 3108 unsigned i, fw_size; 3109 3110 if (!adev->gfx.rlc_fw) 3111 return -EINVAL; 3112 3113 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3114 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3115 3116 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3117 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3118 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3119 3120 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 3121 RLCG_UCODE_LOADING_START_ADDRESS); 3122 for (i = 0; i < fw_size; i++) 3123 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3124 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3125 3126 return 0; 3127 } 3128 3129 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3130 { 3131 int r; 3132 3133 if (amdgpu_sriov_vf(adev)) { 3134 gfx_v9_0_init_csb(adev); 3135 return 0; 3136 } 3137 3138 adev->gfx.rlc.funcs->stop(adev); 3139 3140 /* disable CG */ 3141 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3142 3143 gfx_v9_0_init_pg(adev); 3144 3145 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3146 /* legacy rlc firmware loading */ 3147 r = gfx_v9_0_rlc_load_microcode(adev); 3148 if (r) 3149 return r; 3150 } 3151 3152 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 3153 case IP_VERSION(9, 2, 2): 3154 case IP_VERSION(9, 1, 0): 3155 gfx_v9_0_init_lbpw(adev); 3156 if (amdgpu_lbpw == 0) 3157 gfx_v9_0_enable_lbpw(adev, false); 3158 else 3159 gfx_v9_0_enable_lbpw(adev, true); 3160 break; 3161 case IP_VERSION(9, 4, 0): 3162 gfx_v9_4_init_lbpw(adev); 3163 if (amdgpu_lbpw > 0) 3164 gfx_v9_0_enable_lbpw(adev, true); 3165 else 3166 gfx_v9_0_enable_lbpw(adev, false); 3167 break; 3168 default: 3169 break; 3170 } 3171 3172 gfx_v9_0_update_spm_vmid_internal(adev, 0xf); 3173 3174 adev->gfx.rlc.funcs->start(adev); 3175 3176 return 0; 3177 } 3178 3179 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3180 { 3181 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3182 3183 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3184 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3185 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3186 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3187 udelay(50); 3188 } 3189 3190 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3191 { 3192 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3193 const struct gfx_firmware_header_v1_0 *ce_hdr; 3194 const struct gfx_firmware_header_v1_0 *me_hdr; 3195 const __le32 *fw_data; 3196 unsigned i, fw_size; 3197 3198 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3199 return -EINVAL; 3200 3201 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3202 adev->gfx.pfp_fw->data; 3203 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3204 adev->gfx.ce_fw->data; 3205 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3206 adev->gfx.me_fw->data; 3207 3208 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3209 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3210 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3211 3212 gfx_v9_0_cp_gfx_enable(adev, false); 3213 3214 /* PFP */ 3215 fw_data = (const __le32 *) 3216 (adev->gfx.pfp_fw->data + 3217 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3218 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3219 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3220 for (i = 0; i < fw_size; i++) 3221 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3222 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3223 3224 /* CE */ 3225 fw_data = (const __le32 *) 3226 (adev->gfx.ce_fw->data + 3227 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3228 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3229 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3230 for (i = 0; i < fw_size; i++) 3231 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3232 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3233 3234 /* ME */ 3235 fw_data = (const __le32 *) 3236 (adev->gfx.me_fw->data + 3237 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3238 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3239 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3240 for (i = 0; i < fw_size; i++) 3241 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3242 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3243 3244 return 0; 3245 } 3246 3247 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3248 { 3249 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3250 const struct cs_section_def *sect = NULL; 3251 const struct cs_extent_def *ext = NULL; 3252 int r, i, tmp; 3253 3254 /* init the CP */ 3255 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3256 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3257 3258 gfx_v9_0_cp_gfx_enable(adev, true); 3259 3260 /* Now only limit the quirk on the APU gfx9 series and already 3261 * confirmed that the APU gfx10/gfx11 needn't such update. 3262 */ 3263 if (adev->flags & AMD_IS_APU && 3264 adev->in_s3 && !adev->suspend_complete) { 3265 DRM_INFO(" Will skip the CSB packet resubmit\n"); 3266 return 0; 3267 } 3268 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3269 if (r) { 3270 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3271 return r; 3272 } 3273 3274 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3275 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3276 3277 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3278 amdgpu_ring_write(ring, 0x80000000); 3279 amdgpu_ring_write(ring, 0x80000000); 3280 3281 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3282 for (ext = sect->section; ext->extent != NULL; ++ext) { 3283 if (sect->id == SECT_CONTEXT) { 3284 amdgpu_ring_write(ring, 3285 PACKET3(PACKET3_SET_CONTEXT_REG, 3286 ext->reg_count)); 3287 amdgpu_ring_write(ring, 3288 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3289 for (i = 0; i < ext->reg_count; i++) 3290 amdgpu_ring_write(ring, ext->extent[i]); 3291 } 3292 } 3293 } 3294 3295 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3296 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3297 3298 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3299 amdgpu_ring_write(ring, 0); 3300 3301 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3302 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3303 amdgpu_ring_write(ring, 0x8000); 3304 amdgpu_ring_write(ring, 0x8000); 3305 3306 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3307 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3308 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3309 amdgpu_ring_write(ring, tmp); 3310 amdgpu_ring_write(ring, 0); 3311 3312 amdgpu_ring_commit(ring); 3313 3314 return 0; 3315 } 3316 3317 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3318 { 3319 struct amdgpu_ring *ring; 3320 u32 tmp; 3321 u32 rb_bufsz; 3322 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3323 3324 /* Set the write pointer delay */ 3325 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3326 3327 /* set the RB to use vmid 0 */ 3328 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3329 3330 /* Set ring buffer size */ 3331 ring = &adev->gfx.gfx_ring[0]; 3332 rb_bufsz = order_base_2(ring->ring_size / 8); 3333 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3334 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3335 #ifdef __BIG_ENDIAN 3336 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3337 #endif 3338 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3339 3340 /* Initialize the ring buffer's write pointers */ 3341 ring->wptr = 0; 3342 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3343 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3344 3345 /* set the wb address wether it's enabled or not */ 3346 rptr_addr = ring->rptr_gpu_addr; 3347 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3348 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3349 3350 wptr_gpu_addr = ring->wptr_gpu_addr; 3351 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3352 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3353 3354 mdelay(1); 3355 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3356 3357 rb_addr = ring->gpu_addr >> 8; 3358 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3359 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3360 3361 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3362 if (ring->use_doorbell) { 3363 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3364 DOORBELL_OFFSET, ring->doorbell_index); 3365 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3366 DOORBELL_EN, 1); 3367 } else { 3368 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3369 } 3370 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3371 3372 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3373 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3374 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3375 3376 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3377 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3378 3379 3380 /* start the ring */ 3381 gfx_v9_0_cp_gfx_start(adev); 3382 3383 return 0; 3384 } 3385 3386 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3387 { 3388 if (enable) { 3389 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3390 } else { 3391 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3392 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3393 adev->gfx.kiq[0].ring.sched.ready = false; 3394 } 3395 udelay(50); 3396 } 3397 3398 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3399 { 3400 const struct gfx_firmware_header_v1_0 *mec_hdr; 3401 const __le32 *fw_data; 3402 unsigned i; 3403 u32 tmp; 3404 3405 if (!adev->gfx.mec_fw) 3406 return -EINVAL; 3407 3408 gfx_v9_0_cp_compute_enable(adev, false); 3409 3410 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3411 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3412 3413 fw_data = (const __le32 *) 3414 (adev->gfx.mec_fw->data + 3415 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3416 tmp = 0; 3417 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3418 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3419 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3420 3421 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3422 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3423 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3424 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3425 3426 /* MEC1 */ 3427 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3428 mec_hdr->jt_offset); 3429 for (i = 0; i < mec_hdr->jt_size; i++) 3430 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3431 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3432 3433 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3434 adev->gfx.mec_fw_version); 3435 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3436 3437 return 0; 3438 } 3439 3440 /* KIQ functions */ 3441 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3442 { 3443 uint32_t tmp; 3444 struct amdgpu_device *adev = ring->adev; 3445 3446 /* tell RLC which is KIQ queue */ 3447 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3448 tmp &= 0xffffff00; 3449 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3450 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3451 tmp |= 0x80; 3452 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 3453 } 3454 3455 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) 3456 { 3457 struct amdgpu_device *adev = ring->adev; 3458 3459 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3460 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 3461 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 3462 mqd->cp_hqd_queue_priority = 3463 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 3464 } 3465 } 3466 } 3467 3468 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3469 { 3470 struct amdgpu_device *adev = ring->adev; 3471 struct v9_mqd *mqd = ring->mqd_ptr; 3472 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3473 uint32_t tmp; 3474 3475 mqd->header = 0xC0310800; 3476 mqd->compute_pipelinestat_enable = 0x00000001; 3477 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3478 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3479 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3480 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3481 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3482 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3483 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3484 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3485 mqd->compute_misc_reserved = 0x00000003; 3486 3487 mqd->dynamic_cu_mask_addr_lo = 3488 lower_32_bits(ring->mqd_gpu_addr 3489 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3490 mqd->dynamic_cu_mask_addr_hi = 3491 upper_32_bits(ring->mqd_gpu_addr 3492 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3493 3494 eop_base_addr = ring->eop_gpu_addr >> 8; 3495 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3496 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3497 3498 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3499 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3500 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3501 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3502 3503 mqd->cp_hqd_eop_control = tmp; 3504 3505 /* enable doorbell? */ 3506 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3507 3508 if (ring->use_doorbell) { 3509 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3510 DOORBELL_OFFSET, ring->doorbell_index); 3511 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3512 DOORBELL_EN, 1); 3513 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3514 DOORBELL_SOURCE, 0); 3515 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3516 DOORBELL_HIT, 0); 3517 } else { 3518 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3519 DOORBELL_EN, 0); 3520 } 3521 3522 mqd->cp_hqd_pq_doorbell_control = tmp; 3523 3524 /* disable the queue if it's active */ 3525 ring->wptr = 0; 3526 mqd->cp_hqd_dequeue_request = 0; 3527 mqd->cp_hqd_pq_rptr = 0; 3528 mqd->cp_hqd_pq_wptr_lo = 0; 3529 mqd->cp_hqd_pq_wptr_hi = 0; 3530 3531 /* set the pointer to the MQD */ 3532 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3533 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3534 3535 /* set MQD vmid to 0 */ 3536 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3537 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3538 mqd->cp_mqd_control = tmp; 3539 3540 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3541 hqd_gpu_addr = ring->gpu_addr >> 8; 3542 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3543 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3544 3545 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3546 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3547 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3548 (order_base_2(ring->ring_size / 4) - 1)); 3549 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3550 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 3551 #ifdef __BIG_ENDIAN 3552 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3553 #endif 3554 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3555 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3556 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3557 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3558 mqd->cp_hqd_pq_control = tmp; 3559 3560 /* set the wb address whether it's enabled or not */ 3561 wb_gpu_addr = ring->rptr_gpu_addr; 3562 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3563 mqd->cp_hqd_pq_rptr_report_addr_hi = 3564 upper_32_bits(wb_gpu_addr) & 0xffff; 3565 3566 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3567 wb_gpu_addr = ring->wptr_gpu_addr; 3568 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3569 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3570 3571 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3572 ring->wptr = 0; 3573 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3574 3575 /* set the vmid for the queue */ 3576 mqd->cp_hqd_vmid = 0; 3577 3578 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3579 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3580 mqd->cp_hqd_persistent_state = tmp; 3581 3582 /* set MIN_IB_AVAIL_SIZE */ 3583 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3584 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3585 mqd->cp_hqd_ib_control = tmp; 3586 3587 /* set static priority for a queue/ring */ 3588 gfx_v9_0_mqd_set_priority(ring, mqd); 3589 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM); 3590 3591 /* map_queues packet doesn't need activate the queue, 3592 * so only kiq need set this field. 3593 */ 3594 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3595 mqd->cp_hqd_active = 1; 3596 3597 return 0; 3598 } 3599 3600 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3601 { 3602 struct amdgpu_device *adev = ring->adev; 3603 struct v9_mqd *mqd = ring->mqd_ptr; 3604 int j; 3605 3606 /* disable wptr polling */ 3607 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3608 3609 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3610 mqd->cp_hqd_eop_base_addr_lo); 3611 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3612 mqd->cp_hqd_eop_base_addr_hi); 3613 3614 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3615 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3616 mqd->cp_hqd_eop_control); 3617 3618 /* enable doorbell? */ 3619 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3620 mqd->cp_hqd_pq_doorbell_control); 3621 3622 /* disable the queue if it's active */ 3623 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3624 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3625 for (j = 0; j < adev->usec_timeout; j++) { 3626 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3627 break; 3628 udelay(1); 3629 } 3630 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3631 mqd->cp_hqd_dequeue_request); 3632 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3633 mqd->cp_hqd_pq_rptr); 3634 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3635 mqd->cp_hqd_pq_wptr_lo); 3636 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3637 mqd->cp_hqd_pq_wptr_hi); 3638 } 3639 3640 /* set the pointer to the MQD */ 3641 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3642 mqd->cp_mqd_base_addr_lo); 3643 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3644 mqd->cp_mqd_base_addr_hi); 3645 3646 /* set MQD vmid to 0 */ 3647 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3648 mqd->cp_mqd_control); 3649 3650 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3651 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3652 mqd->cp_hqd_pq_base_lo); 3653 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3654 mqd->cp_hqd_pq_base_hi); 3655 3656 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3657 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3658 mqd->cp_hqd_pq_control); 3659 3660 /* set the wb address whether it's enabled or not */ 3661 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3662 mqd->cp_hqd_pq_rptr_report_addr_lo); 3663 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3664 mqd->cp_hqd_pq_rptr_report_addr_hi); 3665 3666 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3667 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3668 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3669 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3670 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3671 3672 /* enable the doorbell if requested */ 3673 if (ring->use_doorbell) { 3674 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3675 (adev->doorbell_index.kiq * 2) << 2); 3676 /* If GC has entered CGPG, ringing doorbell > first page 3677 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to 3678 * workaround this issue. And this change has to align with firmware 3679 * update. 3680 */ 3681 if (check_if_enlarge_doorbell_range(adev)) 3682 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3683 (adev->doorbell.size - 4)); 3684 else 3685 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3686 (adev->doorbell_index.userqueue_end * 2) << 2); 3687 } 3688 3689 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3690 mqd->cp_hqd_pq_doorbell_control); 3691 3692 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3693 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3694 mqd->cp_hqd_pq_wptr_lo); 3695 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3696 mqd->cp_hqd_pq_wptr_hi); 3697 3698 /* set the vmid for the queue */ 3699 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3700 3701 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3702 mqd->cp_hqd_persistent_state); 3703 3704 /* activate the queue */ 3705 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3706 mqd->cp_hqd_active); 3707 3708 if (ring->use_doorbell) 3709 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3710 3711 return 0; 3712 } 3713 3714 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3715 { 3716 struct amdgpu_device *adev = ring->adev; 3717 int j; 3718 3719 /* disable the queue if it's active */ 3720 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3721 3722 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3723 3724 for (j = 0; j < adev->usec_timeout; j++) { 3725 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3726 break; 3727 udelay(1); 3728 } 3729 3730 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3731 DRM_DEBUG("KIQ dequeue request failed.\n"); 3732 3733 /* Manual disable if dequeue request times out */ 3734 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3735 } 3736 3737 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3738 0); 3739 } 3740 3741 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3742 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3743 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3744 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3745 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3746 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3747 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3748 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3749 3750 return 0; 3751 } 3752 3753 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3754 { 3755 struct amdgpu_device *adev = ring->adev; 3756 struct v9_mqd *mqd = ring->mqd_ptr; 3757 struct v9_mqd *tmp_mqd; 3758 3759 gfx_v9_0_kiq_setting(ring); 3760 3761 /* GPU could be in bad state during probe, driver trigger the reset 3762 * after load the SMU, in this case , the mqd is not be initialized. 3763 * driver need to re-init the mqd. 3764 * check mqd->cp_hqd_pq_control since this value should not be 0 3765 */ 3766 tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup; 3767 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ 3768 /* for GPU_RESET case , reset MQD to a clean status */ 3769 if (adev->gfx.kiq[0].mqd_backup) 3770 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation)); 3771 3772 /* reset ring buffer */ 3773 ring->wptr = 0; 3774 amdgpu_ring_clear_ring(ring); 3775 3776 mutex_lock(&adev->srbm_mutex); 3777 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3778 gfx_v9_0_kiq_init_register(ring); 3779 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3780 mutex_unlock(&adev->srbm_mutex); 3781 } else { 3782 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3783 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3784 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3785 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 3786 amdgpu_ring_clear_ring(ring); 3787 mutex_lock(&adev->srbm_mutex); 3788 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3789 gfx_v9_0_mqd_init(ring); 3790 gfx_v9_0_kiq_init_register(ring); 3791 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3792 mutex_unlock(&adev->srbm_mutex); 3793 3794 if (adev->gfx.kiq[0].mqd_backup) 3795 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); 3796 } 3797 3798 return 0; 3799 } 3800 3801 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) 3802 { 3803 struct amdgpu_device *adev = ring->adev; 3804 struct v9_mqd *mqd = ring->mqd_ptr; 3805 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3806 struct v9_mqd *tmp_mqd; 3807 3808 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control 3809 * is not be initialized before 3810 */ 3811 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3812 3813 if (!restore && (!tmp_mqd->cp_hqd_pq_control || 3814 (!amdgpu_in_reset(adev) && !adev->in_suspend))) { 3815 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3816 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3817 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3818 mutex_lock(&adev->srbm_mutex); 3819 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3820 gfx_v9_0_mqd_init(ring); 3821 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3822 mutex_unlock(&adev->srbm_mutex); 3823 3824 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3825 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3826 } else { 3827 /* restore MQD to a clean status */ 3828 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3829 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3830 /* reset ring buffer */ 3831 ring->wptr = 0; 3832 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 3833 amdgpu_ring_clear_ring(ring); 3834 } 3835 3836 return 0; 3837 } 3838 3839 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3840 { 3841 struct amdgpu_ring *ring; 3842 int r; 3843 3844 ring = &adev->gfx.kiq[0].ring; 3845 3846 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3847 if (unlikely(r != 0)) 3848 return r; 3849 3850 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3851 if (unlikely(r != 0)) { 3852 amdgpu_bo_unreserve(ring->mqd_obj); 3853 return r; 3854 } 3855 3856 gfx_v9_0_kiq_init_queue(ring); 3857 amdgpu_bo_kunmap(ring->mqd_obj); 3858 ring->mqd_ptr = NULL; 3859 amdgpu_bo_unreserve(ring->mqd_obj); 3860 return 0; 3861 } 3862 3863 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3864 { 3865 struct amdgpu_ring *ring = NULL; 3866 int r = 0, i; 3867 3868 gfx_v9_0_cp_compute_enable(adev, true); 3869 3870 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3871 ring = &adev->gfx.compute_ring[i]; 3872 3873 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3874 if (unlikely(r != 0)) 3875 goto done; 3876 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3877 if (!r) { 3878 r = gfx_v9_0_kcq_init_queue(ring, false); 3879 amdgpu_bo_kunmap(ring->mqd_obj); 3880 ring->mqd_ptr = NULL; 3881 } 3882 amdgpu_bo_unreserve(ring->mqd_obj); 3883 if (r) 3884 goto done; 3885 } 3886 3887 r = amdgpu_gfx_enable_kcq(adev, 0); 3888 done: 3889 return r; 3890 } 3891 3892 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3893 { 3894 int r, i; 3895 struct amdgpu_ring *ring; 3896 3897 if (!(adev->flags & AMD_IS_APU)) 3898 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3899 3900 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3901 if (adev->gfx.num_gfx_rings) { 3902 /* legacy firmware loading */ 3903 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3904 if (r) 3905 return r; 3906 } 3907 3908 r = gfx_v9_0_cp_compute_load_microcode(adev); 3909 if (r) 3910 return r; 3911 } 3912 3913 r = gfx_v9_0_kiq_resume(adev); 3914 if (r) 3915 return r; 3916 3917 if (adev->gfx.num_gfx_rings) { 3918 r = gfx_v9_0_cp_gfx_resume(adev); 3919 if (r) 3920 return r; 3921 } 3922 3923 r = gfx_v9_0_kcq_resume(adev); 3924 if (r) 3925 return r; 3926 3927 if (adev->gfx.num_gfx_rings) { 3928 ring = &adev->gfx.gfx_ring[0]; 3929 r = amdgpu_ring_test_helper(ring); 3930 if (r) 3931 return r; 3932 } 3933 3934 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3935 ring = &adev->gfx.compute_ring[i]; 3936 amdgpu_ring_test_helper(ring); 3937 } 3938 3939 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3940 3941 return 0; 3942 } 3943 3944 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 3945 { 3946 u32 tmp; 3947 3948 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) && 3949 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) 3950 return; 3951 3952 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 3953 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 3954 adev->df.hash_status.hash_64k); 3955 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 3956 adev->df.hash_status.hash_2m); 3957 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 3958 adev->df.hash_status.hash_1g); 3959 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 3960 } 3961 3962 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3963 { 3964 if (adev->gfx.num_gfx_rings) 3965 gfx_v9_0_cp_gfx_enable(adev, enable); 3966 gfx_v9_0_cp_compute_enable(adev, enable); 3967 } 3968 3969 static int gfx_v9_0_hw_init(void *handle) 3970 { 3971 int r; 3972 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3973 3974 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 3975 adev->gfx.cleaner_shader_ptr); 3976 3977 if (!amdgpu_sriov_vf(adev)) 3978 gfx_v9_0_init_golden_registers(adev); 3979 3980 gfx_v9_0_constants_init(adev); 3981 3982 gfx_v9_0_init_tcp_config(adev); 3983 3984 r = adev->gfx.rlc.funcs->resume(adev); 3985 if (r) 3986 return r; 3987 3988 r = gfx_v9_0_cp_resume(adev); 3989 if (r) 3990 return r; 3991 3992 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 3993 gfx_v9_4_2_set_power_brake_sequence(adev); 3994 3995 return r; 3996 } 3997 3998 static int gfx_v9_0_hw_fini(void *handle) 3999 { 4000 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4001 4002 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4003 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 4004 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4005 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4006 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4007 4008 /* DF freeze and kcq disable will fail */ 4009 if (!amdgpu_ras_intr_triggered()) 4010 /* disable KCQ to avoid CPC touch memory not valid anymore */ 4011 amdgpu_gfx_disable_kcq(adev, 0); 4012 4013 if (amdgpu_sriov_vf(adev)) { 4014 gfx_v9_0_cp_gfx_enable(adev, false); 4015 /* must disable polling for SRIOV when hw finished, otherwise 4016 * CPC engine may still keep fetching WB address which is already 4017 * invalid after sw finished and trigger DMAR reading error in 4018 * hypervisor side. 4019 */ 4020 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4021 return 0; 4022 } 4023 4024 /* Use deinitialize sequence from CAIL when unbinding device from driver, 4025 * otherwise KIQ is hanging when binding back 4026 */ 4027 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 4028 mutex_lock(&adev->srbm_mutex); 4029 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, 4030 adev->gfx.kiq[0].ring.pipe, 4031 adev->gfx.kiq[0].ring.queue, 0, 0); 4032 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring); 4033 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 4034 mutex_unlock(&adev->srbm_mutex); 4035 } 4036 4037 gfx_v9_0_cp_enable(adev, false); 4038 4039 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */ 4040 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) || 4041 (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) { 4042 dev_dbg(adev->dev, "Skipping RLC halt\n"); 4043 return 0; 4044 } 4045 4046 adev->gfx.rlc.funcs->stop(adev); 4047 return 0; 4048 } 4049 4050 static int gfx_v9_0_suspend(void *handle) 4051 { 4052 return gfx_v9_0_hw_fini(handle); 4053 } 4054 4055 static int gfx_v9_0_resume(void *handle) 4056 { 4057 return gfx_v9_0_hw_init(handle); 4058 } 4059 4060 static bool gfx_v9_0_is_idle(void *handle) 4061 { 4062 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4063 4064 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 4065 GRBM_STATUS, GUI_ACTIVE)) 4066 return false; 4067 else 4068 return true; 4069 } 4070 4071 static int gfx_v9_0_wait_for_idle(void *handle) 4072 { 4073 unsigned i; 4074 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4075 4076 for (i = 0; i < adev->usec_timeout; i++) { 4077 if (gfx_v9_0_is_idle(handle)) 4078 return 0; 4079 udelay(1); 4080 } 4081 return -ETIMEDOUT; 4082 } 4083 4084 static int gfx_v9_0_soft_reset(void *handle) 4085 { 4086 u32 grbm_soft_reset = 0; 4087 u32 tmp; 4088 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4089 4090 /* GRBM_STATUS */ 4091 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4092 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4093 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4094 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4095 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4096 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4097 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4098 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4099 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4100 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4101 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4102 } 4103 4104 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4105 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4106 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4107 } 4108 4109 /* GRBM_STATUS2 */ 4110 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4111 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4112 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4113 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4114 4115 4116 if (grbm_soft_reset) { 4117 /* stop the rlc */ 4118 adev->gfx.rlc.funcs->stop(adev); 4119 4120 if (adev->gfx.num_gfx_rings) 4121 /* Disable GFX parsing/prefetching */ 4122 gfx_v9_0_cp_gfx_enable(adev, false); 4123 4124 /* Disable MEC parsing/prefetching */ 4125 gfx_v9_0_cp_compute_enable(adev, false); 4126 4127 if (grbm_soft_reset) { 4128 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4129 tmp |= grbm_soft_reset; 4130 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4131 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4132 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4133 4134 udelay(50); 4135 4136 tmp &= ~grbm_soft_reset; 4137 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4138 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4139 } 4140 4141 /* Wait a little for things to settle down */ 4142 udelay(50); 4143 } 4144 return 0; 4145 } 4146 4147 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) 4148 { 4149 signed long r, cnt = 0; 4150 unsigned long flags; 4151 uint32_t seq, reg_val_offs = 0; 4152 uint64_t value = 0; 4153 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 4154 struct amdgpu_ring *ring = &kiq->ring; 4155 4156 BUG_ON(!ring->funcs->emit_rreg); 4157 4158 spin_lock_irqsave(&kiq->ring_lock, flags); 4159 if (amdgpu_device_wb_get(adev, ®_val_offs)) { 4160 pr_err("critical bug! too many kiq readers\n"); 4161 goto failed_unlock; 4162 } 4163 amdgpu_ring_alloc(ring, 32); 4164 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4165 amdgpu_ring_write(ring, 9 | /* src: register*/ 4166 (5 << 8) | /* dst: memory */ 4167 (1 << 16) | /* count sel */ 4168 (1 << 20)); /* write confirm */ 4169 amdgpu_ring_write(ring, 0); 4170 amdgpu_ring_write(ring, 0); 4171 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4172 reg_val_offs * 4)); 4173 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4174 reg_val_offs * 4)); 4175 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 4176 if (r) 4177 goto failed_undo; 4178 4179 amdgpu_ring_commit(ring); 4180 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4181 4182 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4183 4184 /* don't wait anymore for gpu reset case because this way may 4185 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 4186 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 4187 * never return if we keep waiting in virt_kiq_rreg, which cause 4188 * gpu_recover() hang there. 4189 * 4190 * also don't wait anymore for IRQ context 4191 * */ 4192 if (r < 1 && (amdgpu_in_reset(adev))) 4193 goto failed_kiq_read; 4194 4195 might_sleep(); 4196 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 4197 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 4198 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4199 } 4200 4201 if (cnt > MAX_KIQ_REG_TRY) 4202 goto failed_kiq_read; 4203 4204 mb(); 4205 value = (uint64_t)adev->wb.wb[reg_val_offs] | 4206 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL; 4207 amdgpu_device_wb_free(adev, reg_val_offs); 4208 return value; 4209 4210 failed_undo: 4211 amdgpu_ring_undo(ring); 4212 failed_unlock: 4213 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4214 failed_kiq_read: 4215 if (reg_val_offs) 4216 amdgpu_device_wb_free(adev, reg_val_offs); 4217 pr_err("failed to read gpu clock\n"); 4218 return ~0; 4219 } 4220 4221 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4222 { 4223 uint64_t clock, clock_lo, clock_hi, hi_check; 4224 4225 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 4226 case IP_VERSION(9, 3, 0): 4227 preempt_disable(); 4228 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4229 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4230 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4231 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over 4232 * roughly every 42 seconds. 4233 */ 4234 if (hi_check != clock_hi) { 4235 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4236 clock_hi = hi_check; 4237 } 4238 preempt_enable(); 4239 clock = clock_lo | (clock_hi << 32ULL); 4240 break; 4241 default: 4242 amdgpu_gfx_off_ctrl(adev, false); 4243 mutex_lock(&adev->gfx.gpu_clock_mutex); 4244 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 4245 IP_VERSION(9, 0, 1) && 4246 amdgpu_sriov_runtime(adev)) { 4247 clock = gfx_v9_0_kiq_read_clock(adev); 4248 } else { 4249 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4250 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4251 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4252 } 4253 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4254 amdgpu_gfx_off_ctrl(adev, true); 4255 break; 4256 } 4257 return clock; 4258 } 4259 4260 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4261 uint32_t vmid, 4262 uint32_t gds_base, uint32_t gds_size, 4263 uint32_t gws_base, uint32_t gws_size, 4264 uint32_t oa_base, uint32_t oa_size) 4265 { 4266 struct amdgpu_device *adev = ring->adev; 4267 4268 /* GDS Base */ 4269 gfx_v9_0_write_data_to_reg(ring, 0, false, 4270 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4271 gds_base); 4272 4273 /* GDS Size */ 4274 gfx_v9_0_write_data_to_reg(ring, 0, false, 4275 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4276 gds_size); 4277 4278 /* GWS */ 4279 gfx_v9_0_write_data_to_reg(ring, 0, false, 4280 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4281 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4282 4283 /* OA */ 4284 gfx_v9_0_write_data_to_reg(ring, 0, false, 4285 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4286 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4287 } 4288 4289 static const u32 vgpr_init_compute_shader[] = 4290 { 4291 0xb07c0000, 0xbe8000ff, 4292 0x000000f8, 0xbf110800, 4293 0x7e000280, 0x7e020280, 4294 0x7e040280, 0x7e060280, 4295 0x7e080280, 0x7e0a0280, 4296 0x7e0c0280, 0x7e0e0280, 4297 0x80808800, 0xbe803200, 4298 0xbf84fff5, 0xbf9c0000, 4299 0xd28c0001, 0x0001007f, 4300 0xd28d0001, 0x0002027e, 4301 0x10020288, 0xb8810904, 4302 0xb7814000, 0xd1196a01, 4303 0x00000301, 0xbe800087, 4304 0xbefc00c1, 0xd89c4000, 4305 0x00020201, 0xd89cc080, 4306 0x00040401, 0x320202ff, 4307 0x00000800, 0x80808100, 4308 0xbf84fff8, 0x7e020280, 4309 0xbf810000, 0x00000000, 4310 }; 4311 4312 static const u32 sgpr_init_compute_shader[] = 4313 { 4314 0xb07c0000, 0xbe8000ff, 4315 0x0000005f, 0xbee50080, 4316 0xbe812c65, 0xbe822c65, 4317 0xbe832c65, 0xbe842c65, 4318 0xbe852c65, 0xb77c0005, 4319 0x80808500, 0xbf84fff8, 4320 0xbe800080, 0xbf810000, 4321 }; 4322 4323 static const u32 vgpr_init_compute_shader_arcturus[] = { 4324 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 4325 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 4326 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 4327 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 4328 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 4329 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 4330 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 4331 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 4332 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 4333 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 4334 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 4335 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 4336 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 4337 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 4338 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 4339 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 4340 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 4341 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 4342 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 4343 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 4344 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 4345 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 4346 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 4347 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 4348 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 4349 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 4350 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 4351 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 4352 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 4353 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 4354 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 4355 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 4356 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 4357 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 4358 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 4359 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 4360 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 4361 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 4362 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 4363 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 4364 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 4365 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 4366 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 4367 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 4368 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 4369 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 4370 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 4371 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 4372 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 4373 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 4374 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 4375 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 4376 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 4377 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 4378 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 4379 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 4380 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 4381 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 4382 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 4383 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 4384 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 4385 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 4386 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 4387 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 4388 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 4389 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 4390 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 4391 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 4392 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 4393 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 4394 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 4395 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 4396 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 4397 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 4398 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 4399 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 4400 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 4401 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 4402 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 4403 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 4404 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 4405 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 4406 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 4407 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 4408 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 4409 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 4410 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 4411 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 4412 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 4413 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 4414 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 4415 0xbf84fff8, 0xbf810000, 4416 }; 4417 4418 /* When below register arrays changed, please update gpr_reg_size, 4419 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 4420 to cover all gfx9 ASICs */ 4421 static const struct soc15_reg_entry vgpr_init_regs[] = { 4422 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4423 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4424 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4425 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4426 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 4427 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4428 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4429 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4430 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4431 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4432 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4433 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4434 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4435 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4436 }; 4437 4438 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = { 4439 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4440 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4441 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4442 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4443 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf }, 4444 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4445 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4446 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4447 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4448 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4449 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4450 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4451 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4452 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4453 }; 4454 4455 static const struct soc15_reg_entry sgpr1_init_regs[] = { 4456 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4457 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4458 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4459 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4460 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4461 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4462 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4463 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4464 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4465 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4466 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4467 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4468 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4469 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4470 }; 4471 4472 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4473 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4474 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4475 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4476 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4477 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4478 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4479 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4480 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4481 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4482 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4483 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4484 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4485 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4486 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4487 }; 4488 4489 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { 4490 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4491 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4492 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4493 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4494 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4495 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4496 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4497 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4498 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4499 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4500 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4501 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4502 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4503 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4504 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4505 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4506 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4507 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4508 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4509 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4510 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4511 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4512 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4513 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4514 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4515 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4516 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4517 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4518 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4519 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4520 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4521 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4522 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4523 }; 4524 4525 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4526 { 4527 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4528 int i, r; 4529 4530 /* only support when RAS is enabled */ 4531 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4532 return 0; 4533 4534 r = amdgpu_ring_alloc(ring, 7); 4535 if (r) { 4536 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4537 ring->name, r); 4538 return r; 4539 } 4540 4541 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4542 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4543 4544 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4545 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4546 PACKET3_DMA_DATA_DST_SEL(1) | 4547 PACKET3_DMA_DATA_SRC_SEL(2) | 4548 PACKET3_DMA_DATA_ENGINE(0))); 4549 amdgpu_ring_write(ring, 0); 4550 amdgpu_ring_write(ring, 0); 4551 amdgpu_ring_write(ring, 0); 4552 amdgpu_ring_write(ring, 0); 4553 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4554 adev->gds.gds_size); 4555 4556 amdgpu_ring_commit(ring); 4557 4558 for (i = 0; i < adev->usec_timeout; i++) { 4559 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4560 break; 4561 udelay(1); 4562 } 4563 4564 if (i >= adev->usec_timeout) 4565 r = -ETIMEDOUT; 4566 4567 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4568 4569 return r; 4570 } 4571 4572 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4573 { 4574 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4575 struct amdgpu_ib ib; 4576 struct dma_fence *f = NULL; 4577 int r, i; 4578 unsigned total_size, vgpr_offset, sgpr_offset; 4579 u64 gpu_addr; 4580 4581 int compute_dim_x = adev->gfx.config.max_shader_engines * 4582 adev->gfx.config.max_cu_per_sh * 4583 adev->gfx.config.max_sh_per_se; 4584 int sgpr_work_group_size = 5; 4585 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6; 4586 int vgpr_init_shader_size; 4587 const u32 *vgpr_init_shader_ptr; 4588 const struct soc15_reg_entry *vgpr_init_regs_ptr; 4589 4590 /* only support when RAS is enabled */ 4591 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4592 return 0; 4593 4594 /* bail if the compute ring is not ready */ 4595 if (!ring->sched.ready) 4596 return 0; 4597 4598 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) { 4599 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; 4600 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); 4601 vgpr_init_regs_ptr = vgpr_init_regs_arcturus; 4602 } else { 4603 vgpr_init_shader_ptr = vgpr_init_compute_shader; 4604 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader); 4605 vgpr_init_regs_ptr = vgpr_init_regs; 4606 } 4607 4608 total_size = 4609 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4610 total_size += 4611 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4612 total_size += 4613 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4614 total_size = ALIGN(total_size, 256); 4615 vgpr_offset = total_size; 4616 total_size += ALIGN(vgpr_init_shader_size, 256); 4617 sgpr_offset = total_size; 4618 total_size += sizeof(sgpr_init_compute_shader); 4619 4620 /* allocate an indirect buffer to put the commands in */ 4621 memset(&ib, 0, sizeof(ib)); 4622 r = amdgpu_ib_get(adev, NULL, total_size, 4623 AMDGPU_IB_POOL_DIRECT, &ib); 4624 if (r) { 4625 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4626 return r; 4627 } 4628 4629 /* load the compute shaders */ 4630 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++) 4631 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i]; 4632 4633 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4634 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4635 4636 /* init the ib length to 0 */ 4637 ib.length_dw = 0; 4638 4639 /* VGPR */ 4640 /* write the register state for the compute dispatch */ 4641 for (i = 0; i < gpr_reg_size; i++) { 4642 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4643 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i]) 4644 - PACKET3_SET_SH_REG_START; 4645 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value; 4646 } 4647 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4648 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4649 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4650 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4651 - PACKET3_SET_SH_REG_START; 4652 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4653 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4654 4655 /* write dispatch packet */ 4656 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4657 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */ 4658 ib.ptr[ib.length_dw++] = 1; /* y */ 4659 ib.ptr[ib.length_dw++] = 1; /* z */ 4660 ib.ptr[ib.length_dw++] = 4661 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4662 4663 /* write CS partial flush packet */ 4664 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4665 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4666 4667 /* SGPR1 */ 4668 /* write the register state for the compute dispatch */ 4669 for (i = 0; i < gpr_reg_size; i++) { 4670 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4671 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4672 - PACKET3_SET_SH_REG_START; 4673 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4674 } 4675 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4676 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4677 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4678 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4679 - PACKET3_SET_SH_REG_START; 4680 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4681 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4682 4683 /* write dispatch packet */ 4684 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4685 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4686 ib.ptr[ib.length_dw++] = 1; /* y */ 4687 ib.ptr[ib.length_dw++] = 1; /* z */ 4688 ib.ptr[ib.length_dw++] = 4689 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4690 4691 /* write CS partial flush packet */ 4692 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4693 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4694 4695 /* SGPR2 */ 4696 /* write the register state for the compute dispatch */ 4697 for (i = 0; i < gpr_reg_size; i++) { 4698 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4699 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4700 - PACKET3_SET_SH_REG_START; 4701 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4702 } 4703 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4704 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4705 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4706 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4707 - PACKET3_SET_SH_REG_START; 4708 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4709 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4710 4711 /* write dispatch packet */ 4712 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4713 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4714 ib.ptr[ib.length_dw++] = 1; /* y */ 4715 ib.ptr[ib.length_dw++] = 1; /* z */ 4716 ib.ptr[ib.length_dw++] = 4717 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4718 4719 /* write CS partial flush packet */ 4720 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4721 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4722 4723 /* shedule the ib on the ring */ 4724 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4725 if (r) { 4726 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4727 goto fail; 4728 } 4729 4730 /* wait for the GPU to finish processing the IB */ 4731 r = dma_fence_wait(f, false); 4732 if (r) { 4733 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4734 goto fail; 4735 } 4736 4737 fail: 4738 amdgpu_ib_free(adev, &ib, NULL); 4739 dma_fence_put(f); 4740 4741 return r; 4742 } 4743 4744 static int gfx_v9_0_early_init(void *handle) 4745 { 4746 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4747 4748 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 4749 4750 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || 4751 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4752 adev->gfx.num_gfx_rings = 0; 4753 else 4754 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4755 adev->gfx.xcc_mask = 1; 4756 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4757 AMDGPU_MAX_COMPUTE_RINGS); 4758 gfx_v9_0_set_kiq_pm4_funcs(adev); 4759 gfx_v9_0_set_ring_funcs(adev); 4760 gfx_v9_0_set_irq_funcs(adev); 4761 gfx_v9_0_set_gds_init(adev); 4762 gfx_v9_0_set_rlc_funcs(adev); 4763 4764 /* init rlcg reg access ctrl */ 4765 gfx_v9_0_init_rlcg_reg_access_ctrl(adev); 4766 4767 return gfx_v9_0_init_microcode(adev); 4768 } 4769 4770 static int gfx_v9_0_ecc_late_init(void *handle) 4771 { 4772 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4773 int r; 4774 4775 /* 4776 * Temp workaround to fix the issue that CP firmware fails to 4777 * update read pointer when CPDMA is writing clearing operation 4778 * to GDS in suspend/resume sequence on several cards. So just 4779 * limit this operation in cold boot sequence. 4780 */ 4781 if ((!adev->in_suspend) && 4782 (adev->gds.gds_size)) { 4783 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4784 if (r) 4785 return r; 4786 } 4787 4788 /* requires IBs so do in late init after IB pool is initialized */ 4789 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4790 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); 4791 else 4792 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4793 4794 if (r) 4795 return r; 4796 4797 if (adev->gfx.ras && 4798 adev->gfx.ras->enable_watchdog_timer) 4799 adev->gfx.ras->enable_watchdog_timer(adev); 4800 4801 return 0; 4802 } 4803 4804 static int gfx_v9_0_late_init(void *handle) 4805 { 4806 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4807 int r; 4808 4809 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4810 if (r) 4811 return r; 4812 4813 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4814 if (r) 4815 return r; 4816 4817 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 4818 if (r) 4819 return r; 4820 4821 r = gfx_v9_0_ecc_late_init(handle); 4822 if (r) 4823 return r; 4824 4825 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4826 gfx_v9_4_2_debug_trap_config_init(adev, 4827 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); 4828 else 4829 gfx_v9_0_debug_trap_config_init(adev, 4830 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); 4831 4832 return 0; 4833 } 4834 4835 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4836 { 4837 uint32_t rlc_setting; 4838 4839 /* if RLC is not enabled, do nothing */ 4840 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4841 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4842 return false; 4843 4844 return true; 4845 } 4846 4847 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 4848 { 4849 uint32_t data; 4850 unsigned i; 4851 4852 data = RLC_SAFE_MODE__CMD_MASK; 4853 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4854 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4855 4856 /* wait for RLC_SAFE_MODE */ 4857 for (i = 0; i < adev->usec_timeout; i++) { 4858 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4859 break; 4860 udelay(1); 4861 } 4862 } 4863 4864 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 4865 { 4866 uint32_t data; 4867 4868 data = RLC_SAFE_MODE__CMD_MASK; 4869 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4870 } 4871 4872 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4873 bool enable) 4874 { 4875 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4876 4877 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4878 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4879 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4880 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4881 } else { 4882 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4883 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4884 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4885 } 4886 4887 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4888 } 4889 4890 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4891 bool enable) 4892 { 4893 /* TODO: double check if we need to perform under safe mode */ 4894 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4895 4896 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4897 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4898 else 4899 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4900 4901 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4902 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4903 else 4904 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4905 4906 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4907 } 4908 4909 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4910 bool enable) 4911 { 4912 uint32_t data, def; 4913 4914 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4915 4916 /* It is disabled by HW by default */ 4917 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4918 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4919 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4920 4921 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) 4922 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4923 4924 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4925 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4926 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4927 4928 /* only for Vega10 & Raven1 */ 4929 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4930 4931 if (def != data) 4932 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4933 4934 /* MGLS is a global flag to control all MGLS in GFX */ 4935 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4936 /* 2 - RLC memory Light sleep */ 4937 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4938 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4939 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4940 if (def != data) 4941 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4942 } 4943 /* 3 - CP memory Light sleep */ 4944 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4945 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4946 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4947 if (def != data) 4948 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4949 } 4950 } 4951 } else { 4952 /* 1 - MGCG_OVERRIDE */ 4953 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4954 4955 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) 4956 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4957 4958 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4959 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4960 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4961 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4962 4963 if (def != data) 4964 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4965 4966 /* 2 - disable MGLS in RLC */ 4967 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4968 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4969 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4970 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4971 } 4972 4973 /* 3 - disable MGLS in CP */ 4974 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4975 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4976 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4977 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4978 } 4979 } 4980 4981 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4982 } 4983 4984 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4985 bool enable) 4986 { 4987 uint32_t data, def; 4988 4989 if (!adev->gfx.num_gfx_rings) 4990 return; 4991 4992 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4993 4994 /* Enable 3D CGCG/CGLS */ 4995 if (enable) { 4996 /* write cmd to clear cgcg/cgls ov */ 4997 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4998 /* unset CGCG override */ 4999 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5000 /* update CGCG and CGLS override bits */ 5001 if (def != data) 5002 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5003 5004 /* enable 3Dcgcg FSM(0x0000363f) */ 5005 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 5006 5007 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5008 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5009 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5010 else 5011 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT; 5012 5013 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5014 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5015 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5016 if (def != data) 5017 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5018 5019 /* set IDLE_POLL_COUNT(0x00900100) */ 5020 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5021 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5022 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5023 if (def != data) 5024 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5025 } else { 5026 /* Disable CGCG/CGLS */ 5027 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 5028 /* disable cgcg, cgls should be disabled */ 5029 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 5030 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 5031 /* disable cgcg and cgls in FSM */ 5032 if (def != data) 5033 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5034 } 5035 5036 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5037 } 5038 5039 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5040 bool enable) 5041 { 5042 uint32_t def, data; 5043 5044 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5045 5046 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5047 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5048 /* unset CGCG override */ 5049 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5050 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5051 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5052 else 5053 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5054 /* update CGCG and CGLS override bits */ 5055 if (def != data) 5056 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5057 5058 /* enable cgcg FSM(0x0000363F) */ 5059 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5060 5061 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) 5062 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5063 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5064 else 5065 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5066 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5067 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5068 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5069 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5070 if (def != data) 5071 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5072 5073 /* set IDLE_POLL_COUNT(0x00900100) */ 5074 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5075 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5076 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5077 if (def != data) 5078 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5079 } else { 5080 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5081 /* reset CGCG/CGLS bits */ 5082 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5083 /* disable cgcg and cgls in FSM */ 5084 if (def != data) 5085 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5086 } 5087 5088 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5089 } 5090 5091 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5092 bool enable) 5093 { 5094 if (enable) { 5095 /* CGCG/CGLS should be enabled after MGCG/MGLS 5096 * === MGCG + MGLS === 5097 */ 5098 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5099 /* === CGCG /CGLS for GFX 3D Only === */ 5100 gfx_v9_0_update_3d_clock_gating(adev, enable); 5101 /* === CGCG + CGLS === */ 5102 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5103 } else { 5104 /* CGCG/CGLS should be disabled before MGCG/MGLS 5105 * === CGCG + CGLS === 5106 */ 5107 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5108 /* === CGCG /CGLS for GFX 3D Only === */ 5109 gfx_v9_0_update_3d_clock_gating(adev, enable); 5110 /* === MGCG + MGLS === */ 5111 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5112 } 5113 return 0; 5114 } 5115 5116 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, 5117 unsigned int vmid) 5118 { 5119 u32 reg, data; 5120 5121 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); 5122 if (amdgpu_sriov_is_pp_one_vf(adev)) 5123 data = RREG32_NO_KIQ(reg); 5124 else 5125 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); 5126 5127 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5128 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5129 5130 if (amdgpu_sriov_is_pp_one_vf(adev)) 5131 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); 5132 else 5133 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); 5134 } 5135 5136 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) 5137 { 5138 amdgpu_gfx_off_ctrl(adev, false); 5139 5140 gfx_v9_0_update_spm_vmid_internal(adev, vmid); 5141 5142 amdgpu_gfx_off_ctrl(adev, true); 5143 } 5144 5145 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev, 5146 uint32_t offset, 5147 struct soc15_reg_rlcg *entries, int arr_size) 5148 { 5149 int i; 5150 uint32_t reg; 5151 5152 if (!entries) 5153 return false; 5154 5155 for (i = 0; i < arr_size; i++) { 5156 const struct soc15_reg_rlcg *entry; 5157 5158 entry = &entries[i]; 5159 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 5160 if (offset == reg) 5161 return true; 5162 } 5163 5164 return false; 5165 } 5166 5167 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 5168 { 5169 return gfx_v9_0_check_rlcg_range(adev, offset, 5170 (void *)rlcg_access_gc_9_0, 5171 ARRAY_SIZE(rlcg_access_gc_9_0)); 5172 } 5173 5174 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 5175 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 5176 .set_safe_mode = gfx_v9_0_set_safe_mode, 5177 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 5178 .init = gfx_v9_0_rlc_init, 5179 .get_csb_size = gfx_v9_0_get_csb_size, 5180 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 5181 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 5182 .resume = gfx_v9_0_rlc_resume, 5183 .stop = gfx_v9_0_rlc_stop, 5184 .reset = gfx_v9_0_rlc_reset, 5185 .start = gfx_v9_0_rlc_start, 5186 .update_spm_vmid = gfx_v9_0_update_spm_vmid, 5187 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, 5188 }; 5189 5190 static int gfx_v9_0_set_powergating_state(void *handle, 5191 enum amd_powergating_state state) 5192 { 5193 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5194 bool enable = (state == AMD_PG_STATE_GATE); 5195 5196 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5197 case IP_VERSION(9, 2, 2): 5198 case IP_VERSION(9, 1, 0): 5199 case IP_VERSION(9, 3, 0): 5200 if (!enable) 5201 amdgpu_gfx_off_ctrl(adev, false); 5202 5203 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5204 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 5205 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 5206 } else { 5207 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 5208 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 5209 } 5210 5211 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5212 gfx_v9_0_enable_cp_power_gating(adev, true); 5213 else 5214 gfx_v9_0_enable_cp_power_gating(adev, false); 5215 5216 /* update gfx cgpg state */ 5217 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 5218 5219 /* update mgcg state */ 5220 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 5221 5222 if (enable) 5223 amdgpu_gfx_off_ctrl(adev, true); 5224 break; 5225 case IP_VERSION(9, 2, 1): 5226 amdgpu_gfx_off_ctrl(adev, enable); 5227 break; 5228 default: 5229 break; 5230 } 5231 5232 return 0; 5233 } 5234 5235 static int gfx_v9_0_set_clockgating_state(void *handle, 5236 enum amd_clockgating_state state) 5237 { 5238 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5239 5240 if (amdgpu_sriov_vf(adev)) 5241 return 0; 5242 5243 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5244 case IP_VERSION(9, 0, 1): 5245 case IP_VERSION(9, 2, 1): 5246 case IP_VERSION(9, 4, 0): 5247 case IP_VERSION(9, 2, 2): 5248 case IP_VERSION(9, 1, 0): 5249 case IP_VERSION(9, 4, 1): 5250 case IP_VERSION(9, 3, 0): 5251 case IP_VERSION(9, 4, 2): 5252 gfx_v9_0_update_gfx_clock_gating(adev, 5253 state == AMD_CG_STATE_GATE); 5254 break; 5255 default: 5256 break; 5257 } 5258 return 0; 5259 } 5260 5261 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags) 5262 { 5263 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5264 int data; 5265 5266 if (amdgpu_sriov_vf(adev)) 5267 *flags = 0; 5268 5269 /* AMD_CG_SUPPORT_GFX_MGCG */ 5270 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 5271 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5272 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5273 5274 /* AMD_CG_SUPPORT_GFX_CGCG */ 5275 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 5276 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5277 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5278 5279 /* AMD_CG_SUPPORT_GFX_CGLS */ 5280 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5281 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5282 5283 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5284 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 5285 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5286 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5287 5288 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5289 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 5290 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5291 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5292 5293 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) { 5294 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5295 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 5296 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5297 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5298 5299 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5300 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5301 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5302 } 5303 } 5304 5305 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5306 { 5307 return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/ 5308 } 5309 5310 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5311 { 5312 struct amdgpu_device *adev = ring->adev; 5313 u64 wptr; 5314 5315 /* XXX check if swapping is necessary on BE */ 5316 if (ring->use_doorbell) { 5317 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5318 } else { 5319 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 5320 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 5321 } 5322 5323 return wptr; 5324 } 5325 5326 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5327 { 5328 struct amdgpu_device *adev = ring->adev; 5329 5330 if (ring->use_doorbell) { 5331 /* XXX check if swapping is necessary on BE */ 5332 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); 5333 WDOORBELL64(ring->doorbell_index, ring->wptr); 5334 } else { 5335 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 5336 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 5337 } 5338 } 5339 5340 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5341 { 5342 struct amdgpu_device *adev = ring->adev; 5343 u32 ref_and_mask, reg_mem_engine; 5344 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5345 5346 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5347 switch (ring->me) { 5348 case 1: 5349 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5350 break; 5351 case 2: 5352 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5353 break; 5354 default: 5355 return; 5356 } 5357 reg_mem_engine = 0; 5358 } else { 5359 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5360 reg_mem_engine = 1; /* pfp */ 5361 } 5362 5363 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5364 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5365 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5366 ref_and_mask, ref_and_mask, 0x20); 5367 } 5368 5369 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5370 struct amdgpu_job *job, 5371 struct amdgpu_ib *ib, 5372 uint32_t flags) 5373 { 5374 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5375 u32 header, control = 0; 5376 5377 if (ib->flags & AMDGPU_IB_FLAG_CE) 5378 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5379 else 5380 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5381 5382 control |= ib->length_dw | (vmid << 24); 5383 5384 if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) { 5385 control |= INDIRECT_BUFFER_PRE_ENB(1); 5386 5387 if (flags & AMDGPU_IB_PREEMPTED) 5388 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5389 5390 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 5391 gfx_v9_0_ring_emit_de_meta(ring, 5392 (!amdgpu_sriov_vf(ring->adev) && 5393 flags & AMDGPU_IB_PREEMPTED) ? 5394 true : false, 5395 job->gds_size > 0 && job->gds_base != 0); 5396 } 5397 5398 amdgpu_ring_write(ring, header); 5399 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5400 amdgpu_ring_write(ring, 5401 #ifdef __BIG_ENDIAN 5402 (2 << 0) | 5403 #endif 5404 lower_32_bits(ib->gpu_addr)); 5405 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5406 amdgpu_ring_ib_on_emit_cntl(ring); 5407 amdgpu_ring_write(ring, control); 5408 } 5409 5410 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring, 5411 unsigned offset) 5412 { 5413 u32 control = ring->ring[offset]; 5414 5415 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5416 ring->ring[offset] = control; 5417 } 5418 5419 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, 5420 unsigned offset) 5421 { 5422 struct amdgpu_device *adev = ring->adev; 5423 void *ce_payload_cpu_addr; 5424 uint64_t payload_offset, payload_size; 5425 5426 payload_size = sizeof(struct v9_ce_ib_state); 5427 5428 if (ring->is_mes_queue) { 5429 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5430 gfx[0].gfx_meta_data) + 5431 offsetof(struct v9_gfx_meta_data, ce_payload); 5432 ce_payload_cpu_addr = 5433 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); 5434 } else { 5435 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5436 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5437 } 5438 5439 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5440 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); 5441 } else { 5442 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, 5443 (ring->buf_mask + 1 - offset) << 2); 5444 payload_size -= (ring->buf_mask + 1 - offset) << 2; 5445 memcpy((void *)&ring->ring[0], 5446 ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), 5447 payload_size); 5448 } 5449 } 5450 5451 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, 5452 unsigned offset) 5453 { 5454 struct amdgpu_device *adev = ring->adev; 5455 void *de_payload_cpu_addr; 5456 uint64_t payload_offset, payload_size; 5457 5458 payload_size = sizeof(struct v9_de_ib_state); 5459 5460 if (ring->is_mes_queue) { 5461 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5462 gfx[0].gfx_meta_data) + 5463 offsetof(struct v9_gfx_meta_data, de_payload); 5464 de_payload_cpu_addr = 5465 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); 5466 } else { 5467 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); 5468 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5469 } 5470 5471 ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = 5472 IB_COMPLETION_STATUS_PREEMPTED; 5473 5474 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5475 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size); 5476 } else { 5477 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, 5478 (ring->buf_mask + 1 - offset) << 2); 5479 payload_size -= (ring->buf_mask + 1 - offset) << 2; 5480 memcpy((void *)&ring->ring[0], 5481 de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), 5482 payload_size); 5483 } 5484 } 5485 5486 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5487 struct amdgpu_job *job, 5488 struct amdgpu_ib *ib, 5489 uint32_t flags) 5490 { 5491 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5492 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5493 5494 /* Currently, there is a high possibility to get wave ID mismatch 5495 * between ME and GDS, leading to a hw deadlock, because ME generates 5496 * different wave IDs than the GDS expects. This situation happens 5497 * randomly when at least 5 compute pipes use GDS ordered append. 5498 * The wave IDs generated by ME are also wrong after suspend/resume. 5499 * Those are probably bugs somewhere else in the kernel driver. 5500 * 5501 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5502 * GDS to 0 for this ring (me/pipe). 5503 */ 5504 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5505 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5506 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5507 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5508 } 5509 5510 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5511 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5512 amdgpu_ring_write(ring, 5513 #ifdef __BIG_ENDIAN 5514 (2 << 0) | 5515 #endif 5516 lower_32_bits(ib->gpu_addr)); 5517 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5518 amdgpu_ring_write(ring, control); 5519 } 5520 5521 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5522 u64 seq, unsigned flags) 5523 { 5524 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5525 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5526 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5527 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; 5528 uint32_t dw2 = 0; 5529 5530 /* RELEASE_MEM - flush caches, send int */ 5531 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5532 5533 if (writeback) { 5534 dw2 = EOP_TC_NC_ACTION_EN; 5535 } else { 5536 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | 5537 EOP_TC_MD_ACTION_EN; 5538 } 5539 dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5540 EVENT_INDEX(5); 5541 if (exec) 5542 dw2 |= EOP_EXEC; 5543 5544 amdgpu_ring_write(ring, dw2); 5545 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5546 5547 /* 5548 * the address should be Qword aligned if 64bit write, Dword 5549 * aligned if only send 32bit data low (discard data high) 5550 */ 5551 if (write64bit) 5552 BUG_ON(addr & 0x7); 5553 else 5554 BUG_ON(addr & 0x3); 5555 amdgpu_ring_write(ring, lower_32_bits(addr)); 5556 amdgpu_ring_write(ring, upper_32_bits(addr)); 5557 amdgpu_ring_write(ring, lower_32_bits(seq)); 5558 amdgpu_ring_write(ring, upper_32_bits(seq)); 5559 amdgpu_ring_write(ring, 0); 5560 } 5561 5562 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5563 { 5564 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5565 uint32_t seq = ring->fence_drv.sync_seq; 5566 uint64_t addr = ring->fence_drv.gpu_addr; 5567 5568 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5569 lower_32_bits(addr), upper_32_bits(addr), 5570 seq, 0xffffffff, 4); 5571 } 5572 5573 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5574 unsigned vmid, uint64_t pd_addr) 5575 { 5576 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5577 5578 /* compute doesn't have PFP */ 5579 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5580 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5581 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5582 amdgpu_ring_write(ring, 0x0); 5583 } 5584 } 5585 5586 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5587 { 5588 return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */ 5589 } 5590 5591 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5592 { 5593 u64 wptr; 5594 5595 /* XXX check if swapping is necessary on BE */ 5596 if (ring->use_doorbell) 5597 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5598 else 5599 BUG(); 5600 return wptr; 5601 } 5602 5603 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5604 { 5605 struct amdgpu_device *adev = ring->adev; 5606 5607 /* XXX check if swapping is necessary on BE */ 5608 if (ring->use_doorbell) { 5609 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); 5610 WDOORBELL64(ring->doorbell_index, ring->wptr); 5611 } else{ 5612 BUG(); /* only DOORBELL method supported on gfx9 now */ 5613 } 5614 } 5615 5616 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5617 u64 seq, unsigned int flags) 5618 { 5619 struct amdgpu_device *adev = ring->adev; 5620 5621 /* we only allocate 32bit for each seq wb address */ 5622 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5623 5624 /* write fence seq to the "addr" */ 5625 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5626 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5627 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5628 amdgpu_ring_write(ring, lower_32_bits(addr)); 5629 amdgpu_ring_write(ring, upper_32_bits(addr)); 5630 amdgpu_ring_write(ring, lower_32_bits(seq)); 5631 5632 if (flags & AMDGPU_FENCE_FLAG_INT) { 5633 /* set register to trigger INT */ 5634 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5635 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5636 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5637 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5638 amdgpu_ring_write(ring, 0); 5639 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5640 } 5641 } 5642 5643 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5644 { 5645 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5646 amdgpu_ring_write(ring, 0); 5647 } 5648 5649 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) 5650 { 5651 struct amdgpu_device *adev = ring->adev; 5652 struct v9_ce_ib_state ce_payload = {0}; 5653 uint64_t offset, ce_payload_gpu_addr; 5654 void *ce_payload_cpu_addr; 5655 int cnt; 5656 5657 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5658 5659 if (ring->is_mes_queue) { 5660 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5661 gfx[0].gfx_meta_data) + 5662 offsetof(struct v9_gfx_meta_data, ce_payload); 5663 ce_payload_gpu_addr = 5664 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5665 ce_payload_cpu_addr = 5666 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5667 } else { 5668 offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5669 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5670 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5671 } 5672 5673 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5674 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5675 WRITE_DATA_DST_SEL(8) | 5676 WR_CONFIRM) | 5677 WRITE_DATA_CACHE_POLICY(0)); 5678 amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); 5679 amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); 5680 5681 amdgpu_ring_ib_on_emit_ce(ring); 5682 5683 if (resume) 5684 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, 5685 sizeof(ce_payload) >> 2); 5686 else 5687 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, 5688 sizeof(ce_payload) >> 2); 5689 } 5690 5691 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) 5692 { 5693 int i, r = 0; 5694 struct amdgpu_device *adev = ring->adev; 5695 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 5696 struct amdgpu_ring *kiq_ring = &kiq->ring; 5697 unsigned long flags; 5698 5699 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 5700 return -EINVAL; 5701 5702 spin_lock_irqsave(&kiq->ring_lock, flags); 5703 5704 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 5705 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5706 return -ENOMEM; 5707 } 5708 5709 /* assert preemption condition */ 5710 amdgpu_ring_set_preempt_cond_exec(ring, false); 5711 5712 ring->trail_seq += 1; 5713 amdgpu_ring_alloc(ring, 13); 5714 gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, 5715 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); 5716 5717 /* assert IB preemption, emit the trailing fence */ 5718 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 5719 ring->trail_fence_gpu_addr, 5720 ring->trail_seq); 5721 5722 amdgpu_ring_commit(kiq_ring); 5723 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5724 5725 /* poll the trailing fence */ 5726 for (i = 0; i < adev->usec_timeout; i++) { 5727 if (ring->trail_seq == 5728 le32_to_cpu(*ring->trail_fence_cpu_addr)) 5729 break; 5730 udelay(1); 5731 } 5732 5733 if (i >= adev->usec_timeout) { 5734 r = -EINVAL; 5735 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); 5736 } 5737 5738 /*reset the CP_VMID_PREEMPT after trailing fence*/ 5739 amdgpu_ring_emit_wreg(ring, 5740 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), 5741 0x0); 5742 amdgpu_ring_commit(ring); 5743 5744 /* deassert preemption condition */ 5745 amdgpu_ring_set_preempt_cond_exec(ring, true); 5746 return r; 5747 } 5748 5749 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds) 5750 { 5751 struct amdgpu_device *adev = ring->adev; 5752 struct v9_de_ib_state de_payload = {0}; 5753 uint64_t offset, gds_addr, de_payload_gpu_addr; 5754 void *de_payload_cpu_addr; 5755 int cnt; 5756 5757 if (ring->is_mes_queue) { 5758 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5759 gfx[0].gfx_meta_data) + 5760 offsetof(struct v9_gfx_meta_data, de_payload); 5761 de_payload_gpu_addr = 5762 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5763 de_payload_cpu_addr = 5764 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 5765 5766 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 5767 gfx[0].gds_backup) + 5768 offsetof(struct v9_gfx_meta_data, de_payload); 5769 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 5770 } else { 5771 offset = offsetof(struct v9_gfx_meta_data, de_payload); 5772 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5773 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5774 5775 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 5776 AMDGPU_CSA_SIZE - adev->gds.gds_size, 5777 PAGE_SIZE); 5778 } 5779 5780 if (usegds) { 5781 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5782 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5783 } 5784 5785 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5786 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5787 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5788 WRITE_DATA_DST_SEL(8) | 5789 WR_CONFIRM) | 5790 WRITE_DATA_CACHE_POLICY(0)); 5791 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 5792 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 5793 5794 amdgpu_ring_ib_on_emit_de(ring); 5795 if (resume) 5796 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 5797 sizeof(de_payload) >> 2); 5798 else 5799 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 5800 sizeof(de_payload) >> 2); 5801 } 5802 5803 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5804 bool secure) 5805 { 5806 uint32_t v = secure ? FRAME_TMZ : 0; 5807 5808 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5809 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5810 } 5811 5812 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5813 { 5814 uint32_t dw2 = 0; 5815 5816 gfx_v9_0_ring_emit_ce_meta(ring, 5817 (!amdgpu_sriov_vf(ring->adev) && 5818 flags & AMDGPU_IB_PREEMPTED) ? true : false); 5819 5820 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5821 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5822 /* set load_global_config & load_global_uconfig */ 5823 dw2 |= 0x8001; 5824 /* set load_cs_sh_regs */ 5825 dw2 |= 0x01000000; 5826 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5827 dw2 |= 0x10002; 5828 5829 /* set load_ce_ram if preamble presented */ 5830 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5831 dw2 |= 0x10000000; 5832 } else { 5833 /* still load_ce_ram if this is the first time preamble presented 5834 * although there is no context switch happens. 5835 */ 5836 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5837 dw2 |= 0x10000000; 5838 } 5839 5840 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5841 amdgpu_ring_write(ring, dw2); 5842 amdgpu_ring_write(ring, 0); 5843 } 5844 5845 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 5846 uint64_t addr) 5847 { 5848 unsigned ret; 5849 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5850 amdgpu_ring_write(ring, lower_32_bits(addr)); 5851 amdgpu_ring_write(ring, upper_32_bits(addr)); 5852 /* discard following DWs if *cond_exec_gpu_addr==0 */ 5853 amdgpu_ring_write(ring, 0); 5854 ret = ring->wptr & ring->buf_mask; 5855 /* patch dummy value later */ 5856 amdgpu_ring_write(ring, 0); 5857 return ret; 5858 } 5859 5860 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5861 uint32_t reg_val_offs) 5862 { 5863 struct amdgpu_device *adev = ring->adev; 5864 5865 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5866 amdgpu_ring_write(ring, 0 | /* src: register*/ 5867 (5 << 8) | /* dst: memory */ 5868 (1 << 20)); /* write confirm */ 5869 amdgpu_ring_write(ring, reg); 5870 amdgpu_ring_write(ring, 0); 5871 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5872 reg_val_offs * 4)); 5873 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5874 reg_val_offs * 4)); 5875 } 5876 5877 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5878 uint32_t val) 5879 { 5880 uint32_t cmd = 0; 5881 5882 switch (ring->funcs->type) { 5883 case AMDGPU_RING_TYPE_GFX: 5884 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5885 break; 5886 case AMDGPU_RING_TYPE_KIQ: 5887 cmd = (1 << 16); /* no inc addr */ 5888 break; 5889 default: 5890 cmd = WR_CONFIRM; 5891 break; 5892 } 5893 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5894 amdgpu_ring_write(ring, cmd); 5895 amdgpu_ring_write(ring, reg); 5896 amdgpu_ring_write(ring, 0); 5897 amdgpu_ring_write(ring, val); 5898 } 5899 5900 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5901 uint32_t val, uint32_t mask) 5902 { 5903 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5904 } 5905 5906 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5907 uint32_t reg0, uint32_t reg1, 5908 uint32_t ref, uint32_t mask) 5909 { 5910 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5911 struct amdgpu_device *adev = ring->adev; 5912 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5913 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5914 5915 if (fw_version_ok) 5916 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5917 ref, mask, 0x20); 5918 else 5919 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5920 ref, mask); 5921 } 5922 5923 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5924 { 5925 struct amdgpu_device *adev = ring->adev; 5926 uint32_t value = 0; 5927 5928 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5929 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5930 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5931 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5932 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5933 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5934 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5935 } 5936 5937 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5938 enum amdgpu_interrupt_state state) 5939 { 5940 switch (state) { 5941 case AMDGPU_IRQ_STATE_DISABLE: 5942 case AMDGPU_IRQ_STATE_ENABLE: 5943 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5944 TIME_STAMP_INT_ENABLE, 5945 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5946 break; 5947 default: 5948 break; 5949 } 5950 } 5951 5952 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5953 int me, int pipe, 5954 enum amdgpu_interrupt_state state) 5955 { 5956 u32 mec_int_cntl, mec_int_cntl_reg; 5957 5958 /* 5959 * amdgpu controls only the first MEC. That's why this function only 5960 * handles the setting of interrupts for this specific MEC. All other 5961 * pipes' interrupts are set by amdkfd. 5962 */ 5963 5964 if (me == 1) { 5965 switch (pipe) { 5966 case 0: 5967 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5968 break; 5969 case 1: 5970 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5971 break; 5972 case 2: 5973 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5974 break; 5975 case 3: 5976 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5977 break; 5978 default: 5979 DRM_DEBUG("invalid pipe %d\n", pipe); 5980 return; 5981 } 5982 } else { 5983 DRM_DEBUG("invalid me %d\n", me); 5984 return; 5985 } 5986 5987 switch (state) { 5988 case AMDGPU_IRQ_STATE_DISABLE: 5989 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg); 5990 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5991 TIME_STAMP_INT_ENABLE, 0); 5992 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5993 break; 5994 case AMDGPU_IRQ_STATE_ENABLE: 5995 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 5996 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5997 TIME_STAMP_INT_ENABLE, 1); 5998 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5999 break; 6000 default: 6001 break; 6002 } 6003 } 6004 6005 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev, 6006 int me, int pipe) 6007 { 6008 /* 6009 * amdgpu controls only the first MEC. That's why this function only 6010 * handles the setting of interrupts for this specific MEC. All other 6011 * pipes' interrupts are set by amdkfd. 6012 */ 6013 if (me != 1) 6014 return 0; 6015 6016 switch (pipe) { 6017 case 0: 6018 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 6019 case 1: 6020 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 6021 case 2: 6022 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 6023 case 3: 6024 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 6025 default: 6026 return 0; 6027 } 6028 } 6029 6030 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6031 struct amdgpu_irq_src *source, 6032 unsigned type, 6033 enum amdgpu_interrupt_state state) 6034 { 6035 u32 cp_int_cntl_reg, cp_int_cntl; 6036 int i, j; 6037 6038 switch (state) { 6039 case AMDGPU_IRQ_STATE_DISABLE: 6040 case AMDGPU_IRQ_STATE_ENABLE: 6041 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6042 PRIV_REG_INT_ENABLE, 6043 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6044 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6045 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6046 /* MECs start at 1 */ 6047 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); 6048 6049 if (cp_int_cntl_reg) { 6050 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6051 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6052 PRIV_REG_INT_ENABLE, 6053 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6054 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6055 } 6056 } 6057 } 6058 break; 6059 default: 6060 break; 6061 } 6062 6063 return 0; 6064 } 6065 6066 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6067 struct amdgpu_irq_src *source, 6068 unsigned type, 6069 enum amdgpu_interrupt_state state) 6070 { 6071 u32 cp_int_cntl_reg, cp_int_cntl; 6072 int i, j; 6073 6074 switch (state) { 6075 case AMDGPU_IRQ_STATE_DISABLE: 6076 case AMDGPU_IRQ_STATE_ENABLE: 6077 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6078 OPCODE_ERROR_INT_ENABLE, 6079 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6080 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6081 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6082 /* MECs start at 1 */ 6083 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); 6084 6085 if (cp_int_cntl_reg) { 6086 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6087 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6088 OPCODE_ERROR_INT_ENABLE, 6089 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6090 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6091 } 6092 } 6093 } 6094 break; 6095 default: 6096 break; 6097 } 6098 6099 return 0; 6100 } 6101 6102 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6103 struct amdgpu_irq_src *source, 6104 unsigned type, 6105 enum amdgpu_interrupt_state state) 6106 { 6107 switch (state) { 6108 case AMDGPU_IRQ_STATE_DISABLE: 6109 case AMDGPU_IRQ_STATE_ENABLE: 6110 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6111 PRIV_INSTR_INT_ENABLE, 6112 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6113 break; 6114 default: 6115 break; 6116 } 6117 6118 return 0; 6119 } 6120 6121 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 6122 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 6123 CP_ECC_ERROR_INT_ENABLE, 1) 6124 6125 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 6126 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 6127 CP_ECC_ERROR_INT_ENABLE, 0) 6128 6129 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 6130 struct amdgpu_irq_src *source, 6131 unsigned type, 6132 enum amdgpu_interrupt_state state) 6133 { 6134 switch (state) { 6135 case AMDGPU_IRQ_STATE_DISABLE: 6136 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6137 CP_ECC_ERROR_INT_ENABLE, 0); 6138 DISABLE_ECC_ON_ME_PIPE(1, 0); 6139 DISABLE_ECC_ON_ME_PIPE(1, 1); 6140 DISABLE_ECC_ON_ME_PIPE(1, 2); 6141 DISABLE_ECC_ON_ME_PIPE(1, 3); 6142 break; 6143 6144 case AMDGPU_IRQ_STATE_ENABLE: 6145 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6146 CP_ECC_ERROR_INT_ENABLE, 1); 6147 ENABLE_ECC_ON_ME_PIPE(1, 0); 6148 ENABLE_ECC_ON_ME_PIPE(1, 1); 6149 ENABLE_ECC_ON_ME_PIPE(1, 2); 6150 ENABLE_ECC_ON_ME_PIPE(1, 3); 6151 break; 6152 default: 6153 break; 6154 } 6155 6156 return 0; 6157 } 6158 6159 6160 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6161 struct amdgpu_irq_src *src, 6162 unsigned type, 6163 enum amdgpu_interrupt_state state) 6164 { 6165 switch (type) { 6166 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6167 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 6168 break; 6169 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6170 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6171 break; 6172 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6173 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6174 break; 6175 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6176 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6177 break; 6178 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6179 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6180 break; 6181 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6182 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6183 break; 6184 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6185 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6186 break; 6187 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6188 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6189 break; 6190 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6191 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6192 break; 6193 default: 6194 break; 6195 } 6196 return 0; 6197 } 6198 6199 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 6200 struct amdgpu_irq_src *source, 6201 struct amdgpu_iv_entry *entry) 6202 { 6203 int i; 6204 u8 me_id, pipe_id, queue_id; 6205 struct amdgpu_ring *ring; 6206 6207 DRM_DEBUG("IH: CP EOP\n"); 6208 me_id = (entry->ring_id & 0x0c) >> 2; 6209 pipe_id = (entry->ring_id & 0x03) >> 0; 6210 queue_id = (entry->ring_id & 0x70) >> 4; 6211 6212 switch (me_id) { 6213 case 0: 6214 if (adev->gfx.num_gfx_rings) { 6215 if (!adev->gfx.mcbp) { 6216 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6217 } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { 6218 /* Fence signals are handled on the software rings*/ 6219 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 6220 amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); 6221 } 6222 } 6223 break; 6224 case 1: 6225 case 2: 6226 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6227 ring = &adev->gfx.compute_ring[i]; 6228 /* Per-queue interrupt is supported for MEC starting from VI. 6229 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6230 */ 6231 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6232 amdgpu_fence_process(ring); 6233 } 6234 break; 6235 } 6236 return 0; 6237 } 6238 6239 static void gfx_v9_0_fault(struct amdgpu_device *adev, 6240 struct amdgpu_iv_entry *entry) 6241 { 6242 u8 me_id, pipe_id, queue_id; 6243 struct amdgpu_ring *ring; 6244 int i; 6245 6246 me_id = (entry->ring_id & 0x0c) >> 2; 6247 pipe_id = (entry->ring_id & 0x03) >> 0; 6248 queue_id = (entry->ring_id & 0x70) >> 4; 6249 6250 switch (me_id) { 6251 case 0: 6252 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 6253 break; 6254 case 1: 6255 case 2: 6256 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6257 ring = &adev->gfx.compute_ring[i]; 6258 if (ring->me == me_id && ring->pipe == pipe_id && 6259 ring->queue == queue_id) 6260 drm_sched_fault(&ring->sched); 6261 } 6262 break; 6263 } 6264 } 6265 6266 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 6267 struct amdgpu_irq_src *source, 6268 struct amdgpu_iv_entry *entry) 6269 { 6270 DRM_ERROR("Illegal register access in command stream\n"); 6271 gfx_v9_0_fault(adev, entry); 6272 return 0; 6273 } 6274 6275 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev, 6276 struct amdgpu_irq_src *source, 6277 struct amdgpu_iv_entry *entry) 6278 { 6279 DRM_ERROR("Illegal opcode in command stream\n"); 6280 gfx_v9_0_fault(adev, entry); 6281 return 0; 6282 } 6283 6284 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 6285 struct amdgpu_irq_src *source, 6286 struct amdgpu_iv_entry *entry) 6287 { 6288 DRM_ERROR("Illegal instruction in command stream\n"); 6289 gfx_v9_0_fault(adev, entry); 6290 return 0; 6291 } 6292 6293 6294 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { 6295 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 6296 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 6297 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 6298 }, 6299 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 6300 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 6301 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 6302 }, 6303 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6304 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 6305 0, 0 6306 }, 6307 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6308 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 6309 0, 0 6310 }, 6311 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 6312 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 6313 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 6314 }, 6315 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6316 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 6317 0, 0 6318 }, 6319 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6320 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 6321 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 6322 }, 6323 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 6324 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 6325 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 6326 }, 6327 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 6328 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 6329 0, 0 6330 }, 6331 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 6332 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 6333 0, 0 6334 }, 6335 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 6336 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 6337 0, 0 6338 }, 6339 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6340 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 6341 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 6342 }, 6343 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6344 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 6345 0, 0 6346 }, 6347 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6348 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 6349 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 6350 }, 6351 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 6352 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6353 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 6354 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 6355 }, 6356 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 6357 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6358 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 6359 0, 0 6360 }, 6361 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 6362 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6363 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 6364 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 6365 }, 6366 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 6367 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6368 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 6369 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 6370 }, 6371 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 6372 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6373 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 6374 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 6375 }, 6376 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 6377 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6378 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 6379 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 6380 }, 6381 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 6382 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 6383 0, 0 6384 }, 6385 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6386 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 6387 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 6388 }, 6389 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6390 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 6391 0, 0 6392 }, 6393 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6394 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 6395 0, 0 6396 }, 6397 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6398 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 6399 0, 0 6400 }, 6401 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6402 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 6403 0, 0 6404 }, 6405 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6406 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 6407 0, 0 6408 }, 6409 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6410 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 6411 0, 0 6412 }, 6413 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6414 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 6415 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 6416 }, 6417 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6418 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 6419 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 6420 }, 6421 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6422 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 6423 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 6424 }, 6425 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6426 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 6427 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 6428 }, 6429 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6430 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 6431 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 6432 }, 6433 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6434 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 6435 0, 0 6436 }, 6437 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6438 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 6439 0, 0 6440 }, 6441 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6442 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 6443 0, 0 6444 }, 6445 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6446 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 6447 0, 0 6448 }, 6449 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6450 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 6451 0, 0 6452 }, 6453 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6454 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 6455 0, 0 6456 }, 6457 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6458 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 6459 0, 0 6460 }, 6461 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6462 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 6463 0, 0 6464 }, 6465 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6466 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 6467 0, 0 6468 }, 6469 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6470 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 6471 0, 0 6472 }, 6473 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6474 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 6475 0, 0 6476 }, 6477 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6478 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 6479 0, 0 6480 }, 6481 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6482 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 6483 0, 0 6484 }, 6485 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 6486 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 6487 0, 0 6488 }, 6489 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6490 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 6491 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 6492 }, 6493 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6494 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 6495 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 6496 }, 6497 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6498 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 6499 0, 0 6500 }, 6501 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6502 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 6503 0, 0 6504 }, 6505 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6506 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 6507 0, 0 6508 }, 6509 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6510 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 6511 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 6512 }, 6513 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6514 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 6515 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 6516 }, 6517 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6518 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 6519 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 6520 }, 6521 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6522 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 6523 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 6524 }, 6525 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6526 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 6527 0, 0 6528 }, 6529 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6530 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 6531 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 6532 }, 6533 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6534 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 6535 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 6536 }, 6537 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6538 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 6539 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 6540 }, 6541 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6542 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 6543 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 6544 }, 6545 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6546 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 6547 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 6548 }, 6549 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6550 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 6551 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 6552 }, 6553 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6554 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 6555 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 6556 }, 6557 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6558 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 6559 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 6560 }, 6561 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6562 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 6563 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 6564 }, 6565 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6566 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 6567 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 6568 }, 6569 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6570 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 6571 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 6572 }, 6573 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6574 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 6575 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 6576 }, 6577 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6578 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 6579 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 6580 }, 6581 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6582 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 6583 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 6584 }, 6585 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6586 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 6587 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 6588 }, 6589 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6590 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 6591 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 6592 }, 6593 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6594 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 6595 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 6596 }, 6597 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6598 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 6599 0, 0 6600 }, 6601 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6602 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 6603 0, 0 6604 }, 6605 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6606 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 6607 0, 0 6608 }, 6609 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6610 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 6611 0, 0 6612 }, 6613 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6614 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 6615 0, 0 6616 }, 6617 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6618 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 6619 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 6620 }, 6621 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6622 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 6623 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 6624 }, 6625 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6626 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 6627 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 6628 }, 6629 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6630 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 6631 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 6632 }, 6633 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6634 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 6635 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 6636 }, 6637 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6638 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 6639 0, 0 6640 }, 6641 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6642 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 6643 0, 0 6644 }, 6645 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6646 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 6647 0, 0 6648 }, 6649 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6650 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 6651 0, 0 6652 }, 6653 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6654 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 6655 0, 0 6656 }, 6657 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6658 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6659 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 6660 }, 6661 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6662 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6663 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 6664 }, 6665 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6666 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6667 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 6668 }, 6669 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6670 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6671 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 6672 }, 6673 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6674 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6675 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 6676 }, 6677 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6678 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 6679 0, 0 6680 }, 6681 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6682 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 6683 0, 0 6684 }, 6685 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6686 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 6687 0, 0 6688 }, 6689 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6690 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 6691 0, 0 6692 }, 6693 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6694 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 6695 0, 0 6696 }, 6697 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6698 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6699 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 6700 }, 6701 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6702 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6703 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 6704 }, 6705 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6706 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6707 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 6708 }, 6709 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6710 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 6711 0, 0 6712 }, 6713 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6714 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 6715 0, 0 6716 }, 6717 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6718 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 6719 0, 0 6720 }, 6721 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6722 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 6723 0, 0 6724 }, 6725 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6726 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 6727 0, 0 6728 }, 6729 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6730 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 6731 0, 0 6732 } 6733 }; 6734 6735 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6736 void *inject_if, uint32_t instance_mask) 6737 { 6738 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6739 int ret; 6740 struct ta_ras_trigger_error_input block_info = { 0 }; 6741 6742 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6743 return -EINVAL; 6744 6745 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6746 return -EINVAL; 6747 6748 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6749 return -EPERM; 6750 6751 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6752 info->head.type)) { 6753 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6754 ras_gfx_subblocks[info->head.sub_block_index].name, 6755 info->head.type); 6756 return -EPERM; 6757 } 6758 6759 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6760 info->head.type)) { 6761 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6762 ras_gfx_subblocks[info->head.sub_block_index].name, 6763 info->head.type); 6764 return -EPERM; 6765 } 6766 6767 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6768 block_info.sub_block_index = 6769 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6770 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6771 block_info.address = info->address; 6772 block_info.value = info->value; 6773 6774 mutex_lock(&adev->grbm_idx_mutex); 6775 ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask); 6776 mutex_unlock(&adev->grbm_idx_mutex); 6777 6778 return ret; 6779 } 6780 6781 static const char * const vml2_mems[] = { 6782 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6783 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6784 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6785 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6786 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6787 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6788 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6789 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6790 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6791 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6792 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6793 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6794 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6795 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6796 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6797 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6798 }; 6799 6800 static const char * const vml2_walker_mems[] = { 6801 "UTC_VML2_CACHE_PDE0_MEM0", 6802 "UTC_VML2_CACHE_PDE0_MEM1", 6803 "UTC_VML2_CACHE_PDE1_MEM0", 6804 "UTC_VML2_CACHE_PDE1_MEM1", 6805 "UTC_VML2_CACHE_PDE2_MEM0", 6806 "UTC_VML2_CACHE_PDE2_MEM1", 6807 "UTC_VML2_RDIF_LOG_FIFO", 6808 }; 6809 6810 static const char * const atc_l2_cache_2m_mems[] = { 6811 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6812 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6813 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6814 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6815 }; 6816 6817 static const char *atc_l2_cache_4k_mems[] = { 6818 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6819 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6820 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6821 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6822 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6823 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6824 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6825 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6826 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6827 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6828 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6829 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6830 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6831 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6832 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6833 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6834 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6835 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6836 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6837 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6838 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6839 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6840 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6841 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6842 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6843 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6844 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6845 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6846 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6847 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6848 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6849 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6850 }; 6851 6852 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6853 struct ras_err_data *err_data) 6854 { 6855 uint32_t i, data; 6856 uint32_t sec_count, ded_count; 6857 6858 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6859 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6860 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6861 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6862 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6863 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6864 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6865 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6866 6867 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6868 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6869 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6870 6871 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6872 if (sec_count) { 6873 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6874 "SEC %d\n", i, vml2_mems[i], sec_count); 6875 err_data->ce_count += sec_count; 6876 } 6877 6878 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6879 if (ded_count) { 6880 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6881 "DED %d\n", i, vml2_mems[i], ded_count); 6882 err_data->ue_count += ded_count; 6883 } 6884 } 6885 6886 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6887 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6888 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6889 6890 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6891 SEC_COUNT); 6892 if (sec_count) { 6893 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6894 "SEC %d\n", i, vml2_walker_mems[i], sec_count); 6895 err_data->ce_count += sec_count; 6896 } 6897 6898 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6899 DED_COUNT); 6900 if (ded_count) { 6901 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6902 "DED %d\n", i, vml2_walker_mems[i], ded_count); 6903 err_data->ue_count += ded_count; 6904 } 6905 } 6906 6907 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6908 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6909 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6910 6911 sec_count = (data & 0x00006000L) >> 0xd; 6912 if (sec_count) { 6913 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6914 "SEC %d\n", i, atc_l2_cache_2m_mems[i], 6915 sec_count); 6916 err_data->ce_count += sec_count; 6917 } 6918 } 6919 6920 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6921 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6922 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6923 6924 sec_count = (data & 0x00006000L) >> 0xd; 6925 if (sec_count) { 6926 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6927 "SEC %d\n", i, atc_l2_cache_4k_mems[i], 6928 sec_count); 6929 err_data->ce_count += sec_count; 6930 } 6931 6932 ded_count = (data & 0x00018000L) >> 0xf; 6933 if (ded_count) { 6934 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6935 "DED %d\n", i, atc_l2_cache_4k_mems[i], 6936 ded_count); 6937 err_data->ue_count += ded_count; 6938 } 6939 } 6940 6941 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6942 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6943 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6944 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6945 6946 return 0; 6947 } 6948 6949 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev, 6950 const struct soc15_reg_entry *reg, 6951 uint32_t se_id, uint32_t inst_id, uint32_t value, 6952 uint32_t *sec_count, uint32_t *ded_count) 6953 { 6954 uint32_t i; 6955 uint32_t sec_cnt, ded_cnt; 6956 6957 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { 6958 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || 6959 gfx_v9_0_ras_fields[i].seg != reg->seg || 6960 gfx_v9_0_ras_fields[i].inst != reg->inst) 6961 continue; 6962 6963 sec_cnt = (value & 6964 gfx_v9_0_ras_fields[i].sec_count_mask) >> 6965 gfx_v9_0_ras_fields[i].sec_count_shift; 6966 if (sec_cnt) { 6967 dev_info(adev->dev, "GFX SubBlock %s, " 6968 "Instance[%d][%d], SEC %d\n", 6969 gfx_v9_0_ras_fields[i].name, 6970 se_id, inst_id, 6971 sec_cnt); 6972 *sec_count += sec_cnt; 6973 } 6974 6975 ded_cnt = (value & 6976 gfx_v9_0_ras_fields[i].ded_count_mask) >> 6977 gfx_v9_0_ras_fields[i].ded_count_shift; 6978 if (ded_cnt) { 6979 dev_info(adev->dev, "GFX SubBlock %s, " 6980 "Instance[%d][%d], DED %d\n", 6981 gfx_v9_0_ras_fields[i].name, 6982 se_id, inst_id, 6983 ded_cnt); 6984 *ded_count += ded_cnt; 6985 } 6986 } 6987 6988 return 0; 6989 } 6990 6991 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) 6992 { 6993 int i, j, k; 6994 6995 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6996 return; 6997 6998 /* read back registers to clear the counters */ 6999 mutex_lock(&adev->grbm_idx_mutex); 7000 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 7001 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 7002 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 7003 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0); 7004 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 7005 } 7006 } 7007 } 7008 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 7009 mutex_unlock(&adev->grbm_idx_mutex); 7010 7011 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 7012 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 7013 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 7014 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 7015 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 7016 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 7017 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 7018 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 7019 7020 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 7021 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 7022 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 7023 } 7024 7025 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 7026 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 7027 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 7028 } 7029 7030 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 7031 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 7032 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 7033 } 7034 7035 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 7036 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 7037 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 7038 } 7039 7040 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 7041 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 7042 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 7043 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 7044 } 7045 7046 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 7047 void *ras_error_status) 7048 { 7049 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 7050 uint32_t sec_count = 0, ded_count = 0; 7051 uint32_t i, j, k; 7052 uint32_t reg_value; 7053 7054 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 7055 return; 7056 7057 err_data->ue_count = 0; 7058 err_data->ce_count = 0; 7059 7060 mutex_lock(&adev->grbm_idx_mutex); 7061 7062 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 7063 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 7064 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 7065 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0); 7066 reg_value = 7067 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 7068 if (reg_value) 7069 gfx_v9_0_ras_error_count(adev, 7070 &gfx_v9_0_edc_counter_regs[i], 7071 j, k, reg_value, 7072 &sec_count, &ded_count); 7073 } 7074 } 7075 } 7076 7077 err_data->ce_count += sec_count; 7078 err_data->ue_count += ded_count; 7079 7080 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7081 mutex_unlock(&adev->grbm_idx_mutex); 7082 7083 gfx_v9_0_query_utc_edc_status(adev, err_data); 7084 } 7085 7086 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) 7087 { 7088 const unsigned int cp_coher_cntl = 7089 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | 7090 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | 7091 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | 7092 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | 7093 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); 7094 7095 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ 7096 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 7097 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ 7098 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 7099 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 7100 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 7101 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 7102 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 7103 } 7104 7105 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, 7106 uint32_t pipe, bool enable) 7107 { 7108 struct amdgpu_device *adev = ring->adev; 7109 uint32_t val; 7110 uint32_t wcl_cs_reg; 7111 7112 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 7113 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT; 7114 7115 switch (pipe) { 7116 case 0: 7117 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0); 7118 break; 7119 case 1: 7120 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1); 7121 break; 7122 case 2: 7123 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2); 7124 break; 7125 case 3: 7126 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3); 7127 break; 7128 default: 7129 DRM_DEBUG("invalid pipe %d\n", pipe); 7130 return; 7131 } 7132 7133 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 7134 7135 } 7136 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 7137 { 7138 struct amdgpu_device *adev = ring->adev; 7139 uint32_t val; 7140 int i; 7141 7142 7143 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 7144 * number of gfx waves. Setting 5 bit will make sure gfx only gets 7145 * around 25% of gpu resources. 7146 */ 7147 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; 7148 amdgpu_ring_emit_wreg(ring, 7149 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX), 7150 val); 7151 7152 /* Restrict waves for normal/low priority compute queues as well 7153 * to get best QoS for high priority compute jobs. 7154 * 7155 * amdgpu controls only 1st ME(0-3 CS pipes). 7156 */ 7157 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 7158 if (i != ring->pipe) 7159 gfx_v9_0_emit_wave_limit_cs(ring, i, enable); 7160 7161 } 7162 } 7163 7164 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 7165 { 7166 int i; 7167 7168 /* Header itself is a NOP packet */ 7169 if (num_nop == 1) { 7170 amdgpu_ring_write(ring, ring->funcs->nop); 7171 return; 7172 } 7173 7174 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 7175 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 7176 7177 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 7178 for (i = 1; i < num_nop; i++) 7179 amdgpu_ring_write(ring, ring->funcs->nop); 7180 } 7181 7182 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) 7183 { 7184 struct amdgpu_device *adev = ring->adev; 7185 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 7186 struct amdgpu_ring *kiq_ring = &kiq->ring; 7187 unsigned long flags; 7188 u32 tmp; 7189 int r; 7190 7191 if (amdgpu_sriov_vf(adev)) 7192 return -EINVAL; 7193 7194 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 7195 return -EINVAL; 7196 7197 spin_lock_irqsave(&kiq->ring_lock, flags); 7198 7199 if (amdgpu_ring_alloc(kiq_ring, 5)) { 7200 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7201 return -ENOMEM; 7202 } 7203 7204 tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); 7205 gfx_v9_0_ring_emit_wreg(kiq_ring, 7206 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); 7207 amdgpu_ring_commit(kiq_ring); 7208 7209 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7210 7211 r = amdgpu_ring_test_ring(kiq_ring); 7212 if (r) 7213 return r; 7214 7215 if (amdgpu_ring_alloc(ring, 7 + 7 + 5)) 7216 return -ENOMEM; 7217 gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 7218 ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); 7219 gfx_v9_0_ring_emit_reg_wait(ring, 7220 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff); 7221 gfx_v9_0_ring_emit_wreg(ring, 7222 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); 7223 7224 return amdgpu_ring_test_ring(ring); 7225 } 7226 7227 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, 7228 unsigned int vmid) 7229 { 7230 struct amdgpu_device *adev = ring->adev; 7231 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 7232 struct amdgpu_ring *kiq_ring = &kiq->ring; 7233 unsigned long flags; 7234 int i, r; 7235 7236 if (amdgpu_sriov_vf(adev)) 7237 return -EINVAL; 7238 7239 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 7240 return -EINVAL; 7241 7242 spin_lock_irqsave(&kiq->ring_lock, flags); 7243 7244 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 7245 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7246 return -ENOMEM; 7247 } 7248 7249 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 7250 0, 0); 7251 amdgpu_ring_commit(kiq_ring); 7252 7253 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7254 7255 r = amdgpu_ring_test_ring(kiq_ring); 7256 if (r) 7257 return r; 7258 7259 /* make sure dequeue is complete*/ 7260 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 7261 mutex_lock(&adev->srbm_mutex); 7262 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 7263 for (i = 0; i < adev->usec_timeout; i++) { 7264 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 7265 break; 7266 udelay(1); 7267 } 7268 if (i >= adev->usec_timeout) 7269 r = -ETIMEDOUT; 7270 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 7271 mutex_unlock(&adev->srbm_mutex); 7272 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 7273 if (r) { 7274 dev_err(adev->dev, "fail to wait on hqd deactive\n"); 7275 return r; 7276 } 7277 7278 r = amdgpu_bo_reserve(ring->mqd_obj, false); 7279 if (unlikely(r != 0)){ 7280 dev_err(adev->dev, "fail to resv mqd_obj\n"); 7281 return r; 7282 } 7283 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 7284 if (!r) { 7285 r = gfx_v9_0_kcq_init_queue(ring, true); 7286 amdgpu_bo_kunmap(ring->mqd_obj); 7287 ring->mqd_ptr = NULL; 7288 } 7289 amdgpu_bo_unreserve(ring->mqd_obj); 7290 if (r) { 7291 dev_err(adev->dev, "fail to unresv mqd_obj\n"); 7292 return r; 7293 } 7294 spin_lock_irqsave(&kiq->ring_lock, flags); 7295 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); 7296 if (r) { 7297 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7298 return -ENOMEM; 7299 } 7300 kiq->pmf->kiq_map_queues(kiq_ring, ring); 7301 amdgpu_ring_commit(kiq_ring); 7302 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7303 r = amdgpu_ring_test_ring(kiq_ring); 7304 if (r) { 7305 DRM_ERROR("fail to remap queue\n"); 7306 return r; 7307 } 7308 return amdgpu_ring_test_ring(ring); 7309 } 7310 7311 static void gfx_v9_ip_print(void *handle, struct drm_printer *p) 7312 { 7313 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 7314 uint32_t i, j, k, reg, index = 0; 7315 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 7316 7317 if (!adev->gfx.ip_dump_core) 7318 return; 7319 7320 for (i = 0; i < reg_count; i++) 7321 drm_printf(p, "%-50s \t 0x%08x\n", 7322 gc_reg_list_9[i].reg_name, 7323 adev->gfx.ip_dump_core[i]); 7324 7325 /* print compute queue registers for all instances */ 7326 if (!adev->gfx.ip_dump_compute_queues) 7327 return; 7328 7329 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 7330 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7331 adev->gfx.mec.num_mec, 7332 adev->gfx.mec.num_pipe_per_mec, 7333 adev->gfx.mec.num_queue_per_pipe); 7334 7335 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7336 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7337 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7338 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7339 for (reg = 0; reg < reg_count; reg++) { 7340 drm_printf(p, "%-50s \t 0x%08x\n", 7341 gc_cp_reg_list_9[reg].reg_name, 7342 adev->gfx.ip_dump_compute_queues[index + reg]); 7343 } 7344 index += reg_count; 7345 } 7346 } 7347 } 7348 7349 } 7350 7351 static void gfx_v9_ip_dump(void *handle) 7352 { 7353 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 7354 uint32_t i, j, k, reg, index = 0; 7355 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 7356 7357 if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings) 7358 return; 7359 7360 amdgpu_gfx_off_ctrl(adev, false); 7361 for (i = 0; i < reg_count; i++) 7362 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i])); 7363 amdgpu_gfx_off_ctrl(adev, true); 7364 7365 /* dump compute queue registers for all instances */ 7366 if (!adev->gfx.ip_dump_compute_queues) 7367 return; 7368 7369 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 7370 amdgpu_gfx_off_ctrl(adev, false); 7371 mutex_lock(&adev->srbm_mutex); 7372 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7373 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7374 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7375 /* ME0 is for GFX so start from 1 for CP */ 7376 soc15_grbm_select(adev, 1 + i, j, k, 0, 0); 7377 7378 for (reg = 0; reg < reg_count; reg++) { 7379 adev->gfx.ip_dump_compute_queues[index + reg] = 7380 RREG32(SOC15_REG_ENTRY_OFFSET( 7381 gc_cp_reg_list_9[reg])); 7382 } 7383 index += reg_count; 7384 } 7385 } 7386 } 7387 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 7388 mutex_unlock(&adev->srbm_mutex); 7389 amdgpu_gfx_off_ctrl(adev, true); 7390 7391 } 7392 7393 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 7394 { 7395 /* Emit the cleaner shader */ 7396 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7397 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7398 } 7399 7400 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 7401 .name = "gfx_v9_0", 7402 .early_init = gfx_v9_0_early_init, 7403 .late_init = gfx_v9_0_late_init, 7404 .sw_init = gfx_v9_0_sw_init, 7405 .sw_fini = gfx_v9_0_sw_fini, 7406 .hw_init = gfx_v9_0_hw_init, 7407 .hw_fini = gfx_v9_0_hw_fini, 7408 .suspend = gfx_v9_0_suspend, 7409 .resume = gfx_v9_0_resume, 7410 .is_idle = gfx_v9_0_is_idle, 7411 .wait_for_idle = gfx_v9_0_wait_for_idle, 7412 .soft_reset = gfx_v9_0_soft_reset, 7413 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 7414 .set_powergating_state = gfx_v9_0_set_powergating_state, 7415 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 7416 .dump_ip_state = gfx_v9_ip_dump, 7417 .print_ip_state = gfx_v9_ip_print, 7418 }; 7419 7420 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 7421 .type = AMDGPU_RING_TYPE_GFX, 7422 .align_mask = 0xff, 7423 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7424 .support_64bit_ptrs = true, 7425 .secure_submission_supported = true, 7426 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 7427 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 7428 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 7429 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 7430 5 + /* COND_EXEC */ 7431 7 + /* PIPELINE_SYNC */ 7432 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7433 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7434 2 + /* VM_FLUSH */ 7435 8 + /* FENCE for VM_FLUSH */ 7436 20 + /* GDS switch */ 7437 4 + /* double SWITCH_BUFFER, 7438 the first COND_EXEC jump to the place just 7439 prior to this double SWITCH_BUFFER */ 7440 5 + /* COND_EXEC */ 7441 7 + /* HDP_flush */ 7442 4 + /* VGT_flush */ 7443 14 + /* CE_META */ 7444 31 + /* DE_META */ 7445 3 + /* CNTX_CTRL */ 7446 5 + /* HDP_INVL */ 7447 8 + 8 + /* FENCE x2 */ 7448 2 + /* SWITCH_BUFFER */ 7449 7 + /* gfx_v9_0_emit_mem_sync */ 7450 2, /* gfx_v9_0_ring_emit_cleaner_shader */ 7451 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 7452 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 7453 .emit_fence = gfx_v9_0_ring_emit_fence, 7454 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7455 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7456 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7457 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7458 .test_ring = gfx_v9_0_ring_test_ring, 7459 .insert_nop = gfx_v9_ring_insert_nop, 7460 .pad_ib = amdgpu_ring_generic_pad_ib, 7461 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7462 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7463 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7464 .preempt_ib = gfx_v9_0_ring_preempt_ib, 7465 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7466 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7467 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7468 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7469 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7470 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7471 .reset = gfx_v9_0_reset_kgq, 7472 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, 7473 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, 7474 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, 7475 }; 7476 7477 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 7478 .type = AMDGPU_RING_TYPE_GFX, 7479 .align_mask = 0xff, 7480 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7481 .support_64bit_ptrs = true, 7482 .secure_submission_supported = true, 7483 .get_rptr = amdgpu_sw_ring_get_rptr_gfx, 7484 .get_wptr = amdgpu_sw_ring_get_wptr_gfx, 7485 .set_wptr = amdgpu_sw_ring_set_wptr_gfx, 7486 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 7487 5 + /* COND_EXEC */ 7488 7 + /* PIPELINE_SYNC */ 7489 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7490 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7491 2 + /* VM_FLUSH */ 7492 8 + /* FENCE for VM_FLUSH */ 7493 20 + /* GDS switch */ 7494 4 + /* double SWITCH_BUFFER, 7495 * the first COND_EXEC jump to the place just 7496 * prior to this double SWITCH_BUFFER 7497 */ 7498 5 + /* COND_EXEC */ 7499 7 + /* HDP_flush */ 7500 4 + /* VGT_flush */ 7501 14 + /* CE_META */ 7502 31 + /* DE_META */ 7503 3 + /* CNTX_CTRL */ 7504 5 + /* HDP_INVL */ 7505 8 + 8 + /* FENCE x2 */ 7506 2 + /* SWITCH_BUFFER */ 7507 7 + /* gfx_v9_0_emit_mem_sync */ 7508 2, /* gfx_v9_0_ring_emit_cleaner_shader */ 7509 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 7510 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 7511 .emit_fence = gfx_v9_0_ring_emit_fence, 7512 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7513 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7514 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7515 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7516 .test_ring = gfx_v9_0_ring_test_ring, 7517 .test_ib = gfx_v9_0_ring_test_ib, 7518 .insert_nop = gfx_v9_ring_insert_nop, 7519 .pad_ib = amdgpu_ring_generic_pad_ib, 7520 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7521 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7522 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7523 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7524 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7525 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7526 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7527 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7528 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7529 .patch_cntl = gfx_v9_0_ring_patch_cntl, 7530 .patch_de = gfx_v9_0_ring_patch_de_meta, 7531 .patch_ce = gfx_v9_0_ring_patch_ce_meta, 7532 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, 7533 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, 7534 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, 7535 }; 7536 7537 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7538 .type = AMDGPU_RING_TYPE_COMPUTE, 7539 .align_mask = 0xff, 7540 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7541 .support_64bit_ptrs = true, 7542 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7543 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7544 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7545 .emit_frame_size = 7546 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7547 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7548 5 + /* hdp invalidate */ 7549 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7550 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7551 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7552 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7553 7 + /* gfx_v9_0_emit_mem_sync */ 7554 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ 7555 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ 7556 2, /* gfx_v9_0_ring_emit_cleaner_shader */ 7557 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7558 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 7559 .emit_fence = gfx_v9_0_ring_emit_fence, 7560 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7561 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7562 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7563 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7564 .test_ring = gfx_v9_0_ring_test_ring, 7565 .test_ib = gfx_v9_0_ring_test_ib, 7566 .insert_nop = gfx_v9_ring_insert_nop, 7567 .pad_ib = amdgpu_ring_generic_pad_ib, 7568 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7569 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7570 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7571 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7572 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7573 .emit_wave_limit = gfx_v9_0_emit_wave_limit, 7574 .reset = gfx_v9_0_reset_kcq, 7575 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, 7576 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, 7577 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, 7578 }; 7579 7580 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7581 .type = AMDGPU_RING_TYPE_KIQ, 7582 .align_mask = 0xff, 7583 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7584 .support_64bit_ptrs = true, 7585 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7586 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7587 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7588 .emit_frame_size = 7589 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7590 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7591 5 + /* hdp invalidate */ 7592 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7593 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7594 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7595 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7596 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7597 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 7598 .test_ring = gfx_v9_0_ring_test_ring, 7599 .insert_nop = amdgpu_ring_insert_nop, 7600 .pad_ib = amdgpu_ring_generic_pad_ib, 7601 .emit_rreg = gfx_v9_0_ring_emit_rreg, 7602 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7603 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7604 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7605 }; 7606 7607 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 7608 { 7609 int i; 7610 7611 adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq; 7612 7613 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7614 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 7615 7616 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 7617 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 7618 adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; 7619 } 7620 7621 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7622 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 7623 } 7624 7625 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 7626 .set = gfx_v9_0_set_eop_interrupt_state, 7627 .process = gfx_v9_0_eop_irq, 7628 }; 7629 7630 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 7631 .set = gfx_v9_0_set_priv_reg_fault_state, 7632 .process = gfx_v9_0_priv_reg_irq, 7633 }; 7634 7635 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = { 7636 .set = gfx_v9_0_set_bad_op_fault_state, 7637 .process = gfx_v9_0_bad_op_irq, 7638 }; 7639 7640 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 7641 .set = gfx_v9_0_set_priv_inst_fault_state, 7642 .process = gfx_v9_0_priv_inst_irq, 7643 }; 7644 7645 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 7646 .set = gfx_v9_0_set_cp_ecc_error_state, 7647 .process = amdgpu_gfx_cp_ecc_error_irq, 7648 }; 7649 7650 7651 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 7652 { 7653 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7654 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 7655 7656 adev->gfx.priv_reg_irq.num_types = 1; 7657 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 7658 7659 adev->gfx.bad_op_irq.num_types = 1; 7660 adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs; 7661 7662 adev->gfx.priv_inst_irq.num_types = 1; 7663 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 7664 7665 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 7666 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 7667 } 7668 7669 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 7670 { 7671 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7672 case IP_VERSION(9, 0, 1): 7673 case IP_VERSION(9, 2, 1): 7674 case IP_VERSION(9, 4, 0): 7675 case IP_VERSION(9, 2, 2): 7676 case IP_VERSION(9, 1, 0): 7677 case IP_VERSION(9, 4, 1): 7678 case IP_VERSION(9, 3, 0): 7679 case IP_VERSION(9, 4, 2): 7680 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 7681 break; 7682 default: 7683 break; 7684 } 7685 } 7686 7687 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 7688 { 7689 /* init asci gds info */ 7690 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7691 case IP_VERSION(9, 0, 1): 7692 case IP_VERSION(9, 2, 1): 7693 case IP_VERSION(9, 4, 0): 7694 adev->gds.gds_size = 0x10000; 7695 break; 7696 case IP_VERSION(9, 2, 2): 7697 case IP_VERSION(9, 1, 0): 7698 case IP_VERSION(9, 4, 1): 7699 adev->gds.gds_size = 0x1000; 7700 break; 7701 case IP_VERSION(9, 4, 2): 7702 /* aldebaran removed all the GDS internal memory, 7703 * only support GWS opcode in kernel, like barrier 7704 * semaphore.etc */ 7705 adev->gds.gds_size = 0; 7706 break; 7707 default: 7708 adev->gds.gds_size = 0x10000; 7709 break; 7710 } 7711 7712 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7713 case IP_VERSION(9, 0, 1): 7714 case IP_VERSION(9, 4, 0): 7715 adev->gds.gds_compute_max_wave_id = 0x7ff; 7716 break; 7717 case IP_VERSION(9, 2, 1): 7718 adev->gds.gds_compute_max_wave_id = 0x27f; 7719 break; 7720 case IP_VERSION(9, 2, 2): 7721 case IP_VERSION(9, 1, 0): 7722 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 7723 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 7724 else 7725 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 7726 break; 7727 case IP_VERSION(9, 4, 1): 7728 adev->gds.gds_compute_max_wave_id = 0xfff; 7729 break; 7730 case IP_VERSION(9, 4, 2): 7731 /* deprecated for Aldebaran, no usage at all */ 7732 adev->gds.gds_compute_max_wave_id = 0; 7733 break; 7734 default: 7735 /* this really depends on the chip */ 7736 adev->gds.gds_compute_max_wave_id = 0x7ff; 7737 break; 7738 } 7739 7740 adev->gds.gws_size = 64; 7741 adev->gds.oa_size = 16; 7742 } 7743 7744 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7745 u32 bitmap) 7746 { 7747 u32 data; 7748 7749 if (!bitmap) 7750 return; 7751 7752 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7753 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7754 7755 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 7756 } 7757 7758 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7759 { 7760 u32 data, mask; 7761 7762 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 7763 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 7764 7765 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7766 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7767 7768 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7769 7770 return (~data) & mask; 7771 } 7772 7773 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 7774 struct amdgpu_cu_info *cu_info) 7775 { 7776 int i, j, k, counter, active_cu_number = 0; 7777 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7778 unsigned disable_masks[4 * 4]; 7779 7780 if (!adev || !cu_info) 7781 return -EINVAL; 7782 7783 /* 7784 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 7785 */ 7786 if (adev->gfx.config.max_shader_engines * 7787 adev->gfx.config.max_sh_per_se > 16) 7788 return -EINVAL; 7789 7790 amdgpu_gfx_parse_disable_cu(disable_masks, 7791 adev->gfx.config.max_shader_engines, 7792 adev->gfx.config.max_sh_per_se); 7793 7794 mutex_lock(&adev->grbm_idx_mutex); 7795 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7796 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7797 mask = 1; 7798 ao_bitmap = 0; 7799 counter = 0; 7800 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 7801 gfx_v9_0_set_user_cu_inactive_bitmap( 7802 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 7803 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 7804 7805 /* 7806 * The bitmap(and ao_cu_bitmap) in cu_info structure is 7807 * 4x4 size array, and it's usually suitable for Vega 7808 * ASICs which has 4*2 SE/SH layout. 7809 * But for Arcturus, SE/SH layout is changed to 8*1. 7810 * To mostly reduce the impact, we make it compatible 7811 * with current bitmap array as below: 7812 * SE4,SH0 --> bitmap[0][1] 7813 * SE5,SH0 --> bitmap[1][1] 7814 * SE6,SH0 --> bitmap[2][1] 7815 * SE7,SH0 --> bitmap[3][1] 7816 */ 7817 cu_info->bitmap[0][i % 4][j + i / 4] = bitmap; 7818 7819 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7820 if (bitmap & mask) { 7821 if (counter < adev->gfx.config.max_cu_per_sh) 7822 ao_bitmap |= mask; 7823 counter ++; 7824 } 7825 mask <<= 1; 7826 } 7827 active_cu_number += counter; 7828 if (i < 2 && j < 2) 7829 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7830 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 7831 } 7832 } 7833 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7834 mutex_unlock(&adev->grbm_idx_mutex); 7835 7836 cu_info->number = active_cu_number; 7837 cu_info->ao_cu_mask = ao_cu_mask; 7838 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7839 7840 return 0; 7841 } 7842 7843 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 7844 { 7845 .type = AMD_IP_BLOCK_TYPE_GFX, 7846 .major = 9, 7847 .minor = 0, 7848 .rev = 0, 7849 .funcs = &gfx_v9_0_ip_funcs, 7850 }; 7851