1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 40 #include "vega10_enum.h" 41 42 #include "soc15_common.h" 43 #include "clearstate_gfx9.h" 44 #include "v9_structs.h" 45 46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 47 48 #include "amdgpu_ras.h" 49 50 #include "amdgpu_ring_mux.h" 51 #include "gfx_v9_4.h" 52 #include "gfx_v9_0.h" 53 #include "gfx_v9_0_cleaner_shader.h" 54 #include "gfx_v9_4_2.h" 55 56 #include "asic_reg/pwr/pwr_10_0_offset.h" 57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h" 58 #include "asic_reg/gc/gc_9_0_default.h" 59 60 #define GFX9_NUM_GFX_RINGS 1 61 #define GFX9_NUM_SW_GFX_RINGS 2 62 #define GFX9_MEC_HPD_SIZE 4096 63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 65 66 #define mmGCEA_PROBE_MAP 0x070c 67 #define mmGCEA_PROBE_MAP_BASE_IDX 0 68 69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 71 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 75 76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 78 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 82 83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 85 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 89 90 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 92 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 93 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 96 97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 99 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 104 105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 107 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 112 113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); 115 116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); 117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); 118 MODULE_FIRMWARE("amdgpu/renoir_me.bin"); 119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); 120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); 121 122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin"); 123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin"); 124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin"); 125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin"); 126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin"); 127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); 128 129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); 130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); 131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); 132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin"); 133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); 134 135 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 137 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04 138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 139 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09 140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 141 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a 142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 143 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b 144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 145 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c 146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 147 148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025 149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1 150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 152 153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = { 154 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS), 155 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2), 156 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1), 157 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2), 158 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1), 159 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1), 160 SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT), 161 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT), 162 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT), 163 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS), 164 SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR), 165 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE), 166 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR), 167 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR), 168 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE), 169 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR), 170 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR), 171 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE), 172 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR), 173 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR), 174 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE), 175 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), 176 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), 177 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ), 178 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ), 179 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ), 180 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ), 181 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO), 182 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI), 183 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ), 184 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO), 185 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI), 186 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ), 187 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO), 188 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI), 189 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ), 190 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO), 191 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI), 192 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ), 193 SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS), 194 SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS), 195 SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS), 196 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT), 197 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT), 198 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS), 199 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL), 200 SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS), 201 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS), 202 SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS), 203 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS), 204 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS), 205 SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS), 206 SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS), 207 SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS), 208 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), 209 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS), 210 SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG), 211 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL), 212 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR), 213 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR), 214 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR), 215 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR), 216 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR), 217 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS), 218 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT), 219 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND), 220 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE), 221 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1), 222 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2), 223 SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE), 224 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE), 225 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), 226 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), 227 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), 228 /* SE status registers */ 229 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), 230 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), 231 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), 232 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3), 233 /* packet headers */ 234 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 235 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 236 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 237 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 238 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 239 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 240 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 241 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), 242 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 243 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 244 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 245 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 246 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 247 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 248 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 249 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), 250 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), 251 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), 252 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), 253 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), 254 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), 255 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), 256 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), 257 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP) 258 }; 259 260 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { 261 /* compute queue registers */ 262 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID), 263 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE), 264 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE), 265 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY), 266 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY), 267 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM), 268 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE), 269 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI), 270 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR), 271 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), 272 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), 273 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), 274 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL), 275 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR), 276 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI), 277 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR), 278 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL), 279 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 280 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR), 281 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), 282 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL), 283 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR), 284 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR), 285 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS), 286 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO), 287 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI), 288 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL), 289 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET), 290 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE), 291 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET), 292 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE), 293 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE), 294 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR), 295 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM), 296 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), 297 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), 298 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS), 299 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), 300 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), 301 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), 302 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), 303 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), 304 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), 305 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), 306 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP) 307 }; 308 309 enum ta_ras_gfx_subblock { 310 /*CPC*/ 311 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, 312 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, 313 TA_RAS_BLOCK__GFX_CPC_UCODE, 314 TA_RAS_BLOCK__GFX_DC_STATE_ME1, 315 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, 316 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, 317 TA_RAS_BLOCK__GFX_DC_STATE_ME2, 318 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, 319 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 320 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, 321 /* CPF*/ 322 TA_RAS_BLOCK__GFX_CPF_INDEX_START, 323 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, 324 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, 325 TA_RAS_BLOCK__GFX_CPF_TAG, 326 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, 327 /* CPG*/ 328 TA_RAS_BLOCK__GFX_CPG_INDEX_START, 329 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, 330 TA_RAS_BLOCK__GFX_CPG_DMA_TAG, 331 TA_RAS_BLOCK__GFX_CPG_TAG, 332 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, 333 /* GDS*/ 334 TA_RAS_BLOCK__GFX_GDS_INDEX_START, 335 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, 336 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, 337 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, 338 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, 339 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 340 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, 341 /* SPI*/ 342 TA_RAS_BLOCK__GFX_SPI_SR_MEM, 343 /* SQ*/ 344 TA_RAS_BLOCK__GFX_SQ_INDEX_START, 345 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, 346 TA_RAS_BLOCK__GFX_SQ_LDS_D, 347 TA_RAS_BLOCK__GFX_SQ_LDS_I, 348 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ 349 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, 350 /* SQC (3 ranges)*/ 351 TA_RAS_BLOCK__GFX_SQC_INDEX_START, 352 /* SQC range 0*/ 353 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, 354 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = 355 TA_RAS_BLOCK__GFX_SQC_INDEX0_START, 356 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 357 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, 358 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 359 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, 360 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 361 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 362 TA_RAS_BLOCK__GFX_SQC_INDEX0_END = 363 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, 364 /* SQC range 1*/ 365 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 366 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = 367 TA_RAS_BLOCK__GFX_SQC_INDEX1_START, 368 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 369 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, 370 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, 371 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, 372 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, 373 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, 374 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 375 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 376 TA_RAS_BLOCK__GFX_SQC_INDEX1_END = 377 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, 378 /* SQC range 2*/ 379 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 380 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = 381 TA_RAS_BLOCK__GFX_SQC_INDEX2_START, 382 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 383 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, 384 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, 385 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, 386 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, 387 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, 388 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 389 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 390 TA_RAS_BLOCK__GFX_SQC_INDEX2_END = 391 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, 392 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, 393 /* TA*/ 394 TA_RAS_BLOCK__GFX_TA_INDEX_START, 395 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, 396 TA_RAS_BLOCK__GFX_TA_FS_AFIFO, 397 TA_RAS_BLOCK__GFX_TA_FL_LFIFO, 398 TA_RAS_BLOCK__GFX_TA_FX_LFIFO, 399 TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 400 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, 401 /* TCA*/ 402 TA_RAS_BLOCK__GFX_TCA_INDEX_START, 403 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, 404 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 405 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, 406 /* TCC (5 sub-ranges)*/ 407 TA_RAS_BLOCK__GFX_TCC_INDEX_START, 408 /* TCC range 0*/ 409 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, 410 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, 411 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, 412 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, 413 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, 414 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, 415 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, 416 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, 417 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 418 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, 419 /* TCC range 1*/ 420 TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 421 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, 422 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 423 TA_RAS_BLOCK__GFX_TCC_INDEX1_END = 424 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, 425 /* TCC range 2*/ 426 TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 427 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, 428 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, 429 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, 430 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, 431 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, 432 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, 433 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, 434 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 435 TA_RAS_BLOCK__GFX_TCC_INDEX2_END = 436 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, 437 /* TCC range 3*/ 438 TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 439 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, 440 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 441 TA_RAS_BLOCK__GFX_TCC_INDEX3_END = 442 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, 443 /* TCC range 4*/ 444 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 445 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = 446 TA_RAS_BLOCK__GFX_TCC_INDEX4_START, 447 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 448 TA_RAS_BLOCK__GFX_TCC_INDEX4_END = 449 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, 450 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, 451 /* TCI*/ 452 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, 453 /* TCP*/ 454 TA_RAS_BLOCK__GFX_TCP_INDEX_START, 455 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, 456 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, 457 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, 458 TA_RAS_BLOCK__GFX_TCP_VM_FIFO, 459 TA_RAS_BLOCK__GFX_TCP_DB_RAM, 460 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, 461 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 462 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, 463 /* TD*/ 464 TA_RAS_BLOCK__GFX_TD_INDEX_START, 465 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, 466 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, 467 TA_RAS_BLOCK__GFX_TD_CS_FIFO, 468 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, 469 /* EA (3 sub-ranges)*/ 470 TA_RAS_BLOCK__GFX_EA_INDEX_START, 471 /* EA range 0*/ 472 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, 473 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, 474 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, 475 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, 476 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, 477 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, 478 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, 479 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, 480 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 481 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, 482 /* EA range 1*/ 483 TA_RAS_BLOCK__GFX_EA_INDEX1_START, 484 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, 485 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, 486 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, 487 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, 488 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, 489 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, 490 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 491 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, 492 /* EA range 2*/ 493 TA_RAS_BLOCK__GFX_EA_INDEX2_START, 494 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, 495 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, 496 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, 497 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 498 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, 499 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, 500 /* UTC VM L2 bank*/ 501 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, 502 /* UTC VM walker*/ 503 TA_RAS_BLOCK__UTC_VML2_WALKER, 504 /* UTC ATC L2 2MB cache*/ 505 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, 506 /* UTC ATC L2 4KB cache*/ 507 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, 508 TA_RAS_BLOCK__GFX_MAX 509 }; 510 511 struct ras_gfx_subblock { 512 unsigned char *name; 513 int ta_subblock; 514 int hw_supported_error_type; 515 int sw_supported_error_type; 516 }; 517 518 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ 519 [AMDGPU_RAS_BLOCK__##subblock] = { \ 520 #subblock, \ 521 TA_RAS_BLOCK__##subblock, \ 522 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ 523 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ 524 } 525 526 static const struct ras_gfx_subblock ras_gfx_subblocks[] = { 527 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), 528 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), 529 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 530 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 531 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), 532 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 533 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 534 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 535 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), 536 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), 537 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), 538 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), 539 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), 540 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), 541 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 542 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), 543 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, 544 0), 545 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, 546 0), 547 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), 548 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), 549 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), 550 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), 551 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), 552 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), 553 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), 554 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 555 0, 0), 556 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 557 0), 558 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 559 0, 0), 560 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, 561 0), 562 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, 563 0, 0), 564 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 565 0), 566 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 567 1), 568 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 569 0, 0, 0), 570 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 571 0), 572 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 573 0), 574 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 575 0), 576 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 577 0), 578 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 579 0), 580 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 581 0, 0), 582 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 583 0), 584 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, 585 0), 586 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, 587 0, 0, 0), 588 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 589 0), 590 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 591 0), 592 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, 593 0), 594 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, 595 0), 596 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, 597 0), 598 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, 599 0, 0), 600 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, 601 0), 602 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), 603 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 604 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 605 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 606 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), 607 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), 608 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 609 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), 610 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, 611 1), 612 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, 613 1), 614 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, 615 1), 616 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, 617 0), 618 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, 619 0), 620 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 621 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), 622 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), 623 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), 624 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), 625 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), 626 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 627 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), 628 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), 629 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 630 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), 631 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, 632 0), 633 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 634 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, 635 0), 636 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, 637 0, 0), 638 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, 639 0), 640 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 641 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), 642 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), 643 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 644 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), 645 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), 646 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), 647 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), 648 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), 649 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), 650 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), 651 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), 652 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 653 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 654 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 655 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), 656 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 657 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), 658 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), 659 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 660 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 661 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 662 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), 663 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), 664 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 665 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), 666 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), 667 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), 668 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), 669 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), 670 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), 671 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), 672 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), 673 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), 674 }; 675 676 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 677 { 678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87), 688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f), 689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 693 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 694 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 695 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 696 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 698 }; 699 700 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 701 { 702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 708 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 709 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 710 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 711 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 715 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 716 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 717 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 718 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 719 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 720 }; 721 722 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 723 { 724 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 725 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 726 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 727 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 728 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 729 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 730 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 731 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 732 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 733 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 734 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 735 }; 736 737 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 738 { 739 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 740 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 741 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 742 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 743 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 744 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 745 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 746 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 747 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 748 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 749 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 750 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 751 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 752 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 753 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 754 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 755 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 756 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 757 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 758 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 759 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 760 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 761 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 762 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 763 }; 764 765 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 766 { 767 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 768 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 769 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 770 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 771 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 772 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 773 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 774 }; 775 776 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 777 { 778 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 779 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 780 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 781 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 782 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 783 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 784 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 785 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 786 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 787 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 788 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 789 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 790 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 791 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 792 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 793 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 794 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 795 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 796 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 797 }; 798 799 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = 800 { 801 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 802 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 803 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 804 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), 805 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), 806 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 807 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 808 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 809 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 810 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 811 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 812 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), 813 }; 814 815 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 816 { 817 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 818 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 819 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 820 }; 821 822 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 823 { 824 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 825 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 826 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 827 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 828 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 829 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 830 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 831 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 832 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 833 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 834 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 835 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 836 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 837 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 838 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 839 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 840 }; 841 842 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 843 { 844 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 845 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 846 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 847 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 848 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 849 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 850 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 851 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 852 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 853 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 854 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800), 855 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800), 856 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000) 857 }; 858 859 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = 860 { 861 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 862 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), 863 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), 864 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), 865 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), 866 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), 867 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), 868 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), 869 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), 870 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), 871 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000) 872 }; 873 874 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = { 875 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)}, 876 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)}, 877 }; 878 879 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 880 { 881 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 882 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 883 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 884 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 885 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 886 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 887 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 888 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 889 }; 890 891 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 892 { 893 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 894 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 895 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 896 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 897 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 898 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 899 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 900 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 901 }; 902 903 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 904 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 905 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 906 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 907 908 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 909 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 910 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 911 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 912 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 913 struct amdgpu_cu_info *cu_info); 914 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 915 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds); 916 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); 917 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 918 void *ras_error_status); 919 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 920 void *inject_if, uint32_t instance_mask); 921 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); 922 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, 923 unsigned int vmid); 924 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 925 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 926 927 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, 928 uint64_t queue_mask) 929 { 930 struct amdgpu_device *adev = kiq_ring->adev; 931 u64 shader_mc_addr; 932 933 /* Cleaner shader MC address */ 934 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 935 936 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 937 amdgpu_ring_write(kiq_ring, 938 PACKET3_SET_RESOURCES_VMID_MASK(0) | 939 /* vmid_mask:0* queue_type:0 (KIQ) */ 940 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); 941 amdgpu_ring_write(kiq_ring, 942 lower_32_bits(queue_mask)); /* queue mask lo */ 943 amdgpu_ring_write(kiq_ring, 944 upper_32_bits(queue_mask)); /* queue mask hi */ 945 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 946 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 947 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 948 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 949 } 950 951 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring, 952 struct amdgpu_ring *ring) 953 { 954 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 955 uint64_t wptr_addr = ring->wptr_gpu_addr; 956 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 957 958 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 959 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 960 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 961 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 962 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 963 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 964 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 965 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 966 /*queue_type: normal compute queue */ 967 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | 968 /* alloc format: all_on_one_pipe */ 969 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | 970 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 971 /* num_queues: must be 1 */ 972 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 973 amdgpu_ring_write(kiq_ring, 974 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 975 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 976 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 977 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 978 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 979 } 980 981 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 982 struct amdgpu_ring *ring, 983 enum amdgpu_unmap_queues_action action, 984 u64 gpu_addr, u64 seq) 985 { 986 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 987 988 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 989 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 990 PACKET3_UNMAP_QUEUES_ACTION(action) | 991 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 992 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 993 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 994 amdgpu_ring_write(kiq_ring, 995 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 996 997 if (action == PREEMPT_QUEUES_NO_UNMAP) { 998 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask)); 999 amdgpu_ring_write(kiq_ring, 0); 1000 amdgpu_ring_write(kiq_ring, 0); 1001 1002 } else { 1003 amdgpu_ring_write(kiq_ring, 0); 1004 amdgpu_ring_write(kiq_ring, 0); 1005 amdgpu_ring_write(kiq_ring, 0); 1006 } 1007 } 1008 1009 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring, 1010 struct amdgpu_ring *ring, 1011 u64 addr, 1012 u64 seq) 1013 { 1014 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 1015 1016 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 1017 amdgpu_ring_write(kiq_ring, 1018 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 1019 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 1020 PACKET3_QUERY_STATUS_COMMAND(2)); 1021 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 1022 amdgpu_ring_write(kiq_ring, 1023 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 1024 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 1025 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 1026 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 1027 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 1028 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 1029 } 1030 1031 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 1032 uint16_t pasid, uint32_t flush_type, 1033 bool all_hub) 1034 { 1035 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 1036 amdgpu_ring_write(kiq_ring, 1037 PACKET3_INVALIDATE_TLBS_DST_SEL(1) | 1038 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 1039 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 1040 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 1041 } 1042 1043 1044 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, 1045 uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, 1046 uint32_t xcc_id, uint32_t vmid) 1047 { 1048 struct amdgpu_device *adev = kiq_ring->adev; 1049 unsigned i; 1050 1051 /* enter save mode */ 1052 amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); 1053 mutex_lock(&adev->srbm_mutex); 1054 soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0); 1055 1056 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { 1057 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2); 1058 WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1); 1059 /* wait till dequeue take effects */ 1060 for (i = 0; i < adev->usec_timeout; i++) { 1061 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 1062 break; 1063 udelay(1); 1064 } 1065 if (i >= adev->usec_timeout) 1066 dev_err(adev->dev, "fail to wait on hqd deactive\n"); 1067 } else { 1068 dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type); 1069 } 1070 1071 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 1072 mutex_unlock(&adev->srbm_mutex); 1073 /* exit safe mode */ 1074 amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); 1075 } 1076 1077 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { 1078 .kiq_set_resources = gfx_v9_0_kiq_set_resources, 1079 .kiq_map_queues = gfx_v9_0_kiq_map_queues, 1080 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues, 1081 .kiq_query_status = gfx_v9_0_kiq_query_status, 1082 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs, 1083 .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue, 1084 .set_resources_size = 8, 1085 .map_queues_size = 7, 1086 .unmap_queues_size = 6, 1087 .query_status_size = 7, 1088 .invalidate_tlbs_size = 2, 1089 }; 1090 1091 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 1092 { 1093 adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs; 1094 } 1095 1096 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 1097 { 1098 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1099 case IP_VERSION(9, 0, 1): 1100 soc15_program_register_sequence(adev, 1101 golden_settings_gc_9_0, 1102 ARRAY_SIZE(golden_settings_gc_9_0)); 1103 soc15_program_register_sequence(adev, 1104 golden_settings_gc_9_0_vg10, 1105 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 1106 break; 1107 case IP_VERSION(9, 2, 1): 1108 soc15_program_register_sequence(adev, 1109 golden_settings_gc_9_2_1, 1110 ARRAY_SIZE(golden_settings_gc_9_2_1)); 1111 soc15_program_register_sequence(adev, 1112 golden_settings_gc_9_2_1_vg12, 1113 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 1114 break; 1115 case IP_VERSION(9, 4, 0): 1116 soc15_program_register_sequence(adev, 1117 golden_settings_gc_9_0, 1118 ARRAY_SIZE(golden_settings_gc_9_0)); 1119 soc15_program_register_sequence(adev, 1120 golden_settings_gc_9_0_vg20, 1121 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 1122 break; 1123 case IP_VERSION(9, 4, 1): 1124 soc15_program_register_sequence(adev, 1125 golden_settings_gc_9_4_1_arct, 1126 ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); 1127 break; 1128 case IP_VERSION(9, 2, 2): 1129 case IP_VERSION(9, 1, 0): 1130 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 1131 ARRAY_SIZE(golden_settings_gc_9_1)); 1132 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 1133 soc15_program_register_sequence(adev, 1134 golden_settings_gc_9_1_rv2, 1135 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 1136 else 1137 soc15_program_register_sequence(adev, 1138 golden_settings_gc_9_1_rv1, 1139 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 1140 break; 1141 case IP_VERSION(9, 3, 0): 1142 soc15_program_register_sequence(adev, 1143 golden_settings_gc_9_1_rn, 1144 ARRAY_SIZE(golden_settings_gc_9_1_rn)); 1145 return; /* for renoir, don't need common goldensetting */ 1146 case IP_VERSION(9, 4, 2): 1147 gfx_v9_4_2_init_golden_registers(adev, 1148 adev->smuio.funcs->get_die_id(adev)); 1149 break; 1150 default: 1151 break; 1152 } 1153 1154 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && 1155 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))) 1156 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 1157 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 1158 } 1159 1160 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 1161 bool wc, uint32_t reg, uint32_t val) 1162 { 1163 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 1164 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 1165 WRITE_DATA_DST_SEL(0) | 1166 (wc ? WR_CONFIRM : 0)); 1167 amdgpu_ring_write(ring, reg); 1168 amdgpu_ring_write(ring, 0); 1169 amdgpu_ring_write(ring, val); 1170 } 1171 1172 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 1173 int mem_space, int opt, uint32_t addr0, 1174 uint32_t addr1, uint32_t ref, uint32_t mask, 1175 uint32_t inv) 1176 { 1177 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 1178 amdgpu_ring_write(ring, 1179 /* memory (1) or register (0) */ 1180 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 1181 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 1182 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 1183 WAIT_REG_MEM_ENGINE(eng_sel))); 1184 1185 if (mem_space) 1186 BUG_ON(addr0 & 0x3); /* Dword align */ 1187 amdgpu_ring_write(ring, addr0); 1188 amdgpu_ring_write(ring, addr1); 1189 amdgpu_ring_write(ring, ref); 1190 amdgpu_ring_write(ring, mask); 1191 amdgpu_ring_write(ring, inv); /* poll interval */ 1192 } 1193 1194 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 1195 { 1196 struct amdgpu_device *adev = ring->adev; 1197 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1198 uint32_t tmp = 0; 1199 unsigned i; 1200 int r; 1201 1202 WREG32(scratch, 0xCAFEDEAD); 1203 r = amdgpu_ring_alloc(ring, 3); 1204 if (r) 1205 return r; 1206 1207 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 1208 amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); 1209 amdgpu_ring_write(ring, 0xDEADBEEF); 1210 amdgpu_ring_commit(ring); 1211 1212 for (i = 0; i < adev->usec_timeout; i++) { 1213 tmp = RREG32(scratch); 1214 if (tmp == 0xDEADBEEF) 1215 break; 1216 udelay(1); 1217 } 1218 1219 if (i >= adev->usec_timeout) 1220 r = -ETIMEDOUT; 1221 return r; 1222 } 1223 1224 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 1225 { 1226 struct amdgpu_device *adev = ring->adev; 1227 struct amdgpu_ib ib; 1228 struct dma_fence *f = NULL; 1229 1230 unsigned index; 1231 uint64_t gpu_addr; 1232 uint32_t tmp; 1233 long r; 1234 1235 r = amdgpu_device_wb_get(adev, &index); 1236 if (r) 1237 return r; 1238 1239 gpu_addr = adev->wb.gpu_addr + (index * 4); 1240 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 1241 memset(&ib, 0, sizeof(ib)); 1242 1243 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 1244 if (r) 1245 goto err1; 1246 1247 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 1248 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1249 ib.ptr[2] = lower_32_bits(gpu_addr); 1250 ib.ptr[3] = upper_32_bits(gpu_addr); 1251 ib.ptr[4] = 0xDEADBEEF; 1252 ib.length_dw = 5; 1253 1254 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1255 if (r) 1256 goto err2; 1257 1258 r = dma_fence_wait_timeout(f, false, timeout); 1259 if (r == 0) { 1260 r = -ETIMEDOUT; 1261 goto err2; 1262 } else if (r < 0) { 1263 goto err2; 1264 } 1265 1266 tmp = adev->wb.wb[index]; 1267 if (tmp == 0xDEADBEEF) 1268 r = 0; 1269 else 1270 r = -EINVAL; 1271 1272 err2: 1273 amdgpu_ib_free(&ib, NULL); 1274 dma_fence_put(f); 1275 err1: 1276 amdgpu_device_wb_free(adev, index); 1277 return r; 1278 } 1279 1280 1281 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 1282 { 1283 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1284 amdgpu_ucode_release(&adev->gfx.me_fw); 1285 amdgpu_ucode_release(&adev->gfx.ce_fw); 1286 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1287 amdgpu_ucode_release(&adev->gfx.mec_fw); 1288 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1289 1290 kfree(adev->gfx.rlc.register_list_format); 1291 } 1292 1293 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 1294 { 1295 adev->gfx.me_fw_write_wait = false; 1296 adev->gfx.mec_fw_write_wait = false; 1297 1298 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && 1299 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) && 1300 ((adev->gfx.mec_fw_version < 0x000001a5) || 1301 (adev->gfx.mec_feature_version < 46) || 1302 (adev->gfx.pfp_fw_version < 0x000000b7) || 1303 (adev->gfx.pfp_feature_version < 46))) 1304 DRM_WARN_ONCE("CP firmware version too old, please update!"); 1305 1306 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1307 case IP_VERSION(9, 0, 1): 1308 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1309 (adev->gfx.me_feature_version >= 42) && 1310 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1311 (adev->gfx.pfp_feature_version >= 42)) 1312 adev->gfx.me_fw_write_wait = true; 1313 1314 if ((adev->gfx.mec_fw_version >= 0x00000193) && 1315 (adev->gfx.mec_feature_version >= 42)) 1316 adev->gfx.mec_fw_write_wait = true; 1317 break; 1318 case IP_VERSION(9, 2, 1): 1319 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1320 (adev->gfx.me_feature_version >= 44) && 1321 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1322 (adev->gfx.pfp_feature_version >= 44)) 1323 adev->gfx.me_fw_write_wait = true; 1324 1325 if ((adev->gfx.mec_fw_version >= 0x00000196) && 1326 (adev->gfx.mec_feature_version >= 44)) 1327 adev->gfx.mec_fw_write_wait = true; 1328 break; 1329 case IP_VERSION(9, 4, 0): 1330 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1331 (adev->gfx.me_feature_version >= 44) && 1332 (adev->gfx.pfp_fw_version >= 0x000000b2) && 1333 (adev->gfx.pfp_feature_version >= 44)) 1334 adev->gfx.me_fw_write_wait = true; 1335 1336 if ((adev->gfx.mec_fw_version >= 0x00000197) && 1337 (adev->gfx.mec_feature_version >= 44)) 1338 adev->gfx.mec_fw_write_wait = true; 1339 break; 1340 case IP_VERSION(9, 1, 0): 1341 case IP_VERSION(9, 2, 2): 1342 if ((adev->gfx.me_fw_version >= 0x0000009c) && 1343 (adev->gfx.me_feature_version >= 42) && 1344 (adev->gfx.pfp_fw_version >= 0x000000b1) && 1345 (adev->gfx.pfp_feature_version >= 42)) 1346 adev->gfx.me_fw_write_wait = true; 1347 1348 if ((adev->gfx.mec_fw_version >= 0x00000192) && 1349 (adev->gfx.mec_feature_version >= 42)) 1350 adev->gfx.mec_fw_write_wait = true; 1351 break; 1352 default: 1353 adev->gfx.me_fw_write_wait = true; 1354 adev->gfx.mec_fw_write_wait = true; 1355 break; 1356 } 1357 } 1358 1359 struct amdgpu_gfxoff_quirk { 1360 u16 chip_vendor; 1361 u16 chip_device; 1362 u16 subsys_vendor; 1363 u16 subsys_device; 1364 u8 revision; 1365 }; 1366 1367 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { 1368 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */ 1369 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 }, 1370 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */ 1371 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, 1372 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ 1373 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, 1374 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ 1375 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, 1376 /* https://bbs.openkylin.top/t/topic/171497 */ 1377 { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 }, 1378 /* HP 705G4 DM with R5 2400G */ 1379 { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 }, 1380 { 0, 0, 0, 0, 0 }, 1381 }; 1382 1383 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev) 1384 { 1385 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list; 1386 1387 while (p && p->chip_device != 0) { 1388 if (pdev->vendor == p->chip_vendor && 1389 pdev->device == p->chip_device && 1390 pdev->subsystem_vendor == p->subsys_vendor && 1391 pdev->subsystem_device == p->subsys_device && 1392 pdev->revision == p->revision) { 1393 return true; 1394 } 1395 ++p; 1396 } 1397 return false; 1398 } 1399 1400 static bool is_raven_kicker(struct amdgpu_device *adev) 1401 { 1402 if (adev->pm.fw_version >= 0x41e2b) 1403 return true; 1404 else 1405 return false; 1406 } 1407 1408 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) 1409 { 1410 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) && 1411 (adev->gfx.me_fw_version >= 0x000000a5) && 1412 (adev->gfx.me_feature_version >= 52)) 1413 return true; 1414 else 1415 return false; 1416 } 1417 1418 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 1419 { 1420 if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) 1421 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1422 1423 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1424 case IP_VERSION(9, 0, 1): 1425 case IP_VERSION(9, 2, 1): 1426 case IP_VERSION(9, 4, 0): 1427 break; 1428 case IP_VERSION(9, 2, 2): 1429 case IP_VERSION(9, 1, 0): 1430 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) || 1431 (adev->apu_flags & AMD_APU_IS_PICASSO)) && 1432 ((!is_raven_kicker(adev) && 1433 adev->gfx.rlc_fw_version < 531) || 1434 (adev->gfx.rlc_feature_version < 1) || 1435 !adev->gfx.rlc.is_rlc_v2_1)) 1436 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 1437 1438 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1439 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1440 AMD_PG_SUPPORT_CP | 1441 AMD_PG_SUPPORT_RLC_SMU_HS; 1442 break; 1443 case IP_VERSION(9, 3, 0): 1444 if (adev->pm.pp_feature & PP_GFXOFF_MASK) 1445 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | 1446 AMD_PG_SUPPORT_CP | 1447 AMD_PG_SUPPORT_RLC_SMU_HS; 1448 break; 1449 default: 1450 break; 1451 } 1452 } 1453 1454 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, 1455 char *chip_name) 1456 { 1457 int err; 1458 1459 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 1460 AMDGPU_UCODE_REQUIRED, 1461 "amdgpu/%s_pfp.bin", chip_name); 1462 if (err) 1463 goto out; 1464 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 1465 1466 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 1467 AMDGPU_UCODE_REQUIRED, 1468 "amdgpu/%s_me.bin", chip_name); 1469 if (err) 1470 goto out; 1471 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 1472 1473 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, 1474 AMDGPU_UCODE_REQUIRED, 1475 "amdgpu/%s_ce.bin", chip_name); 1476 if (err) 1477 goto out; 1478 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); 1479 1480 out: 1481 if (err) { 1482 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1483 amdgpu_ucode_release(&adev->gfx.me_fw); 1484 amdgpu_ucode_release(&adev->gfx.ce_fw); 1485 } 1486 return err; 1487 } 1488 1489 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, 1490 char *chip_name) 1491 { 1492 int err; 1493 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1494 uint16_t version_major; 1495 uint16_t version_minor; 1496 uint32_t smu_version; 1497 1498 /* 1499 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 1500 * instead of picasso_rlc.bin. 1501 * Judgment method: 1502 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 1503 * or revision >= 0xD8 && revision <= 0xDF 1504 * otherwise is PCO FP5 1505 */ 1506 if (!strcmp(chip_name, "picasso") && 1507 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 1508 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 1509 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1510 AMDGPU_UCODE_REQUIRED, 1511 "amdgpu/%s_rlc_am4.bin", chip_name); 1512 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 1513 (smu_version >= 0x41e2b)) 1514 /** 1515 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 1516 */ 1517 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1518 AMDGPU_UCODE_REQUIRED, 1519 "amdgpu/%s_kicker_rlc.bin", chip_name); 1520 else 1521 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1522 AMDGPU_UCODE_REQUIRED, 1523 "amdgpu/%s_rlc.bin", chip_name); 1524 if (err) 1525 goto out; 1526 1527 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1528 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1529 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1530 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 1531 out: 1532 if (err) 1533 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1534 1535 return err; 1536 } 1537 1538 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) 1539 { 1540 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || 1541 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || 1542 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) 1543 return false; 1544 1545 return true; 1546 } 1547 1548 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, 1549 char *chip_name) 1550 { 1551 int err; 1552 1553 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) 1554 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1555 AMDGPU_UCODE_REQUIRED, 1556 "amdgpu/%s_sjt_mec.bin", chip_name); 1557 else 1558 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1559 AMDGPU_UCODE_REQUIRED, 1560 "amdgpu/%s_mec.bin", chip_name); 1561 if (err) 1562 goto out; 1563 1564 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 1565 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 1566 1567 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { 1568 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) 1569 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1570 AMDGPU_UCODE_REQUIRED, 1571 "amdgpu/%s_sjt_mec2.bin", chip_name); 1572 else 1573 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1574 AMDGPU_UCODE_REQUIRED, 1575 "amdgpu/%s_mec2.bin", chip_name); 1576 if (!err) { 1577 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); 1578 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); 1579 } else { 1580 err = 0; 1581 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1582 } 1583 } else { 1584 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; 1585 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; 1586 } 1587 1588 gfx_v9_0_check_if_need_gfxoff(adev); 1589 gfx_v9_0_check_fw_write_wait(adev); 1590 1591 out: 1592 if (err) 1593 amdgpu_ucode_release(&adev->gfx.mec_fw); 1594 return err; 1595 } 1596 1597 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 1598 { 1599 char ucode_prefix[30]; 1600 int r; 1601 1602 DRM_DEBUG("\n"); 1603 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 1604 1605 /* No CPG in Arcturus */ 1606 if (adev->gfx.num_gfx_rings) { 1607 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix); 1608 if (r) 1609 return r; 1610 } 1611 1612 r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix); 1613 if (r) 1614 return r; 1615 1616 r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix); 1617 if (r) 1618 return r; 1619 1620 return r; 1621 } 1622 1623 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 1624 { 1625 u32 count = 0; 1626 const struct cs_section_def *sect = NULL; 1627 const struct cs_extent_def *ext = NULL; 1628 1629 /* begin clear state */ 1630 count += 2; 1631 /* context control state */ 1632 count += 3; 1633 1634 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 1635 for (ext = sect->section; ext->extent != NULL; ++ext) { 1636 if (sect->id == SECT_CONTEXT) 1637 count += 2 + ext->reg_count; 1638 else 1639 return 0; 1640 } 1641 } 1642 1643 /* end clear state */ 1644 count += 2; 1645 /* clear state */ 1646 count += 2; 1647 1648 return count; 1649 } 1650 1651 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 1652 volatile u32 *buffer) 1653 { 1654 u32 count = 0; 1655 1656 if (adev->gfx.rlc.cs_data == NULL) 1657 return; 1658 if (buffer == NULL) 1659 return; 1660 1661 count = amdgpu_gfx_csb_preamble_start(buffer); 1662 count = amdgpu_gfx_csb_data_parser(adev, buffer, count); 1663 amdgpu_gfx_csb_preamble_end(buffer, count); 1664 } 1665 1666 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 1667 { 1668 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 1669 uint32_t pg_always_on_cu_num = 2; 1670 uint32_t always_on_cu_num; 1671 uint32_t i, j, k; 1672 uint32_t mask, cu_bitmap, counter; 1673 1674 if (adev->flags & AMD_IS_APU) 1675 always_on_cu_num = 4; 1676 else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1)) 1677 always_on_cu_num = 8; 1678 else 1679 always_on_cu_num = 12; 1680 1681 mutex_lock(&adev->grbm_idx_mutex); 1682 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1683 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1684 mask = 1; 1685 cu_bitmap = 0; 1686 counter = 0; 1687 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 1688 1689 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 1690 if (cu_info->bitmap[0][i][j] & mask) { 1691 if (counter == pg_always_on_cu_num) 1692 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 1693 if (counter < always_on_cu_num) 1694 cu_bitmap |= mask; 1695 else 1696 break; 1697 counter++; 1698 } 1699 mask <<= 1; 1700 } 1701 1702 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 1703 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1704 } 1705 } 1706 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1707 mutex_unlock(&adev->grbm_idx_mutex); 1708 } 1709 1710 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1711 { 1712 uint32_t data; 1713 1714 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1715 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1716 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1717 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1718 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1719 1720 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1721 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1722 1723 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1724 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1725 1726 mutex_lock(&adev->grbm_idx_mutex); 1727 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1728 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1729 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1730 1731 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1732 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1733 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1734 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1735 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1736 1737 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1738 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1739 data &= 0x0000FFFF; 1740 data |= 0x00C00000; 1741 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1742 1743 /* 1744 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1745 * programmed in gfx_v9_0_init_always_on_cu_mask() 1746 */ 1747 1748 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1749 * but used for RLC_LB_CNTL configuration */ 1750 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1751 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1752 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1753 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1754 mutex_unlock(&adev->grbm_idx_mutex); 1755 1756 gfx_v9_0_init_always_on_cu_mask(adev); 1757 } 1758 1759 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1760 { 1761 uint32_t data; 1762 1763 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1764 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1765 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1766 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1767 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1768 1769 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1770 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1771 1772 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1773 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1774 1775 mutex_lock(&adev->grbm_idx_mutex); 1776 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1777 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 1778 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1779 1780 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1781 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1782 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1783 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1784 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1785 1786 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1787 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1788 data &= 0x0000FFFF; 1789 data |= 0x00C00000; 1790 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1791 1792 /* 1793 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1794 * programmed in gfx_v9_0_init_always_on_cu_mask() 1795 */ 1796 1797 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1798 * but used for RLC_LB_CNTL configuration */ 1799 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1800 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1801 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1802 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1803 mutex_unlock(&adev->grbm_idx_mutex); 1804 1805 gfx_v9_0_init_always_on_cu_mask(adev); 1806 } 1807 1808 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1809 { 1810 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1811 } 1812 1813 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1814 { 1815 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) 1816 return 5; 1817 else 1818 return 4; 1819 } 1820 1821 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 1822 { 1823 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 1824 1825 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 1826 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 1827 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1); 1828 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2); 1829 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3); 1830 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL); 1831 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX); 1832 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT); 1833 adev->gfx.rlc.rlcg_reg_access_supported = true; 1834 } 1835 1836 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1837 { 1838 const struct cs_section_def *cs_data; 1839 int r; 1840 1841 adev->gfx.rlc.cs_data = gfx9_cs_data; 1842 1843 cs_data = adev->gfx.rlc.cs_data; 1844 1845 if (cs_data) { 1846 /* init clear state block */ 1847 r = amdgpu_gfx_rlc_init_csb(adev); 1848 if (r) 1849 return r; 1850 } 1851 1852 if (adev->flags & AMD_IS_APU) { 1853 /* TODO: double check the cp_table_size for RV */ 1854 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1855 r = amdgpu_gfx_rlc_init_cpt(adev); 1856 if (r) 1857 return r; 1858 } 1859 1860 return 0; 1861 } 1862 1863 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1864 { 1865 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1866 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1867 } 1868 1869 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1870 { 1871 int r; 1872 u32 *hpd; 1873 const __le32 *fw_data; 1874 unsigned fw_size; 1875 u32 *fw; 1876 size_t mec_hpd_size; 1877 1878 const struct gfx_firmware_header_v1_0 *mec_hdr; 1879 1880 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1881 1882 /* take ownership of the relevant compute queues */ 1883 amdgpu_gfx_compute_queue_acquire(adev); 1884 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1885 if (mec_hpd_size) { 1886 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1887 AMDGPU_GEM_DOMAIN_VRAM | 1888 AMDGPU_GEM_DOMAIN_GTT, 1889 &adev->gfx.mec.hpd_eop_obj, 1890 &adev->gfx.mec.hpd_eop_gpu_addr, 1891 (void **)&hpd); 1892 if (r) { 1893 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1894 gfx_v9_0_mec_fini(adev); 1895 return r; 1896 } 1897 1898 memset(hpd, 0, mec_hpd_size); 1899 1900 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1901 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1902 } 1903 1904 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1905 1906 fw_data = (const __le32 *) 1907 (adev->gfx.mec_fw->data + 1908 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1909 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 1910 1911 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1912 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1913 &adev->gfx.mec.mec_fw_obj, 1914 &adev->gfx.mec.mec_fw_gpu_addr, 1915 (void **)&fw); 1916 if (r) { 1917 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1918 gfx_v9_0_mec_fini(adev); 1919 return r; 1920 } 1921 1922 memcpy(fw, fw_data, fw_size); 1923 1924 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1925 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1926 1927 return 0; 1928 } 1929 1930 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1931 { 1932 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 1933 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1934 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1935 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1936 (SQ_IND_INDEX__FORCE_READ_MASK)); 1937 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1938 } 1939 1940 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1941 uint32_t wave, uint32_t thread, 1942 uint32_t regno, uint32_t num, uint32_t *out) 1943 { 1944 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX, 1945 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1946 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1947 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1948 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1949 (SQ_IND_INDEX__FORCE_READ_MASK) | 1950 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1951 while (num--) 1952 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1953 } 1954 1955 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1956 { 1957 /* type 1 wave data */ 1958 dst[(*no_fields)++] = 1; 1959 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1960 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1961 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1962 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1963 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1964 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1965 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1966 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1967 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1968 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1969 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1970 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1971 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1972 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1973 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); 1974 } 1975 1976 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1977 uint32_t wave, uint32_t start, 1978 uint32_t size, uint32_t *dst) 1979 { 1980 wave_read_regs( 1981 adev, simd, wave, 0, 1982 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1983 } 1984 1985 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1986 uint32_t wave, uint32_t thread, 1987 uint32_t start, uint32_t size, 1988 uint32_t *dst) 1989 { 1990 wave_read_regs( 1991 adev, simd, wave, thread, 1992 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1993 } 1994 1995 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1996 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1997 { 1998 soc15_grbm_select(adev, me, pipe, q, vm, 0); 1999 } 2000 2001 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 2002 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 2003 .select_se_sh = &gfx_v9_0_select_se_sh, 2004 .read_wave_data = &gfx_v9_0_read_wave_data, 2005 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 2006 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 2007 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, 2008 }; 2009 2010 const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = { 2011 .ras_error_inject = &gfx_v9_0_ras_error_inject, 2012 .query_ras_error_count = &gfx_v9_0_query_ras_error_count, 2013 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, 2014 }; 2015 2016 static struct amdgpu_gfx_ras gfx_v9_0_ras = { 2017 .ras_block = { 2018 .hw_ops = &gfx_v9_0_ras_ops, 2019 }, 2020 }; 2021 2022 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 2023 { 2024 u32 gb_addr_config; 2025 int err; 2026 2027 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2028 case IP_VERSION(9, 0, 1): 2029 adev->gfx.config.max_hw_contexts = 8; 2030 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2031 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2032 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2033 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2034 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 2035 break; 2036 case IP_VERSION(9, 2, 1): 2037 adev->gfx.config.max_hw_contexts = 8; 2038 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2039 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2040 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2041 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2042 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 2043 DRM_INFO("fix gfx.config for vega12\n"); 2044 break; 2045 case IP_VERSION(9, 4, 0): 2046 adev->gfx.ras = &gfx_v9_0_ras; 2047 adev->gfx.config.max_hw_contexts = 8; 2048 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2049 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2050 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2051 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2052 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2053 gb_addr_config &= ~0xf3e777ff; 2054 gb_addr_config |= 0x22014042; 2055 /* check vbios table if gpu info is not available */ 2056 err = amdgpu_atomfirmware_get_gfx_info(adev); 2057 if (err) 2058 return err; 2059 break; 2060 case IP_VERSION(9, 2, 2): 2061 case IP_VERSION(9, 1, 0): 2062 adev->gfx.config.max_hw_contexts = 8; 2063 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2064 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2065 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2066 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2067 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 2068 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 2069 else 2070 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 2071 break; 2072 case IP_VERSION(9, 4, 1): 2073 adev->gfx.ras = &gfx_v9_4_ras; 2074 adev->gfx.config.max_hw_contexts = 8; 2075 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2076 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2077 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2078 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2079 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2080 gb_addr_config &= ~0xf3e777ff; 2081 gb_addr_config |= 0x22014042; 2082 break; 2083 case IP_VERSION(9, 3, 0): 2084 adev->gfx.config.max_hw_contexts = 8; 2085 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2086 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2087 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 2088 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2089 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2090 gb_addr_config &= ~0xf3e777ff; 2091 gb_addr_config |= 0x22010042; 2092 break; 2093 case IP_VERSION(9, 4, 2): 2094 adev->gfx.ras = &gfx_v9_4_2_ras; 2095 adev->gfx.config.max_hw_contexts = 8; 2096 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2097 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2098 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2099 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 2100 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 2101 gb_addr_config &= ~0xf3e777ff; 2102 gb_addr_config |= 0x22014042; 2103 /* check vbios table if gpu info is not available */ 2104 err = amdgpu_atomfirmware_get_gfx_info(adev); 2105 if (err) 2106 return err; 2107 break; 2108 default: 2109 BUG(); 2110 break; 2111 } 2112 2113 adev->gfx.config.gb_addr_config = gb_addr_config; 2114 2115 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2116 REG_GET_FIELD( 2117 adev->gfx.config.gb_addr_config, 2118 GB_ADDR_CONFIG, 2119 NUM_PIPES); 2120 2121 adev->gfx.config.max_tile_pipes = 2122 adev->gfx.config.gb_addr_config_fields.num_pipes; 2123 2124 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 2125 REG_GET_FIELD( 2126 adev->gfx.config.gb_addr_config, 2127 GB_ADDR_CONFIG, 2128 NUM_BANKS); 2129 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2130 REG_GET_FIELD( 2131 adev->gfx.config.gb_addr_config, 2132 GB_ADDR_CONFIG, 2133 MAX_COMPRESSED_FRAGS); 2134 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2135 REG_GET_FIELD( 2136 adev->gfx.config.gb_addr_config, 2137 GB_ADDR_CONFIG, 2138 NUM_RB_PER_SE); 2139 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2140 REG_GET_FIELD( 2141 adev->gfx.config.gb_addr_config, 2142 GB_ADDR_CONFIG, 2143 NUM_SHADER_ENGINES); 2144 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2145 REG_GET_FIELD( 2146 adev->gfx.config.gb_addr_config, 2147 GB_ADDR_CONFIG, 2148 PIPE_INTERLEAVE_SIZE)); 2149 2150 return 0; 2151 } 2152 2153 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 2154 int mec, int pipe, int queue) 2155 { 2156 unsigned irq_type; 2157 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 2158 unsigned int hw_prio; 2159 2160 ring = &adev->gfx.compute_ring[ring_id]; 2161 2162 /* mec0 is me1 */ 2163 ring->me = mec + 1; 2164 ring->pipe = pipe; 2165 ring->queue = queue; 2166 2167 ring->ring_obj = NULL; 2168 ring->use_doorbell = true; 2169 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 2170 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2171 + (ring_id * GFX9_MEC_HPD_SIZE); 2172 ring->vm_hub = AMDGPU_GFXHUB(0); 2173 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2174 2175 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2176 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2177 + ring->pipe; 2178 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 2179 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; 2180 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2181 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 2182 hw_prio, NULL); 2183 } 2184 2185 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) 2186 { 2187 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 2188 uint32_t *ptr; 2189 uint32_t inst; 2190 2191 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 2192 if (!ptr) { 2193 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 2194 adev->gfx.ip_dump_core = NULL; 2195 } else { 2196 adev->gfx.ip_dump_core = ptr; 2197 } 2198 2199 /* Allocate memory for compute queue registers for all the instances */ 2200 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 2201 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 2202 adev->gfx.mec.num_queue_per_pipe; 2203 2204 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 2205 if (!ptr) { 2206 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 2207 adev->gfx.ip_dump_compute_queues = NULL; 2208 } else { 2209 adev->gfx.ip_dump_compute_queues = ptr; 2210 } 2211 } 2212 2213 static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) 2214 { 2215 int i, j, k, r, ring_id; 2216 int xcc_id = 0; 2217 struct amdgpu_ring *ring; 2218 struct amdgpu_device *adev = ip_block->adev; 2219 unsigned int hw_prio; 2220 2221 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2222 case IP_VERSION(9, 0, 1): 2223 case IP_VERSION(9, 2, 1): 2224 case IP_VERSION(9, 4, 0): 2225 case IP_VERSION(9, 2, 2): 2226 case IP_VERSION(9, 1, 0): 2227 case IP_VERSION(9, 4, 1): 2228 case IP_VERSION(9, 3, 0): 2229 case IP_VERSION(9, 4, 2): 2230 adev->gfx.mec.num_mec = 2; 2231 break; 2232 default: 2233 adev->gfx.mec.num_mec = 1; 2234 break; 2235 } 2236 2237 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2238 case IP_VERSION(9, 4, 2): 2239 adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; 2240 adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); 2241 if (adev->gfx.mec_fw_version >= 88) { 2242 adev->gfx.enable_cleaner_shader = true; 2243 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 2244 if (r) { 2245 adev->gfx.enable_cleaner_shader = false; 2246 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 2247 } 2248 } 2249 break; 2250 default: 2251 adev->gfx.enable_cleaner_shader = false; 2252 break; 2253 } 2254 2255 adev->gfx.mec.num_pipe_per_mec = 4; 2256 adev->gfx.mec.num_queue_per_pipe = 8; 2257 2258 /* EOP Event */ 2259 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 2260 if (r) 2261 return r; 2262 2263 /* Bad opcode Event */ 2264 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 2265 GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR, 2266 &adev->gfx.bad_op_irq); 2267 if (r) 2268 return r; 2269 2270 /* Privileged reg */ 2271 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 2272 &adev->gfx.priv_reg_irq); 2273 if (r) 2274 return r; 2275 2276 /* Privileged inst */ 2277 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 2278 &adev->gfx.priv_inst_irq); 2279 if (r) 2280 return r; 2281 2282 /* ECC error */ 2283 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 2284 &adev->gfx.cp_ecc_error_irq); 2285 if (r) 2286 return r; 2287 2288 /* FUE error */ 2289 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 2290 &adev->gfx.cp_ecc_error_irq); 2291 if (r) 2292 return r; 2293 2294 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2295 2296 if (adev->gfx.rlc.funcs) { 2297 if (adev->gfx.rlc.funcs->init) { 2298 r = adev->gfx.rlc.funcs->init(adev); 2299 if (r) { 2300 dev_err(adev->dev, "Failed to init rlc BOs!\n"); 2301 return r; 2302 } 2303 } 2304 } 2305 2306 r = gfx_v9_0_mec_init(adev); 2307 if (r) { 2308 DRM_ERROR("Failed to init MEC BOs!\n"); 2309 return r; 2310 } 2311 2312 /* set up the gfx ring */ 2313 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2314 ring = &adev->gfx.gfx_ring[i]; 2315 ring->ring_obj = NULL; 2316 if (!i) 2317 sprintf(ring->name, "gfx"); 2318 else 2319 sprintf(ring->name, "gfx_%d", i); 2320 ring->use_doorbell = true; 2321 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2322 2323 /* disable scheduler on the real ring */ 2324 ring->no_scheduler = adev->gfx.mcbp; 2325 ring->vm_hub = AMDGPU_GFXHUB(0); 2326 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2327 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 2328 AMDGPU_RING_PRIO_DEFAULT, NULL); 2329 if (r) 2330 return r; 2331 } 2332 2333 /* set up the software rings */ 2334 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 2335 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2336 ring = &adev->gfx.sw_gfx_ring[i]; 2337 ring->ring_obj = NULL; 2338 sprintf(ring->name, amdgpu_sw_ring_name(i)); 2339 ring->use_doorbell = true; 2340 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 2341 ring->is_sw_ring = true; 2342 hw_prio = amdgpu_sw_ring_priority(i); 2343 ring->vm_hub = AMDGPU_GFXHUB(0); 2344 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2345 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, 2346 NULL); 2347 if (r) 2348 return r; 2349 ring->wptr = 0; 2350 } 2351 2352 /* init the muxer and add software rings */ 2353 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], 2354 GFX9_NUM_SW_GFX_RINGS); 2355 if (r) { 2356 DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); 2357 return r; 2358 } 2359 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { 2360 r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, 2361 &adev->gfx.sw_gfx_ring[i]); 2362 if (r) { 2363 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); 2364 return r; 2365 } 2366 } 2367 } 2368 2369 /* set up the compute queues - allocate horizontally across pipes */ 2370 ring_id = 0; 2371 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2372 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2373 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2374 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 2375 k, j)) 2376 continue; 2377 2378 r = gfx_v9_0_compute_ring_init(adev, 2379 ring_id, 2380 i, k, j); 2381 if (r) 2382 return r; 2383 2384 ring_id++; 2385 } 2386 } 2387 } 2388 2389 /* TODO: Add queue reset mask when FW fully supports it */ 2390 adev->gfx.gfx_supported_reset = 2391 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 2392 adev->gfx.compute_supported_reset = 2393 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 2394 2395 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); 2396 if (r) { 2397 DRM_ERROR("Failed to init KIQ BOs!\n"); 2398 return r; 2399 } 2400 2401 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 2402 if (r) 2403 return r; 2404 2405 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2406 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0); 2407 if (r) 2408 return r; 2409 2410 adev->gfx.ce_ram_size = 0x8000; 2411 2412 r = gfx_v9_0_gpu_early_init(adev); 2413 if (r) 2414 return r; 2415 2416 if (amdgpu_gfx_ras_sw_init(adev)) { 2417 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 2418 return -EINVAL; 2419 } 2420 2421 gfx_v9_0_alloc_ip_dump(adev); 2422 2423 r = amdgpu_gfx_sysfs_init(adev); 2424 if (r) 2425 return r; 2426 2427 return 0; 2428 } 2429 2430 2431 static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) 2432 { 2433 int i; 2434 struct amdgpu_device *adev = ip_block->adev; 2435 2436 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 2437 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 2438 amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); 2439 amdgpu_ring_mux_fini(&adev->gfx.muxer); 2440 } 2441 2442 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2443 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2444 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2445 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2446 2447 amdgpu_gfx_mqd_sw_fini(adev, 0); 2448 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 2449 amdgpu_gfx_kiq_fini(adev, 0); 2450 2451 amdgpu_gfx_cleaner_shader_sw_fini(adev); 2452 2453 gfx_v9_0_mec_fini(adev); 2454 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2455 &adev->gfx.rlc.clear_state_gpu_addr, 2456 (void **)&adev->gfx.rlc.cs_ptr); 2457 if (adev->flags & AMD_IS_APU) { 2458 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2459 &adev->gfx.rlc.cp_table_gpu_addr, 2460 (void **)&adev->gfx.rlc.cp_table_ptr); 2461 } 2462 gfx_v9_0_free_microcode(adev); 2463 2464 amdgpu_gfx_sysfs_fini(adev); 2465 2466 kfree(adev->gfx.ip_dump_core); 2467 kfree(adev->gfx.ip_dump_compute_queues); 2468 2469 return 0; 2470 } 2471 2472 2473 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 2474 { 2475 /* TODO */ 2476 } 2477 2478 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, 2479 u32 instance, int xcc_id) 2480 { 2481 u32 data; 2482 2483 if (instance == 0xffffffff) 2484 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 2485 else 2486 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 2487 2488 if (se_num == 0xffffffff) 2489 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 2490 else 2491 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2492 2493 if (sh_num == 0xffffffff) 2494 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 2495 else 2496 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 2497 2498 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 2499 } 2500 2501 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2502 { 2503 u32 data, mask; 2504 2505 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 2506 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 2507 2508 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 2509 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 2510 2511 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 2512 adev->gfx.config.max_sh_per_se); 2513 2514 return (~data) & mask; 2515 } 2516 2517 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 2518 { 2519 int i, j; 2520 u32 data; 2521 u32 active_rbs = 0; 2522 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 2523 adev->gfx.config.max_sh_per_se; 2524 2525 mutex_lock(&adev->grbm_idx_mutex); 2526 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2527 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2528 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 2529 data = gfx_v9_0_get_rb_active_bitmap(adev); 2530 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 2531 rb_bitmap_width_per_sh); 2532 } 2533 } 2534 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 2535 mutex_unlock(&adev->grbm_idx_mutex); 2536 2537 adev->gfx.config.backend_enable_mask = active_rbs; 2538 adev->gfx.config.num_rbs = hweight32(active_rbs); 2539 } 2540 2541 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev, 2542 uint32_t first_vmid, 2543 uint32_t last_vmid) 2544 { 2545 uint32_t data; 2546 uint32_t trap_config_vmid_mask = 0; 2547 int i; 2548 2549 /* Calculate trap config vmid mask */ 2550 for (i = first_vmid; i < last_vmid; i++) 2551 trap_config_vmid_mask |= (1 << i); 2552 2553 data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG, 2554 VMID_SEL, trap_config_vmid_mask); 2555 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, 2556 TRAP_EN, 1); 2557 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); 2558 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); 2559 2560 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); 2561 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); 2562 } 2563 2564 #define DEFAULT_SH_MEM_BASES (0x6000) 2565 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 2566 { 2567 int i; 2568 uint32_t sh_mem_config; 2569 uint32_t sh_mem_bases; 2570 2571 /* 2572 * Configure apertures: 2573 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2574 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2575 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2576 */ 2577 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 2578 2579 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 2580 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 2581 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 2582 2583 mutex_lock(&adev->srbm_mutex); 2584 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2585 soc15_grbm_select(adev, 0, 0, 0, i, 0); 2586 /* CP and shaders */ 2587 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 2588 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 2589 } 2590 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 2591 mutex_unlock(&adev->srbm_mutex); 2592 2593 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 2594 access. These should be enabled by FW for target VMIDs. */ 2595 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2596 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); 2597 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); 2598 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0); 2599 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0); 2600 } 2601 } 2602 2603 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) 2604 { 2605 int vmid; 2606 2607 /* 2608 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2609 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2610 * the driver can enable them for graphics. VMID0 should maintain 2611 * access so that HWS firmware can save/restore entries. 2612 */ 2613 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 2614 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); 2615 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); 2616 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); 2617 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); 2618 } 2619 } 2620 2621 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) 2622 { 2623 uint32_t tmp; 2624 2625 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 2626 case IP_VERSION(9, 4, 1): 2627 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); 2628 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT, 2629 !READ_ONCE(adev->barrier_has_auto_waitcnt)); 2630 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); 2631 break; 2632 default: 2633 break; 2634 } 2635 } 2636 2637 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 2638 { 2639 u32 tmp; 2640 int i; 2641 2642 if (!amdgpu_sriov_vf(adev) || 2643 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) { 2644 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2645 } 2646 2647 gfx_v9_0_tiling_mode_table_init(adev); 2648 2649 if (adev->gfx.num_gfx_rings) 2650 gfx_v9_0_setup_rb(adev); 2651 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 2652 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 2653 2654 /* XXX SH_MEM regs */ 2655 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2656 mutex_lock(&adev->srbm_mutex); 2657 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2658 soc15_grbm_select(adev, 0, 0, 0, i, 0); 2659 /* CP and shaders */ 2660 if (i == 0) { 2661 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2662 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2663 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2664 !!adev->gmc.noretry); 2665 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2666 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 2667 } else { 2668 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 2669 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 2670 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE, 2671 !!adev->gmc.noretry); 2672 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 2673 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2674 (adev->gmc.private_aperture_start >> 48)); 2675 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2676 (adev->gmc.shared_aperture_start >> 48)); 2677 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 2678 } 2679 } 2680 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 2681 2682 mutex_unlock(&adev->srbm_mutex); 2683 2684 gfx_v9_0_init_compute_vmid(adev); 2685 gfx_v9_0_init_gds_vmid(adev); 2686 gfx_v9_0_init_sq_config(adev); 2687 } 2688 2689 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 2690 { 2691 u32 i, j, k; 2692 u32 mask; 2693 2694 mutex_lock(&adev->grbm_idx_mutex); 2695 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 2696 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 2697 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 2698 for (k = 0; k < adev->usec_timeout; k++) { 2699 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 2700 break; 2701 udelay(1); 2702 } 2703 if (k == adev->usec_timeout) { 2704 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 2705 0xffffffff, 0xffffffff, 0); 2706 mutex_unlock(&adev->grbm_idx_mutex); 2707 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2708 i, j); 2709 return; 2710 } 2711 } 2712 } 2713 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 2714 mutex_unlock(&adev->grbm_idx_mutex); 2715 2716 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2717 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2718 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2719 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2720 for (k = 0; k < adev->usec_timeout; k++) { 2721 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2722 break; 2723 udelay(1); 2724 } 2725 } 2726 2727 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2728 bool enable) 2729 { 2730 u32 tmp; 2731 2732 /* These interrupts should be enabled to drive DS clock */ 2733 2734 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2735 2736 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2737 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2738 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2739 if (adev->gfx.num_gfx_rings) 2740 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2741 2742 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2743 } 2744 2745 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2746 { 2747 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2748 /* csib */ 2749 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2750 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2751 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2752 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2753 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2754 adev->gfx.rlc.clear_state_size); 2755 } 2756 2757 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2758 int indirect_offset, 2759 int list_size, 2760 int *unique_indirect_regs, 2761 int unique_indirect_reg_count, 2762 int *indirect_start_offsets, 2763 int *indirect_start_offsets_count, 2764 int max_start_offsets_count) 2765 { 2766 int idx; 2767 2768 for (; indirect_offset < list_size; indirect_offset++) { 2769 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2770 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2771 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2772 2773 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2774 indirect_offset += 2; 2775 2776 /* look for the matching indice */ 2777 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2778 if (unique_indirect_regs[idx] == 2779 register_list_format[indirect_offset] || 2780 !unique_indirect_regs[idx]) 2781 break; 2782 } 2783 2784 BUG_ON(idx >= unique_indirect_reg_count); 2785 2786 if (!unique_indirect_regs[idx]) 2787 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2788 2789 indirect_offset++; 2790 } 2791 } 2792 } 2793 2794 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2795 { 2796 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2797 int unique_indirect_reg_count = 0; 2798 2799 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2800 int indirect_start_offsets_count = 0; 2801 2802 int list_size = 0; 2803 int i = 0, j = 0; 2804 u32 tmp = 0; 2805 2806 u32 *register_list_format = 2807 kmemdup(adev->gfx.rlc.register_list_format, 2808 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2809 if (!register_list_format) 2810 return -ENOMEM; 2811 2812 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2813 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2814 gfx_v9_1_parse_ind_reg_list(register_list_format, 2815 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2816 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2817 unique_indirect_regs, 2818 unique_indirect_reg_count, 2819 indirect_start_offsets, 2820 &indirect_start_offsets_count, 2821 ARRAY_SIZE(indirect_start_offsets)); 2822 2823 /* enable auto inc in case it is disabled */ 2824 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2825 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2826 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2827 2828 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2829 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2830 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2831 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2832 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2833 adev->gfx.rlc.register_restore[i]); 2834 2835 /* load indirect register */ 2836 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2837 adev->gfx.rlc.reg_list_format_start); 2838 2839 /* direct register portion */ 2840 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2841 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2842 register_list_format[i]); 2843 2844 /* indirect register portion */ 2845 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2846 if (register_list_format[i] == 0xFFFFFFFF) { 2847 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2848 continue; 2849 } 2850 2851 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2852 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2853 2854 for (j = 0; j < unique_indirect_reg_count; j++) { 2855 if (register_list_format[i] == unique_indirect_regs[j]) { 2856 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2857 break; 2858 } 2859 } 2860 2861 BUG_ON(j >= unique_indirect_reg_count); 2862 2863 i++; 2864 } 2865 2866 /* set save/restore list size */ 2867 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2868 list_size = list_size >> 1; 2869 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2870 adev->gfx.rlc.reg_restore_list_size); 2871 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2872 2873 /* write the starting offsets to RLC scratch ram */ 2874 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2875 adev->gfx.rlc.starting_offsets_start); 2876 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2877 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2878 indirect_start_offsets[i]); 2879 2880 /* load unique indirect regs*/ 2881 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2882 if (unique_indirect_regs[i] != 0) { 2883 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2884 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2885 unique_indirect_regs[i] & 0x3FFFF); 2886 2887 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2888 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2889 unique_indirect_regs[i] >> 20); 2890 } 2891 } 2892 2893 kfree(register_list_format); 2894 return 0; 2895 } 2896 2897 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2898 { 2899 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2900 } 2901 2902 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2903 bool enable) 2904 { 2905 uint32_t data = 0; 2906 uint32_t default_data = 0; 2907 2908 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2909 if (enable) { 2910 /* enable GFXIP control over CGPG */ 2911 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2912 if(default_data != data) 2913 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2914 2915 /* update status */ 2916 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2917 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2918 if(default_data != data) 2919 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2920 } else { 2921 /* restore GFXIP control over GCPG */ 2922 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2923 if(default_data != data) 2924 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2925 } 2926 } 2927 2928 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2929 { 2930 uint32_t data = 0; 2931 2932 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2933 AMD_PG_SUPPORT_GFX_SMG | 2934 AMD_PG_SUPPORT_GFX_DMG)) { 2935 /* init IDLE_POLL_COUNT = 60 */ 2936 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2937 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2938 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2939 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2940 2941 /* init RLC PG Delay */ 2942 data = 0; 2943 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2944 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2945 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2946 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2947 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2948 2949 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2950 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2951 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2952 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2953 2954 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2955 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2956 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2957 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2958 2959 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2960 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2961 2962 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2963 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2964 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2965 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0)) 2966 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2967 } 2968 } 2969 2970 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2971 bool enable) 2972 { 2973 uint32_t data = 0; 2974 uint32_t default_data = 0; 2975 2976 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2977 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2978 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2979 enable ? 1 : 0); 2980 if (default_data != data) 2981 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2982 } 2983 2984 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2985 bool enable) 2986 { 2987 uint32_t data = 0; 2988 uint32_t default_data = 0; 2989 2990 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2991 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2992 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2993 enable ? 1 : 0); 2994 if(default_data != data) 2995 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2996 } 2997 2998 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2999 bool enable) 3000 { 3001 uint32_t data = 0; 3002 uint32_t default_data = 0; 3003 3004 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3005 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3006 CP_PG_DISABLE, 3007 enable ? 0 : 1); 3008 if(default_data != data) 3009 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3010 } 3011 3012 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 3013 bool enable) 3014 { 3015 uint32_t data, default_data; 3016 3017 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3018 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3019 GFX_POWER_GATING_ENABLE, 3020 enable ? 1 : 0); 3021 if(default_data != data) 3022 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3023 } 3024 3025 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 3026 bool enable) 3027 { 3028 uint32_t data, default_data; 3029 3030 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3031 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3032 GFX_PIPELINE_PG_ENABLE, 3033 enable ? 1 : 0); 3034 if(default_data != data) 3035 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3036 3037 if (!enable) 3038 /* read any GFX register to wake up GFX */ 3039 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 3040 } 3041 3042 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 3043 bool enable) 3044 { 3045 uint32_t data, default_data; 3046 3047 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3048 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3049 STATIC_PER_CU_PG_ENABLE, 3050 enable ? 1 : 0); 3051 if(default_data != data) 3052 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3053 } 3054 3055 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 3056 bool enable) 3057 { 3058 uint32_t data, default_data; 3059 3060 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 3061 data = REG_SET_FIELD(data, RLC_PG_CNTL, 3062 DYN_PER_CU_PG_ENABLE, 3063 enable ? 1 : 0); 3064 if(default_data != data) 3065 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 3066 } 3067 3068 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 3069 { 3070 gfx_v9_0_init_csb(adev); 3071 3072 /* 3073 * Rlc save restore list is workable since v2_1. 3074 * And it's needed by gfxoff feature. 3075 */ 3076 if (adev->gfx.rlc.is_rlc_v2_1) { 3077 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 3078 IP_VERSION(9, 2, 1) || 3079 (adev->apu_flags & AMD_APU_IS_RAVEN2)) 3080 gfx_v9_1_init_rlc_save_restore_list(adev); 3081 gfx_v9_0_enable_save_restore_machine(adev); 3082 } 3083 3084 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3085 AMD_PG_SUPPORT_GFX_SMG | 3086 AMD_PG_SUPPORT_GFX_DMG | 3087 AMD_PG_SUPPORT_CP | 3088 AMD_PG_SUPPORT_GDS | 3089 AMD_PG_SUPPORT_RLC_SMU_HS)) { 3090 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, 3091 adev->gfx.rlc.cp_table_gpu_addr >> 8); 3092 gfx_v9_0_init_gfx_power_gating(adev); 3093 } 3094 } 3095 3096 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 3097 { 3098 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 3099 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3100 gfx_v9_0_wait_for_rlc_serdes(adev); 3101 } 3102 3103 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 3104 { 3105 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3106 udelay(50); 3107 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3108 udelay(50); 3109 } 3110 3111 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 3112 { 3113 #ifdef AMDGPU_RLC_DEBUG_RETRY 3114 u32 rlc_ucode_ver; 3115 #endif 3116 3117 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 3118 udelay(50); 3119 3120 /* carrizo do enable cp interrupt after cp inited */ 3121 if (!(adev->flags & AMD_IS_APU)) { 3122 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3123 udelay(50); 3124 } 3125 3126 #ifdef AMDGPU_RLC_DEBUG_RETRY 3127 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 3128 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 3129 if(rlc_ucode_ver == 0x108) { 3130 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 3131 rlc_ucode_ver, adev->gfx.rlc_fw_version); 3132 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 3133 * default is 0x9C4 to create a 100us interval */ 3134 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 3135 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 3136 * to disable the page fault retry interrupts, default is 3137 * 0x100 (256) */ 3138 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 3139 } 3140 #endif 3141 } 3142 3143 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 3144 { 3145 const struct rlc_firmware_header_v2_0 *hdr; 3146 const __le32 *fw_data; 3147 unsigned i, fw_size; 3148 3149 if (!adev->gfx.rlc_fw) 3150 return -EINVAL; 3151 3152 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3153 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3154 3155 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3156 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3157 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3158 3159 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 3160 RLCG_UCODE_LOADING_START_ADDRESS); 3161 for (i = 0; i < fw_size; i++) 3162 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3163 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3164 3165 return 0; 3166 } 3167 3168 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 3169 { 3170 int r; 3171 3172 if (amdgpu_sriov_vf(adev)) { 3173 gfx_v9_0_init_csb(adev); 3174 return 0; 3175 } 3176 3177 adev->gfx.rlc.funcs->stop(adev); 3178 3179 /* disable CG */ 3180 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 3181 3182 gfx_v9_0_init_pg(adev); 3183 3184 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3185 /* legacy rlc firmware loading */ 3186 r = gfx_v9_0_rlc_load_microcode(adev); 3187 if (r) 3188 return r; 3189 } 3190 3191 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 3192 case IP_VERSION(9, 2, 2): 3193 case IP_VERSION(9, 1, 0): 3194 gfx_v9_0_init_lbpw(adev); 3195 if (amdgpu_lbpw == 0) 3196 gfx_v9_0_enable_lbpw(adev, false); 3197 else 3198 gfx_v9_0_enable_lbpw(adev, true); 3199 break; 3200 case IP_VERSION(9, 4, 0): 3201 gfx_v9_4_init_lbpw(adev); 3202 if (amdgpu_lbpw > 0) 3203 gfx_v9_0_enable_lbpw(adev, true); 3204 else 3205 gfx_v9_0_enable_lbpw(adev, false); 3206 break; 3207 default: 3208 break; 3209 } 3210 3211 gfx_v9_0_update_spm_vmid_internal(adev, 0xf); 3212 3213 adev->gfx.rlc.funcs->start(adev); 3214 3215 return 0; 3216 } 3217 3218 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3219 { 3220 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 3221 3222 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1); 3223 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1); 3224 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1); 3225 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1); 3226 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1); 3227 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1); 3228 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1); 3229 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1); 3230 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1); 3231 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3232 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3233 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 3234 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 3235 udelay(50); 3236 } 3237 3238 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3239 { 3240 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3241 const struct gfx_firmware_header_v1_0 *ce_hdr; 3242 const struct gfx_firmware_header_v1_0 *me_hdr; 3243 const __le32 *fw_data; 3244 unsigned i, fw_size; 3245 3246 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3247 return -EINVAL; 3248 3249 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3250 adev->gfx.pfp_fw->data; 3251 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3252 adev->gfx.ce_fw->data; 3253 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3254 adev->gfx.me_fw->data; 3255 3256 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3257 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3258 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3259 3260 gfx_v9_0_cp_gfx_enable(adev, false); 3261 3262 /* PFP */ 3263 fw_data = (const __le32 *) 3264 (adev->gfx.pfp_fw->data + 3265 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3266 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3267 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 3268 for (i = 0; i < fw_size; i++) 3269 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3270 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3271 3272 /* CE */ 3273 fw_data = (const __le32 *) 3274 (adev->gfx.ce_fw->data + 3275 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3276 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3277 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 3278 for (i = 0; i < fw_size; i++) 3279 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3280 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3281 3282 /* ME */ 3283 fw_data = (const __le32 *) 3284 (adev->gfx.me_fw->data + 3285 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3286 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3287 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 3288 for (i = 0; i < fw_size; i++) 3289 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3290 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3291 3292 return 0; 3293 } 3294 3295 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 3296 { 3297 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3298 const struct cs_section_def *sect = NULL; 3299 const struct cs_extent_def *ext = NULL; 3300 int r, i, tmp; 3301 3302 /* init the CP */ 3303 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3304 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 3305 3306 gfx_v9_0_cp_gfx_enable(adev, true); 3307 3308 /* Now only limit the quirk on the APU gfx9 series and already 3309 * confirmed that the APU gfx10/gfx11 needn't such update. 3310 */ 3311 if (adev->flags & AMD_IS_APU && 3312 adev->in_s3 && !pm_resume_via_firmware()) { 3313 DRM_INFO("Will skip the CSB packet resubmit\n"); 3314 return 0; 3315 } 3316 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 3317 if (r) { 3318 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3319 return r; 3320 } 3321 3322 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3323 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3324 3325 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3326 amdgpu_ring_write(ring, 0x80000000); 3327 amdgpu_ring_write(ring, 0x80000000); 3328 3329 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 3330 for (ext = sect->section; ext->extent != NULL; ++ext) { 3331 if (sect->id == SECT_CONTEXT) { 3332 amdgpu_ring_write(ring, 3333 PACKET3(PACKET3_SET_CONTEXT_REG, 3334 ext->reg_count)); 3335 amdgpu_ring_write(ring, 3336 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3337 for (i = 0; i < ext->reg_count; i++) 3338 amdgpu_ring_write(ring, ext->extent[i]); 3339 } 3340 } 3341 } 3342 3343 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3344 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3345 3346 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3347 amdgpu_ring_write(ring, 0); 3348 3349 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 3350 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 3351 amdgpu_ring_write(ring, 0x8000); 3352 amdgpu_ring_write(ring, 0x8000); 3353 3354 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 3355 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 3356 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 3357 amdgpu_ring_write(ring, tmp); 3358 amdgpu_ring_write(ring, 0); 3359 3360 amdgpu_ring_commit(ring); 3361 3362 return 0; 3363 } 3364 3365 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 3366 { 3367 struct amdgpu_ring *ring; 3368 u32 tmp; 3369 u32 rb_bufsz; 3370 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3371 3372 /* Set the write pointer delay */ 3373 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 3374 3375 /* set the RB to use vmid 0 */ 3376 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 3377 3378 /* Set ring buffer size */ 3379 ring = &adev->gfx.gfx_ring[0]; 3380 rb_bufsz = order_base_2(ring->ring_size / 8); 3381 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3382 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3383 #ifdef __BIG_ENDIAN 3384 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 3385 #endif 3386 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3387 3388 /* Initialize the ring buffer's write pointers */ 3389 ring->wptr = 0; 3390 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3391 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3392 3393 /* set the wb address whether it's enabled or not */ 3394 rptr_addr = ring->rptr_gpu_addr; 3395 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3396 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3397 3398 wptr_gpu_addr = ring->wptr_gpu_addr; 3399 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 3400 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 3401 3402 mdelay(1); 3403 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 3404 3405 rb_addr = ring->gpu_addr >> 8; 3406 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 3407 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3408 3409 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 3410 if (ring->use_doorbell) { 3411 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3412 DOORBELL_OFFSET, ring->doorbell_index); 3413 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3414 DOORBELL_EN, 1); 3415 } else { 3416 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 3417 } 3418 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 3419 3420 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3421 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3422 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 3423 3424 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 3425 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3426 3427 3428 /* start the ring */ 3429 gfx_v9_0_cp_gfx_start(adev); 3430 3431 return 0; 3432 } 3433 3434 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3435 { 3436 if (enable) { 3437 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 3438 } else { 3439 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 3440 (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK | 3441 CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK | 3442 CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK | 3443 CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK | 3444 CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK | 3445 CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK | 3446 CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK | 3447 CP_MEC_CNTL__MEC_ME1_HALT_MASK | 3448 CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 3449 adev->gfx.kiq[0].ring.sched.ready = false; 3450 } 3451 udelay(50); 3452 } 3453 3454 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3455 { 3456 const struct gfx_firmware_header_v1_0 *mec_hdr; 3457 const __le32 *fw_data; 3458 unsigned i; 3459 u32 tmp; 3460 3461 if (!adev->gfx.mec_fw) 3462 return -EINVAL; 3463 3464 gfx_v9_0_cp_compute_enable(adev, false); 3465 3466 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3467 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3468 3469 fw_data = (const __le32 *) 3470 (adev->gfx.mec_fw->data + 3471 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3472 tmp = 0; 3473 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3474 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3475 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 3476 3477 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 3478 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 3479 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 3480 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3481 3482 /* MEC1 */ 3483 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3484 mec_hdr->jt_offset); 3485 for (i = 0; i < mec_hdr->jt_size; i++) 3486 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 3487 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3488 3489 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 3490 adev->gfx.mec_fw_version); 3491 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 3492 3493 return 0; 3494 } 3495 3496 /* KIQ functions */ 3497 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 3498 { 3499 uint32_t tmp; 3500 struct amdgpu_device *adev = ring->adev; 3501 3502 /* tell RLC which is KIQ queue */ 3503 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 3504 tmp &= 0xffffff00; 3505 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3506 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80); 3507 } 3508 3509 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) 3510 { 3511 struct amdgpu_device *adev = ring->adev; 3512 3513 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 3514 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 3515 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 3516 mqd->cp_hqd_queue_priority = 3517 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 3518 } 3519 } 3520 } 3521 3522 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 3523 { 3524 struct amdgpu_device *adev = ring->adev; 3525 struct v9_mqd *mqd = ring->mqd_ptr; 3526 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 3527 uint32_t tmp; 3528 3529 mqd->header = 0xC0310800; 3530 mqd->compute_pipelinestat_enable = 0x00000001; 3531 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 3532 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 3533 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 3534 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 3535 mqd->compute_static_thread_mgmt_se4 = 0xffffffff; 3536 mqd->compute_static_thread_mgmt_se5 = 0xffffffff; 3537 mqd->compute_static_thread_mgmt_se6 = 0xffffffff; 3538 mqd->compute_static_thread_mgmt_se7 = 0xffffffff; 3539 mqd->compute_misc_reserved = 0x00000003; 3540 3541 mqd->dynamic_cu_mask_addr_lo = 3542 lower_32_bits(ring->mqd_gpu_addr 3543 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3544 mqd->dynamic_cu_mask_addr_hi = 3545 upper_32_bits(ring->mqd_gpu_addr 3546 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 3547 3548 eop_base_addr = ring->eop_gpu_addr >> 8; 3549 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 3550 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 3551 3552 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3553 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 3554 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 3555 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 3556 3557 mqd->cp_hqd_eop_control = tmp; 3558 3559 /* enable doorbell? */ 3560 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 3561 3562 if (ring->use_doorbell) { 3563 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3564 DOORBELL_OFFSET, ring->doorbell_index); 3565 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3566 DOORBELL_EN, 1); 3567 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3568 DOORBELL_SOURCE, 0); 3569 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3570 DOORBELL_HIT, 0); 3571 } else { 3572 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 3573 DOORBELL_EN, 0); 3574 } 3575 3576 mqd->cp_hqd_pq_doorbell_control = tmp; 3577 3578 /* disable the queue if it's active */ 3579 ring->wptr = 0; 3580 mqd->cp_hqd_dequeue_request = 0; 3581 mqd->cp_hqd_pq_rptr = 0; 3582 mqd->cp_hqd_pq_wptr_lo = 0; 3583 mqd->cp_hqd_pq_wptr_hi = 0; 3584 3585 /* set the pointer to the MQD */ 3586 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 3587 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 3588 3589 /* set MQD vmid to 0 */ 3590 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 3591 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 3592 mqd->cp_mqd_control = tmp; 3593 3594 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3595 hqd_gpu_addr = ring->gpu_addr >> 8; 3596 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 3597 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 3598 3599 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3600 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 3601 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 3602 (order_base_2(ring->ring_size / 4) - 1)); 3603 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 3604 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 3605 #ifdef __BIG_ENDIAN 3606 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 3607 #endif 3608 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 3609 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 3610 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 3611 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 3612 mqd->cp_hqd_pq_control = tmp; 3613 3614 /* set the wb address whether it's enabled or not */ 3615 wb_gpu_addr = ring->rptr_gpu_addr; 3616 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 3617 mqd->cp_hqd_pq_rptr_report_addr_hi = 3618 upper_32_bits(wb_gpu_addr) & 0xffff; 3619 3620 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3621 wb_gpu_addr = ring->wptr_gpu_addr; 3622 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3623 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3624 3625 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3626 ring->wptr = 0; 3627 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 3628 3629 /* set the vmid for the queue */ 3630 mqd->cp_hqd_vmid = 0; 3631 3632 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 3633 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 3634 mqd->cp_hqd_persistent_state = tmp; 3635 3636 /* set MIN_IB_AVAIL_SIZE */ 3637 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 3638 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 3639 mqd->cp_hqd_ib_control = tmp; 3640 3641 /* set static priority for a queue/ring */ 3642 gfx_v9_0_mqd_set_priority(ring, mqd); 3643 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM); 3644 3645 /* map_queues packet doesn't need activate the queue, 3646 * so only kiq need set this field. 3647 */ 3648 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 3649 mqd->cp_hqd_active = 1; 3650 3651 return 0; 3652 } 3653 3654 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 3655 { 3656 struct amdgpu_device *adev = ring->adev; 3657 struct v9_mqd *mqd = ring->mqd_ptr; 3658 int j; 3659 3660 /* disable wptr polling */ 3661 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3662 3663 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 3664 mqd->cp_hqd_eop_base_addr_lo); 3665 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 3666 mqd->cp_hqd_eop_base_addr_hi); 3667 3668 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 3669 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 3670 mqd->cp_hqd_eop_control); 3671 3672 /* enable doorbell? */ 3673 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3674 mqd->cp_hqd_pq_doorbell_control); 3675 3676 /* disable the queue if it's active */ 3677 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3678 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3679 for (j = 0; j < adev->usec_timeout; j++) { 3680 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3681 break; 3682 udelay(1); 3683 } 3684 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3685 mqd->cp_hqd_dequeue_request); 3686 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3687 mqd->cp_hqd_pq_rptr); 3688 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3689 mqd->cp_hqd_pq_wptr_lo); 3690 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3691 mqd->cp_hqd_pq_wptr_hi); 3692 } 3693 3694 /* set the pointer to the MQD */ 3695 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3696 mqd->cp_mqd_base_addr_lo); 3697 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3698 mqd->cp_mqd_base_addr_hi); 3699 3700 /* set MQD vmid to 0 */ 3701 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3702 mqd->cp_mqd_control); 3703 3704 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3705 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3706 mqd->cp_hqd_pq_base_lo); 3707 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3708 mqd->cp_hqd_pq_base_hi); 3709 3710 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3711 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3712 mqd->cp_hqd_pq_control); 3713 3714 /* set the wb address whether it's enabled or not */ 3715 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3716 mqd->cp_hqd_pq_rptr_report_addr_lo); 3717 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3718 mqd->cp_hqd_pq_rptr_report_addr_hi); 3719 3720 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3721 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3722 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3723 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3724 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3725 3726 /* enable the doorbell if requested */ 3727 if (ring->use_doorbell) { 3728 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3729 (adev->doorbell_index.kiq * 2) << 2); 3730 /* If GC has entered CGPG, ringing doorbell > first page 3731 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to 3732 * workaround this issue. And this change has to align with firmware 3733 * update. 3734 */ 3735 if (check_if_enlarge_doorbell_range(adev)) 3736 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3737 (adev->doorbell.size - 4)); 3738 else 3739 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3740 (adev->doorbell_index.userqueue_end * 2) << 2); 3741 } 3742 3743 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3744 mqd->cp_hqd_pq_doorbell_control); 3745 3746 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3747 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3748 mqd->cp_hqd_pq_wptr_lo); 3749 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3750 mqd->cp_hqd_pq_wptr_hi); 3751 3752 /* set the vmid for the queue */ 3753 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3754 3755 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3756 mqd->cp_hqd_persistent_state); 3757 3758 /* activate the queue */ 3759 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3760 mqd->cp_hqd_active); 3761 3762 if (ring->use_doorbell) 3763 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3764 3765 return 0; 3766 } 3767 3768 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3769 { 3770 struct amdgpu_device *adev = ring->adev; 3771 int j; 3772 3773 /* disable the queue if it's active */ 3774 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3775 3776 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3777 3778 for (j = 0; j < adev->usec_timeout; j++) { 3779 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3780 break; 3781 udelay(1); 3782 } 3783 3784 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3785 DRM_DEBUG("KIQ dequeue request failed.\n"); 3786 3787 /* Manual disable if dequeue request times out */ 3788 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3789 } 3790 3791 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3792 0); 3793 } 3794 3795 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3796 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3797 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3798 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3799 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3800 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3801 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3802 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3803 3804 return 0; 3805 } 3806 3807 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3808 { 3809 struct amdgpu_device *adev = ring->adev; 3810 struct v9_mqd *mqd = ring->mqd_ptr; 3811 struct v9_mqd *tmp_mqd; 3812 3813 gfx_v9_0_kiq_setting(ring); 3814 3815 /* GPU could be in bad state during probe, driver trigger the reset 3816 * after load the SMU, in this case , the mqd is not be initialized. 3817 * driver need to re-init the mqd. 3818 * check mqd->cp_hqd_pq_control since this value should not be 0 3819 */ 3820 tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup; 3821 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ 3822 /* for GPU_RESET case , reset MQD to a clean status */ 3823 if (adev->gfx.kiq[0].mqd_backup) 3824 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation)); 3825 3826 /* reset ring buffer */ 3827 ring->wptr = 0; 3828 amdgpu_ring_clear_ring(ring); 3829 3830 mutex_lock(&adev->srbm_mutex); 3831 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3832 gfx_v9_0_kiq_init_register(ring); 3833 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3834 mutex_unlock(&adev->srbm_mutex); 3835 } else { 3836 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3837 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3838 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3839 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 3840 amdgpu_ring_clear_ring(ring); 3841 mutex_lock(&adev->srbm_mutex); 3842 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3843 gfx_v9_0_mqd_init(ring); 3844 gfx_v9_0_kiq_init_register(ring); 3845 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3846 mutex_unlock(&adev->srbm_mutex); 3847 3848 if (adev->gfx.kiq[0].mqd_backup) 3849 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); 3850 } 3851 3852 return 0; 3853 } 3854 3855 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore) 3856 { 3857 struct amdgpu_device *adev = ring->adev; 3858 struct v9_mqd *mqd = ring->mqd_ptr; 3859 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3860 struct v9_mqd *tmp_mqd; 3861 3862 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control 3863 * is not be initialized before 3864 */ 3865 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; 3866 3867 if (!restore && (!tmp_mqd->cp_hqd_pq_control || 3868 (!amdgpu_in_reset(adev) && !adev->in_suspend))) { 3869 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3870 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3871 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3872 mutex_lock(&adev->srbm_mutex); 3873 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 3874 gfx_v9_0_mqd_init(ring); 3875 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 3876 mutex_unlock(&adev->srbm_mutex); 3877 3878 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3879 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3880 } else { 3881 /* restore MQD to a clean status */ 3882 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3883 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3884 /* reset ring buffer */ 3885 ring->wptr = 0; 3886 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 3887 amdgpu_ring_clear_ring(ring); 3888 } 3889 3890 return 0; 3891 } 3892 3893 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3894 { 3895 gfx_v9_0_kiq_init_queue(&adev->gfx.kiq[0].ring); 3896 return 0; 3897 } 3898 3899 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3900 { 3901 int i, r; 3902 3903 gfx_v9_0_cp_compute_enable(adev, true); 3904 3905 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3906 r = gfx_v9_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); 3907 if (r) 3908 return r; 3909 } 3910 3911 return amdgpu_gfx_enable_kcq(adev, 0); 3912 } 3913 3914 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3915 { 3916 int r, i; 3917 struct amdgpu_ring *ring; 3918 3919 if (!(adev->flags & AMD_IS_APU)) 3920 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3921 3922 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3923 if (adev->gfx.num_gfx_rings) { 3924 /* legacy firmware loading */ 3925 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3926 if (r) 3927 return r; 3928 } 3929 3930 r = gfx_v9_0_cp_compute_load_microcode(adev); 3931 if (r) 3932 return r; 3933 } 3934 3935 if (adev->gfx.num_gfx_rings) 3936 gfx_v9_0_cp_gfx_enable(adev, false); 3937 gfx_v9_0_cp_compute_enable(adev, false); 3938 3939 r = gfx_v9_0_kiq_resume(adev); 3940 if (r) 3941 return r; 3942 3943 if (adev->gfx.num_gfx_rings) { 3944 r = gfx_v9_0_cp_gfx_resume(adev); 3945 if (r) 3946 return r; 3947 } 3948 3949 r = gfx_v9_0_kcq_resume(adev); 3950 if (r) 3951 return r; 3952 3953 if (adev->gfx.num_gfx_rings) { 3954 ring = &adev->gfx.gfx_ring[0]; 3955 r = amdgpu_ring_test_helper(ring); 3956 if (r) 3957 return r; 3958 } 3959 3960 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3961 ring = &adev->gfx.compute_ring[i]; 3962 amdgpu_ring_test_helper(ring); 3963 } 3964 3965 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3966 3967 return 0; 3968 } 3969 3970 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) 3971 { 3972 u32 tmp; 3973 3974 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) && 3975 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) 3976 return; 3977 3978 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); 3979 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH, 3980 adev->df.hash_status.hash_64k); 3981 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH, 3982 adev->df.hash_status.hash_2m); 3983 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH, 3984 adev->df.hash_status.hash_1g); 3985 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp); 3986 } 3987 3988 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3989 { 3990 if (adev->gfx.num_gfx_rings) 3991 gfx_v9_0_cp_gfx_enable(adev, enable); 3992 gfx_v9_0_cp_compute_enable(adev, enable); 3993 } 3994 3995 static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block) 3996 { 3997 int r; 3998 struct amdgpu_device *adev = ip_block->adev; 3999 4000 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4001 adev->gfx.cleaner_shader_ptr); 4002 4003 if (!amdgpu_sriov_vf(adev)) 4004 gfx_v9_0_init_golden_registers(adev); 4005 4006 gfx_v9_0_constants_init(adev); 4007 4008 gfx_v9_0_init_tcp_config(adev); 4009 4010 r = adev->gfx.rlc.funcs->resume(adev); 4011 if (r) 4012 return r; 4013 4014 r = gfx_v9_0_cp_resume(adev); 4015 if (r) 4016 return r; 4017 4018 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) && 4019 !amdgpu_sriov_vf(adev)) 4020 gfx_v9_4_2_set_power_brake_sequence(adev); 4021 4022 return r; 4023 } 4024 4025 static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block) 4026 { 4027 struct amdgpu_device *adev = ip_block->adev; 4028 4029 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4030 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 4031 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4032 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4033 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4034 4035 /* DF freeze and kcq disable will fail */ 4036 if (!amdgpu_ras_intr_triggered()) 4037 /* disable KCQ to avoid CPC touch memory not valid anymore */ 4038 amdgpu_gfx_disable_kcq(adev, 0); 4039 4040 if (amdgpu_sriov_vf(adev)) { 4041 gfx_v9_0_cp_gfx_enable(adev, false); 4042 /* must disable polling for SRIOV when hw finished, otherwise 4043 * CPC engine may still keep fetching WB address which is already 4044 * invalid after sw finished and trigger DMAR reading error in 4045 * hypervisor side. 4046 */ 4047 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4048 return 0; 4049 } 4050 4051 /* Use deinitialize sequence from CAIL when unbinding device from driver, 4052 * otherwise KIQ is hanging when binding back 4053 */ 4054 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 4055 mutex_lock(&adev->srbm_mutex); 4056 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, 4057 adev->gfx.kiq[0].ring.pipe, 4058 adev->gfx.kiq[0].ring.queue, 0, 0); 4059 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring); 4060 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 4061 mutex_unlock(&adev->srbm_mutex); 4062 } 4063 4064 gfx_v9_0_cp_enable(adev, false); 4065 4066 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */ 4067 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) || 4068 (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) { 4069 dev_dbg(adev->dev, "Skipping RLC halt\n"); 4070 return 0; 4071 } 4072 4073 adev->gfx.rlc.funcs->stop(adev); 4074 return 0; 4075 } 4076 4077 static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block) 4078 { 4079 return gfx_v9_0_hw_fini(ip_block); 4080 } 4081 4082 static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block) 4083 { 4084 return gfx_v9_0_hw_init(ip_block); 4085 } 4086 4087 static bool gfx_v9_0_is_idle(struct amdgpu_ip_block *ip_block) 4088 { 4089 struct amdgpu_device *adev = ip_block->adev; 4090 4091 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 4092 GRBM_STATUS, GUI_ACTIVE)) 4093 return false; 4094 else 4095 return true; 4096 } 4097 4098 static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 4099 { 4100 unsigned i; 4101 struct amdgpu_device *adev = ip_block->adev; 4102 4103 for (i = 0; i < adev->usec_timeout; i++) { 4104 if (gfx_v9_0_is_idle(ip_block)) 4105 return 0; 4106 udelay(1); 4107 } 4108 return -ETIMEDOUT; 4109 } 4110 4111 static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block) 4112 { 4113 u32 grbm_soft_reset = 0; 4114 u32 tmp; 4115 struct amdgpu_device *adev = ip_block->adev; 4116 4117 /* GRBM_STATUS */ 4118 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 4119 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4120 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4121 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4122 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4123 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4124 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4125 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4126 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4127 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4128 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4129 } 4130 4131 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4132 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4133 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4134 } 4135 4136 /* GRBM_STATUS2 */ 4137 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 4138 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4139 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4140 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4141 4142 4143 if (grbm_soft_reset) { 4144 /* stop the rlc */ 4145 adev->gfx.rlc.funcs->stop(adev); 4146 4147 if (adev->gfx.num_gfx_rings) 4148 /* Disable GFX parsing/prefetching */ 4149 gfx_v9_0_cp_gfx_enable(adev, false); 4150 4151 /* Disable MEC parsing/prefetching */ 4152 gfx_v9_0_cp_compute_enable(adev, false); 4153 4154 if (grbm_soft_reset) { 4155 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4156 tmp |= grbm_soft_reset; 4157 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4158 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4159 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4160 4161 udelay(50); 4162 4163 tmp &= ~grbm_soft_reset; 4164 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 4165 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 4166 } 4167 4168 /* Wait a little for things to settle down */ 4169 udelay(50); 4170 } 4171 return 0; 4172 } 4173 4174 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) 4175 { 4176 signed long r, cnt = 0; 4177 unsigned long flags; 4178 uint32_t seq, reg_val_offs = 0; 4179 uint64_t value = 0; 4180 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 4181 struct amdgpu_ring *ring = &kiq->ring; 4182 4183 BUG_ON(!ring->funcs->emit_rreg); 4184 4185 spin_lock_irqsave(&kiq->ring_lock, flags); 4186 if (amdgpu_device_wb_get(adev, ®_val_offs)) { 4187 pr_err("critical bug! too many kiq readers\n"); 4188 goto failed_unlock; 4189 } 4190 amdgpu_ring_alloc(ring, 32); 4191 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4192 amdgpu_ring_write(ring, 9 | /* src: register*/ 4193 (5 << 8) | /* dst: memory */ 4194 (1 << 16) | /* count sel */ 4195 (1 << 20)); /* write confirm */ 4196 amdgpu_ring_write(ring, 0); 4197 amdgpu_ring_write(ring, 0); 4198 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4199 reg_val_offs * 4)); 4200 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4201 reg_val_offs * 4)); 4202 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); 4203 if (r) 4204 goto failed_undo; 4205 4206 amdgpu_ring_commit(ring); 4207 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4208 4209 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4210 4211 /* don't wait anymore for gpu reset case because this way may 4212 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg 4213 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will 4214 * never return if we keep waiting in virt_kiq_rreg, which cause 4215 * gpu_recover() hang there. 4216 * 4217 * also don't wait anymore for IRQ context 4218 * */ 4219 if (r < 1 && (amdgpu_in_reset(adev))) 4220 goto failed_kiq_read; 4221 4222 might_sleep(); 4223 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { 4224 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); 4225 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 4226 } 4227 4228 if (cnt > MAX_KIQ_REG_TRY) 4229 goto failed_kiq_read; 4230 4231 mb(); 4232 value = (uint64_t)adev->wb.wb[reg_val_offs] | 4233 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL; 4234 amdgpu_device_wb_free(adev, reg_val_offs); 4235 return value; 4236 4237 failed_undo: 4238 amdgpu_ring_undo(ring); 4239 failed_unlock: 4240 spin_unlock_irqrestore(&kiq->ring_lock, flags); 4241 failed_kiq_read: 4242 if (reg_val_offs) 4243 amdgpu_device_wb_free(adev, reg_val_offs); 4244 pr_err("failed to read gpu clock\n"); 4245 return ~0; 4246 } 4247 4248 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4249 { 4250 uint64_t clock, clock_lo, clock_hi, hi_check; 4251 4252 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 4253 case IP_VERSION(9, 3, 0): 4254 preempt_disable(); 4255 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4256 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4257 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); 4258 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over 4259 * roughly every 42 seconds. 4260 */ 4261 if (hi_check != clock_hi) { 4262 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); 4263 clock_hi = hi_check; 4264 } 4265 preempt_enable(); 4266 clock = clock_lo | (clock_hi << 32ULL); 4267 break; 4268 default: 4269 amdgpu_gfx_off_ctrl(adev, false); 4270 mutex_lock(&adev->gfx.gpu_clock_mutex); 4271 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 4272 IP_VERSION(9, 0, 1) && 4273 amdgpu_sriov_runtime(adev)) { 4274 clock = gfx_v9_0_kiq_read_clock(adev); 4275 } else { 4276 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4277 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 4278 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4279 } 4280 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4281 amdgpu_gfx_off_ctrl(adev, true); 4282 break; 4283 } 4284 return clock; 4285 } 4286 4287 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4288 uint32_t vmid, 4289 uint32_t gds_base, uint32_t gds_size, 4290 uint32_t gws_base, uint32_t gws_size, 4291 uint32_t oa_base, uint32_t oa_size) 4292 { 4293 struct amdgpu_device *adev = ring->adev; 4294 4295 /* GDS Base */ 4296 gfx_v9_0_write_data_to_reg(ring, 0, false, 4297 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 4298 gds_base); 4299 4300 /* GDS Size */ 4301 gfx_v9_0_write_data_to_reg(ring, 0, false, 4302 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 4303 gds_size); 4304 4305 /* GWS */ 4306 gfx_v9_0_write_data_to_reg(ring, 0, false, 4307 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 4308 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 4309 4310 /* OA */ 4311 gfx_v9_0_write_data_to_reg(ring, 0, false, 4312 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 4313 (1 << (oa_size + oa_base)) - (1 << oa_base)); 4314 } 4315 4316 static const u32 vgpr_init_compute_shader[] = 4317 { 4318 0xb07c0000, 0xbe8000ff, 4319 0x000000f8, 0xbf110800, 4320 0x7e000280, 0x7e020280, 4321 0x7e040280, 0x7e060280, 4322 0x7e080280, 0x7e0a0280, 4323 0x7e0c0280, 0x7e0e0280, 4324 0x80808800, 0xbe803200, 4325 0xbf84fff5, 0xbf9c0000, 4326 0xd28c0001, 0x0001007f, 4327 0xd28d0001, 0x0002027e, 4328 0x10020288, 0xb8810904, 4329 0xb7814000, 0xd1196a01, 4330 0x00000301, 0xbe800087, 4331 0xbefc00c1, 0xd89c4000, 4332 0x00020201, 0xd89cc080, 4333 0x00040401, 0x320202ff, 4334 0x00000800, 0x80808100, 4335 0xbf84fff8, 0x7e020280, 4336 0xbf810000, 0x00000000, 4337 }; 4338 4339 static const u32 sgpr_init_compute_shader[] = 4340 { 4341 0xb07c0000, 0xbe8000ff, 4342 0x0000005f, 0xbee50080, 4343 0xbe812c65, 0xbe822c65, 4344 0xbe832c65, 0xbe842c65, 4345 0xbe852c65, 0xb77c0005, 4346 0x80808500, 0xbf84fff8, 4347 0xbe800080, 0xbf810000, 4348 }; 4349 4350 static const u32 vgpr_init_compute_shader_arcturus[] = { 4351 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 4352 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 4353 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 4354 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 4355 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 4356 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 4357 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 4358 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 4359 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 4360 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 4361 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 4362 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 4363 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 4364 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 4365 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 4366 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 4367 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 4368 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 4369 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 4370 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 4371 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 4372 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 4373 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 4374 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 4375 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 4376 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 4377 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 4378 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 4379 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 4380 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 4381 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 4382 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 4383 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 4384 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 4385 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 4386 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 4387 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 4388 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 4389 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 4390 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 4391 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 4392 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 4393 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 4394 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 4395 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 4396 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 4397 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 4398 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 4399 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 4400 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 4401 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 4402 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 4403 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 4404 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 4405 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 4406 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 4407 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 4408 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 4409 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 4410 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 4411 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 4412 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 4413 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 4414 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 4415 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 4416 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 4417 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 4418 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 4419 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 4420 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 4421 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 4422 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 4423 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 4424 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 4425 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 4426 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 4427 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 4428 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 4429 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 4430 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 4431 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 4432 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 4433 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 4434 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 4435 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 4436 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 4437 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 4438 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 4439 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904, 4440 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 4441 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 4442 0xbf84fff8, 0xbf810000, 4443 }; 4444 4445 /* When below register arrays changed, please update gpr_reg_size, 4446 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds, 4447 to cover all gfx9 ASICs */ 4448 static const struct soc15_reg_entry vgpr_init_regs[] = { 4449 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4450 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4451 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4452 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4453 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, 4454 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4455 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4456 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4457 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4458 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4459 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4460 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4461 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4462 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4463 }; 4464 4465 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = { 4466 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4467 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4468 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, 4469 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4470 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf }, 4471 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 4472 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 4473 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 4474 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 4475 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 4476 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, 4477 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, 4478 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, 4479 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, 4480 }; 4481 4482 static const struct soc15_reg_entry sgpr1_init_regs[] = { 4483 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4484 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4485 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4486 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4487 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4488 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4489 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff }, 4490 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff }, 4491 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff }, 4492 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff }, 4493 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff }, 4494 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff }, 4495 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff }, 4496 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff }, 4497 }; 4498 4499 static const struct soc15_reg_entry sgpr2_init_regs[] = { 4500 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, 4501 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, 4502 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, 4503 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 4504 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ 4505 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 4506 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 }, 4507 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 }, 4508 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 }, 4509 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 }, 4510 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 }, 4511 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 }, 4512 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 }, 4513 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 }, 4514 }; 4515 4516 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = { 4517 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1}, 4518 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1}, 4519 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1}, 4520 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1}, 4521 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1}, 4522 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1}, 4523 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1}, 4524 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1}, 4525 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1}, 4526 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1}, 4527 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1}, 4528 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1}, 4529 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1}, 4530 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6}, 4531 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16}, 4532 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16}, 4533 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16}, 4534 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16}, 4535 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16}, 4536 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16}, 4537 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16}, 4538 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16}, 4539 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6}, 4540 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16}, 4541 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16}, 4542 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1}, 4543 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1}, 4544 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32}, 4545 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32}, 4546 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72}, 4547 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16}, 4548 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, 4549 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, 4550 }; 4551 4552 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) 4553 { 4554 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4555 int i, r; 4556 4557 /* only support when RAS is enabled */ 4558 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4559 return 0; 4560 4561 r = amdgpu_ring_alloc(ring, 7); 4562 if (r) { 4563 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n", 4564 ring->name, r); 4565 return r; 4566 } 4567 4568 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000); 4569 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size); 4570 4571 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 4572 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 4573 PACKET3_DMA_DATA_DST_SEL(1) | 4574 PACKET3_DMA_DATA_SRC_SEL(2) | 4575 PACKET3_DMA_DATA_ENGINE(0))); 4576 amdgpu_ring_write(ring, 0); 4577 amdgpu_ring_write(ring, 0); 4578 amdgpu_ring_write(ring, 0); 4579 amdgpu_ring_write(ring, 0); 4580 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 4581 adev->gds.gds_size); 4582 4583 amdgpu_ring_commit(ring); 4584 4585 for (i = 0; i < adev->usec_timeout; i++) { 4586 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring)) 4587 break; 4588 udelay(1); 4589 } 4590 4591 if (i >= adev->usec_timeout) 4592 r = -ETIMEDOUT; 4593 4594 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000); 4595 4596 return r; 4597 } 4598 4599 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 4600 { 4601 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 4602 struct amdgpu_ib ib; 4603 struct dma_fence *f = NULL; 4604 int r, i; 4605 unsigned total_size, vgpr_offset, sgpr_offset; 4606 u64 gpu_addr; 4607 4608 int compute_dim_x = adev->gfx.config.max_shader_engines * 4609 adev->gfx.config.max_cu_per_sh * 4610 adev->gfx.config.max_sh_per_se; 4611 int sgpr_work_group_size = 5; 4612 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6; 4613 int vgpr_init_shader_size; 4614 const u32 *vgpr_init_shader_ptr; 4615 const struct soc15_reg_entry *vgpr_init_regs_ptr; 4616 4617 /* only support when RAS is enabled */ 4618 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 4619 return 0; 4620 4621 /* bail if the compute ring is not ready */ 4622 if (!ring->sched.ready) 4623 return 0; 4624 4625 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) { 4626 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; 4627 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); 4628 vgpr_init_regs_ptr = vgpr_init_regs_arcturus; 4629 } else { 4630 vgpr_init_shader_ptr = vgpr_init_compute_shader; 4631 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader); 4632 vgpr_init_regs_ptr = vgpr_init_regs; 4633 } 4634 4635 total_size = 4636 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */ 4637 total_size += 4638 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */ 4639 total_size += 4640 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */ 4641 total_size = ALIGN(total_size, 256); 4642 vgpr_offset = total_size; 4643 total_size += ALIGN(vgpr_init_shader_size, 256); 4644 sgpr_offset = total_size; 4645 total_size += sizeof(sgpr_init_compute_shader); 4646 4647 /* allocate an indirect buffer to put the commands in */ 4648 memset(&ib, 0, sizeof(ib)); 4649 r = amdgpu_ib_get(adev, NULL, total_size, 4650 AMDGPU_IB_POOL_DIRECT, &ib); 4651 if (r) { 4652 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 4653 return r; 4654 } 4655 4656 /* load the compute shaders */ 4657 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++) 4658 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i]; 4659 4660 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 4661 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 4662 4663 /* init the ib length to 0 */ 4664 ib.length_dw = 0; 4665 4666 /* VGPR */ 4667 /* write the register state for the compute dispatch */ 4668 for (i = 0; i < gpr_reg_size; i++) { 4669 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4670 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i]) 4671 - PACKET3_SET_SH_REG_START; 4672 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value; 4673 } 4674 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4675 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 4676 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4677 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4678 - PACKET3_SET_SH_REG_START; 4679 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4680 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4681 4682 /* write dispatch packet */ 4683 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4684 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */ 4685 ib.ptr[ib.length_dw++] = 1; /* y */ 4686 ib.ptr[ib.length_dw++] = 1; /* z */ 4687 ib.ptr[ib.length_dw++] = 4688 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4689 4690 /* write CS partial flush packet */ 4691 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4692 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4693 4694 /* SGPR1 */ 4695 /* write the register state for the compute dispatch */ 4696 for (i = 0; i < gpr_reg_size; i++) { 4697 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4698 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i]) 4699 - PACKET3_SET_SH_REG_START; 4700 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value; 4701 } 4702 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4703 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4704 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4705 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4706 - PACKET3_SET_SH_REG_START; 4707 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4708 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4709 4710 /* write dispatch packet */ 4711 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4712 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4713 ib.ptr[ib.length_dw++] = 1; /* y */ 4714 ib.ptr[ib.length_dw++] = 1; /* z */ 4715 ib.ptr[ib.length_dw++] = 4716 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4717 4718 /* write CS partial flush packet */ 4719 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4720 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4721 4722 /* SGPR2 */ 4723 /* write the register state for the compute dispatch */ 4724 for (i = 0; i < gpr_reg_size; i++) { 4725 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 4726 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i]) 4727 - PACKET3_SET_SH_REG_START; 4728 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value; 4729 } 4730 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 4731 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 4732 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 4733 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 4734 - PACKET3_SET_SH_REG_START; 4735 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 4736 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 4737 4738 /* write dispatch packet */ 4739 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 4740 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */ 4741 ib.ptr[ib.length_dw++] = 1; /* y */ 4742 ib.ptr[ib.length_dw++] = 1; /* z */ 4743 ib.ptr[ib.length_dw++] = 4744 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 4745 4746 /* write CS partial flush packet */ 4747 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 4748 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 4749 4750 /* shedule the ib on the ring */ 4751 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 4752 if (r) { 4753 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 4754 goto fail; 4755 } 4756 4757 /* wait for the GPU to finish processing the IB */ 4758 r = dma_fence_wait(f, false); 4759 if (r) { 4760 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 4761 goto fail; 4762 } 4763 4764 fail: 4765 amdgpu_ib_free(&ib, NULL); 4766 dma_fence_put(f); 4767 4768 return r; 4769 } 4770 4771 static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block) 4772 { 4773 struct amdgpu_device *adev = ip_block->adev; 4774 4775 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 4776 4777 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || 4778 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4779 adev->gfx.num_gfx_rings = 0; 4780 else 4781 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 4782 adev->gfx.xcc_mask = 1; 4783 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 4784 AMDGPU_MAX_COMPUTE_RINGS); 4785 gfx_v9_0_set_kiq_pm4_funcs(adev); 4786 gfx_v9_0_set_ring_funcs(adev); 4787 gfx_v9_0_set_irq_funcs(adev); 4788 gfx_v9_0_set_gds_init(adev); 4789 gfx_v9_0_set_rlc_funcs(adev); 4790 4791 /* init rlcg reg access ctrl */ 4792 gfx_v9_0_init_rlcg_reg_access_ctrl(adev); 4793 4794 return gfx_v9_0_init_microcode(adev); 4795 } 4796 4797 static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block) 4798 { 4799 struct amdgpu_device *adev = ip_block->adev; 4800 int r; 4801 4802 /* 4803 * Temp workaround to fix the issue that CP firmware fails to 4804 * update read pointer when CPDMA is writing clearing operation 4805 * to GDS in suspend/resume sequence on several cards. So just 4806 * limit this operation in cold boot sequence. 4807 */ 4808 if ((!adev->in_suspend) && 4809 (adev->gds.gds_size)) { 4810 r = gfx_v9_0_do_edc_gds_workarounds(adev); 4811 if (r) 4812 return r; 4813 } 4814 4815 /* requires IBs so do in late init after IB pool is initialized */ 4816 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4817 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); 4818 else 4819 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 4820 4821 if (r) 4822 return r; 4823 4824 if (adev->gfx.ras && 4825 adev->gfx.ras->enable_watchdog_timer) 4826 adev->gfx.ras->enable_watchdog_timer(adev); 4827 4828 return 0; 4829 } 4830 4831 static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block) 4832 { 4833 struct amdgpu_device *adev = ip_block->adev; 4834 int r; 4835 4836 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4837 if (r) 4838 return r; 4839 4840 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4841 if (r) 4842 return r; 4843 4844 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 4845 if (r) 4846 return r; 4847 4848 r = gfx_v9_0_ecc_late_init(ip_block); 4849 if (r) 4850 return r; 4851 4852 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 4853 gfx_v9_4_2_debug_trap_config_init(adev, 4854 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); 4855 else 4856 gfx_v9_0_debug_trap_config_init(adev, 4857 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); 4858 4859 return 0; 4860 } 4861 4862 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 4863 { 4864 uint32_t rlc_setting; 4865 4866 /* if RLC is not enabled, do nothing */ 4867 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 4868 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 4869 return false; 4870 4871 return true; 4872 } 4873 4874 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 4875 { 4876 uint32_t data; 4877 unsigned i; 4878 4879 data = RLC_SAFE_MODE__CMD_MASK; 4880 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 4881 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4882 4883 /* wait for RLC_SAFE_MODE */ 4884 for (i = 0; i < adev->usec_timeout; i++) { 4885 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 4886 break; 4887 udelay(1); 4888 } 4889 } 4890 4891 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 4892 { 4893 uint32_t data; 4894 4895 data = RLC_SAFE_MODE__CMD_MASK; 4896 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 4897 } 4898 4899 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 4900 bool enable) 4901 { 4902 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4903 4904 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 4905 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 4906 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4907 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 4908 } else { 4909 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 4910 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 4911 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 4912 } 4913 4914 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4915 } 4916 4917 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 4918 bool enable) 4919 { 4920 /* TODO: double check if we need to perform under safe mode */ 4921 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 4922 4923 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 4924 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 4925 else 4926 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 4927 4928 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 4929 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 4930 else 4931 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 4932 4933 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 4934 } 4935 4936 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 4937 bool enable) 4938 { 4939 uint32_t data, def; 4940 4941 /* It is disabled by HW by default */ 4942 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 4943 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 4944 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4945 4946 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) 4947 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4948 4949 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4950 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4951 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4952 4953 /* only for Vega10 & Raven1 */ 4954 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4955 4956 if (def != data) 4957 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4958 4959 /* MGLS is a global flag to control all MGLS in GFX */ 4960 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4961 /* 2 - RLC memory Light sleep */ 4962 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4963 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4964 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4965 if (def != data) 4966 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4967 } 4968 /* 3 - CP memory Light sleep */ 4969 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4970 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4971 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4972 if (def != data) 4973 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4974 } 4975 } 4976 } else { 4977 /* 1 - MGCG_OVERRIDE */ 4978 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4979 4980 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) 4981 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4982 4983 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4984 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4985 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4986 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4987 4988 if (def != data) 4989 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4990 4991 /* 2 - disable MGLS in RLC */ 4992 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4993 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4994 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4995 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4996 } 4997 4998 /* 3 - disable MGLS in CP */ 4999 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 5000 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5001 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5002 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 5003 } 5004 } 5005 } 5006 5007 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 5008 bool enable) 5009 { 5010 uint32_t data, def; 5011 5012 if (!adev->gfx.num_gfx_rings) 5013 return; 5014 5015 /* Enable 3D CGCG/CGLS */ 5016 if (enable) { 5017 /* write cmd to clear cgcg/cgls ov */ 5018 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5019 /* unset CGCG override */ 5020 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5021 /* update CGCG and CGLS override bits */ 5022 if (def != data) 5023 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5024 5025 /* enable 3Dcgcg FSM(0x0000363f) */ 5026 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 5027 5028 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5029 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5030 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5031 else 5032 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT; 5033 5034 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5035 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5036 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5037 if (def != data) 5038 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5039 5040 /* set IDLE_POLL_COUNT(0x00900100) */ 5041 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5042 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5043 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5044 if (def != data) 5045 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5046 } else { 5047 /* Disable CGCG/CGLS */ 5048 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 5049 /* disable cgcg, cgls should be disabled */ 5050 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 5051 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 5052 /* disable cgcg and cgls in FSM */ 5053 if (def != data) 5054 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 5055 } 5056 } 5057 5058 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5059 bool enable) 5060 { 5061 uint32_t def, data; 5062 5063 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5064 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 5065 /* unset CGCG override */ 5066 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5067 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5068 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5069 else 5070 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5071 /* update CGCG and CGLS override bits */ 5072 if (def != data) 5073 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 5074 5075 /* enable cgcg FSM(0x0000363F) */ 5076 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5077 5078 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) 5079 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5080 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5081 else 5082 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5083 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5084 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5085 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5086 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5087 if (def != data) 5088 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5089 5090 /* set IDLE_POLL_COUNT(0x00900100) */ 5091 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 5092 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5093 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5094 if (def != data) 5095 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 5096 } else { 5097 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 5098 /* reset CGCG/CGLS bits */ 5099 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5100 /* disable cgcg and cgls in FSM */ 5101 if (def != data) 5102 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 5103 } 5104 } 5105 5106 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5107 bool enable) 5108 { 5109 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5110 if (enable) { 5111 /* CGCG/CGLS should be enabled after MGCG/MGLS 5112 * === MGCG + MGLS === 5113 */ 5114 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5115 /* === CGCG /CGLS for GFX 3D Only === */ 5116 gfx_v9_0_update_3d_clock_gating(adev, enable); 5117 /* === CGCG + CGLS === */ 5118 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5119 } else { 5120 /* CGCG/CGLS should be disabled before MGCG/MGLS 5121 * === CGCG + CGLS === 5122 */ 5123 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 5124 /* === CGCG /CGLS for GFX 3D Only === */ 5125 gfx_v9_0_update_3d_clock_gating(adev, enable); 5126 /* === MGCG + MGLS === */ 5127 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 5128 } 5129 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5130 return 0; 5131 } 5132 5133 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, 5134 unsigned int vmid) 5135 { 5136 u32 reg, data; 5137 5138 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); 5139 if (amdgpu_sriov_is_pp_one_vf(adev)) 5140 data = RREG32_NO_KIQ(reg); 5141 else 5142 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); 5143 5144 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 5145 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5146 5147 if (amdgpu_sriov_is_pp_one_vf(adev)) 5148 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); 5149 else 5150 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); 5151 } 5152 5153 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) 5154 { 5155 amdgpu_gfx_off_ctrl(adev, false); 5156 5157 gfx_v9_0_update_spm_vmid_internal(adev, vmid); 5158 5159 amdgpu_gfx_off_ctrl(adev, true); 5160 } 5161 5162 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev, 5163 uint32_t offset, 5164 struct soc15_reg_rlcg *entries, int arr_size) 5165 { 5166 int i; 5167 uint32_t reg; 5168 5169 if (!entries) 5170 return false; 5171 5172 for (i = 0; i < arr_size; i++) { 5173 const struct soc15_reg_rlcg *entry; 5174 5175 entry = &entries[i]; 5176 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; 5177 if (offset == reg) 5178 return true; 5179 } 5180 5181 return false; 5182 } 5183 5184 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset) 5185 { 5186 return gfx_v9_0_check_rlcg_range(adev, offset, 5187 (void *)rlcg_access_gc_9_0, 5188 ARRAY_SIZE(rlcg_access_gc_9_0)); 5189 } 5190 5191 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 5192 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 5193 .set_safe_mode = gfx_v9_0_set_safe_mode, 5194 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 5195 .init = gfx_v9_0_rlc_init, 5196 .get_csb_size = gfx_v9_0_get_csb_size, 5197 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 5198 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 5199 .resume = gfx_v9_0_rlc_resume, 5200 .stop = gfx_v9_0_rlc_stop, 5201 .reset = gfx_v9_0_rlc_reset, 5202 .start = gfx_v9_0_rlc_start, 5203 .update_spm_vmid = gfx_v9_0_update_spm_vmid, 5204 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, 5205 }; 5206 5207 static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5208 enum amd_powergating_state state) 5209 { 5210 struct amdgpu_device *adev = ip_block->adev; 5211 bool enable = (state == AMD_PG_STATE_GATE); 5212 5213 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5214 case IP_VERSION(9, 2, 2): 5215 case IP_VERSION(9, 1, 0): 5216 case IP_VERSION(9, 3, 0): 5217 if (!enable) 5218 amdgpu_gfx_off_ctrl_immediate(adev, false); 5219 5220 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5221 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 5222 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 5223 } else { 5224 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 5225 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 5226 } 5227 5228 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5229 gfx_v9_0_enable_cp_power_gating(adev, true); 5230 else 5231 gfx_v9_0_enable_cp_power_gating(adev, false); 5232 5233 /* update gfx cgpg state */ 5234 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 5235 5236 /* update mgcg state */ 5237 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 5238 5239 if (enable) 5240 amdgpu_gfx_off_ctrl_immediate(adev, true); 5241 break; 5242 case IP_VERSION(9, 2, 1): 5243 amdgpu_gfx_off_ctrl_immediate(adev, enable); 5244 break; 5245 default: 5246 break; 5247 } 5248 5249 return 0; 5250 } 5251 5252 static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5253 enum amd_clockgating_state state) 5254 { 5255 struct amdgpu_device *adev = ip_block->adev; 5256 5257 if (amdgpu_sriov_vf(adev)) 5258 return 0; 5259 5260 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5261 case IP_VERSION(9, 0, 1): 5262 case IP_VERSION(9, 2, 1): 5263 case IP_VERSION(9, 4, 0): 5264 case IP_VERSION(9, 2, 2): 5265 case IP_VERSION(9, 1, 0): 5266 case IP_VERSION(9, 4, 1): 5267 case IP_VERSION(9, 3, 0): 5268 case IP_VERSION(9, 4, 2): 5269 gfx_v9_0_update_gfx_clock_gating(adev, 5270 state == AMD_CG_STATE_GATE); 5271 break; 5272 default: 5273 break; 5274 } 5275 return 0; 5276 } 5277 5278 static void gfx_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5279 { 5280 struct amdgpu_device *adev = ip_block->adev; 5281 int data; 5282 5283 if (amdgpu_sriov_vf(adev)) 5284 *flags = 0; 5285 5286 /* AMD_CG_SUPPORT_GFX_MGCG */ 5287 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); 5288 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5289 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5290 5291 /* AMD_CG_SUPPORT_GFX_CGCG */ 5292 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); 5293 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5294 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5295 5296 /* AMD_CG_SUPPORT_GFX_CGLS */ 5297 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5298 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5299 5300 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5301 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); 5302 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5303 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5304 5305 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5306 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); 5307 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5308 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5309 5310 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) { 5311 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5312 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); 5313 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5314 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5315 5316 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5317 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5318 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5319 } 5320 } 5321 5322 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5323 { 5324 return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/ 5325 } 5326 5327 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5328 { 5329 struct amdgpu_device *adev = ring->adev; 5330 u64 wptr; 5331 5332 /* XXX check if swapping is necessary on BE */ 5333 if (ring->use_doorbell) { 5334 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5335 } else { 5336 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 5337 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 5338 } 5339 5340 return wptr; 5341 } 5342 5343 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5344 { 5345 struct amdgpu_device *adev = ring->adev; 5346 5347 if (ring->use_doorbell) { 5348 /* XXX check if swapping is necessary on BE */ 5349 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); 5350 WDOORBELL64(ring->doorbell_index, ring->wptr); 5351 } else { 5352 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 5353 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 5354 } 5355 } 5356 5357 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5358 { 5359 struct amdgpu_device *adev = ring->adev; 5360 u32 ref_and_mask, reg_mem_engine; 5361 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5362 5363 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5364 switch (ring->me) { 5365 case 1: 5366 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5367 break; 5368 case 2: 5369 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5370 break; 5371 default: 5372 return; 5373 } 5374 reg_mem_engine = 0; 5375 } else { 5376 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 5377 reg_mem_engine = 1; /* pfp */ 5378 } 5379 5380 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5381 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5382 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5383 ref_and_mask, ref_and_mask, 0x20); 5384 } 5385 5386 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5387 struct amdgpu_job *job, 5388 struct amdgpu_ib *ib, 5389 uint32_t flags) 5390 { 5391 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5392 u32 header, control = 0; 5393 5394 if (ib->flags & AMDGPU_IB_FLAG_CE) 5395 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5396 else 5397 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5398 5399 control |= ib->length_dw | (vmid << 24); 5400 5401 if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) { 5402 control |= INDIRECT_BUFFER_PRE_ENB(1); 5403 5404 if (flags & AMDGPU_IB_PREEMPTED) 5405 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5406 5407 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 5408 gfx_v9_0_ring_emit_de_meta(ring, 5409 (!amdgpu_sriov_vf(ring->adev) && 5410 flags & AMDGPU_IB_PREEMPTED) ? 5411 true : false, 5412 job->gds_size > 0 && job->gds_base != 0); 5413 } 5414 5415 amdgpu_ring_write(ring, header); 5416 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5417 amdgpu_ring_write(ring, 5418 #ifdef __BIG_ENDIAN 5419 (2 << 0) | 5420 #endif 5421 lower_32_bits(ib->gpu_addr)); 5422 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5423 amdgpu_ring_ib_on_emit_cntl(ring); 5424 amdgpu_ring_write(ring, control); 5425 } 5426 5427 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring, 5428 unsigned offset) 5429 { 5430 u32 control = ring->ring[offset]; 5431 5432 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5433 ring->ring[offset] = control; 5434 } 5435 5436 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, 5437 unsigned offset) 5438 { 5439 struct amdgpu_device *adev = ring->adev; 5440 void *ce_payload_cpu_addr; 5441 uint64_t payload_offset, payload_size; 5442 5443 payload_size = sizeof(struct v9_ce_ib_state); 5444 5445 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5446 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5447 5448 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5449 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); 5450 } else { 5451 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, 5452 (ring->buf_mask + 1 - offset) << 2); 5453 payload_size -= (ring->buf_mask + 1 - offset) << 2; 5454 memcpy((void *)&ring->ring[0], 5455 ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), 5456 payload_size); 5457 } 5458 } 5459 5460 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, 5461 unsigned offset) 5462 { 5463 struct amdgpu_device *adev = ring->adev; 5464 void *de_payload_cpu_addr; 5465 uint64_t payload_offset, payload_size; 5466 5467 payload_size = sizeof(struct v9_de_ib_state); 5468 5469 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); 5470 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; 5471 5472 ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = 5473 IB_COMPLETION_STATUS_PREEMPTED; 5474 5475 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { 5476 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size); 5477 } else { 5478 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, 5479 (ring->buf_mask + 1 - offset) << 2); 5480 payload_size -= (ring->buf_mask + 1 - offset) << 2; 5481 memcpy((void *)&ring->ring[0], 5482 de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), 5483 payload_size); 5484 } 5485 } 5486 5487 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5488 struct amdgpu_job *job, 5489 struct amdgpu_ib *ib, 5490 uint32_t flags) 5491 { 5492 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5493 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5494 5495 /* Currently, there is a high possibility to get wave ID mismatch 5496 * between ME and GDS, leading to a hw deadlock, because ME generates 5497 * different wave IDs than the GDS expects. This situation happens 5498 * randomly when at least 5 compute pipes use GDS ordered append. 5499 * The wave IDs generated by ME are also wrong after suspend/resume. 5500 * Those are probably bugs somewhere else in the kernel driver. 5501 * 5502 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5503 * GDS to 0 for this ring (me/pipe). 5504 */ 5505 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5506 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5507 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 5508 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5509 } 5510 5511 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5512 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5513 amdgpu_ring_write(ring, 5514 #ifdef __BIG_ENDIAN 5515 (2 << 0) | 5516 #endif 5517 lower_32_bits(ib->gpu_addr)); 5518 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5519 amdgpu_ring_write(ring, control); 5520 } 5521 5522 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5523 u64 seq, unsigned flags) 5524 { 5525 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5526 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5527 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 5528 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; 5529 uint32_t dw2 = 0; 5530 5531 /* RELEASE_MEM - flush caches, send int */ 5532 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5533 5534 if (writeback) { 5535 dw2 = EOP_TC_NC_ACTION_EN; 5536 } else { 5537 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | 5538 EOP_TC_MD_ACTION_EN; 5539 } 5540 dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5541 EVENT_INDEX(5); 5542 if (exec) 5543 dw2 |= EOP_EXEC; 5544 5545 amdgpu_ring_write(ring, dw2); 5546 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5547 5548 /* 5549 * the address should be Qword aligned if 64bit write, Dword 5550 * aligned if only send 32bit data low (discard data high) 5551 */ 5552 if (write64bit) 5553 BUG_ON(addr & 0x7); 5554 else 5555 BUG_ON(addr & 0x3); 5556 amdgpu_ring_write(ring, lower_32_bits(addr)); 5557 amdgpu_ring_write(ring, upper_32_bits(addr)); 5558 amdgpu_ring_write(ring, lower_32_bits(seq)); 5559 amdgpu_ring_write(ring, upper_32_bits(seq)); 5560 amdgpu_ring_write(ring, 0); 5561 } 5562 5563 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5564 { 5565 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5566 uint32_t seq = ring->fence_drv.sync_seq; 5567 uint64_t addr = ring->fence_drv.gpu_addr; 5568 5569 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 5570 lower_32_bits(addr), upper_32_bits(addr), 5571 seq, 0xffffffff, 4); 5572 } 5573 5574 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5575 unsigned vmid, uint64_t pd_addr) 5576 { 5577 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5578 5579 /* compute doesn't have PFP */ 5580 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5581 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5582 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5583 amdgpu_ring_write(ring, 0x0); 5584 } 5585 } 5586 5587 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5588 { 5589 return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */ 5590 } 5591 5592 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5593 { 5594 u64 wptr; 5595 5596 /* XXX check if swapping is necessary on BE */ 5597 if (ring->use_doorbell) 5598 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5599 else 5600 BUG(); 5601 return wptr; 5602 } 5603 5604 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5605 { 5606 struct amdgpu_device *adev = ring->adev; 5607 5608 /* XXX check if swapping is necessary on BE */ 5609 if (ring->use_doorbell) { 5610 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); 5611 WDOORBELL64(ring->doorbell_index, ring->wptr); 5612 } else{ 5613 BUG(); /* only DOORBELL method supported on gfx9 now */ 5614 } 5615 } 5616 5617 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5618 u64 seq, unsigned int flags) 5619 { 5620 struct amdgpu_device *adev = ring->adev; 5621 5622 /* we only allocate 32bit for each seq wb address */ 5623 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5624 5625 /* write fence seq to the "addr" */ 5626 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5627 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5628 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5629 amdgpu_ring_write(ring, lower_32_bits(addr)); 5630 amdgpu_ring_write(ring, upper_32_bits(addr)); 5631 amdgpu_ring_write(ring, lower_32_bits(seq)); 5632 5633 if (flags & AMDGPU_FENCE_FLAG_INT) { 5634 /* set register to trigger INT */ 5635 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5636 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5637 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5638 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 5639 amdgpu_ring_write(ring, 0); 5640 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5641 } 5642 } 5643 5644 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 5645 { 5646 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5647 amdgpu_ring_write(ring, 0); 5648 } 5649 5650 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) 5651 { 5652 struct amdgpu_device *adev = ring->adev; 5653 struct v9_ce_ib_state ce_payload = {0}; 5654 uint64_t offset, ce_payload_gpu_addr; 5655 void *ce_payload_cpu_addr; 5656 int cnt; 5657 5658 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 5659 5660 offset = offsetof(struct v9_gfx_meta_data, ce_payload); 5661 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5662 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5663 5664 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5665 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 5666 WRITE_DATA_DST_SEL(8) | 5667 WR_CONFIRM) | 5668 WRITE_DATA_CACHE_POLICY(0)); 5669 amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); 5670 amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); 5671 5672 amdgpu_ring_ib_on_emit_ce(ring); 5673 5674 if (resume) 5675 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, 5676 sizeof(ce_payload) >> 2); 5677 else 5678 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, 5679 sizeof(ce_payload) >> 2); 5680 } 5681 5682 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) 5683 { 5684 int i, r = 0; 5685 struct amdgpu_device *adev = ring->adev; 5686 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 5687 struct amdgpu_ring *kiq_ring = &kiq->ring; 5688 unsigned long flags; 5689 5690 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 5691 return -EINVAL; 5692 5693 spin_lock_irqsave(&kiq->ring_lock, flags); 5694 5695 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 5696 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5697 return -ENOMEM; 5698 } 5699 5700 /* assert preemption condition */ 5701 amdgpu_ring_set_preempt_cond_exec(ring, false); 5702 5703 ring->trail_seq += 1; 5704 amdgpu_ring_alloc(ring, 13); 5705 gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, 5706 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); 5707 5708 /* assert IB preemption, emit the trailing fence */ 5709 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 5710 ring->trail_fence_gpu_addr, 5711 ring->trail_seq); 5712 5713 amdgpu_ring_commit(kiq_ring); 5714 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5715 5716 /* poll the trailing fence */ 5717 for (i = 0; i < adev->usec_timeout; i++) { 5718 if (ring->trail_seq == 5719 le32_to_cpu(*ring->trail_fence_cpu_addr)) 5720 break; 5721 udelay(1); 5722 } 5723 5724 if (i >= adev->usec_timeout) { 5725 r = -EINVAL; 5726 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); 5727 } 5728 5729 /*reset the CP_VMID_PREEMPT after trailing fence*/ 5730 amdgpu_ring_emit_wreg(ring, 5731 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), 5732 0x0); 5733 amdgpu_ring_commit(ring); 5734 5735 /* deassert preemption condition */ 5736 amdgpu_ring_set_preempt_cond_exec(ring, true); 5737 return r; 5738 } 5739 5740 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds) 5741 { 5742 struct amdgpu_device *adev = ring->adev; 5743 struct v9_de_ib_state de_payload = {0}; 5744 uint64_t offset, gds_addr, de_payload_gpu_addr; 5745 void *de_payload_cpu_addr; 5746 int cnt; 5747 5748 offset = offsetof(struct v9_gfx_meta_data, de_payload); 5749 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 5750 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 5751 5752 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 5753 AMDGPU_CSA_SIZE - adev->gds.gds_size, 5754 PAGE_SIZE); 5755 5756 if (usegds) { 5757 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 5758 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 5759 } 5760 5761 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 5762 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 5763 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 5764 WRITE_DATA_DST_SEL(8) | 5765 WR_CONFIRM) | 5766 WRITE_DATA_CACHE_POLICY(0)); 5767 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 5768 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 5769 5770 amdgpu_ring_ib_on_emit_de(ring); 5771 if (resume) 5772 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 5773 sizeof(de_payload) >> 2); 5774 else 5775 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 5776 sizeof(de_payload) >> 2); 5777 } 5778 5779 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 5780 bool secure) 5781 { 5782 uint32_t v = secure ? FRAME_TMZ : 0; 5783 5784 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 5785 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 5786 } 5787 5788 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 5789 { 5790 uint32_t dw2 = 0; 5791 5792 gfx_v9_0_ring_emit_ce_meta(ring, 5793 (!amdgpu_sriov_vf(ring->adev) && 5794 flags & AMDGPU_IB_PREEMPTED) ? true : false); 5795 5796 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5797 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5798 /* set load_global_config & load_global_uconfig */ 5799 dw2 |= 0x8001; 5800 /* set load_cs_sh_regs */ 5801 dw2 |= 0x01000000; 5802 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5803 dw2 |= 0x10002; 5804 5805 /* set load_ce_ram if preamble presented */ 5806 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 5807 dw2 |= 0x10000000; 5808 } else { 5809 /* still load_ce_ram if this is the first time preamble presented 5810 * although there is no context switch happens. 5811 */ 5812 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 5813 dw2 |= 0x10000000; 5814 } 5815 5816 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5817 amdgpu_ring_write(ring, dw2); 5818 amdgpu_ring_write(ring, 0); 5819 } 5820 5821 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 5822 uint64_t addr) 5823 { 5824 unsigned ret; 5825 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5826 amdgpu_ring_write(ring, lower_32_bits(addr)); 5827 amdgpu_ring_write(ring, upper_32_bits(addr)); 5828 /* discard following DWs if *cond_exec_gpu_addr==0 */ 5829 amdgpu_ring_write(ring, 0); 5830 ret = ring->wptr & ring->buf_mask; 5831 /* patch dummy value later */ 5832 amdgpu_ring_write(ring, 0); 5833 return ret; 5834 } 5835 5836 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 5837 uint32_t reg_val_offs) 5838 { 5839 struct amdgpu_device *adev = ring->adev; 5840 5841 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 5842 amdgpu_ring_write(ring, 0 | /* src: register*/ 5843 (5 << 8) | /* dst: memory */ 5844 (1 << 20)); /* write confirm */ 5845 amdgpu_ring_write(ring, reg); 5846 amdgpu_ring_write(ring, 0); 5847 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 5848 reg_val_offs * 4)); 5849 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 5850 reg_val_offs * 4)); 5851 } 5852 5853 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 5854 uint32_t val) 5855 { 5856 uint32_t cmd = 0; 5857 5858 switch (ring->funcs->type) { 5859 case AMDGPU_RING_TYPE_GFX: 5860 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 5861 break; 5862 case AMDGPU_RING_TYPE_KIQ: 5863 cmd = (1 << 16); /* no inc addr */ 5864 break; 5865 default: 5866 cmd = WR_CONFIRM; 5867 break; 5868 } 5869 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5870 amdgpu_ring_write(ring, cmd); 5871 amdgpu_ring_write(ring, reg); 5872 amdgpu_ring_write(ring, 0); 5873 amdgpu_ring_write(ring, val); 5874 } 5875 5876 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 5877 uint32_t val, uint32_t mask) 5878 { 5879 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 5880 } 5881 5882 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 5883 uint32_t reg0, uint32_t reg1, 5884 uint32_t ref, uint32_t mask) 5885 { 5886 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5887 struct amdgpu_device *adev = ring->adev; 5888 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 5889 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 5890 5891 if (fw_version_ok) 5892 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 5893 ref, mask, 0x20); 5894 else 5895 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 5896 ref, mask); 5897 } 5898 5899 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 5900 { 5901 struct amdgpu_device *adev = ring->adev; 5902 uint32_t value = 0; 5903 5904 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 5905 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 5906 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 5907 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 5908 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5909 WREG32_SOC15(GC, 0, mmSQ_CMD, value); 5910 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5911 } 5912 5913 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5914 enum amdgpu_interrupt_state state) 5915 { 5916 switch (state) { 5917 case AMDGPU_IRQ_STATE_DISABLE: 5918 case AMDGPU_IRQ_STATE_ENABLE: 5919 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 5920 TIME_STAMP_INT_ENABLE, 5921 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 5922 break; 5923 default: 5924 break; 5925 } 5926 } 5927 5928 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5929 int me, int pipe, 5930 enum amdgpu_interrupt_state state) 5931 { 5932 u32 mec_int_cntl, mec_int_cntl_reg; 5933 5934 /* 5935 * amdgpu controls only the first MEC. That's why this function only 5936 * handles the setting of interrupts for this specific MEC. All other 5937 * pipes' interrupts are set by amdkfd. 5938 */ 5939 5940 if (me == 1) { 5941 switch (pipe) { 5942 case 0: 5943 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5944 break; 5945 case 1: 5946 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5947 break; 5948 case 2: 5949 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5950 break; 5951 case 3: 5952 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 5953 break; 5954 default: 5955 DRM_DEBUG("invalid pipe %d\n", pipe); 5956 return; 5957 } 5958 } else { 5959 DRM_DEBUG("invalid me %d\n", me); 5960 return; 5961 } 5962 5963 switch (state) { 5964 case AMDGPU_IRQ_STATE_DISABLE: 5965 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg); 5966 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5967 TIME_STAMP_INT_ENABLE, 0); 5968 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5969 break; 5970 case AMDGPU_IRQ_STATE_ENABLE: 5971 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 5972 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5973 TIME_STAMP_INT_ENABLE, 1); 5974 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 5975 break; 5976 default: 5977 break; 5978 } 5979 } 5980 5981 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev, 5982 int me, int pipe) 5983 { 5984 /* 5985 * amdgpu controls only the first MEC. That's why this function only 5986 * handles the setting of interrupts for this specific MEC. All other 5987 * pipes' interrupts are set by amdkfd. 5988 */ 5989 if (me != 1) 5990 return 0; 5991 5992 switch (pipe) { 5993 case 0: 5994 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 5995 case 1: 5996 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 5997 case 2: 5998 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 5999 case 3: 6000 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 6001 default: 6002 return 0; 6003 } 6004 } 6005 6006 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6007 struct amdgpu_irq_src *source, 6008 unsigned type, 6009 enum amdgpu_interrupt_state state) 6010 { 6011 u32 cp_int_cntl_reg, cp_int_cntl; 6012 int i, j; 6013 6014 switch (state) { 6015 case AMDGPU_IRQ_STATE_DISABLE: 6016 case AMDGPU_IRQ_STATE_ENABLE: 6017 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6018 PRIV_REG_INT_ENABLE, 6019 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6020 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6021 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6022 /* MECs start at 1 */ 6023 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); 6024 6025 if (cp_int_cntl_reg) { 6026 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6027 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6028 PRIV_REG_INT_ENABLE, 6029 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6030 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6031 } 6032 } 6033 } 6034 break; 6035 default: 6036 break; 6037 } 6038 6039 return 0; 6040 } 6041 6042 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6043 struct amdgpu_irq_src *source, 6044 unsigned type, 6045 enum amdgpu_interrupt_state state) 6046 { 6047 u32 cp_int_cntl_reg, cp_int_cntl; 6048 int i, j; 6049 6050 switch (state) { 6051 case AMDGPU_IRQ_STATE_DISABLE: 6052 case AMDGPU_IRQ_STATE_ENABLE: 6053 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6054 OPCODE_ERROR_INT_ENABLE, 6055 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6056 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6057 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6058 /* MECs start at 1 */ 6059 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j); 6060 6061 if (cp_int_cntl_reg) { 6062 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6063 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6064 OPCODE_ERROR_INT_ENABLE, 6065 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6066 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6067 } 6068 } 6069 } 6070 break; 6071 default: 6072 break; 6073 } 6074 6075 return 0; 6076 } 6077 6078 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6079 struct amdgpu_irq_src *source, 6080 unsigned type, 6081 enum amdgpu_interrupt_state state) 6082 { 6083 switch (state) { 6084 case AMDGPU_IRQ_STATE_DISABLE: 6085 case AMDGPU_IRQ_STATE_ENABLE: 6086 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6087 PRIV_INSTR_INT_ENABLE, 6088 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6089 break; 6090 default: 6091 break; 6092 } 6093 6094 return 0; 6095 } 6096 6097 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 6098 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 6099 CP_ECC_ERROR_INT_ENABLE, 1) 6100 6101 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 6102 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 6103 CP_ECC_ERROR_INT_ENABLE, 0) 6104 6105 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 6106 struct amdgpu_irq_src *source, 6107 unsigned type, 6108 enum amdgpu_interrupt_state state) 6109 { 6110 switch (state) { 6111 case AMDGPU_IRQ_STATE_DISABLE: 6112 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6113 CP_ECC_ERROR_INT_ENABLE, 0); 6114 DISABLE_ECC_ON_ME_PIPE(1, 0); 6115 DISABLE_ECC_ON_ME_PIPE(1, 1); 6116 DISABLE_ECC_ON_ME_PIPE(1, 2); 6117 DISABLE_ECC_ON_ME_PIPE(1, 3); 6118 break; 6119 6120 case AMDGPU_IRQ_STATE_ENABLE: 6121 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 6122 CP_ECC_ERROR_INT_ENABLE, 1); 6123 ENABLE_ECC_ON_ME_PIPE(1, 0); 6124 ENABLE_ECC_ON_ME_PIPE(1, 1); 6125 ENABLE_ECC_ON_ME_PIPE(1, 2); 6126 ENABLE_ECC_ON_ME_PIPE(1, 3); 6127 break; 6128 default: 6129 break; 6130 } 6131 6132 return 0; 6133 } 6134 6135 6136 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6137 struct amdgpu_irq_src *src, 6138 unsigned type, 6139 enum amdgpu_interrupt_state state) 6140 { 6141 switch (type) { 6142 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6143 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 6144 break; 6145 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6146 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6147 break; 6148 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6149 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6150 break; 6151 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6152 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6153 break; 6154 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6155 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6156 break; 6157 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6158 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6159 break; 6160 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6161 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6162 break; 6163 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6164 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6165 break; 6166 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6167 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6168 break; 6169 default: 6170 break; 6171 } 6172 return 0; 6173 } 6174 6175 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 6176 struct amdgpu_irq_src *source, 6177 struct amdgpu_iv_entry *entry) 6178 { 6179 int i; 6180 u8 me_id, pipe_id, queue_id; 6181 struct amdgpu_ring *ring; 6182 6183 DRM_DEBUG("IH: CP EOP\n"); 6184 me_id = (entry->ring_id & 0x0c) >> 2; 6185 pipe_id = (entry->ring_id & 0x03) >> 0; 6186 queue_id = (entry->ring_id & 0x70) >> 4; 6187 6188 switch (me_id) { 6189 case 0: 6190 if (adev->gfx.num_gfx_rings) { 6191 if (!adev->gfx.mcbp) { 6192 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6193 } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { 6194 /* Fence signals are handled on the software rings*/ 6195 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 6196 amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); 6197 } 6198 } 6199 break; 6200 case 1: 6201 case 2: 6202 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6203 ring = &adev->gfx.compute_ring[i]; 6204 /* Per-queue interrupt is supported for MEC starting from VI. 6205 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6206 */ 6207 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6208 amdgpu_fence_process(ring); 6209 } 6210 break; 6211 } 6212 return 0; 6213 } 6214 6215 static void gfx_v9_0_fault(struct amdgpu_device *adev, 6216 struct amdgpu_iv_entry *entry) 6217 { 6218 u8 me_id, pipe_id, queue_id; 6219 struct amdgpu_ring *ring; 6220 int i; 6221 6222 me_id = (entry->ring_id & 0x0c) >> 2; 6223 pipe_id = (entry->ring_id & 0x03) >> 0; 6224 queue_id = (entry->ring_id & 0x70) >> 4; 6225 6226 switch (me_id) { 6227 case 0: 6228 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 6229 break; 6230 case 1: 6231 case 2: 6232 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6233 ring = &adev->gfx.compute_ring[i]; 6234 if (ring->me == me_id && ring->pipe == pipe_id && 6235 ring->queue == queue_id) 6236 drm_sched_fault(&ring->sched); 6237 } 6238 break; 6239 } 6240 } 6241 6242 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 6243 struct amdgpu_irq_src *source, 6244 struct amdgpu_iv_entry *entry) 6245 { 6246 DRM_ERROR("Illegal register access in command stream\n"); 6247 gfx_v9_0_fault(adev, entry); 6248 return 0; 6249 } 6250 6251 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev, 6252 struct amdgpu_irq_src *source, 6253 struct amdgpu_iv_entry *entry) 6254 { 6255 DRM_ERROR("Illegal opcode in command stream\n"); 6256 gfx_v9_0_fault(adev, entry); 6257 return 0; 6258 } 6259 6260 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 6261 struct amdgpu_irq_src *source, 6262 struct amdgpu_iv_entry *entry) 6263 { 6264 DRM_ERROR("Illegal instruction in command stream\n"); 6265 gfx_v9_0_fault(adev, entry); 6266 return 0; 6267 } 6268 6269 6270 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { 6271 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 6272 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT), 6273 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT) 6274 }, 6275 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 6276 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT), 6277 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT) 6278 }, 6279 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6280 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1), 6281 0, 0 6282 }, 6283 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 6284 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2), 6285 0, 0 6286 }, 6287 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 6288 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT), 6289 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT) 6290 }, 6291 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6292 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT), 6293 0, 0 6294 }, 6295 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 6296 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), 6297 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT) 6298 }, 6299 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 6300 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT), 6301 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT) 6302 }, 6303 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 6304 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1), 6305 0, 0 6306 }, 6307 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 6308 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1), 6309 0, 0 6310 }, 6311 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 6312 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1), 6313 0, 0 6314 }, 6315 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6316 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC), 6317 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED) 6318 }, 6319 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 6320 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 6321 0, 0 6322 }, 6323 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6324 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), 6325 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) 6326 }, 6327 { "GDS_OA_PHY_PHY_CMD_RAM_MEM", 6328 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6329 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), 6330 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) 6331 }, 6332 { "GDS_OA_PHY_PHY_DATA_RAM_MEM", 6333 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 6334 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 6335 0, 0 6336 }, 6337 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", 6338 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6339 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), 6340 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) 6341 }, 6342 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", 6343 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6344 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), 6345 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) 6346 }, 6347 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", 6348 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6349 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), 6350 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) 6351 }, 6352 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", 6353 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 6354 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), 6355 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) 6356 }, 6357 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 6358 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 6359 0, 0 6360 }, 6361 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6362 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), 6363 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) 6364 }, 6365 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6366 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 6367 0, 0 6368 }, 6369 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6370 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 6371 0, 0 6372 }, 6373 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6374 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 6375 0, 0 6376 }, 6377 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 6378 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 6379 0, 0 6380 }, 6381 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6382 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 6383 0, 0 6384 }, 6385 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 6386 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 6387 0, 0 6388 }, 6389 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6390 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), 6391 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) 6392 }, 6393 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6394 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), 6395 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) 6396 }, 6397 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6398 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), 6399 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) 6400 }, 6401 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6402 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), 6403 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) 6404 }, 6405 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6406 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), 6407 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) 6408 }, 6409 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6410 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 6411 0, 0 6412 }, 6413 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6414 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 6415 0, 0 6416 }, 6417 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6418 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 6419 0, 0 6420 }, 6421 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6422 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 6423 0, 0 6424 }, 6425 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6426 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 6427 0, 0 6428 }, 6429 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 6430 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 6431 0, 0 6432 }, 6433 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6434 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 6435 0, 0 6436 }, 6437 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6438 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 6439 0, 0 6440 }, 6441 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6442 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 6443 0, 0 6444 }, 6445 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6446 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), 6447 0, 0 6448 }, 6449 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6450 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 6451 0, 0 6452 }, 6453 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6454 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), 6455 0, 0 6456 }, 6457 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 6458 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 6459 0, 0 6460 }, 6461 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 6462 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 6463 0, 0 6464 }, 6465 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6466 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), 6467 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) 6468 }, 6469 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6470 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), 6471 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) 6472 }, 6473 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6474 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 6475 0, 0 6476 }, 6477 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6478 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 6479 0, 0 6480 }, 6481 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6482 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 6483 0, 0 6484 }, 6485 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6486 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), 6487 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) 6488 }, 6489 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 6490 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), 6491 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) 6492 }, 6493 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6494 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), 6495 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) 6496 }, 6497 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6498 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), 6499 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) 6500 }, 6501 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 6502 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT), 6503 0, 0 6504 }, 6505 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6506 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT), 6507 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT) 6508 }, 6509 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6510 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT), 6511 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT) 6512 }, 6513 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6514 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT), 6515 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT) 6516 }, 6517 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6518 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT), 6519 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT) 6520 }, 6521 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6522 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT), 6523 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT) 6524 }, 6525 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6526 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT), 6527 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT) 6528 }, 6529 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 6530 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT), 6531 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT) 6532 }, 6533 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6534 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), 6535 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) 6536 }, 6537 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6538 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), 6539 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) 6540 }, 6541 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6542 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), 6543 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) 6544 }, 6545 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6546 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), 6547 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) 6548 }, 6549 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6550 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), 6551 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) 6552 }, 6553 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 6554 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), 6555 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) 6556 }, 6557 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6558 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), 6559 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) 6560 }, 6561 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6562 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), 6563 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) 6564 }, 6565 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6566 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), 6567 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) 6568 }, 6569 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6570 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), 6571 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) 6572 }, 6573 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6574 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), 6575 0, 0 6576 }, 6577 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6578 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 6579 0, 0 6580 }, 6581 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6582 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 6583 0, 0 6584 }, 6585 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6586 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 6587 0, 0 6588 }, 6589 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6590 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 6591 0, 0 6592 }, 6593 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 6594 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), 6595 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) 6596 }, 6597 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6598 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), 6599 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) 6600 }, 6601 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6602 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), 6603 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) 6604 }, 6605 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6606 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), 6607 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) 6608 }, 6609 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6610 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), 6611 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) 6612 }, 6613 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6614 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), 6615 0, 0 6616 }, 6617 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6618 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 6619 0, 0 6620 }, 6621 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6622 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 6623 0, 0 6624 }, 6625 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6626 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 6627 0, 0 6628 }, 6629 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 6630 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 6631 0, 0 6632 }, 6633 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6634 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), 6635 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) 6636 }, 6637 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6638 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), 6639 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) 6640 }, 6641 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6642 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), 6643 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) 6644 }, 6645 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6646 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), 6647 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) 6648 }, 6649 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6650 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), 6651 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) 6652 }, 6653 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6654 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 6655 0, 0 6656 }, 6657 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6658 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 6659 0, 0 6660 }, 6661 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6662 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 6663 0, 0 6664 }, 6665 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6666 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 6667 0, 0 6668 }, 6669 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 6670 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 6671 0, 0 6672 }, 6673 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6674 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), 6675 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) 6676 }, 6677 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6678 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), 6679 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) 6680 }, 6681 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6682 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), 6683 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) 6684 }, 6685 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6686 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 6687 0, 0 6688 }, 6689 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6690 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 6691 0, 0 6692 }, 6693 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6694 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 6695 0, 0 6696 }, 6697 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6698 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 6699 0, 0 6700 }, 6701 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6702 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 6703 0, 0 6704 }, 6705 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 6706 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 6707 0, 0 6708 } 6709 }; 6710 6711 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, 6712 void *inject_if, uint32_t instance_mask) 6713 { 6714 struct ras_inject_if *info = (struct ras_inject_if *)inject_if; 6715 int ret; 6716 struct ta_ras_trigger_error_input block_info = { 0 }; 6717 6718 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6719 return -EINVAL; 6720 6721 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) 6722 return -EINVAL; 6723 6724 if (!ras_gfx_subblocks[info->head.sub_block_index].name) 6725 return -EPERM; 6726 6727 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & 6728 info->head.type)) { 6729 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", 6730 ras_gfx_subblocks[info->head.sub_block_index].name, 6731 info->head.type); 6732 return -EPERM; 6733 } 6734 6735 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & 6736 info->head.type)) { 6737 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", 6738 ras_gfx_subblocks[info->head.sub_block_index].name, 6739 info->head.type); 6740 return -EPERM; 6741 } 6742 6743 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); 6744 block_info.sub_block_index = 6745 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; 6746 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); 6747 block_info.address = info->address; 6748 block_info.value = info->value; 6749 6750 mutex_lock(&adev->grbm_idx_mutex); 6751 ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask); 6752 mutex_unlock(&adev->grbm_idx_mutex); 6753 6754 return ret; 6755 } 6756 6757 static const char * const vml2_mems[] = { 6758 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", 6759 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", 6760 "UTC_VML2_BANK_CACHE_0_4K_MEM0", 6761 "UTC_VML2_BANK_CACHE_0_4K_MEM1", 6762 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0", 6763 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1", 6764 "UTC_VML2_BANK_CACHE_1_4K_MEM0", 6765 "UTC_VML2_BANK_CACHE_1_4K_MEM1", 6766 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0", 6767 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1", 6768 "UTC_VML2_BANK_CACHE_2_4K_MEM0", 6769 "UTC_VML2_BANK_CACHE_2_4K_MEM1", 6770 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0", 6771 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1", 6772 "UTC_VML2_BANK_CACHE_3_4K_MEM0", 6773 "UTC_VML2_BANK_CACHE_3_4K_MEM1", 6774 }; 6775 6776 static const char * const vml2_walker_mems[] = { 6777 "UTC_VML2_CACHE_PDE0_MEM0", 6778 "UTC_VML2_CACHE_PDE0_MEM1", 6779 "UTC_VML2_CACHE_PDE1_MEM0", 6780 "UTC_VML2_CACHE_PDE1_MEM1", 6781 "UTC_VML2_CACHE_PDE2_MEM0", 6782 "UTC_VML2_CACHE_PDE2_MEM1", 6783 "UTC_VML2_RDIF_LOG_FIFO", 6784 }; 6785 6786 static const char * const atc_l2_cache_2m_mems[] = { 6787 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", 6788 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", 6789 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", 6790 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM", 6791 }; 6792 6793 static const char *atc_l2_cache_4k_mems[] = { 6794 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0", 6795 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1", 6796 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2", 6797 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3", 6798 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4", 6799 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5", 6800 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6", 6801 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7", 6802 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0", 6803 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1", 6804 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2", 6805 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3", 6806 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4", 6807 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5", 6808 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6", 6809 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7", 6810 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0", 6811 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1", 6812 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2", 6813 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3", 6814 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4", 6815 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5", 6816 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6", 6817 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7", 6818 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0", 6819 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1", 6820 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2", 6821 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3", 6822 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4", 6823 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5", 6824 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6", 6825 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7", 6826 }; 6827 6828 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev, 6829 struct ras_err_data *err_data) 6830 { 6831 uint32_t i, data; 6832 uint32_t sec_count, ded_count; 6833 6834 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6835 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6836 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6837 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6838 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6839 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6840 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6841 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6842 6843 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6844 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6845 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6846 6847 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT); 6848 if (sec_count) { 6849 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6850 "SEC %d\n", i, vml2_mems[i], sec_count); 6851 err_data->ce_count += sec_count; 6852 } 6853 6854 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT); 6855 if (ded_count) { 6856 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6857 "DED %d\n", i, vml2_mems[i], ded_count); 6858 err_data->ue_count += ded_count; 6859 } 6860 } 6861 6862 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 6863 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 6864 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 6865 6866 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6867 SEC_COUNT); 6868 if (sec_count) { 6869 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6870 "SEC %d\n", i, vml2_walker_mems[i], sec_count); 6871 err_data->ce_count += sec_count; 6872 } 6873 6874 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT, 6875 DED_COUNT); 6876 if (ded_count) { 6877 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6878 "DED %d\n", i, vml2_walker_mems[i], ded_count); 6879 err_data->ue_count += ded_count; 6880 } 6881 } 6882 6883 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 6884 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 6885 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 6886 6887 sec_count = (data & 0x00006000L) >> 0xd; 6888 if (sec_count) { 6889 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6890 "SEC %d\n", i, atc_l2_cache_2m_mems[i], 6891 sec_count); 6892 err_data->ce_count += sec_count; 6893 } 6894 } 6895 6896 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 6897 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 6898 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 6899 6900 sec_count = (data & 0x00006000L) >> 0xd; 6901 if (sec_count) { 6902 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6903 "SEC %d\n", i, atc_l2_cache_4k_mems[i], 6904 sec_count); 6905 err_data->ce_count += sec_count; 6906 } 6907 6908 ded_count = (data & 0x00018000L) >> 0xf; 6909 if (ded_count) { 6910 dev_info(adev->dev, "Instance[%d]: SubBlock %s, " 6911 "DED %d\n", i, atc_l2_cache_4k_mems[i], 6912 ded_count); 6913 err_data->ue_count += ded_count; 6914 } 6915 } 6916 6917 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6918 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6919 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6920 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6921 6922 return 0; 6923 } 6924 6925 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev, 6926 const struct soc15_reg_entry *reg, 6927 uint32_t se_id, uint32_t inst_id, uint32_t value, 6928 uint32_t *sec_count, uint32_t *ded_count) 6929 { 6930 uint32_t i; 6931 uint32_t sec_cnt, ded_cnt; 6932 6933 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) { 6934 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset || 6935 gfx_v9_0_ras_fields[i].seg != reg->seg || 6936 gfx_v9_0_ras_fields[i].inst != reg->inst) 6937 continue; 6938 6939 sec_cnt = (value & 6940 gfx_v9_0_ras_fields[i].sec_count_mask) >> 6941 gfx_v9_0_ras_fields[i].sec_count_shift; 6942 if (sec_cnt) { 6943 dev_info(adev->dev, "GFX SubBlock %s, " 6944 "Instance[%d][%d], SEC %d\n", 6945 gfx_v9_0_ras_fields[i].name, 6946 se_id, inst_id, 6947 sec_cnt); 6948 *sec_count += sec_cnt; 6949 } 6950 6951 ded_cnt = (value & 6952 gfx_v9_0_ras_fields[i].ded_count_mask) >> 6953 gfx_v9_0_ras_fields[i].ded_count_shift; 6954 if (ded_cnt) { 6955 dev_info(adev->dev, "GFX SubBlock %s, " 6956 "Instance[%d][%d], DED %d\n", 6957 gfx_v9_0_ras_fields[i].name, 6958 se_id, inst_id, 6959 ded_cnt); 6960 *ded_count += ded_cnt; 6961 } 6962 } 6963 6964 return 0; 6965 } 6966 6967 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) 6968 { 6969 int i, j, k; 6970 6971 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 6972 return; 6973 6974 /* read back registers to clear the counters */ 6975 mutex_lock(&adev->grbm_idx_mutex); 6976 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 6977 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 6978 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 6979 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0); 6980 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 6981 } 6982 } 6983 } 6984 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 6985 mutex_unlock(&adev->grbm_idx_mutex); 6986 6987 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 6988 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0); 6989 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 6990 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0); 6991 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 6992 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0); 6993 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 6994 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0); 6995 6996 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) { 6997 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i); 6998 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT); 6999 } 7000 7001 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) { 7002 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i); 7003 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT); 7004 } 7005 7006 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) { 7007 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i); 7008 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT); 7009 } 7010 7011 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) { 7012 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i); 7013 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT); 7014 } 7015 7016 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255); 7017 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255); 7018 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255); 7019 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); 7020 } 7021 7022 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, 7023 void *ras_error_status) 7024 { 7025 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; 7026 uint32_t sec_count = 0, ded_count = 0; 7027 uint32_t i, j, k; 7028 uint32_t reg_value; 7029 7030 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 7031 return; 7032 7033 err_data->ue_count = 0; 7034 err_data->ce_count = 0; 7035 7036 mutex_lock(&adev->grbm_idx_mutex); 7037 7038 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { 7039 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { 7040 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { 7041 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0); 7042 reg_value = 7043 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); 7044 if (reg_value) 7045 gfx_v9_0_ras_error_count(adev, 7046 &gfx_v9_0_edc_counter_regs[i], 7047 j, k, reg_value, 7048 &sec_count, &ded_count); 7049 } 7050 } 7051 } 7052 7053 err_data->ce_count += sec_count; 7054 err_data->ue_count += ded_count; 7055 7056 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7057 mutex_unlock(&adev->grbm_idx_mutex); 7058 7059 gfx_v9_0_query_utc_edc_status(adev, err_data); 7060 } 7061 7062 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) 7063 { 7064 const unsigned int cp_coher_cntl = 7065 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) | 7066 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) | 7067 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) | 7068 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) | 7069 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1); 7070 7071 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */ 7072 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 7073 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */ 7074 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 7075 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 7076 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 7077 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 7078 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 7079 } 7080 7081 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring, 7082 uint32_t pipe, bool enable) 7083 { 7084 struct amdgpu_device *adev = ring->adev; 7085 uint32_t val; 7086 uint32_t wcl_cs_reg; 7087 7088 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 7089 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT; 7090 7091 switch (pipe) { 7092 case 0: 7093 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0); 7094 break; 7095 case 1: 7096 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1); 7097 break; 7098 case 2: 7099 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2); 7100 break; 7101 case 3: 7102 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3); 7103 break; 7104 default: 7105 DRM_DEBUG("invalid pipe %d\n", pipe); 7106 return; 7107 } 7108 7109 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 7110 7111 } 7112 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 7113 { 7114 struct amdgpu_device *adev = ring->adev; 7115 uint32_t val; 7116 int i; 7117 7118 7119 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 7120 * number of gfx waves. Setting 5 bit will make sure gfx only gets 7121 * around 25% of gpu resources. 7122 */ 7123 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; 7124 amdgpu_ring_emit_wreg(ring, 7125 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX), 7126 val); 7127 7128 /* Restrict waves for normal/low priority compute queues as well 7129 * to get best QoS for high priority compute jobs. 7130 * 7131 * amdgpu controls only 1st ME(0-3 CS pipes). 7132 */ 7133 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 7134 if (i != ring->pipe) 7135 gfx_v9_0_emit_wave_limit_cs(ring, i, enable); 7136 7137 } 7138 } 7139 7140 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 7141 { 7142 /* Header itself is a NOP packet */ 7143 if (num_nop == 1) { 7144 amdgpu_ring_write(ring, ring->funcs->nop); 7145 return; 7146 } 7147 7148 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 7149 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 7150 7151 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 7152 amdgpu_ring_insert_nop(ring, num_nop - 1); 7153 } 7154 7155 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) 7156 { 7157 struct amdgpu_device *adev = ring->adev; 7158 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 7159 struct amdgpu_ring *kiq_ring = &kiq->ring; 7160 unsigned long flags; 7161 u32 tmp; 7162 int r; 7163 7164 if (amdgpu_sriov_vf(adev)) 7165 return -EINVAL; 7166 7167 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 7168 return -EINVAL; 7169 7170 spin_lock_irqsave(&kiq->ring_lock, flags); 7171 7172 if (amdgpu_ring_alloc(kiq_ring, 5)) { 7173 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7174 return -ENOMEM; 7175 } 7176 7177 tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); 7178 gfx_v9_0_ring_emit_wreg(kiq_ring, 7179 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); 7180 amdgpu_ring_commit(kiq_ring); 7181 7182 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7183 7184 r = amdgpu_ring_test_ring(kiq_ring); 7185 if (r) 7186 return r; 7187 7188 if (amdgpu_ring_alloc(ring, 7 + 7 + 5)) 7189 return -ENOMEM; 7190 gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr, 7191 ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); 7192 gfx_v9_0_ring_emit_reg_wait(ring, 7193 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff); 7194 gfx_v9_0_ring_emit_wreg(ring, 7195 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); 7196 7197 return amdgpu_ring_test_ring(ring); 7198 } 7199 7200 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, 7201 unsigned int vmid) 7202 { 7203 struct amdgpu_device *adev = ring->adev; 7204 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 7205 struct amdgpu_ring *kiq_ring = &kiq->ring; 7206 unsigned long flags; 7207 int i, r; 7208 7209 if (amdgpu_sriov_vf(adev)) 7210 return -EINVAL; 7211 7212 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 7213 return -EINVAL; 7214 7215 spin_lock_irqsave(&kiq->ring_lock, flags); 7216 7217 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 7218 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7219 return -ENOMEM; 7220 } 7221 7222 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 7223 0, 0); 7224 amdgpu_ring_commit(kiq_ring); 7225 7226 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7227 7228 r = amdgpu_ring_test_ring(kiq_ring); 7229 if (r) 7230 return r; 7231 7232 /* make sure dequeue is complete*/ 7233 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 7234 mutex_lock(&adev->srbm_mutex); 7235 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); 7236 for (i = 0; i < adev->usec_timeout; i++) { 7237 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 7238 break; 7239 udelay(1); 7240 } 7241 if (i >= adev->usec_timeout) 7242 r = -ETIMEDOUT; 7243 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 7244 mutex_unlock(&adev->srbm_mutex); 7245 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 7246 if (r) { 7247 dev_err(adev->dev, "fail to wait on hqd deactive\n"); 7248 return r; 7249 } 7250 7251 r = gfx_v9_0_kcq_init_queue(ring, true); 7252 if (r) { 7253 dev_err(adev->dev, "fail to init kcq\n"); 7254 return r; 7255 } 7256 spin_lock_irqsave(&kiq->ring_lock, flags); 7257 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size); 7258 if (r) { 7259 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7260 return -ENOMEM; 7261 } 7262 kiq->pmf->kiq_map_queues(kiq_ring, ring); 7263 amdgpu_ring_commit(kiq_ring); 7264 spin_unlock_irqrestore(&kiq->ring_lock, flags); 7265 r = amdgpu_ring_test_ring(kiq_ring); 7266 if (r) { 7267 DRM_ERROR("fail to remap queue\n"); 7268 return r; 7269 } 7270 return amdgpu_ring_test_ring(ring); 7271 } 7272 7273 static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 7274 { 7275 struct amdgpu_device *adev = ip_block->adev; 7276 uint32_t i, j, k, reg, index = 0; 7277 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 7278 7279 if (!adev->gfx.ip_dump_core) 7280 return; 7281 7282 for (i = 0; i < reg_count; i++) 7283 drm_printf(p, "%-50s \t 0x%08x\n", 7284 gc_reg_list_9[i].reg_name, 7285 adev->gfx.ip_dump_core[i]); 7286 7287 /* print compute queue registers for all instances */ 7288 if (!adev->gfx.ip_dump_compute_queues) 7289 return; 7290 7291 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 7292 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7293 adev->gfx.mec.num_mec, 7294 adev->gfx.mec.num_pipe_per_mec, 7295 adev->gfx.mec.num_queue_per_pipe); 7296 7297 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7298 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7299 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7300 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7301 for (reg = 0; reg < reg_count; reg++) { 7302 if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) 7303 drm_printf(p, "%-50s \t 0x%08x\n", 7304 "mmCP_MEC_ME2_HEADER_DUMP", 7305 adev->gfx.ip_dump_compute_queues[index + reg]); 7306 else 7307 drm_printf(p, "%-50s \t 0x%08x\n", 7308 gc_cp_reg_list_9[reg].reg_name, 7309 adev->gfx.ip_dump_compute_queues[index + reg]); 7310 } 7311 index += reg_count; 7312 } 7313 } 7314 } 7315 7316 } 7317 7318 static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block) 7319 { 7320 struct amdgpu_device *adev = ip_block->adev; 7321 uint32_t i, j, k, reg, index = 0; 7322 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); 7323 7324 if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings) 7325 return; 7326 7327 amdgpu_gfx_off_ctrl(adev, false); 7328 for (i = 0; i < reg_count; i++) 7329 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i])); 7330 amdgpu_gfx_off_ctrl(adev, true); 7331 7332 /* dump compute queue registers for all instances */ 7333 if (!adev->gfx.ip_dump_compute_queues) 7334 return; 7335 7336 reg_count = ARRAY_SIZE(gc_cp_reg_list_9); 7337 amdgpu_gfx_off_ctrl(adev, false); 7338 mutex_lock(&adev->srbm_mutex); 7339 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7340 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7341 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7342 /* ME0 is for GFX so start from 1 for CP */ 7343 soc15_grbm_select(adev, 1 + i, j, k, 0, 0); 7344 7345 for (reg = 0; reg < reg_count; reg++) { 7346 if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) 7347 adev->gfx.ip_dump_compute_queues[index + reg] = 7348 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP)); 7349 else 7350 adev->gfx.ip_dump_compute_queues[index + reg] = 7351 RREG32(SOC15_REG_ENTRY_OFFSET( 7352 gc_cp_reg_list_9[reg])); 7353 } 7354 index += reg_count; 7355 } 7356 } 7357 } 7358 soc15_grbm_select(adev, 0, 0, 0, 0, 0); 7359 mutex_unlock(&adev->srbm_mutex); 7360 amdgpu_gfx_off_ctrl(adev, true); 7361 7362 } 7363 7364 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 7365 { 7366 struct amdgpu_device *adev = ring->adev; 7367 7368 /* Emit the cleaner shader */ 7369 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) 7370 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7371 else 7372 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER_9_0, 0)); 7373 7374 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7375 } 7376 7377 static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring) 7378 { 7379 struct amdgpu_device *adev = ring->adev; 7380 struct amdgpu_ip_block *gfx_block = 7381 amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 7382 7383 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 7384 7385 /* Raven and PCO APUs seem to have stability issues 7386 * with compute and gfxoff and gfx pg. Disable gfx pg during 7387 * submission and allow again afterwards. 7388 */ 7389 if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) 7390 gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE); 7391 } 7392 7393 static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring) 7394 { 7395 struct amdgpu_device *adev = ring->adev; 7396 struct amdgpu_ip_block *gfx_block = 7397 amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 7398 7399 /* Raven and PCO APUs seem to have stability issues 7400 * with compute and gfxoff and gfx pg. Disable gfx pg during 7401 * submission and allow again afterwards. 7402 */ 7403 if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) 7404 gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE); 7405 7406 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 7407 } 7408 7409 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 7410 .name = "gfx_v9_0", 7411 .early_init = gfx_v9_0_early_init, 7412 .late_init = gfx_v9_0_late_init, 7413 .sw_init = gfx_v9_0_sw_init, 7414 .sw_fini = gfx_v9_0_sw_fini, 7415 .hw_init = gfx_v9_0_hw_init, 7416 .hw_fini = gfx_v9_0_hw_fini, 7417 .suspend = gfx_v9_0_suspend, 7418 .resume = gfx_v9_0_resume, 7419 .is_idle = gfx_v9_0_is_idle, 7420 .wait_for_idle = gfx_v9_0_wait_for_idle, 7421 .soft_reset = gfx_v9_0_soft_reset, 7422 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 7423 .set_powergating_state = gfx_v9_0_set_powergating_state, 7424 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 7425 .dump_ip_state = gfx_v9_ip_dump, 7426 .print_ip_state = gfx_v9_ip_print, 7427 }; 7428 7429 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 7430 .type = AMDGPU_RING_TYPE_GFX, 7431 .align_mask = 0xff, 7432 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7433 .support_64bit_ptrs = true, 7434 .secure_submission_supported = true, 7435 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 7436 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 7437 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 7438 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 7439 5 + /* COND_EXEC */ 7440 7 + /* PIPELINE_SYNC */ 7441 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7442 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7443 2 + /* VM_FLUSH */ 7444 8 + /* FENCE for VM_FLUSH */ 7445 20 + /* GDS switch */ 7446 4 + /* double SWITCH_BUFFER, 7447 the first COND_EXEC jump to the place just 7448 prior to this double SWITCH_BUFFER */ 7449 5 + /* COND_EXEC */ 7450 7 + /* HDP_flush */ 7451 4 + /* VGT_flush */ 7452 14 + /* CE_META */ 7453 31 + /* DE_META */ 7454 3 + /* CNTX_CTRL */ 7455 5 + /* HDP_INVL */ 7456 8 + 8 + /* FENCE x2 */ 7457 2 + /* SWITCH_BUFFER */ 7458 7 + /* gfx_v9_0_emit_mem_sync */ 7459 2, /* gfx_v9_0_ring_emit_cleaner_shader */ 7460 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 7461 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 7462 .emit_fence = gfx_v9_0_ring_emit_fence, 7463 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7464 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7465 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7466 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7467 .test_ring = gfx_v9_0_ring_test_ring, 7468 .insert_nop = gfx_v9_ring_insert_nop, 7469 .pad_ib = amdgpu_ring_generic_pad_ib, 7470 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7471 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7472 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7473 .preempt_ib = gfx_v9_0_ring_preempt_ib, 7474 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7475 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7476 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7477 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7478 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7479 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7480 .reset = gfx_v9_0_reset_kgq, 7481 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, 7482 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, 7483 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, 7484 }; 7485 7486 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { 7487 .type = AMDGPU_RING_TYPE_GFX, 7488 .align_mask = 0xff, 7489 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7490 .support_64bit_ptrs = true, 7491 .secure_submission_supported = true, 7492 .get_rptr = amdgpu_sw_ring_get_rptr_gfx, 7493 .get_wptr = amdgpu_sw_ring_get_wptr_gfx, 7494 .set_wptr = amdgpu_sw_ring_set_wptr_gfx, 7495 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 7496 5 + /* COND_EXEC */ 7497 7 + /* PIPELINE_SYNC */ 7498 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7499 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7500 2 + /* VM_FLUSH */ 7501 8 + /* FENCE for VM_FLUSH */ 7502 20 + /* GDS switch */ 7503 4 + /* double SWITCH_BUFFER, 7504 * the first COND_EXEC jump to the place just 7505 * prior to this double SWITCH_BUFFER 7506 */ 7507 5 + /* COND_EXEC */ 7508 7 + /* HDP_flush */ 7509 4 + /* VGT_flush */ 7510 14 + /* CE_META */ 7511 31 + /* DE_META */ 7512 3 + /* CNTX_CTRL */ 7513 5 + /* HDP_INVL */ 7514 8 + 8 + /* FENCE x2 */ 7515 2 + /* SWITCH_BUFFER */ 7516 7 + /* gfx_v9_0_emit_mem_sync */ 7517 2, /* gfx_v9_0_ring_emit_cleaner_shader */ 7518 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 7519 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 7520 .emit_fence = gfx_v9_0_ring_emit_fence, 7521 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7522 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7523 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7524 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7525 .test_ring = gfx_v9_0_ring_test_ring, 7526 .test_ib = gfx_v9_0_ring_test_ib, 7527 .insert_nop = gfx_v9_ring_insert_nop, 7528 .pad_ib = amdgpu_ring_generic_pad_ib, 7529 .emit_switch_buffer = gfx_v9_ring_emit_sb, 7530 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 7531 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 7532 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, 7533 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7534 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7535 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7536 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7537 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7538 .patch_cntl = gfx_v9_0_ring_patch_cntl, 7539 .patch_de = gfx_v9_0_ring_patch_de_meta, 7540 .patch_ce = gfx_v9_0_ring_patch_ce_meta, 7541 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, 7542 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, 7543 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, 7544 }; 7545 7546 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7547 .type = AMDGPU_RING_TYPE_COMPUTE, 7548 .align_mask = 0xff, 7549 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7550 .support_64bit_ptrs = true, 7551 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7552 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7553 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7554 .emit_frame_size = 7555 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7556 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7557 5 + /* hdp invalidate */ 7558 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7559 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7560 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7561 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7562 7 + /* gfx_v9_0_emit_mem_sync */ 7563 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ 7564 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ 7565 2, /* gfx_v9_0_ring_emit_cleaner_shader */ 7566 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7567 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 7568 .emit_fence = gfx_v9_0_ring_emit_fence, 7569 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 7570 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 7571 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 7572 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 7573 .test_ring = gfx_v9_0_ring_test_ring, 7574 .test_ib = gfx_v9_0_ring_test_ib, 7575 .insert_nop = gfx_v9_ring_insert_nop, 7576 .pad_ib = amdgpu_ring_generic_pad_ib, 7577 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7578 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7579 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7580 .soft_recovery = gfx_v9_0_ring_soft_recovery, 7581 .emit_mem_sync = gfx_v9_0_emit_mem_sync, 7582 .emit_wave_limit = gfx_v9_0_emit_wave_limit, 7583 .reset = gfx_v9_0_reset_kcq, 7584 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, 7585 .begin_use = gfx_v9_0_ring_begin_use_compute, 7586 .end_use = gfx_v9_0_ring_end_use_compute, 7587 }; 7588 7589 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7590 .type = AMDGPU_RING_TYPE_KIQ, 7591 .align_mask = 0xff, 7592 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7593 .support_64bit_ptrs = true, 7594 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 7595 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 7596 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 7597 .emit_frame_size = 7598 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7599 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 7600 5 + /* hdp invalidate */ 7601 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 7602 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7603 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7604 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7605 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 7606 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 7607 .test_ring = gfx_v9_0_ring_test_ring, 7608 .insert_nop = amdgpu_ring_insert_nop, 7609 .pad_ib = amdgpu_ring_generic_pad_ib, 7610 .emit_rreg = gfx_v9_0_ring_emit_rreg, 7611 .emit_wreg = gfx_v9_0_ring_emit_wreg, 7612 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 7613 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 7614 }; 7615 7616 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 7617 { 7618 int i; 7619 7620 adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq; 7621 7622 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7623 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 7624 7625 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { 7626 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) 7627 adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; 7628 } 7629 7630 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7631 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 7632 } 7633 7634 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 7635 .set = gfx_v9_0_set_eop_interrupt_state, 7636 .process = gfx_v9_0_eop_irq, 7637 }; 7638 7639 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 7640 .set = gfx_v9_0_set_priv_reg_fault_state, 7641 .process = gfx_v9_0_priv_reg_irq, 7642 }; 7643 7644 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = { 7645 .set = gfx_v9_0_set_bad_op_fault_state, 7646 .process = gfx_v9_0_bad_op_irq, 7647 }; 7648 7649 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 7650 .set = gfx_v9_0_set_priv_inst_fault_state, 7651 .process = gfx_v9_0_priv_inst_irq, 7652 }; 7653 7654 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 7655 .set = gfx_v9_0_set_cp_ecc_error_state, 7656 .process = amdgpu_gfx_cp_ecc_error_irq, 7657 }; 7658 7659 7660 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 7661 { 7662 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7663 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 7664 7665 adev->gfx.priv_reg_irq.num_types = 1; 7666 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 7667 7668 adev->gfx.bad_op_irq.num_types = 1; 7669 adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs; 7670 7671 adev->gfx.priv_inst_irq.num_types = 1; 7672 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 7673 7674 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 7675 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 7676 } 7677 7678 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 7679 { 7680 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7681 case IP_VERSION(9, 0, 1): 7682 case IP_VERSION(9, 2, 1): 7683 case IP_VERSION(9, 4, 0): 7684 case IP_VERSION(9, 2, 2): 7685 case IP_VERSION(9, 1, 0): 7686 case IP_VERSION(9, 4, 1): 7687 case IP_VERSION(9, 3, 0): 7688 case IP_VERSION(9, 4, 2): 7689 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 7690 break; 7691 default: 7692 break; 7693 } 7694 } 7695 7696 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 7697 { 7698 /* init asci gds info */ 7699 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7700 case IP_VERSION(9, 0, 1): 7701 case IP_VERSION(9, 2, 1): 7702 case IP_VERSION(9, 4, 0): 7703 adev->gds.gds_size = 0x10000; 7704 break; 7705 case IP_VERSION(9, 2, 2): 7706 case IP_VERSION(9, 1, 0): 7707 case IP_VERSION(9, 4, 1): 7708 adev->gds.gds_size = 0x1000; 7709 break; 7710 case IP_VERSION(9, 4, 2): 7711 /* aldebaran removed all the GDS internal memory, 7712 * only support GWS opcode in kernel, like barrier 7713 * semaphore.etc */ 7714 adev->gds.gds_size = 0; 7715 break; 7716 default: 7717 adev->gds.gds_size = 0x10000; 7718 break; 7719 } 7720 7721 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 7722 case IP_VERSION(9, 0, 1): 7723 case IP_VERSION(9, 4, 0): 7724 adev->gds.gds_compute_max_wave_id = 0x7ff; 7725 break; 7726 case IP_VERSION(9, 2, 1): 7727 adev->gds.gds_compute_max_wave_id = 0x27f; 7728 break; 7729 case IP_VERSION(9, 2, 2): 7730 case IP_VERSION(9, 1, 0): 7731 if (adev->apu_flags & AMD_APU_IS_RAVEN2) 7732 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 7733 else 7734 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 7735 break; 7736 case IP_VERSION(9, 4, 1): 7737 adev->gds.gds_compute_max_wave_id = 0xfff; 7738 break; 7739 case IP_VERSION(9, 4, 2): 7740 /* deprecated for Aldebaran, no usage at all */ 7741 adev->gds.gds_compute_max_wave_id = 0; 7742 break; 7743 default: 7744 /* this really depends on the chip */ 7745 adev->gds.gds_compute_max_wave_id = 0x7ff; 7746 break; 7747 } 7748 7749 adev->gds.gws_size = 64; 7750 adev->gds.oa_size = 16; 7751 } 7752 7753 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7754 u32 bitmap) 7755 { 7756 u32 data; 7757 7758 if (!bitmap) 7759 return; 7760 7761 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7762 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7763 7764 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 7765 } 7766 7767 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7768 { 7769 u32 data, mask; 7770 7771 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 7772 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 7773 7774 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7775 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7776 7777 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7778 7779 return (~data) & mask; 7780 } 7781 7782 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 7783 struct amdgpu_cu_info *cu_info) 7784 { 7785 int i, j, k, counter, active_cu_number = 0; 7786 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7787 unsigned disable_masks[4 * 4]; 7788 7789 if (!adev || !cu_info) 7790 return -EINVAL; 7791 7792 /* 7793 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs 7794 */ 7795 if (adev->gfx.config.max_shader_engines * 7796 adev->gfx.config.max_sh_per_se > 16) 7797 return -EINVAL; 7798 7799 amdgpu_gfx_parse_disable_cu(disable_masks, 7800 adev->gfx.config.max_shader_engines, 7801 adev->gfx.config.max_sh_per_se); 7802 7803 mutex_lock(&adev->grbm_idx_mutex); 7804 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7805 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7806 mask = 1; 7807 ao_bitmap = 0; 7808 counter = 0; 7809 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); 7810 gfx_v9_0_set_user_cu_inactive_bitmap( 7811 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); 7812 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 7813 7814 /* 7815 * The bitmap(and ao_cu_bitmap) in cu_info structure is 7816 * 4x4 size array, and it's usually suitable for Vega 7817 * ASICs which has 4*2 SE/SH layout. 7818 * But for Arcturus, SE/SH layout is changed to 8*1. 7819 * To mostly reduce the impact, we make it compatible 7820 * with current bitmap array as below: 7821 * SE4,SH0 --> bitmap[0][1] 7822 * SE5,SH0 --> bitmap[1][1] 7823 * SE6,SH0 --> bitmap[2][1] 7824 * SE7,SH0 --> bitmap[3][1] 7825 */ 7826 cu_info->bitmap[0][i % 4][j + i / 4] = bitmap; 7827 7828 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7829 if (bitmap & mask) { 7830 if (counter < adev->gfx.config.max_cu_per_sh) 7831 ao_bitmap |= mask; 7832 counter ++; 7833 } 7834 mask <<= 1; 7835 } 7836 active_cu_number += counter; 7837 if (i < 2 && j < 2) 7838 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7839 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; 7840 } 7841 } 7842 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7843 mutex_unlock(&adev->grbm_idx_mutex); 7844 7845 cu_info->number = active_cu_number; 7846 cu_info->ao_cu_mask = ao_cu_mask; 7847 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7848 7849 return 0; 7850 } 7851 7852 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 7853 { 7854 .type = AMD_IP_BLOCK_TYPE_GFX, 7855 .major = 9, 7856 .minor = 0, 7857 .rev = 0, 7858 .funcs = &gfx_v9_0_ip_funcs, 7859 }; 7860