1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "imu_v11_0.h" 33 #include "soc21.h" 34 #include "nvd.h" 35 36 #include "gc/gc_11_0_0_offset.h" 37 #include "gc/gc_11_0_0_sh_mask.h" 38 #include "smuio/smuio_13_0_6_offset.h" 39 #include "smuio/smuio_13_0_6_sh_mask.h" 40 #include "navi10_enum.h" 41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 42 43 #include "soc15.h" 44 #include "clearstate_gfx11.h" 45 #include "v11_structs.h" 46 #include "gfx_v11_0.h" 47 #include "gfx_v11_0_cleaner_shader.h" 48 #include "gfx_v11_0_3.h" 49 #include "nbio_v4_3.h" 50 #include "mes_v11_0.h" 51 52 #define GFX11_NUM_GFX_RINGS 1 53 #define GFX11_MEC_HPD_SIZE 2048 54 55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 56 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 57 58 #define regCGTT_WD_CLK_CTRL 0x5086 59 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 60 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 62 #define regPC_CONFIG_CNTL_1 0x194d 63 #define regPC_CONFIG_CNTL_1_BASE_IDX 1 64 65 #define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 66 #define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 67 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 68 #define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 69 #define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 70 #define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 71 #define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 72 73 #define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 74 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 75 #define regCP_MQD_CONTROL_DEFAULT 0x00000100 76 #define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 77 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 78 #define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 79 #define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 80 #define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 81 82 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 83 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 84 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 85 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 86 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); 87 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 88 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 90 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 92 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 93 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 94 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 95 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 96 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 98 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 101 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 102 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 103 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 104 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); 105 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); 106 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); 107 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); 109 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); 110 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); 112 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); 113 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); 114 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); 115 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); 116 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); 118 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); 120 121 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { 122 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), 123 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), 124 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), 125 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), 126 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), 127 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), 128 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), 129 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), 130 SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), 131 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), 132 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), 133 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), 134 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), 135 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), 136 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), 137 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), 138 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), 139 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 140 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), 141 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), 142 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), 143 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), 144 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), 145 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), 146 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), 147 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 148 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), 149 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 150 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 151 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 152 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), 153 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), 154 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), 155 SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), 156 SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), 157 SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), 158 SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), 159 SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), 160 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), 161 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), 162 SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), 163 SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), 164 SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), 165 SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), 166 SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), 167 SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), 168 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), 169 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), 170 SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), 171 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), 172 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), 173 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), 174 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), 175 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), 176 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), 177 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), 178 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), 179 /* cp header registers */ 180 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), 181 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 182 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 183 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 184 /* SE status registers */ 185 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), 186 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), 187 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), 188 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), 189 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), 190 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) 191 }; 192 193 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { 194 /* compute registers */ 195 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), 196 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), 197 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), 198 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), 199 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), 200 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), 201 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), 202 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), 203 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), 204 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), 205 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), 206 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), 207 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), 208 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), 209 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), 210 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), 211 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), 212 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), 213 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), 214 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), 215 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), 216 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), 217 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), 218 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), 219 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), 220 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), 221 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), 222 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), 223 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), 224 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), 225 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), 226 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), 227 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), 228 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), 229 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), 230 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), 231 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), 232 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), 233 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) 234 }; 235 236 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { 237 /* gfx queue registers */ 238 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), 239 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), 240 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), 241 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), 242 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), 243 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), 244 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), 245 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), 246 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), 247 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), 248 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), 249 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), 250 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), 251 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), 252 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), 253 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), 254 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), 255 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), 256 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), 257 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), 258 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 259 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 260 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 261 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 262 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) 263 }; 264 265 static const struct soc15_reg_golden golden_settings_gc_11_0[] = { 266 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) 267 }; 268 269 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 270 { 271 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 272 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 273 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 274 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 275 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 276 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 277 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 278 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 279 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 280 }; 281 282 #define DEFAULT_SH_MEM_CONFIG \ 283 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 284 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 285 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 286 287 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 288 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 289 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 290 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 291 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 292 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 293 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 294 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 295 struct amdgpu_cu_info *cu_info); 296 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 297 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 298 u32 sh_num, u32 instance, int xcc_id); 299 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 300 301 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 302 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 303 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 304 uint32_t val); 305 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 306 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 307 uint16_t pasid, uint32_t flush_type, 308 bool all_hub, uint8_t dst_sel); 309 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 310 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 311 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 312 bool enable); 313 314 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 315 { 316 struct amdgpu_device *adev = kiq_ring->adev; 317 u64 shader_mc_addr; 318 319 /* Cleaner shader MC address */ 320 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 321 322 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 323 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 324 PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ 325 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 326 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 327 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 328 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 329 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 330 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 331 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 332 } 333 334 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 335 struct amdgpu_ring *ring) 336 { 337 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 338 uint64_t wptr_addr = ring->wptr_gpu_addr; 339 uint32_t me = 0, eng_sel = 0; 340 341 switch (ring->funcs->type) { 342 case AMDGPU_RING_TYPE_COMPUTE: 343 me = 1; 344 eng_sel = 0; 345 break; 346 case AMDGPU_RING_TYPE_GFX: 347 me = 0; 348 eng_sel = 4; 349 break; 350 case AMDGPU_RING_TYPE_MES: 351 me = 2; 352 eng_sel = 5; 353 break; 354 default: 355 WARN_ON(1); 356 } 357 358 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 359 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 360 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 361 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 362 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 363 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 364 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 365 PACKET3_MAP_QUEUES_ME((me)) | 366 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 367 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 368 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 369 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 370 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 371 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 372 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 373 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 374 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 375 } 376 377 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 378 struct amdgpu_ring *ring, 379 enum amdgpu_unmap_queues_action action, 380 u64 gpu_addr, u64 seq) 381 { 382 struct amdgpu_device *adev = kiq_ring->adev; 383 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 384 385 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { 386 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); 387 return; 388 } 389 390 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 391 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 392 PACKET3_UNMAP_QUEUES_ACTION(action) | 393 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 394 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 395 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 396 amdgpu_ring_write(kiq_ring, 397 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 398 399 if (action == PREEMPT_QUEUES_NO_UNMAP) { 400 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 401 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 402 amdgpu_ring_write(kiq_ring, seq); 403 } else { 404 amdgpu_ring_write(kiq_ring, 0); 405 amdgpu_ring_write(kiq_ring, 0); 406 amdgpu_ring_write(kiq_ring, 0); 407 } 408 } 409 410 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 411 struct amdgpu_ring *ring, 412 u64 addr, 413 u64 seq) 414 { 415 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 416 417 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 418 amdgpu_ring_write(kiq_ring, 419 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 420 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 421 PACKET3_QUERY_STATUS_COMMAND(2)); 422 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 423 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 424 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 425 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 426 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 427 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 428 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 429 } 430 431 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 432 uint16_t pasid, uint32_t flush_type, 433 bool all_hub) 434 { 435 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 436 } 437 438 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 439 .kiq_set_resources = gfx11_kiq_set_resources, 440 .kiq_map_queues = gfx11_kiq_map_queues, 441 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 442 .kiq_query_status = gfx11_kiq_query_status, 443 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 444 .set_resources_size = 8, 445 .map_queues_size = 7, 446 .unmap_queues_size = 6, 447 .query_status_size = 7, 448 .invalidate_tlbs_size = 2, 449 }; 450 451 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 452 { 453 adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; 454 } 455 456 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 457 { 458 if (amdgpu_sriov_vf(adev)) 459 return; 460 461 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 462 case IP_VERSION(11, 0, 1): 463 case IP_VERSION(11, 0, 4): 464 soc15_program_register_sequence(adev, 465 golden_settings_gc_11_0_1, 466 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 467 break; 468 default: 469 break; 470 } 471 soc15_program_register_sequence(adev, 472 golden_settings_gc_11_0, 473 (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); 474 475 } 476 477 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 478 bool wc, uint32_t reg, uint32_t val) 479 { 480 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 481 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 482 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 483 amdgpu_ring_write(ring, reg); 484 amdgpu_ring_write(ring, 0); 485 amdgpu_ring_write(ring, val); 486 } 487 488 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 489 int mem_space, int opt, uint32_t addr0, 490 uint32_t addr1, uint32_t ref, uint32_t mask, 491 uint32_t inv) 492 { 493 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 494 amdgpu_ring_write(ring, 495 /* memory (1) or register (0) */ 496 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 497 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 498 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 499 WAIT_REG_MEM_ENGINE(eng_sel))); 500 501 if (mem_space) 502 BUG_ON(addr0 & 0x3); /* Dword align */ 503 amdgpu_ring_write(ring, addr0); 504 amdgpu_ring_write(ring, addr1); 505 amdgpu_ring_write(ring, ref); 506 amdgpu_ring_write(ring, mask); 507 amdgpu_ring_write(ring, inv); /* poll interval */ 508 } 509 510 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 511 { 512 /* Header itself is a NOP packet */ 513 if (num_nop == 1) { 514 amdgpu_ring_write(ring, ring->funcs->nop); 515 return; 516 } 517 518 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 519 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 520 521 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 522 amdgpu_ring_insert_nop(ring, num_nop - 1); 523 } 524 525 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 526 { 527 struct amdgpu_device *adev = ring->adev; 528 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 529 uint32_t tmp = 0; 530 unsigned i; 531 int r; 532 533 WREG32(scratch, 0xCAFEDEAD); 534 r = amdgpu_ring_alloc(ring, 5); 535 if (r) { 536 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 537 ring->idx, r); 538 return r; 539 } 540 541 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 542 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 543 } else { 544 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 545 amdgpu_ring_write(ring, scratch - 546 PACKET3_SET_UCONFIG_REG_START); 547 amdgpu_ring_write(ring, 0xDEADBEEF); 548 } 549 amdgpu_ring_commit(ring); 550 551 for (i = 0; i < adev->usec_timeout; i++) { 552 tmp = RREG32(scratch); 553 if (tmp == 0xDEADBEEF) 554 break; 555 if (amdgpu_emu_mode == 1) 556 msleep(1); 557 else 558 udelay(1); 559 } 560 561 if (i >= adev->usec_timeout) 562 r = -ETIMEDOUT; 563 return r; 564 } 565 566 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 567 { 568 struct amdgpu_device *adev = ring->adev; 569 struct amdgpu_ib ib; 570 struct dma_fence *f = NULL; 571 unsigned index; 572 uint64_t gpu_addr; 573 volatile uint32_t *cpu_ptr; 574 long r; 575 576 /* MES KIQ fw hasn't indirect buffer support for now */ 577 if (adev->enable_mes_kiq && 578 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 579 return 0; 580 581 memset(&ib, 0, sizeof(ib)); 582 583 if (ring->is_mes_queue) { 584 uint32_t padding, offset; 585 586 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); 587 padding = amdgpu_mes_ctx_get_offs(ring, 588 AMDGPU_MES_CTX_PADDING_OFFS); 589 590 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 591 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 592 593 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); 594 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); 595 *cpu_ptr = cpu_to_le32(0xCAFEDEAD); 596 } else { 597 r = amdgpu_device_wb_get(adev, &index); 598 if (r) 599 return r; 600 601 gpu_addr = adev->wb.gpu_addr + (index * 4); 602 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 603 cpu_ptr = &adev->wb.wb[index]; 604 605 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 606 if (r) { 607 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 608 goto err1; 609 } 610 } 611 612 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 613 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 614 ib.ptr[2] = lower_32_bits(gpu_addr); 615 ib.ptr[3] = upper_32_bits(gpu_addr); 616 ib.ptr[4] = 0xDEADBEEF; 617 ib.length_dw = 5; 618 619 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 620 if (r) 621 goto err2; 622 623 r = dma_fence_wait_timeout(f, false, timeout); 624 if (r == 0) { 625 r = -ETIMEDOUT; 626 goto err2; 627 } else if (r < 0) { 628 goto err2; 629 } 630 631 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 632 r = 0; 633 else 634 r = -EINVAL; 635 err2: 636 if (!ring->is_mes_queue) 637 amdgpu_ib_free(&ib, NULL); 638 dma_fence_put(f); 639 err1: 640 if (!ring->is_mes_queue) 641 amdgpu_device_wb_free(adev, index); 642 return r; 643 } 644 645 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 646 { 647 amdgpu_ucode_release(&adev->gfx.pfp_fw); 648 amdgpu_ucode_release(&adev->gfx.me_fw); 649 amdgpu_ucode_release(&adev->gfx.rlc_fw); 650 amdgpu_ucode_release(&adev->gfx.mec_fw); 651 652 kfree(adev->gfx.rlc.register_list_format); 653 } 654 655 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) 656 { 657 const struct psp_firmware_header_v1_0 *toc_hdr; 658 int err = 0; 659 660 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, 661 AMDGPU_UCODE_REQUIRED, 662 "amdgpu/%s_toc.bin", ucode_prefix); 663 if (err) 664 goto out; 665 666 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 667 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 668 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 669 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 670 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 671 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 672 return 0; 673 out: 674 amdgpu_ucode_release(&adev->psp.toc_fw); 675 return err; 676 } 677 678 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) 679 { 680 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 681 case IP_VERSION(11, 0, 0): 682 case IP_VERSION(11, 0, 2): 683 case IP_VERSION(11, 0, 3): 684 if ((adev->gfx.me_fw_version >= 1505) && 685 (adev->gfx.pfp_fw_version >= 1600) && 686 (adev->gfx.mec_fw_version >= 512)) { 687 if (amdgpu_sriov_vf(adev)) 688 adev->gfx.cp_gfx_shadow = true; 689 else 690 adev->gfx.cp_gfx_shadow = false; 691 } 692 break; 693 default: 694 adev->gfx.cp_gfx_shadow = false; 695 break; 696 } 697 } 698 699 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 700 { 701 char ucode_prefix[25]; 702 int err; 703 const struct rlc_firmware_header_v2_0 *rlc_hdr; 704 uint16_t version_major; 705 uint16_t version_minor; 706 707 DRM_DEBUG("\n"); 708 709 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 710 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 711 AMDGPU_UCODE_REQUIRED, 712 "amdgpu/%s_pfp.bin", ucode_prefix); 713 if (err) 714 goto out; 715 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 716 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 717 (union amdgpu_firmware_header *) 718 adev->gfx.pfp_fw->data, 2, 0); 719 if (adev->gfx.rs64_enable) { 720 dev_info(adev->dev, "CP RS64 enable\n"); 721 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 722 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 723 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 724 } else { 725 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 726 } 727 728 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 729 AMDGPU_UCODE_REQUIRED, 730 "amdgpu/%s_me.bin", ucode_prefix); 731 if (err) 732 goto out; 733 if (adev->gfx.rs64_enable) { 734 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 735 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 736 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 737 } else { 738 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 739 } 740 741 if (!amdgpu_sriov_vf(adev)) { 742 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && 743 adev->pdev->revision == 0xCE) 744 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 745 AMDGPU_UCODE_REQUIRED, 746 "amdgpu/gc_11_0_0_rlc_1.bin"); 747 else 748 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 749 AMDGPU_UCODE_REQUIRED, 750 "amdgpu/%s_rlc.bin", ucode_prefix); 751 if (err) 752 goto out; 753 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 754 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 755 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 756 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 757 if (err) 758 goto out; 759 } 760 761 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 762 AMDGPU_UCODE_REQUIRED, 763 "amdgpu/%s_mec.bin", ucode_prefix); 764 if (err) 765 goto out; 766 if (adev->gfx.rs64_enable) { 767 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 768 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 769 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 770 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 771 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 772 } else { 773 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 774 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 775 } 776 777 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 778 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix); 779 780 /* only one MEC for gfx 11.0.0. */ 781 adev->gfx.mec2_fw = NULL; 782 783 gfx_v11_0_check_fw_cp_gfx_shadow(adev); 784 785 if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { 786 err = adev->gfx.imu.funcs->init_microcode(adev); 787 if (err) 788 DRM_ERROR("Failed to init imu firmware!\n"); 789 return err; 790 } 791 792 out: 793 if (err) { 794 amdgpu_ucode_release(&adev->gfx.pfp_fw); 795 amdgpu_ucode_release(&adev->gfx.me_fw); 796 amdgpu_ucode_release(&adev->gfx.rlc_fw); 797 amdgpu_ucode_release(&adev->gfx.mec_fw); 798 } 799 800 return err; 801 } 802 803 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 804 { 805 u32 count = 0; 806 const struct cs_section_def *sect = NULL; 807 const struct cs_extent_def *ext = NULL; 808 809 /* begin clear state */ 810 count += 2; 811 /* context control state */ 812 count += 3; 813 814 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 815 for (ext = sect->section; ext->extent != NULL; ++ext) { 816 if (sect->id == SECT_CONTEXT) 817 count += 2 + ext->reg_count; 818 else 819 return 0; 820 } 821 } 822 823 /* set PA_SC_TILE_STEERING_OVERRIDE */ 824 count += 3; 825 /* end clear state */ 826 count += 2; 827 /* clear state */ 828 count += 2; 829 830 return count; 831 } 832 833 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, 834 volatile u32 *buffer) 835 { 836 u32 count = 0, i; 837 const struct cs_section_def *sect = NULL; 838 const struct cs_extent_def *ext = NULL; 839 int ctx_reg_offset; 840 841 if (adev->gfx.rlc.cs_data == NULL) 842 return; 843 if (buffer == NULL) 844 return; 845 846 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 847 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 848 849 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 850 buffer[count++] = cpu_to_le32(0x80000000); 851 buffer[count++] = cpu_to_le32(0x80000000); 852 853 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 854 for (ext = sect->section; ext->extent != NULL; ++ext) { 855 if (sect->id == SECT_CONTEXT) { 856 buffer[count++] = 857 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 858 buffer[count++] = cpu_to_le32(ext->reg_index - 859 PACKET3_SET_CONTEXT_REG_START); 860 for (i = 0; i < ext->reg_count; i++) 861 buffer[count++] = cpu_to_le32(ext->extent[i]); 862 } 863 } 864 } 865 866 ctx_reg_offset = 867 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 868 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 869 buffer[count++] = cpu_to_le32(ctx_reg_offset); 870 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 871 872 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 873 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 874 875 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 876 buffer[count++] = cpu_to_le32(0); 877 } 878 879 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 880 { 881 /* clear state block */ 882 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 883 &adev->gfx.rlc.clear_state_gpu_addr, 884 (void **)&adev->gfx.rlc.cs_ptr); 885 886 /* jump table block */ 887 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 888 &adev->gfx.rlc.cp_table_gpu_addr, 889 (void **)&adev->gfx.rlc.cp_table_ptr); 890 } 891 892 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 893 { 894 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 895 896 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 897 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 898 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 899 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 900 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 901 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 902 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 903 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 904 adev->gfx.rlc.rlcg_reg_access_supported = true; 905 } 906 907 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 908 { 909 const struct cs_section_def *cs_data; 910 int r; 911 912 adev->gfx.rlc.cs_data = gfx11_cs_data; 913 914 cs_data = adev->gfx.rlc.cs_data; 915 916 if (cs_data) { 917 /* init clear state block */ 918 r = amdgpu_gfx_rlc_init_csb(adev); 919 if (r) 920 return r; 921 } 922 923 /* init spm vmid with 0xf */ 924 if (adev->gfx.rlc.funcs->update_spm_vmid) 925 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); 926 927 return 0; 928 } 929 930 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 931 { 932 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 933 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 934 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 935 } 936 937 static void gfx_v11_0_me_init(struct amdgpu_device *adev) 938 { 939 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 940 941 amdgpu_gfx_graphics_queue_acquire(adev); 942 } 943 944 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 945 { 946 int r; 947 u32 *hpd; 948 size_t mec_hpd_size; 949 950 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 951 952 /* take ownership of the relevant compute queues */ 953 amdgpu_gfx_compute_queue_acquire(adev); 954 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 955 956 if (mec_hpd_size) { 957 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 958 AMDGPU_GEM_DOMAIN_GTT, 959 &adev->gfx.mec.hpd_eop_obj, 960 &adev->gfx.mec.hpd_eop_gpu_addr, 961 (void **)&hpd); 962 if (r) { 963 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 964 gfx_v11_0_mec_fini(adev); 965 return r; 966 } 967 968 memset(hpd, 0, mec_hpd_size); 969 970 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 971 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 972 } 973 974 return 0; 975 } 976 977 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 978 { 979 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 980 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 981 (address << SQ_IND_INDEX__INDEX__SHIFT)); 982 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 983 } 984 985 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 986 uint32_t thread, uint32_t regno, 987 uint32_t num, uint32_t *out) 988 { 989 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 990 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 991 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 992 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 993 (SQ_IND_INDEX__AUTO_INCR_MASK)); 994 while (num--) 995 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 996 } 997 998 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 999 { 1000 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 1001 * field when performing a select_se_sh so it should be 1002 * zero here */ 1003 WARN_ON(simd != 0); 1004 1005 /* type 3 wave data */ 1006 dst[(*no_fields)++] = 3; 1007 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 1008 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 1009 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 1010 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 1011 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 1012 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 1013 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 1014 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 1015 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 1016 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 1017 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 1018 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 1019 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 1020 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 1021 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 1022 } 1023 1024 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1025 uint32_t wave, uint32_t start, 1026 uint32_t size, uint32_t *dst) 1027 { 1028 WARN_ON(simd != 0); 1029 1030 wave_read_regs( 1031 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 1032 dst); 1033 } 1034 1035 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1036 uint32_t wave, uint32_t thread, 1037 uint32_t start, uint32_t size, 1038 uint32_t *dst) 1039 { 1040 wave_read_regs( 1041 adev, wave, thread, 1042 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1043 } 1044 1045 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 1046 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1047 { 1048 soc21_grbm_select(adev, me, pipe, q, vm); 1049 } 1050 1051 /* all sizes are in bytes */ 1052 #define MQD_SHADOW_BASE_SIZE 73728 1053 #define MQD_SHADOW_BASE_ALIGNMENT 256 1054 #define MQD_FWWORKAREA_SIZE 484 1055 #define MQD_FWWORKAREA_ALIGNMENT 256 1056 1057 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, 1058 struct amdgpu_gfx_shadow_info *shadow_info) 1059 { 1060 if (adev->gfx.cp_gfx_shadow) { 1061 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; 1062 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; 1063 shadow_info->csa_size = MQD_FWWORKAREA_SIZE; 1064 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; 1065 return 0; 1066 } else { 1067 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); 1068 return -ENOTSUPP; 1069 } 1070 } 1071 1072 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 1073 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 1074 .select_se_sh = &gfx_v11_0_select_se_sh, 1075 .read_wave_data = &gfx_v11_0_read_wave_data, 1076 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 1077 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 1078 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 1079 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 1080 .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, 1081 }; 1082 1083 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 1084 { 1085 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1086 case IP_VERSION(11, 0, 0): 1087 case IP_VERSION(11, 0, 2): 1088 adev->gfx.config.max_hw_contexts = 8; 1089 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1090 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1091 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1092 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1093 break; 1094 case IP_VERSION(11, 0, 3): 1095 adev->gfx.ras = &gfx_v11_0_3_ras; 1096 adev->gfx.config.max_hw_contexts = 8; 1097 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1098 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1099 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1100 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1101 break; 1102 case IP_VERSION(11, 0, 1): 1103 case IP_VERSION(11, 0, 4): 1104 case IP_VERSION(11, 5, 0): 1105 case IP_VERSION(11, 5, 1): 1106 case IP_VERSION(11, 5, 2): 1107 case IP_VERSION(11, 5, 3): 1108 adev->gfx.config.max_hw_contexts = 8; 1109 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1110 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1111 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1112 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 1113 break; 1114 default: 1115 BUG(); 1116 break; 1117 } 1118 1119 return 0; 1120 } 1121 1122 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1123 int me, int pipe, int queue) 1124 { 1125 struct amdgpu_ring *ring; 1126 unsigned int irq_type; 1127 unsigned int hw_prio; 1128 1129 ring = &adev->gfx.gfx_ring[ring_id]; 1130 1131 ring->me = me; 1132 ring->pipe = pipe; 1133 ring->queue = queue; 1134 1135 ring->ring_obj = NULL; 1136 ring->use_doorbell = true; 1137 1138 if (!ring_id) 1139 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1140 else 1141 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1142 ring->vm_hub = AMDGPU_GFXHUB(0); 1143 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1144 1145 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1146 hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? 1147 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1148 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1149 hw_prio, NULL); 1150 } 1151 1152 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1153 int mec, int pipe, int queue) 1154 { 1155 int r; 1156 unsigned irq_type; 1157 struct amdgpu_ring *ring; 1158 unsigned int hw_prio; 1159 1160 ring = &adev->gfx.compute_ring[ring_id]; 1161 1162 /* mec0 is me1 */ 1163 ring->me = mec + 1; 1164 ring->pipe = pipe; 1165 ring->queue = queue; 1166 1167 ring->ring_obj = NULL; 1168 ring->use_doorbell = true; 1169 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1170 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1171 + (ring_id * GFX11_MEC_HPD_SIZE); 1172 ring->vm_hub = AMDGPU_GFXHUB(0); 1173 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1174 1175 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1176 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1177 + ring->pipe; 1178 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 1179 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1180 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1181 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1182 hw_prio, NULL); 1183 if (r) 1184 return r; 1185 1186 return 0; 1187 } 1188 1189 static struct { 1190 SOC21_FIRMWARE_ID id; 1191 unsigned int offset; 1192 unsigned int size; 1193 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 1194 1195 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 1196 { 1197 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 1198 1199 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 1200 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 1201 rlc_autoload_info[ucode->id].id = ucode->id; 1202 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 1203 rlc_autoload_info[ucode->id].size = ucode->size * 4; 1204 1205 ucode++; 1206 } 1207 } 1208 1209 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 1210 { 1211 uint32_t total_size = 0; 1212 SOC21_FIRMWARE_ID id; 1213 1214 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 1215 1216 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 1217 total_size += rlc_autoload_info[id].size; 1218 1219 /* In case the offset in rlc toc ucode is aligned */ 1220 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 1221 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 1222 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 1223 1224 return total_size; 1225 } 1226 1227 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 1228 { 1229 int r; 1230 uint32_t total_size; 1231 1232 total_size = gfx_v11_0_calc_toc_total_size(adev); 1233 1234 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 1235 AMDGPU_GEM_DOMAIN_VRAM | 1236 AMDGPU_GEM_DOMAIN_GTT, 1237 &adev->gfx.rlc.rlc_autoload_bo, 1238 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1239 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1240 1241 if (r) { 1242 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1243 return r; 1244 } 1245 1246 return 0; 1247 } 1248 1249 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1250 SOC21_FIRMWARE_ID id, 1251 const void *fw_data, 1252 uint32_t fw_size, 1253 uint32_t *fw_autoload_mask) 1254 { 1255 uint32_t toc_offset; 1256 uint32_t toc_fw_size; 1257 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1258 1259 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1260 return; 1261 1262 toc_offset = rlc_autoload_info[id].offset; 1263 toc_fw_size = rlc_autoload_info[id].size; 1264 1265 if (fw_size == 0) 1266 fw_size = toc_fw_size; 1267 1268 if (fw_size > toc_fw_size) 1269 fw_size = toc_fw_size; 1270 1271 memcpy(ptr + toc_offset, fw_data, fw_size); 1272 1273 if (fw_size < toc_fw_size) 1274 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1275 1276 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1277 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1278 } 1279 1280 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1281 uint32_t *fw_autoload_mask) 1282 { 1283 void *data; 1284 uint32_t size; 1285 uint64_t *toc_ptr; 1286 1287 *(uint64_t *)fw_autoload_mask |= 0x1; 1288 1289 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1290 1291 data = adev->psp.toc.start_addr; 1292 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1293 1294 toc_ptr = (uint64_t *)data + size / 8 - 1; 1295 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1296 1297 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1298 data, size, fw_autoload_mask); 1299 } 1300 1301 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1302 uint32_t *fw_autoload_mask) 1303 { 1304 const __le32 *fw_data; 1305 uint32_t fw_size; 1306 const struct gfx_firmware_header_v1_0 *cp_hdr; 1307 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1308 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1309 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1310 uint16_t version_major, version_minor; 1311 1312 if (adev->gfx.rs64_enable) { 1313 /* pfp ucode */ 1314 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1315 adev->gfx.pfp_fw->data; 1316 /* instruction */ 1317 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1318 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1319 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1320 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1321 fw_data, fw_size, fw_autoload_mask); 1322 /* data */ 1323 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1324 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1325 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1326 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1327 fw_data, fw_size, fw_autoload_mask); 1328 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1329 fw_data, fw_size, fw_autoload_mask); 1330 /* me ucode */ 1331 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1332 adev->gfx.me_fw->data; 1333 /* instruction */ 1334 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1335 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1336 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1337 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1338 fw_data, fw_size, fw_autoload_mask); 1339 /* data */ 1340 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1341 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1342 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1343 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1344 fw_data, fw_size, fw_autoload_mask); 1345 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1346 fw_data, fw_size, fw_autoload_mask); 1347 /* mec ucode */ 1348 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1349 adev->gfx.mec_fw->data; 1350 /* instruction */ 1351 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1352 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1353 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1354 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1355 fw_data, fw_size, fw_autoload_mask); 1356 /* data */ 1357 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1358 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1359 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1360 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1361 fw_data, fw_size, fw_autoload_mask); 1362 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1363 fw_data, fw_size, fw_autoload_mask); 1364 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1365 fw_data, fw_size, fw_autoload_mask); 1366 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1367 fw_data, fw_size, fw_autoload_mask); 1368 } else { 1369 /* pfp ucode */ 1370 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1371 adev->gfx.pfp_fw->data; 1372 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1373 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1374 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1375 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1376 fw_data, fw_size, fw_autoload_mask); 1377 1378 /* me ucode */ 1379 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1380 adev->gfx.me_fw->data; 1381 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1382 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1383 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1384 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1385 fw_data, fw_size, fw_autoload_mask); 1386 1387 /* mec ucode */ 1388 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1389 adev->gfx.mec_fw->data; 1390 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1391 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1392 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1393 cp_hdr->jt_size * 4; 1394 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1395 fw_data, fw_size, fw_autoload_mask); 1396 } 1397 1398 /* rlc ucode */ 1399 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1400 adev->gfx.rlc_fw->data; 1401 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1402 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1403 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1404 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1405 fw_data, fw_size, fw_autoload_mask); 1406 1407 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1408 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1409 if (version_major == 2) { 1410 if (version_minor >= 2) { 1411 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1412 1413 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1414 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1415 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1416 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1417 fw_data, fw_size, fw_autoload_mask); 1418 1419 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1420 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1421 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1422 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1423 fw_data, fw_size, fw_autoload_mask); 1424 } 1425 } 1426 } 1427 1428 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1429 uint32_t *fw_autoload_mask) 1430 { 1431 const __le32 *fw_data; 1432 uint32_t fw_size; 1433 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1434 1435 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1436 adev->sdma.instance[0].fw->data; 1437 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1438 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1439 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1440 1441 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1442 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1443 1444 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1445 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1446 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1447 1448 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1449 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1450 } 1451 1452 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1453 uint32_t *fw_autoload_mask) 1454 { 1455 const __le32 *fw_data; 1456 unsigned fw_size; 1457 const struct mes_firmware_header_v1_0 *mes_hdr; 1458 int pipe, ucode_id, data_id; 1459 1460 for (pipe = 0; pipe < 2; pipe++) { 1461 if (pipe==0) { 1462 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1463 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1464 } else { 1465 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1466 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1467 } 1468 1469 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1470 adev->mes.fw[pipe]->data; 1471 1472 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1473 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1474 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1475 1476 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1477 ucode_id, fw_data, fw_size, fw_autoload_mask); 1478 1479 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1480 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1481 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1482 1483 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1484 data_id, fw_data, fw_size, fw_autoload_mask); 1485 } 1486 } 1487 1488 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1489 { 1490 uint32_t rlc_g_offset, rlc_g_size; 1491 uint64_t gpu_addr; 1492 uint32_t autoload_fw_id[2]; 1493 1494 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1495 1496 /* RLC autoload sequence 2: copy ucode */ 1497 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1498 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1499 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1500 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1501 1502 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1503 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1504 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1505 1506 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1507 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1508 1509 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1510 1511 /* RLC autoload sequence 3: load IMU fw */ 1512 if (adev->gfx.imu.funcs->load_microcode) 1513 adev->gfx.imu.funcs->load_microcode(adev); 1514 /* RLC autoload sequence 4 init IMU fw */ 1515 if (adev->gfx.imu.funcs->setup_imu) 1516 adev->gfx.imu.funcs->setup_imu(adev); 1517 if (adev->gfx.imu.funcs->start_imu) 1518 adev->gfx.imu.funcs->start_imu(adev); 1519 1520 /* RLC autoload sequence 5 disable gpa mode */ 1521 gfx_v11_0_disable_gpa_mode(adev); 1522 1523 return 0; 1524 } 1525 1526 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) 1527 { 1528 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 1529 uint32_t *ptr; 1530 uint32_t inst; 1531 1532 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 1533 if (!ptr) { 1534 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 1535 adev->gfx.ip_dump_core = NULL; 1536 } else { 1537 adev->gfx.ip_dump_core = ptr; 1538 } 1539 1540 /* Allocate memory for compute queue registers for all the instances */ 1541 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 1542 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 1543 adev->gfx.mec.num_queue_per_pipe; 1544 1545 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1546 if (!ptr) { 1547 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 1548 adev->gfx.ip_dump_compute_queues = NULL; 1549 } else { 1550 adev->gfx.ip_dump_compute_queues = ptr; 1551 } 1552 1553 /* Allocate memory for gfx queue registers for all the instances */ 1554 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 1555 inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * 1556 adev->gfx.me.num_queue_per_pipe; 1557 1558 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1559 if (!ptr) { 1560 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); 1561 adev->gfx.ip_dump_gfx_queues = NULL; 1562 } else { 1563 adev->gfx.ip_dump_gfx_queues = ptr; 1564 } 1565 } 1566 1567 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) 1568 { 1569 int i, j, k, r, ring_id = 0; 1570 int xcc_id = 0; 1571 struct amdgpu_device *adev = ip_block->adev; 1572 int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ 1573 1574 INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); 1575 1576 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1577 case IP_VERSION(11, 0, 0): 1578 case IP_VERSION(11, 0, 2): 1579 case IP_VERSION(11, 0, 3): 1580 adev->gfx.me.num_me = 1; 1581 adev->gfx.me.num_pipe_per_me = 1; 1582 adev->gfx.me.num_queue_per_pipe = 2; 1583 adev->gfx.mec.num_mec = 1; 1584 adev->gfx.mec.num_pipe_per_mec = 4; 1585 adev->gfx.mec.num_queue_per_pipe = 4; 1586 break; 1587 case IP_VERSION(11, 0, 1): 1588 case IP_VERSION(11, 0, 4): 1589 case IP_VERSION(11, 5, 0): 1590 case IP_VERSION(11, 5, 1): 1591 case IP_VERSION(11, 5, 2): 1592 case IP_VERSION(11, 5, 3): 1593 adev->gfx.me.num_me = 1; 1594 adev->gfx.me.num_pipe_per_me = 1; 1595 adev->gfx.me.num_queue_per_pipe = 2; 1596 adev->gfx.mec.num_mec = 1; 1597 adev->gfx.mec.num_pipe_per_mec = 4; 1598 adev->gfx.mec.num_queue_per_pipe = 4; 1599 break; 1600 default: 1601 adev->gfx.me.num_me = 1; 1602 adev->gfx.me.num_pipe_per_me = 1; 1603 adev->gfx.me.num_queue_per_pipe = 1; 1604 adev->gfx.mec.num_mec = 1; 1605 adev->gfx.mec.num_pipe_per_mec = 4; 1606 adev->gfx.mec.num_queue_per_pipe = 8; 1607 break; 1608 } 1609 1610 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1611 case IP_VERSION(11, 0, 0): 1612 case IP_VERSION(11, 0, 2): 1613 case IP_VERSION(11, 0, 3): 1614 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1615 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1616 if (adev->gfx.me_fw_version >= 2280 && 1617 adev->gfx.pfp_fw_version >= 2370 && 1618 adev->gfx.mec_fw_version >= 2450 && 1619 adev->mes.fw_version[0] >= 99) { 1620 adev->gfx.enable_cleaner_shader = true; 1621 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1622 if (r) { 1623 adev->gfx.enable_cleaner_shader = false; 1624 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1625 } 1626 } 1627 break; 1628 case IP_VERSION(11, 5, 0): 1629 case IP_VERSION(11, 5, 1): 1630 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1631 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1632 if (adev->gfx.mec_fw_version >= 26 && 1633 adev->mes.fw_version[0] >= 114) { 1634 adev->gfx.enable_cleaner_shader = true; 1635 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1636 if (r) { 1637 adev->gfx.enable_cleaner_shader = false; 1638 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1639 } 1640 } 1641 break; 1642 default: 1643 adev->gfx.enable_cleaner_shader = false; 1644 break; 1645 } 1646 1647 /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ 1648 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && 1649 amdgpu_sriov_is_pp_one_vf(adev)) 1650 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; 1651 1652 /* EOP Event */ 1653 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1654 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1655 &adev->gfx.eop_irq); 1656 if (r) 1657 return r; 1658 1659 /* Bad opcode Event */ 1660 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1661 GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, 1662 &adev->gfx.bad_op_irq); 1663 if (r) 1664 return r; 1665 1666 /* Privileged reg */ 1667 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1668 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1669 &adev->gfx.priv_reg_irq); 1670 if (r) 1671 return r; 1672 1673 /* Privileged inst */ 1674 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1675 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1676 &adev->gfx.priv_inst_irq); 1677 if (r) 1678 return r; 1679 1680 /* FED error */ 1681 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1682 GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, 1683 &adev->gfx.rlc_gc_fed_irq); 1684 if (r) 1685 return r; 1686 1687 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1688 1689 gfx_v11_0_me_init(adev); 1690 1691 r = gfx_v11_0_rlc_init(adev); 1692 if (r) { 1693 DRM_ERROR("Failed to init rlc BOs!\n"); 1694 return r; 1695 } 1696 1697 r = gfx_v11_0_mec_init(adev); 1698 if (r) { 1699 DRM_ERROR("Failed to init MEC BOs!\n"); 1700 return r; 1701 } 1702 1703 /* set up the gfx ring */ 1704 for (i = 0; i < adev->gfx.me.num_me; i++) { 1705 for (j = 0; j < num_queue_per_pipe; j++) { 1706 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1707 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1708 continue; 1709 1710 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1711 i, k, j); 1712 if (r) 1713 return r; 1714 ring_id++; 1715 } 1716 } 1717 } 1718 1719 ring_id = 0; 1720 /* set up the compute queues - allocate horizontally across pipes */ 1721 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1722 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1723 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1724 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 1725 k, j)) 1726 continue; 1727 1728 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1729 i, k, j); 1730 if (r) 1731 return r; 1732 1733 ring_id++; 1734 } 1735 } 1736 } 1737 1738 adev->gfx.gfx_supported_reset = 1739 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1740 adev->gfx.compute_supported_reset = 1741 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1742 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1743 case IP_VERSION(11, 0, 0): 1744 case IP_VERSION(11, 0, 2): 1745 case IP_VERSION(11, 0, 3): 1746 if ((adev->gfx.me_fw_version >= 2280) && 1747 (adev->gfx.mec_fw_version >= 2410)) { 1748 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1749 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1750 } 1751 break; 1752 default: 1753 break; 1754 } 1755 1756 if (!adev->enable_mes_kiq) { 1757 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); 1758 if (r) { 1759 DRM_ERROR("Failed to init KIQ BOs!\n"); 1760 return r; 1761 } 1762 1763 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 1764 if (r) 1765 return r; 1766 } 1767 1768 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); 1769 if (r) 1770 return r; 1771 1772 /* allocate visible FB for rlc auto-loading fw */ 1773 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1774 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1775 if (r) 1776 return r; 1777 } 1778 1779 r = gfx_v11_0_gpu_early_init(adev); 1780 if (r) 1781 return r; 1782 1783 if (amdgpu_gfx_ras_sw_init(adev)) { 1784 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 1785 return -EINVAL; 1786 } 1787 1788 gfx_v11_0_alloc_ip_dump(adev); 1789 1790 r = amdgpu_gfx_sysfs_init(adev); 1791 if (r) 1792 return r; 1793 1794 return 0; 1795 } 1796 1797 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1798 { 1799 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1800 &adev->gfx.pfp.pfp_fw_gpu_addr, 1801 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1802 1803 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1804 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1805 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1806 } 1807 1808 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1809 { 1810 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1811 &adev->gfx.me.me_fw_gpu_addr, 1812 (void **)&adev->gfx.me.me_fw_ptr); 1813 1814 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1815 &adev->gfx.me.me_fw_data_gpu_addr, 1816 (void **)&adev->gfx.me.me_fw_data_ptr); 1817 } 1818 1819 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1820 { 1821 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1822 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1823 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1824 } 1825 1826 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) 1827 { 1828 int i; 1829 struct amdgpu_device *adev = ip_block->adev; 1830 1831 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1832 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1833 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1834 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1835 1836 amdgpu_gfx_mqd_sw_fini(adev, 0); 1837 1838 if (!adev->enable_mes_kiq) { 1839 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 1840 amdgpu_gfx_kiq_fini(adev, 0); 1841 } 1842 1843 amdgpu_gfx_cleaner_shader_sw_fini(adev); 1844 1845 gfx_v11_0_pfp_fini(adev); 1846 gfx_v11_0_me_fini(adev); 1847 gfx_v11_0_rlc_fini(adev); 1848 gfx_v11_0_mec_fini(adev); 1849 1850 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1851 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1852 1853 gfx_v11_0_free_microcode(adev); 1854 1855 amdgpu_gfx_sysfs_fini(adev); 1856 1857 kfree(adev->gfx.ip_dump_core); 1858 kfree(adev->gfx.ip_dump_compute_queues); 1859 kfree(adev->gfx.ip_dump_gfx_queues); 1860 1861 return 0; 1862 } 1863 1864 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1865 u32 sh_num, u32 instance, int xcc_id) 1866 { 1867 u32 data; 1868 1869 if (instance == 0xffffffff) 1870 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1871 INSTANCE_BROADCAST_WRITES, 1); 1872 else 1873 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1874 instance); 1875 1876 if (se_num == 0xffffffff) 1877 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1878 1); 1879 else 1880 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1881 1882 if (sh_num == 0xffffffff) 1883 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1884 1); 1885 else 1886 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1887 1888 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 1889 } 1890 1891 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) 1892 { 1893 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; 1894 1895 gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); 1896 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, 1897 CC_GC_SA_UNIT_DISABLE, 1898 SA_DISABLE); 1899 gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); 1900 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, 1901 GC_USER_SA_UNIT_DISABLE, 1902 SA_DISABLE); 1903 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * 1904 adev->gfx.config.max_shader_engines); 1905 1906 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 1907 } 1908 1909 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1910 { 1911 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; 1912 u32 rb_mask; 1913 1914 gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 1915 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, 1916 CC_RB_BACKEND_DISABLE, 1917 BACKEND_DISABLE); 1918 gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 1919 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, 1920 GC_USER_RB_BACKEND_DISABLE, 1921 BACKEND_DISABLE); 1922 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * 1923 adev->gfx.config.max_shader_engines); 1924 1925 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); 1926 } 1927 1928 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 1929 { 1930 u32 rb_bitmap_per_sa; 1931 u32 rb_bitmap_width_per_sa; 1932 u32 max_sa; 1933 u32 active_sa_bitmap; 1934 u32 global_active_rb_bitmap; 1935 u32 active_rb_bitmap = 0; 1936 u32 i; 1937 1938 /* query sa bitmap from SA_UNIT_DISABLE registers */ 1939 active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); 1940 /* query rb bitmap from RB_BACKEND_DISABLE registers */ 1941 global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); 1942 1943 /* generate active rb bitmap according to active sa bitmap */ 1944 max_sa = adev->gfx.config.max_shader_engines * 1945 adev->gfx.config.max_sh_per_se; 1946 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 1947 adev->gfx.config.max_sh_per_se; 1948 rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); 1949 1950 for (i = 0; i < max_sa; i++) { 1951 if (active_sa_bitmap & (1 << i)) 1952 active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); 1953 } 1954 1955 active_rb_bitmap &= global_active_rb_bitmap; 1956 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 1957 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 1958 } 1959 1960 #define DEFAULT_SH_MEM_BASES (0x6000) 1961 #define LDS_APP_BASE 0x1 1962 #define SCRATCH_APP_BASE 0x2 1963 1964 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 1965 { 1966 int i; 1967 uint32_t sh_mem_bases; 1968 uint32_t data; 1969 1970 /* 1971 * Configure apertures: 1972 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1973 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1974 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1975 */ 1976 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 1977 SCRATCH_APP_BASE; 1978 1979 mutex_lock(&adev->srbm_mutex); 1980 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 1981 soc21_grbm_select(adev, 0, 0, 0, i); 1982 /* CP and shaders */ 1983 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 1984 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 1985 1986 /* Enable trap for each kfd vmid. */ 1987 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 1988 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 1989 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); 1990 } 1991 soc21_grbm_select(adev, 0, 0, 0, 0); 1992 mutex_unlock(&adev->srbm_mutex); 1993 1994 /* 1995 * Initialize all compute VMIDs to have no GDS, GWS, or OA 1996 * access. These should be enabled by FW for target VMIDs. 1997 */ 1998 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 1999 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 2000 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 2001 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 2002 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 2003 } 2004 } 2005 2006 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 2007 { 2008 int vmid; 2009 2010 /* 2011 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2012 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2013 * the driver can enable them for graphics. VMID0 should maintain 2014 * access so that HWS firmware can save/restore entries. 2015 */ 2016 for (vmid = 1; vmid < 16; vmid++) { 2017 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 2018 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 2019 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 2020 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 2021 } 2022 } 2023 2024 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 2025 { 2026 /* TODO: harvest feature to be added later. */ 2027 } 2028 2029 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 2030 { 2031 /* TCCs are global (not instanced). */ 2032 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 2033 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 2034 2035 adev->gfx.config.tcc_disabled_mask = 2036 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 2037 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 2038 } 2039 2040 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 2041 { 2042 u32 tmp; 2043 int i; 2044 2045 if (!amdgpu_sriov_vf(adev)) 2046 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2047 2048 gfx_v11_0_setup_rb(adev); 2049 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 2050 gfx_v11_0_get_tcc_info(adev); 2051 adev->gfx.config.pa_sc_tile_steering_override = 0; 2052 2053 /* Set whether texture coordinate truncation is conformant. */ 2054 tmp = RREG32_SOC15(GC, 0, regTA_CNTL2); 2055 adev->gfx.config.ta_cntl2_truncate_coord_mode = 2056 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE); 2057 2058 /* XXX SH_MEM regs */ 2059 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2060 mutex_lock(&adev->srbm_mutex); 2061 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2062 soc21_grbm_select(adev, 0, 0, 0, i); 2063 /* CP and shaders */ 2064 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2065 if (i != 0) { 2066 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2067 (adev->gmc.private_aperture_start >> 48)); 2068 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2069 (adev->gmc.shared_aperture_start >> 48)); 2070 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 2071 } 2072 } 2073 soc21_grbm_select(adev, 0, 0, 0, 0); 2074 2075 mutex_unlock(&adev->srbm_mutex); 2076 2077 gfx_v11_0_init_compute_vmid(adev); 2078 gfx_v11_0_init_gds_vmid(adev); 2079 } 2080 2081 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, 2082 int me, int pipe) 2083 { 2084 if (me != 0) 2085 return 0; 2086 2087 switch (pipe) { 2088 case 0: 2089 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 2090 case 1: 2091 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 2092 default: 2093 return 0; 2094 } 2095 } 2096 2097 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, 2098 int me, int pipe) 2099 { 2100 /* 2101 * amdgpu controls only the first MEC. That's why this function only 2102 * handles the setting of interrupts for this specific MEC. All other 2103 * pipes' interrupts are set by amdkfd. 2104 */ 2105 if (me != 1) 2106 return 0; 2107 2108 switch (pipe) { 2109 case 0: 2110 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 2111 case 1: 2112 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 2113 case 2: 2114 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 2115 case 3: 2116 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 2117 default: 2118 return 0; 2119 } 2120 } 2121 2122 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2123 bool enable) 2124 { 2125 u32 tmp, cp_int_cntl_reg; 2126 int i, j; 2127 2128 if (amdgpu_sriov_vf(adev)) 2129 return; 2130 2131 for (i = 0; i < adev->gfx.me.num_me; i++) { 2132 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 2133 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 2134 2135 if (cp_int_cntl_reg) { 2136 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 2137 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 2138 enable ? 1 : 0); 2139 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 2140 enable ? 1 : 0); 2141 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 2142 enable ? 1 : 0); 2143 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 2144 enable ? 1 : 0); 2145 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); 2146 } 2147 } 2148 } 2149 } 2150 2151 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 2152 { 2153 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2154 2155 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 2156 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2157 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 2158 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2159 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 2160 2161 return 0; 2162 } 2163 2164 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 2165 { 2166 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 2167 2168 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 2169 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 2170 } 2171 2172 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 2173 { 2174 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2175 udelay(50); 2176 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2177 udelay(50); 2178 } 2179 2180 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 2181 bool enable) 2182 { 2183 uint32_t rlc_pg_cntl; 2184 2185 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 2186 2187 if (!enable) { 2188 /* RLC_PG_CNTL[23] = 0 (default) 2189 * RLC will wait for handshake acks with SMU 2190 * GFXOFF will be enabled 2191 * RLC_PG_CNTL[23] = 1 2192 * RLC will not issue any message to SMU 2193 * hence no handshake between SMU & RLC 2194 * GFXOFF will be disabled 2195 */ 2196 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2197 } else 2198 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2199 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 2200 } 2201 2202 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 2203 { 2204 /* TODO: enable rlc & smu handshake until smu 2205 * and gfxoff feature works as expected */ 2206 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 2207 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 2208 2209 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2210 udelay(50); 2211 } 2212 2213 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 2214 { 2215 uint32_t tmp; 2216 2217 /* enable Save Restore Machine */ 2218 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 2219 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2220 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 2221 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 2222 } 2223 2224 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 2225 { 2226 const struct rlc_firmware_header_v2_0 *hdr; 2227 const __le32 *fw_data; 2228 unsigned i, fw_size; 2229 2230 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2231 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2232 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2233 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2234 2235 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 2236 RLCG_UCODE_LOADING_START_ADDRESS); 2237 2238 for (i = 0; i < fw_size; i++) 2239 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 2240 le32_to_cpup(fw_data++)); 2241 2242 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2243 } 2244 2245 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 2246 { 2247 const struct rlc_firmware_header_v2_2 *hdr; 2248 const __le32 *fw_data; 2249 unsigned i, fw_size; 2250 u32 tmp; 2251 2252 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 2253 2254 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2255 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 2256 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 2257 2258 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 2259 2260 for (i = 0; i < fw_size; i++) { 2261 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2262 msleep(1); 2263 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 2264 le32_to_cpup(fw_data++)); 2265 } 2266 2267 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2268 2269 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2270 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 2271 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 2272 2273 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 2274 for (i = 0; i < fw_size; i++) { 2275 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2276 msleep(1); 2277 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 2278 le32_to_cpup(fw_data++)); 2279 } 2280 2281 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2282 2283 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 2284 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 2285 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 2286 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 2287 } 2288 2289 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 2290 { 2291 const struct rlc_firmware_header_v2_3 *hdr; 2292 const __le32 *fw_data; 2293 unsigned i, fw_size; 2294 u32 tmp; 2295 2296 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 2297 2298 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2299 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 2300 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 2301 2302 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 2303 2304 for (i = 0; i < fw_size; i++) { 2305 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2306 msleep(1); 2307 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 2308 le32_to_cpup(fw_data++)); 2309 } 2310 2311 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 2312 2313 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 2314 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 2315 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 2316 2317 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2318 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 2319 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 2320 2321 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 2322 2323 for (i = 0; i < fw_size; i++) { 2324 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2325 msleep(1); 2326 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 2327 le32_to_cpup(fw_data++)); 2328 } 2329 2330 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 2331 2332 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 2333 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 2334 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 2335 } 2336 2337 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 2338 { 2339 const struct rlc_firmware_header_v2_0 *hdr; 2340 uint16_t version_major; 2341 uint16_t version_minor; 2342 2343 if (!adev->gfx.rlc_fw) 2344 return -EINVAL; 2345 2346 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2347 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2348 2349 version_major = le16_to_cpu(hdr->header.header_version_major); 2350 version_minor = le16_to_cpu(hdr->header.header_version_minor); 2351 2352 if (version_major == 2) { 2353 gfx_v11_0_load_rlcg_microcode(adev); 2354 if (amdgpu_dpm == 1) { 2355 if (version_minor >= 2) 2356 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 2357 if (version_minor == 3) 2358 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 2359 } 2360 2361 return 0; 2362 } 2363 2364 return -EINVAL; 2365 } 2366 2367 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 2368 { 2369 int r; 2370 2371 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 2372 gfx_v11_0_init_csb(adev); 2373 2374 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 2375 gfx_v11_0_rlc_enable_srm(adev); 2376 } else { 2377 if (amdgpu_sriov_vf(adev)) { 2378 gfx_v11_0_init_csb(adev); 2379 return 0; 2380 } 2381 2382 adev->gfx.rlc.funcs->stop(adev); 2383 2384 /* disable CG */ 2385 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 2386 2387 /* disable PG */ 2388 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 2389 2390 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2391 /* legacy rlc firmware loading */ 2392 r = gfx_v11_0_rlc_load_microcode(adev); 2393 if (r) 2394 return r; 2395 } 2396 2397 gfx_v11_0_init_csb(adev); 2398 2399 adev->gfx.rlc.funcs->start(adev); 2400 } 2401 return 0; 2402 } 2403 2404 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 2405 { 2406 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2407 uint32_t tmp; 2408 int i; 2409 2410 /* Trigger an invalidation of the L1 instruction caches */ 2411 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2412 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2413 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2414 2415 /* Wait for invalidation complete */ 2416 for (i = 0; i < usec_timeout; i++) { 2417 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2418 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2419 INVALIDATE_CACHE_COMPLETE)) 2420 break; 2421 udelay(1); 2422 } 2423 2424 if (i >= usec_timeout) { 2425 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2426 return -EINVAL; 2427 } 2428 2429 if (amdgpu_emu_mode == 1) 2430 adev->hdp.funcs->flush_hdp(adev, NULL); 2431 2432 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2433 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2434 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2435 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2436 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2437 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2438 2439 /* Program me ucode address into intruction cache address register */ 2440 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2441 lower_32_bits(addr) & 0xFFFFF000); 2442 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2443 upper_32_bits(addr)); 2444 2445 return 0; 2446 } 2447 2448 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 2449 { 2450 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2451 uint32_t tmp; 2452 int i; 2453 2454 /* Trigger an invalidation of the L1 instruction caches */ 2455 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2456 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2457 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2458 2459 /* Wait for invalidation complete */ 2460 for (i = 0; i < usec_timeout; i++) { 2461 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2462 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2463 INVALIDATE_CACHE_COMPLETE)) 2464 break; 2465 udelay(1); 2466 } 2467 2468 if (i >= usec_timeout) { 2469 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2470 return -EINVAL; 2471 } 2472 2473 if (amdgpu_emu_mode == 1) 2474 adev->hdp.funcs->flush_hdp(adev, NULL); 2475 2476 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2477 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2478 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2479 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2480 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2481 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2482 2483 /* Program pfp ucode address into intruction cache address register */ 2484 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2485 lower_32_bits(addr) & 0xFFFFF000); 2486 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2487 upper_32_bits(addr)); 2488 2489 return 0; 2490 } 2491 2492 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2493 { 2494 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2495 uint32_t tmp; 2496 int i; 2497 2498 /* Trigger an invalidation of the L1 instruction caches */ 2499 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2500 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2501 2502 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2503 2504 /* Wait for invalidation complete */ 2505 for (i = 0; i < usec_timeout; i++) { 2506 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2507 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2508 INVALIDATE_CACHE_COMPLETE)) 2509 break; 2510 udelay(1); 2511 } 2512 2513 if (i >= usec_timeout) { 2514 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2515 return -EINVAL; 2516 } 2517 2518 if (amdgpu_emu_mode == 1) 2519 adev->hdp.funcs->flush_hdp(adev, NULL); 2520 2521 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2522 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2523 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2524 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2525 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2526 2527 /* Program mec1 ucode address into intruction cache address register */ 2528 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2529 lower_32_bits(addr) & 0xFFFFF000); 2530 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2531 upper_32_bits(addr)); 2532 2533 return 0; 2534 } 2535 2536 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2537 { 2538 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2539 uint32_t tmp; 2540 unsigned i, pipe_id; 2541 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2542 2543 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2544 adev->gfx.pfp_fw->data; 2545 2546 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2547 lower_32_bits(addr)); 2548 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2549 upper_32_bits(addr)); 2550 2551 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2552 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2553 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2554 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2555 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2556 2557 /* 2558 * Programming any of the CP_PFP_IC_BASE registers 2559 * forces invalidation of the ME L1 I$. Wait for the 2560 * invalidation complete 2561 */ 2562 for (i = 0; i < usec_timeout; i++) { 2563 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2564 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2565 INVALIDATE_CACHE_COMPLETE)) 2566 break; 2567 udelay(1); 2568 } 2569 2570 if (i >= usec_timeout) { 2571 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2572 return -EINVAL; 2573 } 2574 2575 /* Prime the L1 instruction caches */ 2576 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2577 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2578 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2579 /* Waiting for cache primed*/ 2580 for (i = 0; i < usec_timeout; i++) { 2581 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2582 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2583 ICACHE_PRIMED)) 2584 break; 2585 udelay(1); 2586 } 2587 2588 if (i >= usec_timeout) { 2589 dev_err(adev->dev, "failed to prime instruction cache\n"); 2590 return -EINVAL; 2591 } 2592 2593 mutex_lock(&adev->srbm_mutex); 2594 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2595 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2596 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2597 (pfp_hdr->ucode_start_addr_hi << 30) | 2598 (pfp_hdr->ucode_start_addr_lo >> 2)); 2599 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2600 pfp_hdr->ucode_start_addr_hi >> 2); 2601 2602 /* 2603 * Program CP_ME_CNTL to reset given PIPE to take 2604 * effect of CP_PFP_PRGRM_CNTR_START. 2605 */ 2606 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2607 if (pipe_id == 0) 2608 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2609 PFP_PIPE0_RESET, 1); 2610 else 2611 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2612 PFP_PIPE1_RESET, 1); 2613 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2614 2615 /* Clear pfp pipe0 reset bit. */ 2616 if (pipe_id == 0) 2617 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2618 PFP_PIPE0_RESET, 0); 2619 else 2620 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2621 PFP_PIPE1_RESET, 0); 2622 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2623 2624 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2625 lower_32_bits(addr2)); 2626 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2627 upper_32_bits(addr2)); 2628 } 2629 soc21_grbm_select(adev, 0, 0, 0, 0); 2630 mutex_unlock(&adev->srbm_mutex); 2631 2632 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2633 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2634 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2635 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2636 2637 /* Invalidate the data caches */ 2638 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2639 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2640 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2641 2642 for (i = 0; i < usec_timeout; i++) { 2643 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2644 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2645 INVALIDATE_DCACHE_COMPLETE)) 2646 break; 2647 udelay(1); 2648 } 2649 2650 if (i >= usec_timeout) { 2651 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2652 return -EINVAL; 2653 } 2654 2655 return 0; 2656 } 2657 2658 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2659 { 2660 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2661 uint32_t tmp; 2662 unsigned i, pipe_id; 2663 const struct gfx_firmware_header_v2_0 *me_hdr; 2664 2665 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2666 adev->gfx.me_fw->data; 2667 2668 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2669 lower_32_bits(addr)); 2670 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2671 upper_32_bits(addr)); 2672 2673 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2674 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2675 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2676 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2677 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2678 2679 /* 2680 * Programming any of the CP_ME_IC_BASE registers 2681 * forces invalidation of the ME L1 I$. Wait for the 2682 * invalidation complete 2683 */ 2684 for (i = 0; i < usec_timeout; i++) { 2685 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2686 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2687 INVALIDATE_CACHE_COMPLETE)) 2688 break; 2689 udelay(1); 2690 } 2691 2692 if (i >= usec_timeout) { 2693 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2694 return -EINVAL; 2695 } 2696 2697 /* Prime the instruction caches */ 2698 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2699 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2700 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2701 2702 /* Waiting for instruction cache primed*/ 2703 for (i = 0; i < usec_timeout; i++) { 2704 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2705 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2706 ICACHE_PRIMED)) 2707 break; 2708 udelay(1); 2709 } 2710 2711 if (i >= usec_timeout) { 2712 dev_err(adev->dev, "failed to prime instruction cache\n"); 2713 return -EINVAL; 2714 } 2715 2716 mutex_lock(&adev->srbm_mutex); 2717 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2718 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2719 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2720 (me_hdr->ucode_start_addr_hi << 30) | 2721 (me_hdr->ucode_start_addr_lo >> 2) ); 2722 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2723 me_hdr->ucode_start_addr_hi>>2); 2724 2725 /* 2726 * Program CP_ME_CNTL to reset given PIPE to take 2727 * effect of CP_PFP_PRGRM_CNTR_START. 2728 */ 2729 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2730 if (pipe_id == 0) 2731 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2732 ME_PIPE0_RESET, 1); 2733 else 2734 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2735 ME_PIPE1_RESET, 1); 2736 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2737 2738 /* Clear pfp pipe0 reset bit. */ 2739 if (pipe_id == 0) 2740 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2741 ME_PIPE0_RESET, 0); 2742 else 2743 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2744 ME_PIPE1_RESET, 0); 2745 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2746 2747 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2748 lower_32_bits(addr2)); 2749 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2750 upper_32_bits(addr2)); 2751 } 2752 soc21_grbm_select(adev, 0, 0, 0, 0); 2753 mutex_unlock(&adev->srbm_mutex); 2754 2755 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2756 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2757 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2758 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2759 2760 /* Invalidate the data caches */ 2761 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2762 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2763 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2764 2765 for (i = 0; i < usec_timeout; i++) { 2766 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2767 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2768 INVALIDATE_DCACHE_COMPLETE)) 2769 break; 2770 udelay(1); 2771 } 2772 2773 if (i >= usec_timeout) { 2774 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2775 return -EINVAL; 2776 } 2777 2778 return 0; 2779 } 2780 2781 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2782 { 2783 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2784 uint32_t tmp; 2785 unsigned i; 2786 const struct gfx_firmware_header_v2_0 *mec_hdr; 2787 2788 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2789 adev->gfx.mec_fw->data; 2790 2791 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2792 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2793 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2794 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2795 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2796 2797 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2798 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2799 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2800 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2801 2802 mutex_lock(&adev->srbm_mutex); 2803 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2804 soc21_grbm_select(adev, 1, i, 0, 0); 2805 2806 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2807 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2808 upper_32_bits(addr2)); 2809 2810 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2811 mec_hdr->ucode_start_addr_lo >> 2 | 2812 mec_hdr->ucode_start_addr_hi << 30); 2813 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2814 mec_hdr->ucode_start_addr_hi >> 2); 2815 2816 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2817 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2818 upper_32_bits(addr)); 2819 } 2820 mutex_unlock(&adev->srbm_mutex); 2821 soc21_grbm_select(adev, 0, 0, 0, 0); 2822 2823 /* Trigger an invalidation of the L1 instruction caches */ 2824 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2825 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2826 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2827 2828 /* Wait for invalidation complete */ 2829 for (i = 0; i < usec_timeout; i++) { 2830 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2831 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2832 INVALIDATE_DCACHE_COMPLETE)) 2833 break; 2834 udelay(1); 2835 } 2836 2837 if (i >= usec_timeout) { 2838 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2839 return -EINVAL; 2840 } 2841 2842 /* Trigger an invalidation of the L1 instruction caches */ 2843 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2844 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2845 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2846 2847 /* Wait for invalidation complete */ 2848 for (i = 0; i < usec_timeout; i++) { 2849 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2850 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2851 INVALIDATE_CACHE_COMPLETE)) 2852 break; 2853 udelay(1); 2854 } 2855 2856 if (i >= usec_timeout) { 2857 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2858 return -EINVAL; 2859 } 2860 2861 return 0; 2862 } 2863 2864 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2865 { 2866 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2867 const struct gfx_firmware_header_v2_0 *me_hdr; 2868 const struct gfx_firmware_header_v2_0 *mec_hdr; 2869 uint32_t pipe_id, tmp; 2870 2871 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2872 adev->gfx.mec_fw->data; 2873 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2874 adev->gfx.me_fw->data; 2875 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2876 adev->gfx.pfp_fw->data; 2877 2878 /* config pfp program start addr */ 2879 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2880 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2881 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2882 (pfp_hdr->ucode_start_addr_hi << 30) | 2883 (pfp_hdr->ucode_start_addr_lo >> 2)); 2884 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2885 pfp_hdr->ucode_start_addr_hi >> 2); 2886 } 2887 soc21_grbm_select(adev, 0, 0, 0, 0); 2888 2889 /* reset pfp pipe */ 2890 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2891 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 2892 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 2893 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2894 2895 /* clear pfp pipe reset */ 2896 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 2897 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 2898 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2899 2900 /* config me program start addr */ 2901 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2902 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2903 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2904 (me_hdr->ucode_start_addr_hi << 30) | 2905 (me_hdr->ucode_start_addr_lo >> 2) ); 2906 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2907 me_hdr->ucode_start_addr_hi>>2); 2908 } 2909 soc21_grbm_select(adev, 0, 0, 0, 0); 2910 2911 /* reset me pipe */ 2912 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2913 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 2914 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 2915 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2916 2917 /* clear me pipe reset */ 2918 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 2919 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 2920 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2921 2922 /* config mec program start addr */ 2923 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 2924 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 2925 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2926 mec_hdr->ucode_start_addr_lo >> 2 | 2927 mec_hdr->ucode_start_addr_hi << 30); 2928 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2929 mec_hdr->ucode_start_addr_hi >> 2); 2930 } 2931 soc21_grbm_select(adev, 0, 0, 0, 0); 2932 2933 /* reset mec pipe */ 2934 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 2935 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 2936 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 2937 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 2938 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 2939 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 2940 2941 /* clear mec pipe reset */ 2942 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 2943 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 2944 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 2945 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 2946 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 2947 } 2948 2949 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 2950 { 2951 uint32_t cp_status; 2952 uint32_t bootload_status; 2953 int i, r; 2954 uint64_t addr, addr2; 2955 2956 for (i = 0; i < adev->usec_timeout; i++) { 2957 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 2958 2959 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 2960 IP_VERSION(11, 0, 1) || 2961 amdgpu_ip_version(adev, GC_HWIP, 0) == 2962 IP_VERSION(11, 0, 4) || 2963 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || 2964 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || 2965 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || 2966 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3)) 2967 bootload_status = RREG32_SOC15(GC, 0, 2968 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 2969 else 2970 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 2971 2972 if ((cp_status == 0) && 2973 (REG_GET_FIELD(bootload_status, 2974 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 2975 break; 2976 } 2977 udelay(1); 2978 } 2979 2980 if (i >= adev->usec_timeout) { 2981 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 2982 return -ETIMEDOUT; 2983 } 2984 2985 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 2986 if (adev->gfx.rs64_enable) { 2987 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2988 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 2989 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2990 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 2991 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 2992 if (r) 2993 return r; 2994 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2995 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 2996 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2997 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 2998 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 2999 if (r) 3000 return r; 3001 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3002 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 3003 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3004 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 3005 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 3006 if (r) 3007 return r; 3008 } else { 3009 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3010 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 3011 r = gfx_v11_0_config_me_cache(adev, addr); 3012 if (r) 3013 return r; 3014 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3015 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 3016 r = gfx_v11_0_config_pfp_cache(adev, addr); 3017 if (r) 3018 return r; 3019 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3020 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 3021 r = gfx_v11_0_config_mec_cache(adev, addr); 3022 if (r) 3023 return r; 3024 } 3025 } 3026 3027 return 0; 3028 } 3029 3030 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3031 { 3032 int i; 3033 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3034 3035 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3036 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3037 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3038 3039 for (i = 0; i < adev->usec_timeout; i++) { 3040 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 3041 break; 3042 udelay(1); 3043 } 3044 3045 if (i >= adev->usec_timeout) 3046 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 3047 3048 return 0; 3049 } 3050 3051 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 3052 { 3053 int r; 3054 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3055 const __le32 *fw_data; 3056 unsigned i, fw_size; 3057 3058 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3059 adev->gfx.pfp_fw->data; 3060 3061 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3062 3063 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3064 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3065 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 3066 3067 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 3068 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3069 &adev->gfx.pfp.pfp_fw_obj, 3070 &adev->gfx.pfp.pfp_fw_gpu_addr, 3071 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3072 if (r) { 3073 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 3074 gfx_v11_0_pfp_fini(adev); 3075 return r; 3076 } 3077 3078 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 3079 3080 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3081 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3082 3083 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 3084 3085 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 3086 3087 for (i = 0; i < pfp_hdr->jt_size; i++) 3088 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 3089 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 3090 3091 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3092 3093 return 0; 3094 } 3095 3096 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 3097 { 3098 int r; 3099 const struct gfx_firmware_header_v2_0 *pfp_hdr; 3100 const __le32 *fw_ucode, *fw_data; 3101 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3102 uint32_t tmp; 3103 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3104 3105 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 3106 adev->gfx.pfp_fw->data; 3107 3108 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3109 3110 /* instruction */ 3111 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 3112 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 3113 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 3114 /* data */ 3115 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3116 le32_to_cpu(pfp_hdr->data_offset_bytes)); 3117 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 3118 3119 /* 64kb align */ 3120 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3121 64 * 1024, 3122 AMDGPU_GEM_DOMAIN_VRAM | 3123 AMDGPU_GEM_DOMAIN_GTT, 3124 &adev->gfx.pfp.pfp_fw_obj, 3125 &adev->gfx.pfp.pfp_fw_gpu_addr, 3126 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3127 if (r) { 3128 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 3129 gfx_v11_0_pfp_fini(adev); 3130 return r; 3131 } 3132 3133 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3134 64 * 1024, 3135 AMDGPU_GEM_DOMAIN_VRAM | 3136 AMDGPU_GEM_DOMAIN_GTT, 3137 &adev->gfx.pfp.pfp_fw_data_obj, 3138 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 3139 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 3140 if (r) { 3141 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 3142 gfx_v11_0_pfp_fini(adev); 3143 return r; 3144 } 3145 3146 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 3147 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 3148 3149 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3150 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 3151 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3152 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3153 3154 if (amdgpu_emu_mode == 1) 3155 adev->hdp.funcs->flush_hdp(adev, NULL); 3156 3157 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3158 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3159 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 3160 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3161 3162 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 3163 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 3164 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 3165 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 3166 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 3167 3168 /* 3169 * Programming any of the CP_PFP_IC_BASE registers 3170 * forces invalidation of the ME L1 I$. Wait for the 3171 * invalidation complete 3172 */ 3173 for (i = 0; i < usec_timeout; i++) { 3174 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3175 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3176 INVALIDATE_CACHE_COMPLETE)) 3177 break; 3178 udelay(1); 3179 } 3180 3181 if (i >= usec_timeout) { 3182 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3183 return -EINVAL; 3184 } 3185 3186 /* Prime the L1 instruction caches */ 3187 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3188 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 3189 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 3190 /* Waiting for cache primed*/ 3191 for (i = 0; i < usec_timeout; i++) { 3192 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3193 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3194 ICACHE_PRIMED)) 3195 break; 3196 udelay(1); 3197 } 3198 3199 if (i >= usec_timeout) { 3200 dev_err(adev->dev, "failed to prime instruction cache\n"); 3201 return -EINVAL; 3202 } 3203 3204 mutex_lock(&adev->srbm_mutex); 3205 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3206 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3207 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 3208 (pfp_hdr->ucode_start_addr_hi << 30) | 3209 (pfp_hdr->ucode_start_addr_lo >> 2) ); 3210 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 3211 pfp_hdr->ucode_start_addr_hi>>2); 3212 3213 /* 3214 * Program CP_ME_CNTL to reset given PIPE to take 3215 * effect of CP_PFP_PRGRM_CNTR_START. 3216 */ 3217 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3218 if (pipe_id == 0) 3219 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3220 PFP_PIPE0_RESET, 1); 3221 else 3222 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3223 PFP_PIPE1_RESET, 1); 3224 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3225 3226 /* Clear pfp pipe0 reset bit. */ 3227 if (pipe_id == 0) 3228 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3229 PFP_PIPE0_RESET, 0); 3230 else 3231 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3232 PFP_PIPE1_RESET, 0); 3233 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3234 3235 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 3236 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3237 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 3238 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3239 } 3240 soc21_grbm_select(adev, 0, 0, 0, 0); 3241 mutex_unlock(&adev->srbm_mutex); 3242 3243 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3244 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3245 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3246 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3247 3248 /* Invalidate the data caches */ 3249 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3250 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3251 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3252 3253 for (i = 0; i < usec_timeout; i++) { 3254 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3255 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3256 INVALIDATE_DCACHE_COMPLETE)) 3257 break; 3258 udelay(1); 3259 } 3260 3261 if (i >= usec_timeout) { 3262 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3263 return -EINVAL; 3264 } 3265 3266 return 0; 3267 } 3268 3269 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 3270 { 3271 int r; 3272 const struct gfx_firmware_header_v1_0 *me_hdr; 3273 const __le32 *fw_data; 3274 unsigned i, fw_size; 3275 3276 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3277 adev->gfx.me_fw->data; 3278 3279 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3280 3281 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3282 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3283 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 3284 3285 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 3286 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3287 &adev->gfx.me.me_fw_obj, 3288 &adev->gfx.me.me_fw_gpu_addr, 3289 (void **)&adev->gfx.me.me_fw_ptr); 3290 if (r) { 3291 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 3292 gfx_v11_0_me_fini(adev); 3293 return r; 3294 } 3295 3296 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 3297 3298 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3299 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3300 3301 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 3302 3303 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 3304 3305 for (i = 0; i < me_hdr->jt_size; i++) 3306 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 3307 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 3308 3309 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 3310 3311 return 0; 3312 } 3313 3314 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 3315 { 3316 int r; 3317 const struct gfx_firmware_header_v2_0 *me_hdr; 3318 const __le32 *fw_ucode, *fw_data; 3319 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3320 uint32_t tmp; 3321 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3322 3323 me_hdr = (const struct gfx_firmware_header_v2_0 *) 3324 adev->gfx.me_fw->data; 3325 3326 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3327 3328 /* instruction */ 3329 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 3330 le32_to_cpu(me_hdr->ucode_offset_bytes)); 3331 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 3332 /* data */ 3333 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3334 le32_to_cpu(me_hdr->data_offset_bytes)); 3335 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 3336 3337 /* 64kb align*/ 3338 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3339 64 * 1024, 3340 AMDGPU_GEM_DOMAIN_VRAM | 3341 AMDGPU_GEM_DOMAIN_GTT, 3342 &adev->gfx.me.me_fw_obj, 3343 &adev->gfx.me.me_fw_gpu_addr, 3344 (void **)&adev->gfx.me.me_fw_ptr); 3345 if (r) { 3346 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 3347 gfx_v11_0_me_fini(adev); 3348 return r; 3349 } 3350 3351 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3352 64 * 1024, 3353 AMDGPU_GEM_DOMAIN_VRAM | 3354 AMDGPU_GEM_DOMAIN_GTT, 3355 &adev->gfx.me.me_fw_data_obj, 3356 &adev->gfx.me.me_fw_data_gpu_addr, 3357 (void **)&adev->gfx.me.me_fw_data_ptr); 3358 if (r) { 3359 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 3360 gfx_v11_0_pfp_fini(adev); 3361 return r; 3362 } 3363 3364 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 3365 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 3366 3367 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3368 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 3369 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3370 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3371 3372 if (amdgpu_emu_mode == 1) 3373 adev->hdp.funcs->flush_hdp(adev, NULL); 3374 3375 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3376 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3377 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 3378 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3379 3380 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 3381 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 3382 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 3383 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 3384 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 3385 3386 /* 3387 * Programming any of the CP_ME_IC_BASE registers 3388 * forces invalidation of the ME L1 I$. Wait for the 3389 * invalidation complete 3390 */ 3391 for (i = 0; i < usec_timeout; i++) { 3392 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3393 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3394 INVALIDATE_CACHE_COMPLETE)) 3395 break; 3396 udelay(1); 3397 } 3398 3399 if (i >= usec_timeout) { 3400 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3401 return -EINVAL; 3402 } 3403 3404 /* Prime the instruction caches */ 3405 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3406 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 3407 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 3408 3409 /* Waiting for instruction cache primed*/ 3410 for (i = 0; i < usec_timeout; i++) { 3411 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3412 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3413 ICACHE_PRIMED)) 3414 break; 3415 udelay(1); 3416 } 3417 3418 if (i >= usec_timeout) { 3419 dev_err(adev->dev, "failed to prime instruction cache\n"); 3420 return -EINVAL; 3421 } 3422 3423 mutex_lock(&adev->srbm_mutex); 3424 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3425 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3426 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3427 (me_hdr->ucode_start_addr_hi << 30) | 3428 (me_hdr->ucode_start_addr_lo >> 2) ); 3429 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3430 me_hdr->ucode_start_addr_hi>>2); 3431 3432 /* 3433 * Program CP_ME_CNTL to reset given PIPE to take 3434 * effect of CP_PFP_PRGRM_CNTR_START. 3435 */ 3436 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3437 if (pipe_id == 0) 3438 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3439 ME_PIPE0_RESET, 1); 3440 else 3441 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3442 ME_PIPE1_RESET, 1); 3443 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3444 3445 /* Clear pfp pipe0 reset bit. */ 3446 if (pipe_id == 0) 3447 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3448 ME_PIPE0_RESET, 0); 3449 else 3450 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3451 ME_PIPE1_RESET, 0); 3452 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3453 3454 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 3455 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3456 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 3457 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3458 } 3459 soc21_grbm_select(adev, 0, 0, 0, 0); 3460 mutex_unlock(&adev->srbm_mutex); 3461 3462 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3463 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3464 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3465 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3466 3467 /* Invalidate the data caches */ 3468 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3469 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3470 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3471 3472 for (i = 0; i < usec_timeout; i++) { 3473 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3474 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3475 INVALIDATE_DCACHE_COMPLETE)) 3476 break; 3477 udelay(1); 3478 } 3479 3480 if (i >= usec_timeout) { 3481 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3482 return -EINVAL; 3483 } 3484 3485 return 0; 3486 } 3487 3488 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3489 { 3490 int r; 3491 3492 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3493 return -EINVAL; 3494 3495 gfx_v11_0_cp_gfx_enable(adev, false); 3496 3497 if (adev->gfx.rs64_enable) 3498 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3499 else 3500 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3501 if (r) { 3502 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3503 return r; 3504 } 3505 3506 if (adev->gfx.rs64_enable) 3507 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3508 else 3509 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3510 if (r) { 3511 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3512 return r; 3513 } 3514 3515 return 0; 3516 } 3517 3518 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3519 { 3520 struct amdgpu_ring *ring; 3521 const struct cs_section_def *sect = NULL; 3522 const struct cs_extent_def *ext = NULL; 3523 int r, i; 3524 int ctx_reg_offset; 3525 3526 /* init the CP */ 3527 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3528 adev->gfx.config.max_hw_contexts - 1); 3529 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3530 3531 if (!amdgpu_async_gfx_ring) 3532 gfx_v11_0_cp_gfx_enable(adev, true); 3533 3534 ring = &adev->gfx.gfx_ring[0]; 3535 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3536 if (r) { 3537 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3538 return r; 3539 } 3540 3541 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3542 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3543 3544 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3545 amdgpu_ring_write(ring, 0x80000000); 3546 amdgpu_ring_write(ring, 0x80000000); 3547 3548 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3549 for (ext = sect->section; ext->extent != NULL; ++ext) { 3550 if (sect->id == SECT_CONTEXT) { 3551 amdgpu_ring_write(ring, 3552 PACKET3(PACKET3_SET_CONTEXT_REG, 3553 ext->reg_count)); 3554 amdgpu_ring_write(ring, ext->reg_index - 3555 PACKET3_SET_CONTEXT_REG_START); 3556 for (i = 0; i < ext->reg_count; i++) 3557 amdgpu_ring_write(ring, ext->extent[i]); 3558 } 3559 } 3560 } 3561 3562 ctx_reg_offset = 3563 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3564 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3565 amdgpu_ring_write(ring, ctx_reg_offset); 3566 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3567 3568 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3569 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3570 3571 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3572 amdgpu_ring_write(ring, 0); 3573 3574 amdgpu_ring_commit(ring); 3575 3576 /* submit cs packet to copy state 0 to next available state */ 3577 if (adev->gfx.num_gfx_rings > 1) { 3578 /* maximum supported gfx ring is 2 */ 3579 ring = &adev->gfx.gfx_ring[1]; 3580 r = amdgpu_ring_alloc(ring, 2); 3581 if (r) { 3582 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3583 return r; 3584 } 3585 3586 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3587 amdgpu_ring_write(ring, 0); 3588 3589 amdgpu_ring_commit(ring); 3590 } 3591 return 0; 3592 } 3593 3594 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3595 CP_PIPE_ID pipe) 3596 { 3597 u32 tmp; 3598 3599 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3600 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3601 3602 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3603 } 3604 3605 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3606 struct amdgpu_ring *ring) 3607 { 3608 u32 tmp; 3609 3610 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3611 if (ring->use_doorbell) { 3612 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3613 DOORBELL_OFFSET, ring->doorbell_index); 3614 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3615 DOORBELL_EN, 1); 3616 } else { 3617 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3618 DOORBELL_EN, 0); 3619 } 3620 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3621 3622 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3623 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3624 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3625 3626 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3627 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3628 } 3629 3630 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3631 { 3632 struct amdgpu_ring *ring; 3633 u32 tmp; 3634 u32 rb_bufsz; 3635 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3636 3637 /* Set the write pointer delay */ 3638 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3639 3640 /* set the RB to use vmid 0 */ 3641 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3642 3643 /* Init gfx ring 0 for pipe 0 */ 3644 mutex_lock(&adev->srbm_mutex); 3645 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3646 3647 /* Set ring buffer size */ 3648 ring = &adev->gfx.gfx_ring[0]; 3649 rb_bufsz = order_base_2(ring->ring_size / 8); 3650 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3651 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3652 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3653 3654 /* Initialize the ring buffer's write pointers */ 3655 ring->wptr = 0; 3656 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3657 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3658 3659 /* set the wb address whether it's enabled or not */ 3660 rptr_addr = ring->rptr_gpu_addr; 3661 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3662 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3663 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3664 3665 wptr_gpu_addr = ring->wptr_gpu_addr; 3666 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3667 lower_32_bits(wptr_gpu_addr)); 3668 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3669 upper_32_bits(wptr_gpu_addr)); 3670 3671 mdelay(1); 3672 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3673 3674 rb_addr = ring->gpu_addr >> 8; 3675 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3676 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3677 3678 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3679 3680 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3681 mutex_unlock(&adev->srbm_mutex); 3682 3683 /* Init gfx ring 1 for pipe 1 */ 3684 if (adev->gfx.num_gfx_rings > 1) { 3685 mutex_lock(&adev->srbm_mutex); 3686 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3687 /* maximum supported gfx ring is 2 */ 3688 ring = &adev->gfx.gfx_ring[1]; 3689 rb_bufsz = order_base_2(ring->ring_size / 8); 3690 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3691 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3692 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3693 /* Initialize the ring buffer's write pointers */ 3694 ring->wptr = 0; 3695 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3696 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3697 /* Set the wb address whether it's enabled or not */ 3698 rptr_addr = ring->rptr_gpu_addr; 3699 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3700 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3701 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3702 wptr_gpu_addr = ring->wptr_gpu_addr; 3703 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3704 lower_32_bits(wptr_gpu_addr)); 3705 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3706 upper_32_bits(wptr_gpu_addr)); 3707 3708 mdelay(1); 3709 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3710 3711 rb_addr = ring->gpu_addr >> 8; 3712 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3713 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3714 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3715 3716 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3717 mutex_unlock(&adev->srbm_mutex); 3718 } 3719 /* Switch to pipe 0 */ 3720 mutex_lock(&adev->srbm_mutex); 3721 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3722 mutex_unlock(&adev->srbm_mutex); 3723 3724 /* start the ring */ 3725 gfx_v11_0_cp_gfx_start(adev); 3726 3727 return 0; 3728 } 3729 3730 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3731 { 3732 u32 data; 3733 3734 if (adev->gfx.rs64_enable) { 3735 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3736 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3737 enable ? 0 : 1); 3738 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3739 enable ? 0 : 1); 3740 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3741 enable ? 0 : 1); 3742 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3743 enable ? 0 : 1); 3744 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3745 enable ? 0 : 1); 3746 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3747 enable ? 1 : 0); 3748 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3749 enable ? 1 : 0); 3750 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3751 enable ? 1 : 0); 3752 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3753 enable ? 1 : 0); 3754 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3755 enable ? 0 : 1); 3756 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3757 } else { 3758 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3759 3760 if (enable) { 3761 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3762 if (!adev->enable_mes_kiq) 3763 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3764 MEC_ME2_HALT, 0); 3765 } else { 3766 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3767 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3768 } 3769 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3770 } 3771 3772 udelay(50); 3773 } 3774 3775 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3776 { 3777 const struct gfx_firmware_header_v1_0 *mec_hdr; 3778 const __le32 *fw_data; 3779 unsigned i, fw_size; 3780 u32 *fw = NULL; 3781 int r; 3782 3783 if (!adev->gfx.mec_fw) 3784 return -EINVAL; 3785 3786 gfx_v11_0_cp_compute_enable(adev, false); 3787 3788 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3789 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3790 3791 fw_data = (const __le32 *) 3792 (adev->gfx.mec_fw->data + 3793 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3794 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3795 3796 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3797 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3798 &adev->gfx.mec.mec_fw_obj, 3799 &adev->gfx.mec.mec_fw_gpu_addr, 3800 (void **)&fw); 3801 if (r) { 3802 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3803 gfx_v11_0_mec_fini(adev); 3804 return r; 3805 } 3806 3807 memcpy(fw, fw_data, fw_size); 3808 3809 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3810 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3811 3812 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3813 3814 /* MEC1 */ 3815 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3816 3817 for (i = 0; i < mec_hdr->jt_size; i++) 3818 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3819 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3820 3821 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3822 3823 return 0; 3824 } 3825 3826 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3827 { 3828 const struct gfx_firmware_header_v2_0 *mec_hdr; 3829 const __le32 *fw_ucode, *fw_data; 3830 u32 tmp, fw_ucode_size, fw_data_size; 3831 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3832 u32 *fw_ucode_ptr, *fw_data_ptr; 3833 int r; 3834 3835 if (!adev->gfx.mec_fw) 3836 return -EINVAL; 3837 3838 gfx_v11_0_cp_compute_enable(adev, false); 3839 3840 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3841 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3842 3843 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3844 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3845 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3846 3847 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3848 le32_to_cpu(mec_hdr->data_offset_bytes)); 3849 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3850 3851 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3852 64 * 1024, 3853 AMDGPU_GEM_DOMAIN_VRAM | 3854 AMDGPU_GEM_DOMAIN_GTT, 3855 &adev->gfx.mec.mec_fw_obj, 3856 &adev->gfx.mec.mec_fw_gpu_addr, 3857 (void **)&fw_ucode_ptr); 3858 if (r) { 3859 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3860 gfx_v11_0_mec_fini(adev); 3861 return r; 3862 } 3863 3864 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3865 64 * 1024, 3866 AMDGPU_GEM_DOMAIN_VRAM | 3867 AMDGPU_GEM_DOMAIN_GTT, 3868 &adev->gfx.mec.mec_fw_data_obj, 3869 &adev->gfx.mec.mec_fw_data_gpu_addr, 3870 (void **)&fw_data_ptr); 3871 if (r) { 3872 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3873 gfx_v11_0_mec_fini(adev); 3874 return r; 3875 } 3876 3877 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 3878 memcpy(fw_data_ptr, fw_data, fw_data_size); 3879 3880 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3881 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 3882 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3883 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 3884 3885 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 3886 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3887 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 3888 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3889 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 3890 3891 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 3892 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 3893 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 3894 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 3895 3896 mutex_lock(&adev->srbm_mutex); 3897 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 3898 soc21_grbm_select(adev, 1, i, 0, 0); 3899 3900 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 3901 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 3902 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 3903 3904 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3905 mec_hdr->ucode_start_addr_lo >> 2 | 3906 mec_hdr->ucode_start_addr_hi << 30); 3907 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3908 mec_hdr->ucode_start_addr_hi >> 2); 3909 3910 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 3911 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 3912 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3913 } 3914 mutex_unlock(&adev->srbm_mutex); 3915 soc21_grbm_select(adev, 0, 0, 0, 0); 3916 3917 /* Trigger an invalidation of the L1 instruction caches */ 3918 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3919 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3920 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 3921 3922 /* Wait for invalidation complete */ 3923 for (i = 0; i < usec_timeout; i++) { 3924 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3925 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 3926 INVALIDATE_DCACHE_COMPLETE)) 3927 break; 3928 udelay(1); 3929 } 3930 3931 if (i >= usec_timeout) { 3932 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3933 return -EINVAL; 3934 } 3935 3936 /* Trigger an invalidation of the L1 instruction caches */ 3937 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 3938 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 3939 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 3940 3941 /* Wait for invalidation complete */ 3942 for (i = 0; i < usec_timeout; i++) { 3943 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 3944 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 3945 INVALIDATE_CACHE_COMPLETE)) 3946 break; 3947 udelay(1); 3948 } 3949 3950 if (i >= usec_timeout) { 3951 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3952 return -EINVAL; 3953 } 3954 3955 return 0; 3956 } 3957 3958 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 3959 { 3960 uint32_t tmp; 3961 struct amdgpu_device *adev = ring->adev; 3962 3963 /* tell RLC which is KIQ queue */ 3964 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 3965 tmp &= 0xffffff00; 3966 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3967 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); 3968 } 3969 3970 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 3971 { 3972 /* set graphics engine doorbell range */ 3973 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 3974 (adev->doorbell_index.gfx_ring0 * 2) << 2); 3975 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3976 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 3977 3978 /* set compute engine doorbell range */ 3979 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 3980 (adev->doorbell_index.kiq * 2) << 2); 3981 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 3982 (adev->doorbell_index.userqueue_end * 2) << 2); 3983 } 3984 3985 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, 3986 struct v11_gfx_mqd *mqd, 3987 struct amdgpu_mqd_prop *prop) 3988 { 3989 bool priority = 0; 3990 u32 tmp; 3991 3992 /* set up default queue priority level 3993 * 0x0 = low priority, 0x1 = high priority 3994 */ 3995 if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) 3996 priority = 1; 3997 3998 tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; 3999 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); 4000 mqd->cp_gfx_hqd_queue_priority = tmp; 4001 } 4002 4003 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 4004 struct amdgpu_mqd_prop *prop) 4005 { 4006 struct v11_gfx_mqd *mqd = m; 4007 uint64_t hqd_gpu_addr, wb_gpu_addr; 4008 uint32_t tmp; 4009 uint32_t rb_bufsz; 4010 4011 /* set up gfx hqd wptr */ 4012 mqd->cp_gfx_hqd_wptr = 0; 4013 mqd->cp_gfx_hqd_wptr_hi = 0; 4014 4015 /* set the pointer to the MQD */ 4016 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 4017 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4018 4019 /* set up mqd control */ 4020 tmp = regCP_GFX_MQD_CONTROL_DEFAULT; 4021 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 4022 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 4023 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 4024 mqd->cp_gfx_mqd_control = tmp; 4025 4026 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 4027 tmp = regCP_GFX_HQD_VMID_DEFAULT; 4028 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 4029 mqd->cp_gfx_hqd_vmid = 0; 4030 4031 /* set up gfx queue priority */ 4032 gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); 4033 4034 /* set up time quantum */ 4035 tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; 4036 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 4037 mqd->cp_gfx_hqd_quantum = tmp; 4038 4039 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 4040 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4041 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 4042 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 4043 4044 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 4045 wb_gpu_addr = prop->rptr_gpu_addr; 4046 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 4047 mqd->cp_gfx_hqd_rptr_addr_hi = 4048 upper_32_bits(wb_gpu_addr) & 0xffff; 4049 4050 /* set up rb_wptr_poll addr */ 4051 wb_gpu_addr = prop->wptr_gpu_addr; 4052 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4053 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4054 4055 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 4056 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 4057 tmp = regCP_GFX_HQD_CNTL_DEFAULT; 4058 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 4059 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 4060 #ifdef __BIG_ENDIAN 4061 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 4062 #endif 4063 mqd->cp_gfx_hqd_cntl = tmp; 4064 4065 /* set up cp_doorbell_control */ 4066 tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; 4067 if (prop->use_doorbell) { 4068 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4069 DOORBELL_OFFSET, prop->doorbell_index); 4070 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4071 DOORBELL_EN, 1); 4072 } else 4073 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4074 DOORBELL_EN, 0); 4075 mqd->cp_rb_doorbell_control = tmp; 4076 4077 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4078 mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; 4079 4080 /* active the queue */ 4081 mqd->cp_gfx_hqd_active = 1; 4082 4083 return 0; 4084 } 4085 4086 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) 4087 { 4088 struct amdgpu_device *adev = ring->adev; 4089 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 4090 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 4091 4092 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4093 memset((void *)mqd, 0, sizeof(*mqd)); 4094 mutex_lock(&adev->srbm_mutex); 4095 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4096 amdgpu_ring_init_mqd(ring); 4097 soc21_grbm_select(adev, 0, 0, 0, 0); 4098 mutex_unlock(&adev->srbm_mutex); 4099 if (adev->gfx.me.mqd_backup[mqd_idx]) 4100 memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4101 } else { 4102 /* restore mqd with the backup copy */ 4103 if (adev->gfx.me.mqd_backup[mqd_idx]) 4104 memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 4105 /* reset the ring */ 4106 ring->wptr = 0; 4107 *ring->wptr_cpu_addr = 0; 4108 amdgpu_ring_clear_ring(ring); 4109 } 4110 4111 return 0; 4112 } 4113 4114 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 4115 { 4116 int r, i; 4117 4118 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4119 r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); 4120 if (r) 4121 return r; 4122 } 4123 4124 r = amdgpu_gfx_enable_kgq(adev, 0); 4125 if (r) 4126 return r; 4127 4128 return gfx_v11_0_cp_gfx_start(adev); 4129 } 4130 4131 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 4132 struct amdgpu_mqd_prop *prop) 4133 { 4134 struct v11_compute_mqd *mqd = m; 4135 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4136 uint32_t tmp; 4137 4138 mqd->header = 0xC0310800; 4139 mqd->compute_pipelinestat_enable = 0x00000001; 4140 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4141 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4142 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4143 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4144 mqd->compute_misc_reserved = 0x00000007; 4145 4146 eop_base_addr = prop->eop_gpu_addr >> 8; 4147 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4148 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4149 4150 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4151 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 4152 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4153 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 4154 4155 mqd->cp_hqd_eop_control = tmp; 4156 4157 /* enable doorbell? */ 4158 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4159 4160 if (prop->use_doorbell) { 4161 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4162 DOORBELL_OFFSET, prop->doorbell_index); 4163 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4164 DOORBELL_EN, 1); 4165 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4166 DOORBELL_SOURCE, 0); 4167 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4168 DOORBELL_HIT, 0); 4169 } else { 4170 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4171 DOORBELL_EN, 0); 4172 } 4173 4174 mqd->cp_hqd_pq_doorbell_control = tmp; 4175 4176 /* disable the queue if it's active */ 4177 mqd->cp_hqd_dequeue_request = 0; 4178 mqd->cp_hqd_pq_rptr = 0; 4179 mqd->cp_hqd_pq_wptr_lo = 0; 4180 mqd->cp_hqd_pq_wptr_hi = 0; 4181 4182 /* set the pointer to the MQD */ 4183 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 4184 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4185 4186 /* set MQD vmid to 0 */ 4187 tmp = regCP_MQD_CONTROL_DEFAULT; 4188 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4189 mqd->cp_mqd_control = tmp; 4190 4191 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4192 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4193 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4194 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4195 4196 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4197 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 4198 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4199 (order_base_2(prop->queue_size / 4) - 1)); 4200 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4201 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 4202 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 4203 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 4204 prop->allow_tunneling); 4205 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4206 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4207 mqd->cp_hqd_pq_control = tmp; 4208 4209 /* set the wb address whether it's enabled or not */ 4210 wb_gpu_addr = prop->rptr_gpu_addr; 4211 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4212 mqd->cp_hqd_pq_rptr_report_addr_hi = 4213 upper_32_bits(wb_gpu_addr) & 0xffff; 4214 4215 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4216 wb_gpu_addr = prop->wptr_gpu_addr; 4217 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4218 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4219 4220 tmp = 0; 4221 /* enable the doorbell if requested */ 4222 if (prop->use_doorbell) { 4223 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4224 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4225 DOORBELL_OFFSET, prop->doorbell_index); 4226 4227 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4228 DOORBELL_EN, 1); 4229 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4230 DOORBELL_SOURCE, 0); 4231 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4232 DOORBELL_HIT, 0); 4233 } 4234 4235 mqd->cp_hqd_pq_doorbell_control = tmp; 4236 4237 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4238 mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; 4239 4240 /* set the vmid for the queue */ 4241 mqd->cp_hqd_vmid = 0; 4242 4243 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 4244 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 4245 mqd->cp_hqd_persistent_state = tmp; 4246 4247 /* set MIN_IB_AVAIL_SIZE */ 4248 tmp = regCP_HQD_IB_CONTROL_DEFAULT; 4249 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4250 mqd->cp_hqd_ib_control = tmp; 4251 4252 /* set static priority for a compute queue/ring */ 4253 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 4254 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 4255 4256 mqd->cp_hqd_active = prop->hqd_active; 4257 4258 return 0; 4259 } 4260 4261 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 4262 { 4263 struct amdgpu_device *adev = ring->adev; 4264 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4265 int j; 4266 4267 /* inactivate the queue */ 4268 if (amdgpu_sriov_vf(adev)) 4269 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 4270 4271 /* disable wptr polling */ 4272 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4273 4274 /* write the EOP addr */ 4275 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 4276 mqd->cp_hqd_eop_base_addr_lo); 4277 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 4278 mqd->cp_hqd_eop_base_addr_hi); 4279 4280 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4281 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 4282 mqd->cp_hqd_eop_control); 4283 4284 /* enable doorbell? */ 4285 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4286 mqd->cp_hqd_pq_doorbell_control); 4287 4288 /* disable the queue if it's active */ 4289 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 4290 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 4291 for (j = 0; j < adev->usec_timeout; j++) { 4292 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 4293 break; 4294 udelay(1); 4295 } 4296 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 4297 mqd->cp_hqd_dequeue_request); 4298 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 4299 mqd->cp_hqd_pq_rptr); 4300 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4301 mqd->cp_hqd_pq_wptr_lo); 4302 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4303 mqd->cp_hqd_pq_wptr_hi); 4304 } 4305 4306 /* set the pointer to the MQD */ 4307 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 4308 mqd->cp_mqd_base_addr_lo); 4309 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 4310 mqd->cp_mqd_base_addr_hi); 4311 4312 /* set MQD vmid to 0 */ 4313 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 4314 mqd->cp_mqd_control); 4315 4316 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4317 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 4318 mqd->cp_hqd_pq_base_lo); 4319 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 4320 mqd->cp_hqd_pq_base_hi); 4321 4322 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4323 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 4324 mqd->cp_hqd_pq_control); 4325 4326 /* set the wb address whether it's enabled or not */ 4327 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 4328 mqd->cp_hqd_pq_rptr_report_addr_lo); 4329 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4330 mqd->cp_hqd_pq_rptr_report_addr_hi); 4331 4332 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4333 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 4334 mqd->cp_hqd_pq_wptr_poll_addr_lo); 4335 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4336 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4337 4338 /* enable the doorbell if requested */ 4339 if (ring->use_doorbell) { 4340 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4341 (adev->doorbell_index.kiq * 2) << 2); 4342 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4343 (adev->doorbell_index.userqueue_end * 2) << 2); 4344 } 4345 4346 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4347 mqd->cp_hqd_pq_doorbell_control); 4348 4349 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4350 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4351 mqd->cp_hqd_pq_wptr_lo); 4352 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4353 mqd->cp_hqd_pq_wptr_hi); 4354 4355 /* set the vmid for the queue */ 4356 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 4357 4358 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 4359 mqd->cp_hqd_persistent_state); 4360 4361 /* activate the queue */ 4362 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 4363 mqd->cp_hqd_active); 4364 4365 if (ring->use_doorbell) 4366 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4367 4368 return 0; 4369 } 4370 4371 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 4372 { 4373 struct amdgpu_device *adev = ring->adev; 4374 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4375 4376 gfx_v11_0_kiq_setting(ring); 4377 4378 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4379 /* reset MQD to a clean status */ 4380 if (adev->gfx.kiq[0].mqd_backup) 4381 memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); 4382 4383 /* reset ring buffer */ 4384 ring->wptr = 0; 4385 amdgpu_ring_clear_ring(ring); 4386 4387 mutex_lock(&adev->srbm_mutex); 4388 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4389 gfx_v11_0_kiq_init_register(ring); 4390 soc21_grbm_select(adev, 0, 0, 0, 0); 4391 mutex_unlock(&adev->srbm_mutex); 4392 } else { 4393 memset((void *)mqd, 0, sizeof(*mqd)); 4394 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4395 amdgpu_ring_clear_ring(ring); 4396 mutex_lock(&adev->srbm_mutex); 4397 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4398 amdgpu_ring_init_mqd(ring); 4399 gfx_v11_0_kiq_init_register(ring); 4400 soc21_grbm_select(adev, 0, 0, 0, 0); 4401 mutex_unlock(&adev->srbm_mutex); 4402 4403 if (adev->gfx.kiq[0].mqd_backup) 4404 memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); 4405 } 4406 4407 return 0; 4408 } 4409 4410 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) 4411 { 4412 struct amdgpu_device *adev = ring->adev; 4413 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4414 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4415 4416 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4417 memset((void *)mqd, 0, sizeof(*mqd)); 4418 mutex_lock(&adev->srbm_mutex); 4419 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4420 amdgpu_ring_init_mqd(ring); 4421 soc21_grbm_select(adev, 0, 0, 0, 0); 4422 mutex_unlock(&adev->srbm_mutex); 4423 4424 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4425 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4426 } else { 4427 /* restore MQD to a clean status */ 4428 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4429 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4430 /* reset ring buffer */ 4431 ring->wptr = 0; 4432 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4433 amdgpu_ring_clear_ring(ring); 4434 } 4435 4436 return 0; 4437 } 4438 4439 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4440 { 4441 gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring); 4442 return 0; 4443 } 4444 4445 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4446 { 4447 int i, r; 4448 4449 if (!amdgpu_async_gfx_ring) 4450 gfx_v11_0_cp_compute_enable(adev, true); 4451 4452 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4453 r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); 4454 if (r) 4455 return r; 4456 } 4457 4458 return amdgpu_gfx_enable_kcq(adev, 0); 4459 } 4460 4461 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4462 { 4463 int r, i; 4464 struct amdgpu_ring *ring; 4465 4466 if (!(adev->flags & AMD_IS_APU)) 4467 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4468 4469 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4470 /* legacy firmware loading */ 4471 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4472 if (r) 4473 return r; 4474 4475 if (adev->gfx.rs64_enable) 4476 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4477 else 4478 r = gfx_v11_0_cp_compute_load_microcode(adev); 4479 if (r) 4480 return r; 4481 } 4482 4483 gfx_v11_0_cp_set_doorbell_range(adev); 4484 4485 if (amdgpu_async_gfx_ring) { 4486 gfx_v11_0_cp_compute_enable(adev, true); 4487 gfx_v11_0_cp_gfx_enable(adev, true); 4488 } 4489 4490 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4491 r = amdgpu_mes_kiq_hw_init(adev); 4492 else 4493 r = gfx_v11_0_kiq_resume(adev); 4494 if (r) 4495 return r; 4496 4497 r = gfx_v11_0_kcq_resume(adev); 4498 if (r) 4499 return r; 4500 4501 if (!amdgpu_async_gfx_ring) { 4502 r = gfx_v11_0_cp_gfx_resume(adev); 4503 if (r) 4504 return r; 4505 } else { 4506 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4507 if (r) 4508 return r; 4509 } 4510 4511 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4512 ring = &adev->gfx.gfx_ring[i]; 4513 r = amdgpu_ring_test_helper(ring); 4514 if (r) 4515 return r; 4516 } 4517 4518 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4519 ring = &adev->gfx.compute_ring[i]; 4520 r = amdgpu_ring_test_helper(ring); 4521 if (r) 4522 return r; 4523 } 4524 4525 return 0; 4526 } 4527 4528 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4529 { 4530 gfx_v11_0_cp_gfx_enable(adev, enable); 4531 gfx_v11_0_cp_compute_enable(adev, enable); 4532 } 4533 4534 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4535 { 4536 int r; 4537 bool value; 4538 4539 r = adev->gfxhub.funcs->gart_enable(adev); 4540 if (r) 4541 return r; 4542 4543 adev->hdp.funcs->flush_hdp(adev, NULL); 4544 4545 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 4546 false : true; 4547 4548 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4549 /* TODO investigate why this and the hdp flush above is needed, 4550 * are we missing a flush somewhere else? */ 4551 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); 4552 4553 return 0; 4554 } 4555 4556 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4557 { 4558 u32 tmp; 4559 4560 /* select RS64 */ 4561 if (adev->gfx.rs64_enable) { 4562 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4563 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4564 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4565 4566 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4567 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4568 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4569 } 4570 4571 if (amdgpu_emu_mode == 1) 4572 msleep(100); 4573 } 4574 4575 static int get_gb_addr_config(struct amdgpu_device * adev) 4576 { 4577 u32 gb_addr_config; 4578 4579 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4580 if (gb_addr_config == 0) 4581 return -EINVAL; 4582 4583 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4584 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4585 4586 adev->gfx.config.gb_addr_config = gb_addr_config; 4587 4588 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4589 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4590 GB_ADDR_CONFIG, NUM_PIPES); 4591 4592 adev->gfx.config.max_tile_pipes = 4593 adev->gfx.config.gb_addr_config_fields.num_pipes; 4594 4595 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4596 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4597 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4598 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4599 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4600 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4601 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4602 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4603 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4604 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4605 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4606 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4607 4608 return 0; 4609 } 4610 4611 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4612 { 4613 uint32_t data; 4614 4615 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4616 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4617 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4618 4619 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4620 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4621 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4622 } 4623 4624 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) 4625 { 4626 int r; 4627 struct amdgpu_device *adev = ip_block->adev; 4628 4629 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4630 adev->gfx.cleaner_shader_ptr); 4631 4632 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4633 if (adev->gfx.imu.funcs) { 4634 /* RLC autoload sequence 1: Program rlc ram */ 4635 if (adev->gfx.imu.funcs->program_rlc_ram) 4636 adev->gfx.imu.funcs->program_rlc_ram(adev); 4637 /* rlc autoload firmware */ 4638 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4639 if (r) 4640 return r; 4641 } 4642 } else { 4643 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4644 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4645 if (adev->gfx.imu.funcs->load_microcode) 4646 adev->gfx.imu.funcs->load_microcode(adev); 4647 if (adev->gfx.imu.funcs->setup_imu) 4648 adev->gfx.imu.funcs->setup_imu(adev); 4649 if (adev->gfx.imu.funcs->start_imu) 4650 adev->gfx.imu.funcs->start_imu(adev); 4651 } 4652 4653 /* disable gpa mode in backdoor loading */ 4654 gfx_v11_0_disable_gpa_mode(adev); 4655 } 4656 } 4657 4658 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4659 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4660 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4661 if (r) { 4662 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4663 return r; 4664 } 4665 } 4666 4667 adev->gfx.is_poweron = true; 4668 4669 if(get_gb_addr_config(adev)) 4670 DRM_WARN("Invalid gb_addr_config !\n"); 4671 4672 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4673 adev->gfx.rs64_enable) 4674 gfx_v11_0_config_gfx_rs64(adev); 4675 4676 r = gfx_v11_0_gfxhub_enable(adev); 4677 if (r) 4678 return r; 4679 4680 if (!amdgpu_emu_mode) 4681 gfx_v11_0_init_golden_registers(adev); 4682 4683 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4684 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4685 /** 4686 * For gfx 11, rlc firmware loading relies on smu firmware is 4687 * loaded firstly, so in direct type, it has to load smc ucode 4688 * here before rlc. 4689 */ 4690 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4691 if (r) 4692 return r; 4693 } 4694 4695 gfx_v11_0_constants_init(adev); 4696 4697 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4698 gfx_v11_0_select_cp_fw_arch(adev); 4699 4700 if (adev->nbio.funcs->gc_doorbell_init) 4701 adev->nbio.funcs->gc_doorbell_init(adev); 4702 4703 r = gfx_v11_0_rlc_resume(adev); 4704 if (r) 4705 return r; 4706 4707 /* 4708 * init golden registers and rlc resume may override some registers, 4709 * reconfig them here 4710 */ 4711 gfx_v11_0_tcp_harvest(adev); 4712 4713 r = gfx_v11_0_cp_resume(adev); 4714 if (r) 4715 return r; 4716 4717 /* get IMU version from HW if it's not set */ 4718 if (!adev->gfx.imu_fw_version) 4719 adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); 4720 4721 return r; 4722 } 4723 4724 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) 4725 { 4726 struct amdgpu_device *adev = ip_block->adev; 4727 4728 cancel_delayed_work_sync(&adev->gfx.idle_work); 4729 4730 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4731 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4732 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4733 4734 if (!adev->no_hw_access) { 4735 if (amdgpu_async_gfx_ring) { 4736 if (amdgpu_gfx_disable_kgq(adev, 0)) 4737 DRM_ERROR("KGQ disable failed\n"); 4738 } 4739 4740 if (amdgpu_gfx_disable_kcq(adev, 0)) 4741 DRM_ERROR("KCQ disable failed\n"); 4742 4743 amdgpu_mes_kiq_hw_fini(adev); 4744 } 4745 4746 if (amdgpu_sriov_vf(adev)) 4747 /* Remove the steps disabling CPG and clearing KIQ position, 4748 * so that CP could perform IDLE-SAVE during switch. Those 4749 * steps are necessary to avoid a DMAR error in gfx9 but it is 4750 * not reproduced on gfx11. 4751 */ 4752 return 0; 4753 4754 gfx_v11_0_cp_enable(adev, false); 4755 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4756 4757 adev->gfxhub.funcs->gart_disable(adev); 4758 4759 adev->gfx.is_poweron = false; 4760 4761 return 0; 4762 } 4763 4764 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) 4765 { 4766 return gfx_v11_0_hw_fini(ip_block); 4767 } 4768 4769 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) 4770 { 4771 return gfx_v11_0_hw_init(ip_block); 4772 } 4773 4774 static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block) 4775 { 4776 struct amdgpu_device *adev = ip_block->adev; 4777 4778 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 4779 GRBM_STATUS, GUI_ACTIVE)) 4780 return false; 4781 else 4782 return true; 4783 } 4784 4785 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 4786 { 4787 unsigned i; 4788 u32 tmp; 4789 struct amdgpu_device *adev = ip_block->adev; 4790 4791 for (i = 0; i < adev->usec_timeout; i++) { 4792 /* read MC_STATUS */ 4793 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 4794 GRBM_STATUS__GUI_ACTIVE_MASK; 4795 4796 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 4797 return 0; 4798 udelay(1); 4799 } 4800 return -ETIMEDOUT; 4801 } 4802 4803 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, 4804 bool req) 4805 { 4806 u32 i, tmp, val; 4807 4808 for (i = 0; i < adev->usec_timeout; i++) { 4809 /* Request with MeId=2, PipeId=0 */ 4810 tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); 4811 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); 4812 WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); 4813 4814 val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); 4815 if (req) { 4816 if (val == tmp) 4817 break; 4818 } else { 4819 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, 4820 REQUEST, 1); 4821 4822 /* unlocked or locked by firmware */ 4823 if (val != tmp) 4824 break; 4825 } 4826 udelay(1); 4827 } 4828 4829 if (i >= adev->usec_timeout) 4830 return -EINVAL; 4831 4832 return 0; 4833 } 4834 4835 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) 4836 { 4837 u32 grbm_soft_reset = 0; 4838 u32 tmp; 4839 int r, i, j, k; 4840 struct amdgpu_device *adev = ip_block->adev; 4841 4842 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4843 4844 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4845 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 4846 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 4847 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 4848 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 4849 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 4850 4851 mutex_lock(&adev->srbm_mutex); 4852 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 4853 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 4854 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 4855 soc21_grbm_select(adev, i, k, j, 0); 4856 4857 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 4858 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 4859 } 4860 } 4861 } 4862 for (i = 0; i < adev->gfx.me.num_me; ++i) { 4863 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 4864 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 4865 soc21_grbm_select(adev, i, k, j, 0); 4866 4867 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 4868 } 4869 } 4870 } 4871 soc21_grbm_select(adev, 0, 0, 0, 0); 4872 mutex_unlock(&adev->srbm_mutex); 4873 4874 /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ 4875 mutex_lock(&adev->gfx.reset_sem_mutex); 4876 r = gfx_v11_0_request_gfx_index_mutex(adev, true); 4877 if (r) { 4878 mutex_unlock(&adev->gfx.reset_sem_mutex); 4879 DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); 4880 return r; 4881 } 4882 4883 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 4884 4885 // Read CP_VMID_RESET register three times. 4886 // to get sufficient time for GFX_HQD_ACTIVE reach 0 4887 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4888 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4889 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4890 4891 /* release the gfx mutex */ 4892 r = gfx_v11_0_request_gfx_index_mutex(adev, false); 4893 mutex_unlock(&adev->gfx.reset_sem_mutex); 4894 if (r) { 4895 DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); 4896 return r; 4897 } 4898 4899 for (i = 0; i < adev->usec_timeout; i++) { 4900 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 4901 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 4902 break; 4903 udelay(1); 4904 } 4905 if (i >= adev->usec_timeout) { 4906 printk("Failed to wait all pipes clean\n"); 4907 return -EINVAL; 4908 } 4909 4910 /********** trigger soft reset ***********/ 4911 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 4912 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4913 SOFT_RESET_CP, 1); 4914 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4915 SOFT_RESET_GFX, 1); 4916 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4917 SOFT_RESET_CPF, 1); 4918 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4919 SOFT_RESET_CPC, 1); 4920 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4921 SOFT_RESET_CPG, 1); 4922 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 4923 /********** exit soft reset ***********/ 4924 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 4925 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4926 SOFT_RESET_CP, 0); 4927 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4928 SOFT_RESET_GFX, 0); 4929 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4930 SOFT_RESET_CPF, 0); 4931 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4932 SOFT_RESET_CPC, 0); 4933 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4934 SOFT_RESET_CPG, 0); 4935 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 4936 4937 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 4938 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 4939 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 4940 4941 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 4942 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 4943 4944 for (i = 0; i < adev->usec_timeout; i++) { 4945 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 4946 break; 4947 udelay(1); 4948 } 4949 if (i >= adev->usec_timeout) { 4950 printk("Failed to wait CP_VMID_RESET to 0\n"); 4951 return -EINVAL; 4952 } 4953 4954 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4955 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 4956 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 4957 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 4958 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 4959 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 4960 4961 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4962 4963 return gfx_v11_0_cp_resume(adev); 4964 } 4965 4966 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) 4967 { 4968 int i, r; 4969 struct amdgpu_device *adev = ip_block->adev; 4970 struct amdgpu_ring *ring; 4971 long tmo = msecs_to_jiffies(1000); 4972 4973 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4974 ring = &adev->gfx.gfx_ring[i]; 4975 r = amdgpu_ring_test_ib(ring, tmo); 4976 if (r) 4977 return true; 4978 } 4979 4980 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4981 ring = &adev->gfx.compute_ring[i]; 4982 r = amdgpu_ring_test_ib(ring, tmo); 4983 if (r) 4984 return true; 4985 } 4986 4987 return false; 4988 } 4989 4990 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) 4991 { 4992 struct amdgpu_device *adev = ip_block->adev; 4993 /** 4994 * GFX soft reset will impact MES, need resume MES when do GFX soft reset 4995 */ 4996 return amdgpu_mes_resume(adev); 4997 } 4998 4999 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5000 { 5001 uint64_t clock; 5002 uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; 5003 5004 if (amdgpu_sriov_vf(adev)) { 5005 amdgpu_gfx_off_ctrl(adev, false); 5006 mutex_lock(&adev->gfx.gpu_clock_mutex); 5007 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5008 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5009 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5010 if (clock_counter_hi_pre != clock_counter_hi_after) 5011 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5012 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5013 amdgpu_gfx_off_ctrl(adev, true); 5014 } else { 5015 preempt_disable(); 5016 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5017 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5018 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5019 if (clock_counter_hi_pre != clock_counter_hi_after) 5020 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5021 preempt_enable(); 5022 } 5023 clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); 5024 5025 return clock; 5026 } 5027 5028 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5029 uint32_t vmid, 5030 uint32_t gds_base, uint32_t gds_size, 5031 uint32_t gws_base, uint32_t gws_size, 5032 uint32_t oa_base, uint32_t oa_size) 5033 { 5034 struct amdgpu_device *adev = ring->adev; 5035 5036 /* GDS Base */ 5037 gfx_v11_0_write_data_to_reg(ring, 0, false, 5038 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 5039 gds_base); 5040 5041 /* GDS Size */ 5042 gfx_v11_0_write_data_to_reg(ring, 0, false, 5043 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 5044 gds_size); 5045 5046 /* GWS */ 5047 gfx_v11_0_write_data_to_reg(ring, 0, false, 5048 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 5049 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5050 5051 /* OA */ 5052 gfx_v11_0_write_data_to_reg(ring, 0, false, 5053 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 5054 (1 << (oa_size + oa_base)) - (1 << oa_base)); 5055 } 5056 5057 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) 5058 { 5059 struct amdgpu_device *adev = ip_block->adev; 5060 5061 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 5062 5063 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 5064 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 5065 AMDGPU_MAX_COMPUTE_RINGS); 5066 5067 gfx_v11_0_set_kiq_pm4_funcs(adev); 5068 gfx_v11_0_set_ring_funcs(adev); 5069 gfx_v11_0_set_irq_funcs(adev); 5070 gfx_v11_0_set_gds_init(adev); 5071 gfx_v11_0_set_rlc_funcs(adev); 5072 gfx_v11_0_set_mqd_funcs(adev); 5073 gfx_v11_0_set_imu_funcs(adev); 5074 5075 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 5076 5077 return gfx_v11_0_init_microcode(adev); 5078 } 5079 5080 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) 5081 { 5082 struct amdgpu_device *adev = ip_block->adev; 5083 int r; 5084 5085 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5086 if (r) 5087 return r; 5088 5089 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5090 if (r) 5091 return r; 5092 5093 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 5094 if (r) 5095 return r; 5096 return 0; 5097 } 5098 5099 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 5100 { 5101 uint32_t rlc_cntl; 5102 5103 /* if RLC is not enabled, do nothing */ 5104 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 5105 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 5106 } 5107 5108 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 5109 { 5110 uint32_t data; 5111 unsigned i; 5112 5113 data = RLC_SAFE_MODE__CMD_MASK; 5114 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5115 5116 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 5117 5118 /* wait for RLC_SAFE_MODE */ 5119 for (i = 0; i < adev->usec_timeout; i++) { 5120 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 5121 RLC_SAFE_MODE, CMD)) 5122 break; 5123 udelay(1); 5124 } 5125 } 5126 5127 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 5128 { 5129 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 5130 } 5131 5132 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 5133 bool enable) 5134 { 5135 uint32_t def, data; 5136 5137 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 5138 return; 5139 5140 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5141 5142 if (enable) 5143 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5144 else 5145 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5146 5147 if (def != data) 5148 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5149 } 5150 5151 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 5152 bool enable) 5153 { 5154 uint32_t def, data; 5155 5156 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 5157 return; 5158 5159 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5160 5161 if (enable) 5162 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5163 else 5164 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5165 5166 if (def != data) 5167 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5168 } 5169 5170 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 5171 bool enable) 5172 { 5173 uint32_t def, data; 5174 5175 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 5176 return; 5177 5178 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5179 5180 if (enable) 5181 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5182 else 5183 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5184 5185 if (def != data) 5186 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5187 } 5188 5189 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5190 bool enable) 5191 { 5192 uint32_t data, def; 5193 5194 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 5195 return; 5196 5197 /* It is disabled by HW by default */ 5198 if (enable) { 5199 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5200 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 5201 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5202 5203 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5204 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5205 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5206 5207 if (def != data) 5208 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5209 } 5210 } else { 5211 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5212 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5213 5214 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5215 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5216 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5217 5218 if (def != data) 5219 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5220 } 5221 } 5222 } 5223 5224 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5225 bool enable) 5226 { 5227 uint32_t def, data; 5228 5229 if (!(adev->cg_flags & 5230 (AMD_CG_SUPPORT_GFX_CGCG | 5231 AMD_CG_SUPPORT_GFX_CGLS | 5232 AMD_CG_SUPPORT_GFX_3D_CGCG | 5233 AMD_CG_SUPPORT_GFX_3D_CGLS))) 5234 return; 5235 5236 if (enable) { 5237 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5238 5239 /* unset CGCG override */ 5240 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5241 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5242 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5243 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5244 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 5245 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5246 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5247 5248 /* update CGCG override bits */ 5249 if (def != data) 5250 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5251 5252 /* enable cgcg FSM(0x0000363F) */ 5253 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5254 5255 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5256 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 5257 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5258 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5259 } 5260 5261 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5262 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 5263 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5264 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5265 } 5266 5267 if (def != data) 5268 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5269 5270 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5271 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5272 5273 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5274 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 5275 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5276 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5277 } 5278 5279 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5280 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 5281 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5282 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5283 } 5284 5285 if (def != data) 5286 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5287 5288 /* set IDLE_POLL_COUNT(0x00900100) */ 5289 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 5290 5291 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 5292 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5293 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5294 5295 if (def != data) 5296 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 5297 5298 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5299 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5300 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5301 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5302 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5303 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 5304 5305 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5306 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5307 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5308 5309 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5310 if (adev->sdma.num_instances > 1) { 5311 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5312 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5313 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5314 } 5315 } else { 5316 /* Program RLC_CGCG_CGLS_CTRL */ 5317 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5318 5319 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5320 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5321 5322 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5323 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5324 5325 if (def != data) 5326 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5327 5328 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5329 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5330 5331 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5332 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5333 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5334 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5335 5336 if (def != data) 5337 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5338 5339 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5340 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5341 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5342 5343 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5344 if (adev->sdma.num_instances > 1) { 5345 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5346 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5347 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5348 } 5349 } 5350 } 5351 5352 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5353 bool enable) 5354 { 5355 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5356 5357 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 5358 5359 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 5360 5361 gfx_v11_0_update_repeater_fgcg(adev, enable); 5362 5363 gfx_v11_0_update_sram_fgcg(adev, enable); 5364 5365 gfx_v11_0_update_perf_clk(adev, enable); 5366 5367 if (adev->cg_flags & 5368 (AMD_CG_SUPPORT_GFX_MGCG | 5369 AMD_CG_SUPPORT_GFX_CGLS | 5370 AMD_CG_SUPPORT_GFX_CGCG | 5371 AMD_CG_SUPPORT_GFX_3D_CGCG | 5372 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5373 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5374 5375 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5376 5377 return 0; 5378 } 5379 5380 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) 5381 { 5382 u32 reg, pre_data, data; 5383 5384 amdgpu_gfx_off_ctrl(adev, false); 5385 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5386 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) 5387 pre_data = RREG32_NO_KIQ(reg); 5388 else 5389 pre_data = RREG32(reg); 5390 5391 data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); 5392 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5393 5394 if (pre_data != data) { 5395 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { 5396 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5397 } else 5398 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5399 } 5400 amdgpu_gfx_off_ctrl(adev, true); 5401 5402 if (ring 5403 && amdgpu_sriov_is_pp_one_vf(adev) 5404 && (pre_data != data) 5405 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) 5406 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { 5407 amdgpu_ring_emit_wreg(ring, reg, data); 5408 } 5409 } 5410 5411 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5412 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5413 .set_safe_mode = gfx_v11_0_set_safe_mode, 5414 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5415 .init = gfx_v11_0_rlc_init, 5416 .get_csb_size = gfx_v11_0_get_csb_size, 5417 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5418 .resume = gfx_v11_0_rlc_resume, 5419 .stop = gfx_v11_0_rlc_stop, 5420 .reset = gfx_v11_0_rlc_reset, 5421 .start = gfx_v11_0_rlc_start, 5422 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5423 }; 5424 5425 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5426 { 5427 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5428 5429 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5430 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5431 else 5432 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5433 5434 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5435 5436 // Program RLC_PG_DELAY3 for CGPG hysteresis 5437 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5438 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5439 case IP_VERSION(11, 0, 1): 5440 case IP_VERSION(11, 0, 4): 5441 case IP_VERSION(11, 5, 0): 5442 case IP_VERSION(11, 5, 1): 5443 case IP_VERSION(11, 5, 2): 5444 case IP_VERSION(11, 5, 3): 5445 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5446 break; 5447 default: 5448 break; 5449 } 5450 } 5451 } 5452 5453 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5454 { 5455 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5456 5457 gfx_v11_cntl_power_gating(adev, enable); 5458 5459 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5460 } 5461 5462 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5463 enum amd_powergating_state state) 5464 { 5465 struct amdgpu_device *adev = ip_block->adev; 5466 bool enable = (state == AMD_PG_STATE_GATE); 5467 5468 if (amdgpu_sriov_vf(adev)) 5469 return 0; 5470 5471 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5472 case IP_VERSION(11, 0, 0): 5473 case IP_VERSION(11, 0, 2): 5474 case IP_VERSION(11, 0, 3): 5475 amdgpu_gfx_off_ctrl(adev, enable); 5476 break; 5477 case IP_VERSION(11, 0, 1): 5478 case IP_VERSION(11, 0, 4): 5479 case IP_VERSION(11, 5, 0): 5480 case IP_VERSION(11, 5, 1): 5481 case IP_VERSION(11, 5, 2): 5482 case IP_VERSION(11, 5, 3): 5483 if (!enable) 5484 amdgpu_gfx_off_ctrl(adev, false); 5485 5486 gfx_v11_cntl_pg(adev, enable); 5487 5488 if (enable) 5489 amdgpu_gfx_off_ctrl(adev, true); 5490 5491 break; 5492 default: 5493 break; 5494 } 5495 5496 return 0; 5497 } 5498 5499 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5500 enum amd_clockgating_state state) 5501 { 5502 struct amdgpu_device *adev = ip_block->adev; 5503 5504 if (amdgpu_sriov_vf(adev)) 5505 return 0; 5506 5507 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5508 case IP_VERSION(11, 0, 0): 5509 case IP_VERSION(11, 0, 1): 5510 case IP_VERSION(11, 0, 2): 5511 case IP_VERSION(11, 0, 3): 5512 case IP_VERSION(11, 0, 4): 5513 case IP_VERSION(11, 5, 0): 5514 case IP_VERSION(11, 5, 1): 5515 case IP_VERSION(11, 5, 2): 5516 case IP_VERSION(11, 5, 3): 5517 gfx_v11_0_update_gfx_clock_gating(adev, 5518 state == AMD_CG_STATE_GATE); 5519 break; 5520 default: 5521 break; 5522 } 5523 5524 return 0; 5525 } 5526 5527 static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5528 { 5529 struct amdgpu_device *adev = ip_block->adev; 5530 int data; 5531 5532 /* AMD_CG_SUPPORT_GFX_MGCG */ 5533 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5534 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5535 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5536 5537 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5538 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5539 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5540 5541 /* AMD_CG_SUPPORT_GFX_FGCG */ 5542 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5543 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5544 5545 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5546 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5547 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5548 5549 /* AMD_CG_SUPPORT_GFX_CGCG */ 5550 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5551 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5552 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5553 5554 /* AMD_CG_SUPPORT_GFX_CGLS */ 5555 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5556 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5557 5558 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5559 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5560 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5561 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5562 5563 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5564 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5565 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5566 } 5567 5568 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5569 { 5570 /* gfx11 is 32bit rptr*/ 5571 return *(uint32_t *)ring->rptr_cpu_addr; 5572 } 5573 5574 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5575 { 5576 struct amdgpu_device *adev = ring->adev; 5577 u64 wptr; 5578 5579 /* XXX check if swapping is necessary on BE */ 5580 if (ring->use_doorbell) { 5581 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5582 } else { 5583 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5584 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5585 } 5586 5587 return wptr; 5588 } 5589 5590 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5591 { 5592 struct amdgpu_device *adev = ring->adev; 5593 5594 if (ring->use_doorbell) { 5595 /* XXX check if swapping is necessary on BE */ 5596 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5597 ring->wptr); 5598 WDOORBELL64(ring->doorbell_index, ring->wptr); 5599 } else { 5600 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5601 lower_32_bits(ring->wptr)); 5602 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5603 upper_32_bits(ring->wptr)); 5604 } 5605 } 5606 5607 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5608 { 5609 /* gfx11 hardware is 32bit rptr */ 5610 return *(uint32_t *)ring->rptr_cpu_addr; 5611 } 5612 5613 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5614 { 5615 u64 wptr; 5616 5617 /* XXX check if swapping is necessary on BE */ 5618 if (ring->use_doorbell) 5619 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5620 else 5621 BUG(); 5622 return wptr; 5623 } 5624 5625 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5626 { 5627 struct amdgpu_device *adev = ring->adev; 5628 5629 /* XXX check if swapping is necessary on BE */ 5630 if (ring->use_doorbell) { 5631 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5632 ring->wptr); 5633 WDOORBELL64(ring->doorbell_index, ring->wptr); 5634 } else { 5635 BUG(); /* only DOORBELL method supported on gfx11 now */ 5636 } 5637 } 5638 5639 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5640 { 5641 struct amdgpu_device *adev = ring->adev; 5642 u32 ref_and_mask, reg_mem_engine; 5643 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5644 5645 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5646 switch (ring->me) { 5647 case 1: 5648 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5649 break; 5650 case 2: 5651 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5652 break; 5653 default: 5654 return; 5655 } 5656 reg_mem_engine = 0; 5657 } else { 5658 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; 5659 reg_mem_engine = 1; /* pfp */ 5660 } 5661 5662 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5663 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5664 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5665 ref_and_mask, ref_and_mask, 0x20); 5666 } 5667 5668 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5669 struct amdgpu_job *job, 5670 struct amdgpu_ib *ib, 5671 uint32_t flags) 5672 { 5673 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5674 u32 header, control = 0; 5675 5676 BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); 5677 5678 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5679 5680 control |= ib->length_dw | (vmid << 24); 5681 5682 if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5683 control |= INDIRECT_BUFFER_PRE_ENB(1); 5684 5685 if (flags & AMDGPU_IB_PREEMPTED) 5686 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5687 5688 if (vmid) 5689 gfx_v11_0_ring_emit_de_meta(ring, 5690 (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); 5691 } 5692 5693 if (ring->is_mes_queue) 5694 /* inherit vmid from mqd */ 5695 control |= 0x400000; 5696 5697 amdgpu_ring_write(ring, header); 5698 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5699 amdgpu_ring_write(ring, 5700 #ifdef __BIG_ENDIAN 5701 (2 << 0) | 5702 #endif 5703 lower_32_bits(ib->gpu_addr)); 5704 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5705 amdgpu_ring_write(ring, control); 5706 } 5707 5708 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5709 struct amdgpu_job *job, 5710 struct amdgpu_ib *ib, 5711 uint32_t flags) 5712 { 5713 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5714 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5715 5716 if (ring->is_mes_queue) 5717 /* inherit vmid from mqd */ 5718 control |= 0x40000000; 5719 5720 /* Currently, there is a high possibility to get wave ID mismatch 5721 * between ME and GDS, leading to a hw deadlock, because ME generates 5722 * different wave IDs than the GDS expects. This situation happens 5723 * randomly when at least 5 compute pipes use GDS ordered append. 5724 * The wave IDs generated by ME are also wrong after suspend/resume. 5725 * Those are probably bugs somewhere else in the kernel driver. 5726 * 5727 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5728 * GDS to 0 for this ring (me/pipe). 5729 */ 5730 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5731 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5732 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 5733 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5734 } 5735 5736 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5737 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5738 amdgpu_ring_write(ring, 5739 #ifdef __BIG_ENDIAN 5740 (2 << 0) | 5741 #endif 5742 lower_32_bits(ib->gpu_addr)); 5743 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5744 amdgpu_ring_write(ring, control); 5745 } 5746 5747 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5748 u64 seq, unsigned flags) 5749 { 5750 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5751 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5752 5753 /* RELEASE_MEM - flush caches, send int */ 5754 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5755 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 5756 PACKET3_RELEASE_MEM_GCR_GL2_WB | 5757 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ 5758 PACKET3_RELEASE_MEM_GCR_GLM_WB | 5759 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 5760 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5761 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 5762 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 5763 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 5764 5765 /* 5766 * the address should be Qword aligned if 64bit write, Dword 5767 * aligned if only send 32bit data low (discard data high) 5768 */ 5769 if (write64bit) 5770 BUG_ON(addr & 0x7); 5771 else 5772 BUG_ON(addr & 0x3); 5773 amdgpu_ring_write(ring, lower_32_bits(addr)); 5774 amdgpu_ring_write(ring, upper_32_bits(addr)); 5775 amdgpu_ring_write(ring, lower_32_bits(seq)); 5776 amdgpu_ring_write(ring, upper_32_bits(seq)); 5777 amdgpu_ring_write(ring, ring->is_mes_queue ? 5778 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); 5779 } 5780 5781 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5782 { 5783 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5784 uint32_t seq = ring->fence_drv.sync_seq; 5785 uint64_t addr = ring->fence_drv.gpu_addr; 5786 5787 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 5788 upper_32_bits(addr), seq, 0xffffffff, 4); 5789 } 5790 5791 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 5792 uint16_t pasid, uint32_t flush_type, 5793 bool all_hub, uint8_t dst_sel) 5794 { 5795 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 5796 amdgpu_ring_write(ring, 5797 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 5798 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 5799 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 5800 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 5801 } 5802 5803 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5804 unsigned vmid, uint64_t pd_addr) 5805 { 5806 if (ring->is_mes_queue) 5807 gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); 5808 else 5809 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5810 5811 /* compute doesn't have PFP */ 5812 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5813 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5814 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5815 amdgpu_ring_write(ring, 0x0); 5816 } 5817 5818 /* Make sure that we can't skip the SET_Q_MODE packets when the VM 5819 * changed in any way. 5820 */ 5821 ring->set_q_mode_offs = 0; 5822 ring->set_q_mode_ptr = NULL; 5823 } 5824 5825 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5826 u64 seq, unsigned int flags) 5827 { 5828 struct amdgpu_device *adev = ring->adev; 5829 5830 /* we only allocate 32bit for each seq wb address */ 5831 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5832 5833 /* write fence seq to the "addr" */ 5834 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5835 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5836 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5837 amdgpu_ring_write(ring, lower_32_bits(addr)); 5838 amdgpu_ring_write(ring, upper_32_bits(addr)); 5839 amdgpu_ring_write(ring, lower_32_bits(seq)); 5840 5841 if (flags & AMDGPU_FENCE_FLAG_INT) { 5842 /* set register to trigger INT */ 5843 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5844 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5845 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5846 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 5847 amdgpu_ring_write(ring, 0); 5848 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5849 } 5850 } 5851 5852 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 5853 uint32_t flags) 5854 { 5855 uint32_t dw2 = 0; 5856 5857 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5858 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5859 /* set load_global_config & load_global_uconfig */ 5860 dw2 |= 0x8001; 5861 /* set load_cs_sh_regs */ 5862 dw2 |= 0x01000000; 5863 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5864 dw2 |= 0x10002; 5865 } 5866 5867 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5868 amdgpu_ring_write(ring, dw2); 5869 amdgpu_ring_write(ring, 0); 5870 } 5871 5872 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 5873 uint64_t addr) 5874 { 5875 unsigned ret; 5876 5877 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5878 amdgpu_ring_write(ring, lower_32_bits(addr)); 5879 amdgpu_ring_write(ring, upper_32_bits(addr)); 5880 /* discard following DWs if *cond_exec_gpu_addr==0 */ 5881 amdgpu_ring_write(ring, 0); 5882 ret = ring->wptr & ring->buf_mask; 5883 /* patch dummy value later */ 5884 amdgpu_ring_write(ring, 0); 5885 5886 return ret; 5887 } 5888 5889 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, 5890 u64 shadow_va, u64 csa_va, 5891 u64 gds_va, bool init_shadow, 5892 int vmid) 5893 { 5894 struct amdgpu_device *adev = ring->adev; 5895 unsigned int offs, end; 5896 5897 if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) 5898 return; 5899 5900 /* 5901 * The logic here isn't easy to understand because we need to keep state 5902 * accross multiple executions of the function as well as between the 5903 * CPU and GPU. The general idea is that the newly written GPU command 5904 * has a condition on the previous one and only executed if really 5905 * necessary. 5906 */ 5907 5908 /* 5909 * The dw in the NOP controls if the next SET_Q_MODE packet should be 5910 * executed or not. Reserve 64bits just to be on the save side. 5911 */ 5912 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); 5913 offs = ring->wptr & ring->buf_mask; 5914 5915 /* 5916 * We start with skipping the prefix SET_Q_MODE and always executing 5917 * the postfix SET_Q_MODE packet. This is changed below with a 5918 * WRITE_DATA command when the postfix executed. 5919 */ 5920 amdgpu_ring_write(ring, shadow_va ? 1 : 0); 5921 amdgpu_ring_write(ring, 0); 5922 5923 if (ring->set_q_mode_offs) { 5924 uint64_t addr; 5925 5926 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 5927 addr += ring->set_q_mode_offs << 2; 5928 end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); 5929 } 5930 5931 /* 5932 * When the postfix SET_Q_MODE packet executes we need to make sure that the 5933 * next prefix SET_Q_MODE packet executes as well. 5934 */ 5935 if (!shadow_va) { 5936 uint64_t addr; 5937 5938 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 5939 addr += offs << 2; 5940 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5941 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 5942 amdgpu_ring_write(ring, lower_32_bits(addr)); 5943 amdgpu_ring_write(ring, upper_32_bits(addr)); 5944 amdgpu_ring_write(ring, 0x1); 5945 } 5946 5947 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); 5948 amdgpu_ring_write(ring, lower_32_bits(shadow_va)); 5949 amdgpu_ring_write(ring, upper_32_bits(shadow_va)); 5950 amdgpu_ring_write(ring, lower_32_bits(gds_va)); 5951 amdgpu_ring_write(ring, upper_32_bits(gds_va)); 5952 amdgpu_ring_write(ring, lower_32_bits(csa_va)); 5953 amdgpu_ring_write(ring, upper_32_bits(csa_va)); 5954 amdgpu_ring_write(ring, shadow_va ? 5955 PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); 5956 amdgpu_ring_write(ring, init_shadow ? 5957 PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); 5958 5959 if (ring->set_q_mode_offs) 5960 amdgpu_ring_patch_cond_exec(ring, end); 5961 5962 if (shadow_va) { 5963 uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; 5964 5965 /* 5966 * If the tokens match try to skip the last postfix SET_Q_MODE 5967 * packet to avoid saving/restoring the state all the time. 5968 */ 5969 if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) 5970 *ring->set_q_mode_ptr = 0; 5971 5972 ring->set_q_mode_token = token; 5973 } else { 5974 ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; 5975 } 5976 5977 ring->set_q_mode_offs = offs; 5978 } 5979 5980 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) 5981 { 5982 int i, r = 0; 5983 struct amdgpu_device *adev = ring->adev; 5984 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 5985 struct amdgpu_ring *kiq_ring = &kiq->ring; 5986 unsigned long flags; 5987 5988 if (adev->enable_mes) 5989 return -EINVAL; 5990 5991 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 5992 return -EINVAL; 5993 5994 spin_lock_irqsave(&kiq->ring_lock, flags); 5995 5996 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 5997 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5998 return -ENOMEM; 5999 } 6000 6001 /* assert preemption condition */ 6002 amdgpu_ring_set_preempt_cond_exec(ring, false); 6003 6004 /* assert IB preemption, emit the trailing fence */ 6005 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 6006 ring->trail_fence_gpu_addr, 6007 ++ring->trail_seq); 6008 amdgpu_ring_commit(kiq_ring); 6009 6010 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6011 6012 /* poll the trailing fence */ 6013 for (i = 0; i < adev->usec_timeout; i++) { 6014 if (ring->trail_seq == 6015 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 6016 break; 6017 udelay(1); 6018 } 6019 6020 if (i >= adev->usec_timeout) { 6021 r = -EINVAL; 6022 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 6023 } 6024 6025 /* deassert preemption condition */ 6026 amdgpu_ring_set_preempt_cond_exec(ring, true); 6027 return r; 6028 } 6029 6030 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 6031 { 6032 struct amdgpu_device *adev = ring->adev; 6033 struct v10_de_ib_state de_payload = {0}; 6034 uint64_t offset, gds_addr, de_payload_gpu_addr; 6035 void *de_payload_cpu_addr; 6036 int cnt; 6037 6038 if (ring->is_mes_queue) { 6039 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 6040 gfx[0].gfx_meta_data) + 6041 offsetof(struct v10_gfx_meta_data, de_payload); 6042 de_payload_gpu_addr = 6043 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 6044 de_payload_cpu_addr = 6045 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 6046 6047 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 6048 gfx[0].gds_backup) + 6049 offsetof(struct v10_gfx_meta_data, de_payload); 6050 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 6051 } else { 6052 offset = offsetof(struct v10_gfx_meta_data, de_payload); 6053 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 6054 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 6055 6056 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 6057 AMDGPU_CSA_SIZE - adev->gds.gds_size, 6058 PAGE_SIZE); 6059 } 6060 6061 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 6062 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 6063 6064 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 6065 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 6066 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 6067 WRITE_DATA_DST_SEL(8) | 6068 WR_CONFIRM) | 6069 WRITE_DATA_CACHE_POLICY(0)); 6070 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 6071 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 6072 6073 if (resume) 6074 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 6075 sizeof(de_payload) >> 2); 6076 else 6077 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 6078 sizeof(de_payload) >> 2); 6079 } 6080 6081 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 6082 bool secure) 6083 { 6084 uint32_t v = secure ? FRAME_TMZ : 0; 6085 6086 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 6087 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 6088 } 6089 6090 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 6091 uint32_t reg_val_offs) 6092 { 6093 struct amdgpu_device *adev = ring->adev; 6094 6095 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6096 amdgpu_ring_write(ring, 0 | /* src: register*/ 6097 (5 << 8) | /* dst: memory */ 6098 (1 << 20)); /* write confirm */ 6099 amdgpu_ring_write(ring, reg); 6100 amdgpu_ring_write(ring, 0); 6101 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6102 reg_val_offs * 4)); 6103 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6104 reg_val_offs * 4)); 6105 } 6106 6107 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6108 uint32_t val) 6109 { 6110 uint32_t cmd = 0; 6111 6112 switch (ring->funcs->type) { 6113 case AMDGPU_RING_TYPE_GFX: 6114 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6115 break; 6116 case AMDGPU_RING_TYPE_KIQ: 6117 cmd = (1 << 16); /* no inc addr */ 6118 break; 6119 default: 6120 cmd = WR_CONFIRM; 6121 break; 6122 } 6123 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6124 amdgpu_ring_write(ring, cmd); 6125 amdgpu_ring_write(ring, reg); 6126 amdgpu_ring_write(ring, 0); 6127 amdgpu_ring_write(ring, val); 6128 } 6129 6130 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 6131 uint32_t val, uint32_t mask) 6132 { 6133 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 6134 } 6135 6136 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 6137 uint32_t reg0, uint32_t reg1, 6138 uint32_t ref, uint32_t mask) 6139 { 6140 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6141 6142 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 6143 ref, mask, 0x20); 6144 } 6145 6146 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, 6147 unsigned vmid) 6148 { 6149 struct amdgpu_device *adev = ring->adev; 6150 uint32_t value = 0; 6151 6152 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 6153 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 6154 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 6155 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 6156 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 6157 WREG32_SOC15(GC, 0, regSQ_CMD, value); 6158 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 6159 } 6160 6161 static void 6162 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6163 uint32_t me, uint32_t pipe, 6164 enum amdgpu_interrupt_state state) 6165 { 6166 uint32_t cp_int_cntl, cp_int_cntl_reg; 6167 6168 if (!me) { 6169 switch (pipe) { 6170 case 0: 6171 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 6172 break; 6173 case 1: 6174 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 6175 break; 6176 default: 6177 DRM_DEBUG("invalid pipe %d\n", pipe); 6178 return; 6179 } 6180 } else { 6181 DRM_DEBUG("invalid me %d\n", me); 6182 return; 6183 } 6184 6185 switch (state) { 6186 case AMDGPU_IRQ_STATE_DISABLE: 6187 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6188 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6189 TIME_STAMP_INT_ENABLE, 0); 6190 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6191 GENERIC0_INT_ENABLE, 0); 6192 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6193 break; 6194 case AMDGPU_IRQ_STATE_ENABLE: 6195 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6196 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6197 TIME_STAMP_INT_ENABLE, 1); 6198 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6199 GENERIC0_INT_ENABLE, 1); 6200 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6201 break; 6202 default: 6203 break; 6204 } 6205 } 6206 6207 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6208 int me, int pipe, 6209 enum amdgpu_interrupt_state state) 6210 { 6211 u32 mec_int_cntl, mec_int_cntl_reg; 6212 6213 /* 6214 * amdgpu controls only the first MEC. That's why this function only 6215 * handles the setting of interrupts for this specific MEC. All other 6216 * pipes' interrupts are set by amdkfd. 6217 */ 6218 6219 if (me == 1) { 6220 switch (pipe) { 6221 case 0: 6222 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6223 break; 6224 case 1: 6225 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 6226 break; 6227 case 2: 6228 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 6229 break; 6230 case 3: 6231 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 6232 break; 6233 default: 6234 DRM_DEBUG("invalid pipe %d\n", pipe); 6235 return; 6236 } 6237 } else { 6238 DRM_DEBUG("invalid me %d\n", me); 6239 return; 6240 } 6241 6242 switch (state) { 6243 case AMDGPU_IRQ_STATE_DISABLE: 6244 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6245 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6246 TIME_STAMP_INT_ENABLE, 0); 6247 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6248 GENERIC0_INT_ENABLE, 0); 6249 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6250 break; 6251 case AMDGPU_IRQ_STATE_ENABLE: 6252 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6253 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6254 TIME_STAMP_INT_ENABLE, 1); 6255 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6256 GENERIC0_INT_ENABLE, 1); 6257 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6258 break; 6259 default: 6260 break; 6261 } 6262 } 6263 6264 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6265 struct amdgpu_irq_src *src, 6266 unsigned type, 6267 enum amdgpu_interrupt_state state) 6268 { 6269 switch (type) { 6270 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6271 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 6272 break; 6273 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 6274 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 6275 break; 6276 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6277 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6278 break; 6279 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6280 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6281 break; 6282 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6283 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6284 break; 6285 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6286 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6287 break; 6288 default: 6289 break; 6290 } 6291 return 0; 6292 } 6293 6294 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 6295 struct amdgpu_irq_src *source, 6296 struct amdgpu_iv_entry *entry) 6297 { 6298 int i; 6299 u8 me_id, pipe_id, queue_id; 6300 struct amdgpu_ring *ring; 6301 uint32_t mes_queue_id = entry->src_data[0]; 6302 6303 DRM_DEBUG("IH: CP EOP\n"); 6304 6305 if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { 6306 struct amdgpu_mes_queue *queue; 6307 6308 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; 6309 6310 spin_lock(&adev->mes.queue_id_lock); 6311 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); 6312 if (queue) { 6313 DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); 6314 amdgpu_fence_process(queue->ring); 6315 } 6316 spin_unlock(&adev->mes.queue_id_lock); 6317 } else { 6318 me_id = (entry->ring_id & 0x0c) >> 2; 6319 pipe_id = (entry->ring_id & 0x03) >> 0; 6320 queue_id = (entry->ring_id & 0x70) >> 4; 6321 6322 switch (me_id) { 6323 case 0: 6324 if (pipe_id == 0) 6325 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6326 else 6327 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 6328 break; 6329 case 1: 6330 case 2: 6331 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6332 ring = &adev->gfx.compute_ring[i]; 6333 /* Per-queue interrupt is supported for MEC starting from VI. 6334 * The interrupt can only be enabled/disabled per pipe instead 6335 * of per queue. 6336 */ 6337 if ((ring->me == me_id) && 6338 (ring->pipe == pipe_id) && 6339 (ring->queue == queue_id)) 6340 amdgpu_fence_process(ring); 6341 } 6342 break; 6343 } 6344 } 6345 6346 return 0; 6347 } 6348 6349 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6350 struct amdgpu_irq_src *source, 6351 unsigned int type, 6352 enum amdgpu_interrupt_state state) 6353 { 6354 u32 cp_int_cntl_reg, cp_int_cntl; 6355 int i, j; 6356 6357 switch (state) { 6358 case AMDGPU_IRQ_STATE_DISABLE: 6359 case AMDGPU_IRQ_STATE_ENABLE: 6360 for (i = 0; i < adev->gfx.me.num_me; i++) { 6361 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6362 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6363 6364 if (cp_int_cntl_reg) { 6365 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6366 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6367 PRIV_REG_INT_ENABLE, 6368 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6369 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6370 } 6371 } 6372 } 6373 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6374 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6375 /* MECs start at 1 */ 6376 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6377 6378 if (cp_int_cntl_reg) { 6379 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6380 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6381 PRIV_REG_INT_ENABLE, 6382 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6383 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6384 } 6385 } 6386 } 6387 break; 6388 default: 6389 break; 6390 } 6391 6392 return 0; 6393 } 6394 6395 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6396 struct amdgpu_irq_src *source, 6397 unsigned type, 6398 enum amdgpu_interrupt_state state) 6399 { 6400 u32 cp_int_cntl_reg, cp_int_cntl; 6401 int i, j; 6402 6403 switch (state) { 6404 case AMDGPU_IRQ_STATE_DISABLE: 6405 case AMDGPU_IRQ_STATE_ENABLE: 6406 for (i = 0; i < adev->gfx.me.num_me; i++) { 6407 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6408 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6409 6410 if (cp_int_cntl_reg) { 6411 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6412 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6413 OPCODE_ERROR_INT_ENABLE, 6414 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6415 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6416 } 6417 } 6418 } 6419 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6420 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6421 /* MECs start at 1 */ 6422 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6423 6424 if (cp_int_cntl_reg) { 6425 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6426 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6427 OPCODE_ERROR_INT_ENABLE, 6428 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6429 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6430 } 6431 } 6432 } 6433 break; 6434 default: 6435 break; 6436 } 6437 return 0; 6438 } 6439 6440 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6441 struct amdgpu_irq_src *source, 6442 unsigned int type, 6443 enum amdgpu_interrupt_state state) 6444 { 6445 u32 cp_int_cntl_reg, cp_int_cntl; 6446 int i, j; 6447 6448 switch (state) { 6449 case AMDGPU_IRQ_STATE_DISABLE: 6450 case AMDGPU_IRQ_STATE_ENABLE: 6451 for (i = 0; i < adev->gfx.me.num_me; i++) { 6452 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6453 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6454 6455 if (cp_int_cntl_reg) { 6456 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6457 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6458 PRIV_INSTR_INT_ENABLE, 6459 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6460 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6461 } 6462 } 6463 } 6464 break; 6465 default: 6466 break; 6467 } 6468 6469 return 0; 6470 } 6471 6472 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 6473 struct amdgpu_iv_entry *entry) 6474 { 6475 u8 me_id, pipe_id, queue_id; 6476 struct amdgpu_ring *ring; 6477 int i; 6478 6479 me_id = (entry->ring_id & 0x0c) >> 2; 6480 pipe_id = (entry->ring_id & 0x03) >> 0; 6481 queue_id = (entry->ring_id & 0x70) >> 4; 6482 6483 switch (me_id) { 6484 case 0: 6485 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 6486 ring = &adev->gfx.gfx_ring[i]; 6487 if (ring->me == me_id && ring->pipe == pipe_id && 6488 ring->queue == queue_id) 6489 drm_sched_fault(&ring->sched); 6490 } 6491 break; 6492 case 1: 6493 case 2: 6494 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6495 ring = &adev->gfx.compute_ring[i]; 6496 if (ring->me == me_id && ring->pipe == pipe_id && 6497 ring->queue == queue_id) 6498 drm_sched_fault(&ring->sched); 6499 } 6500 break; 6501 default: 6502 BUG(); 6503 break; 6504 } 6505 } 6506 6507 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 6508 struct amdgpu_irq_src *source, 6509 struct amdgpu_iv_entry *entry) 6510 { 6511 DRM_ERROR("Illegal register access in command stream\n"); 6512 gfx_v11_0_handle_priv_fault(adev, entry); 6513 return 0; 6514 } 6515 6516 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, 6517 struct amdgpu_irq_src *source, 6518 struct amdgpu_iv_entry *entry) 6519 { 6520 DRM_ERROR("Illegal opcode in command stream \n"); 6521 gfx_v11_0_handle_priv_fault(adev, entry); 6522 return 0; 6523 } 6524 6525 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6526 struct amdgpu_irq_src *source, 6527 struct amdgpu_iv_entry *entry) 6528 { 6529 DRM_ERROR("Illegal instruction in command stream\n"); 6530 gfx_v11_0_handle_priv_fault(adev, entry); 6531 return 0; 6532 } 6533 6534 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, 6535 struct amdgpu_irq_src *source, 6536 struct amdgpu_iv_entry *entry) 6537 { 6538 if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) 6539 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); 6540 6541 return 0; 6542 } 6543 6544 #if 0 6545 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6546 struct amdgpu_irq_src *src, 6547 unsigned int type, 6548 enum amdgpu_interrupt_state state) 6549 { 6550 uint32_t tmp, target; 6551 struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); 6552 6553 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6554 target += ring->pipe; 6555 6556 switch (type) { 6557 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6558 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6559 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6560 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6561 GENERIC2_INT_ENABLE, 0); 6562 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6563 6564 tmp = RREG32_SOC15_IP(GC, target); 6565 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6566 GENERIC2_INT_ENABLE, 0); 6567 WREG32_SOC15_IP(GC, target, tmp); 6568 } else { 6569 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6570 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6571 GENERIC2_INT_ENABLE, 1); 6572 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6573 6574 tmp = RREG32_SOC15_IP(GC, target); 6575 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6576 GENERIC2_INT_ENABLE, 1); 6577 WREG32_SOC15_IP(GC, target, tmp); 6578 } 6579 break; 6580 default: 6581 BUG(); /* kiq only support GENERIC2_INT now */ 6582 break; 6583 } 6584 return 0; 6585 } 6586 #endif 6587 6588 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6589 { 6590 const unsigned int gcr_cntl = 6591 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6592 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6593 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6594 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6595 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6596 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6597 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6598 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6599 6600 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6601 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6602 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6603 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6604 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6605 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6606 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6607 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6608 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6609 } 6610 6611 static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) 6612 { 6613 /* Disable the pipe reset until the CPFW fully support it.*/ 6614 dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); 6615 return false; 6616 } 6617 6618 6619 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) 6620 { 6621 struct amdgpu_device *adev = ring->adev; 6622 uint32_t reset_pipe = 0, clean_pipe = 0; 6623 int r; 6624 6625 if (!gfx_v11_pipe_reset_support(adev)) 6626 return -EOPNOTSUPP; 6627 6628 gfx_v11_0_set_safe_mode(adev, 0); 6629 mutex_lock(&adev->srbm_mutex); 6630 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6631 6632 switch (ring->pipe) { 6633 case 0: 6634 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6635 PFP_PIPE0_RESET, 1); 6636 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6637 ME_PIPE0_RESET, 1); 6638 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6639 PFP_PIPE0_RESET, 0); 6640 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6641 ME_PIPE0_RESET, 0); 6642 break; 6643 case 1: 6644 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6645 PFP_PIPE1_RESET, 1); 6646 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6647 ME_PIPE1_RESET, 1); 6648 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6649 PFP_PIPE1_RESET, 0); 6650 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6651 ME_PIPE1_RESET, 0); 6652 break; 6653 default: 6654 break; 6655 } 6656 6657 WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); 6658 WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); 6659 6660 r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - 6661 RS64_FW_UC_START_ADDR_LO; 6662 soc21_grbm_select(adev, 0, 0, 0, 0); 6663 mutex_unlock(&adev->srbm_mutex); 6664 gfx_v11_0_unset_safe_mode(adev, 0); 6665 6666 dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, 6667 r == 0 ? "successfully" : "failed"); 6668 /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, 6669 * so the pipe reset status relies on the later gfx ring test result. 6670 */ 6671 return 0; 6672 } 6673 6674 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) 6675 { 6676 struct amdgpu_device *adev = ring->adev; 6677 int r; 6678 6679 if (amdgpu_sriov_vf(adev)) 6680 return -EINVAL; 6681 6682 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); 6683 if (r) { 6684 6685 dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); 6686 r = gfx_v11_reset_gfx_pipe(ring); 6687 if (r) 6688 return r; 6689 } 6690 6691 r = gfx_v11_0_kgq_init_queue(ring, true); 6692 if (r) { 6693 dev_err(adev->dev, "failed to init kgq\n"); 6694 return r; 6695 } 6696 6697 r = amdgpu_mes_map_legacy_queue(adev, ring); 6698 if (r) { 6699 dev_err(adev->dev, "failed to remap kgq\n"); 6700 return r; 6701 } 6702 6703 return amdgpu_ring_test_ring(ring); 6704 } 6705 6706 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) 6707 { 6708 6709 struct amdgpu_device *adev = ring->adev; 6710 uint32_t reset_pipe = 0, clean_pipe = 0; 6711 int r; 6712 6713 if (!gfx_v11_pipe_reset_support(adev)) 6714 return -EOPNOTSUPP; 6715 6716 gfx_v11_0_set_safe_mode(adev, 0); 6717 mutex_lock(&adev->srbm_mutex); 6718 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6719 6720 reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 6721 clean_pipe = reset_pipe; 6722 6723 if (adev->gfx.rs64_enable) { 6724 6725 switch (ring->pipe) { 6726 case 0: 6727 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6728 MEC_PIPE0_RESET, 1); 6729 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6730 MEC_PIPE0_RESET, 0); 6731 break; 6732 case 1: 6733 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6734 MEC_PIPE1_RESET, 1); 6735 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6736 MEC_PIPE1_RESET, 0); 6737 break; 6738 case 2: 6739 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6740 MEC_PIPE2_RESET, 1); 6741 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6742 MEC_PIPE2_RESET, 0); 6743 break; 6744 case 3: 6745 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6746 MEC_PIPE3_RESET, 1); 6747 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6748 MEC_PIPE3_RESET, 0); 6749 break; 6750 default: 6751 break; 6752 } 6753 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); 6754 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); 6755 r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - 6756 RS64_FW_UC_START_ADDR_LO; 6757 } else { 6758 if (ring->me == 1) { 6759 switch (ring->pipe) { 6760 case 0: 6761 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6762 MEC_ME1_PIPE0_RESET, 1); 6763 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6764 MEC_ME1_PIPE0_RESET, 0); 6765 break; 6766 case 1: 6767 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6768 MEC_ME1_PIPE1_RESET, 1); 6769 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6770 MEC_ME1_PIPE1_RESET, 0); 6771 break; 6772 case 2: 6773 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6774 MEC_ME1_PIPE2_RESET, 1); 6775 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6776 MEC_ME1_PIPE2_RESET, 0); 6777 break; 6778 case 3: 6779 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6780 MEC_ME1_PIPE3_RESET, 1); 6781 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6782 MEC_ME1_PIPE3_RESET, 0); 6783 break; 6784 default: 6785 break; 6786 } 6787 /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ 6788 } else { 6789 switch (ring->pipe) { 6790 case 0: 6791 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6792 MEC_ME2_PIPE0_RESET, 1); 6793 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6794 MEC_ME2_PIPE0_RESET, 0); 6795 break; 6796 case 1: 6797 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6798 MEC_ME2_PIPE1_RESET, 1); 6799 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6800 MEC_ME2_PIPE1_RESET, 0); 6801 break; 6802 case 2: 6803 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6804 MEC_ME2_PIPE2_RESET, 1); 6805 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6806 MEC_ME2_PIPE2_RESET, 0); 6807 break; 6808 case 3: 6809 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6810 MEC_ME2_PIPE3_RESET, 1); 6811 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6812 MEC_ME2_PIPE3_RESET, 0); 6813 break; 6814 default: 6815 break; 6816 } 6817 /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ 6818 } 6819 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); 6820 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); 6821 r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); 6822 } 6823 6824 soc21_grbm_select(adev, 0, 0, 0, 0); 6825 mutex_unlock(&adev->srbm_mutex); 6826 gfx_v11_0_unset_safe_mode(adev, 0); 6827 6828 dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, 6829 r == 0 ? "successfully" : "failed"); 6830 /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe 6831 * reset status relies on the compute ring test result. 6832 */ 6833 return 0; 6834 } 6835 6836 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) 6837 { 6838 struct amdgpu_device *adev = ring->adev; 6839 int r = 0; 6840 6841 if (amdgpu_sriov_vf(adev)) 6842 return -EINVAL; 6843 6844 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); 6845 if (r) { 6846 dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); 6847 r = gfx_v11_0_reset_compute_pipe(ring); 6848 if (r) 6849 return r; 6850 } 6851 6852 r = gfx_v11_0_kcq_init_queue(ring, true); 6853 if (r) { 6854 dev_err(adev->dev, "fail to init kcq\n"); 6855 return r; 6856 } 6857 r = amdgpu_mes_map_legacy_queue(adev, ring); 6858 if (r) { 6859 dev_err(adev->dev, "failed to remap kcq\n"); 6860 return r; 6861 } 6862 6863 return amdgpu_ring_test_ring(ring); 6864 } 6865 6866 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 6867 { 6868 struct amdgpu_device *adev = ip_block->adev; 6869 uint32_t i, j, k, reg, index = 0; 6870 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 6871 6872 if (!adev->gfx.ip_dump_core) 6873 return; 6874 6875 for (i = 0; i < reg_count; i++) 6876 drm_printf(p, "%-50s \t 0x%08x\n", 6877 gc_reg_list_11_0[i].reg_name, 6878 adev->gfx.ip_dump_core[i]); 6879 6880 /* print compute queue registers for all instances */ 6881 if (!adev->gfx.ip_dump_compute_queues) 6882 return; 6883 6884 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 6885 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 6886 adev->gfx.mec.num_mec, 6887 adev->gfx.mec.num_pipe_per_mec, 6888 adev->gfx.mec.num_queue_per_pipe); 6889 6890 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6891 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6892 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 6893 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 6894 for (reg = 0; reg < reg_count; reg++) { 6895 drm_printf(p, "%-50s \t 0x%08x\n", 6896 gc_cp_reg_list_11[reg].reg_name, 6897 adev->gfx.ip_dump_compute_queues[index + reg]); 6898 } 6899 index += reg_count; 6900 } 6901 } 6902 } 6903 6904 /* print gfx queue registers for all instances */ 6905 if (!adev->gfx.ip_dump_gfx_queues) 6906 return; 6907 6908 index = 0; 6909 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 6910 drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", 6911 adev->gfx.me.num_me, 6912 adev->gfx.me.num_pipe_per_me, 6913 adev->gfx.me.num_queue_per_pipe); 6914 6915 for (i = 0; i < adev->gfx.me.num_me; i++) { 6916 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6917 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 6918 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); 6919 for (reg = 0; reg < reg_count; reg++) { 6920 drm_printf(p, "%-50s \t 0x%08x\n", 6921 gc_gfx_queue_reg_list_11[reg].reg_name, 6922 adev->gfx.ip_dump_gfx_queues[index + reg]); 6923 } 6924 index += reg_count; 6925 } 6926 } 6927 } 6928 } 6929 6930 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) 6931 { 6932 struct amdgpu_device *adev = ip_block->adev; 6933 uint32_t i, j, k, reg, index = 0; 6934 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 6935 6936 if (!adev->gfx.ip_dump_core) 6937 return; 6938 6939 amdgpu_gfx_off_ctrl(adev, false); 6940 for (i = 0; i < reg_count; i++) 6941 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); 6942 amdgpu_gfx_off_ctrl(adev, true); 6943 6944 /* dump compute queue registers for all instances */ 6945 if (!adev->gfx.ip_dump_compute_queues) 6946 return; 6947 6948 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 6949 amdgpu_gfx_off_ctrl(adev, false); 6950 mutex_lock(&adev->srbm_mutex); 6951 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6952 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6953 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 6954 /* ME0 is for GFX so start from 1 for CP */ 6955 soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); 6956 for (reg = 0; reg < reg_count; reg++) { 6957 adev->gfx.ip_dump_compute_queues[index + reg] = 6958 RREG32(SOC15_REG_ENTRY_OFFSET( 6959 gc_cp_reg_list_11[reg])); 6960 } 6961 index += reg_count; 6962 } 6963 } 6964 } 6965 soc21_grbm_select(adev, 0, 0, 0, 0); 6966 mutex_unlock(&adev->srbm_mutex); 6967 amdgpu_gfx_off_ctrl(adev, true); 6968 6969 /* dump gfx queue registers for all instances */ 6970 if (!adev->gfx.ip_dump_gfx_queues) 6971 return; 6972 6973 index = 0; 6974 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 6975 amdgpu_gfx_off_ctrl(adev, false); 6976 mutex_lock(&adev->srbm_mutex); 6977 for (i = 0; i < adev->gfx.me.num_me; i++) { 6978 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6979 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 6980 soc21_grbm_select(adev, i, j, k, 0); 6981 6982 for (reg = 0; reg < reg_count; reg++) { 6983 adev->gfx.ip_dump_gfx_queues[index + reg] = 6984 RREG32(SOC15_REG_ENTRY_OFFSET( 6985 gc_gfx_queue_reg_list_11[reg])); 6986 } 6987 index += reg_count; 6988 } 6989 } 6990 } 6991 soc21_grbm_select(adev, 0, 0, 0, 0); 6992 mutex_unlock(&adev->srbm_mutex); 6993 amdgpu_gfx_off_ctrl(adev, true); 6994 } 6995 6996 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 6997 { 6998 /* Emit the cleaner shader */ 6999 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7000 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7001 } 7002 7003 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring) 7004 { 7005 amdgpu_gfx_profile_ring_begin_use(ring); 7006 7007 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 7008 } 7009 7010 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring) 7011 { 7012 amdgpu_gfx_profile_ring_end_use(ring); 7013 7014 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 7015 } 7016 7017 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 7018 .name = "gfx_v11_0", 7019 .early_init = gfx_v11_0_early_init, 7020 .late_init = gfx_v11_0_late_init, 7021 .sw_init = gfx_v11_0_sw_init, 7022 .sw_fini = gfx_v11_0_sw_fini, 7023 .hw_init = gfx_v11_0_hw_init, 7024 .hw_fini = gfx_v11_0_hw_fini, 7025 .suspend = gfx_v11_0_suspend, 7026 .resume = gfx_v11_0_resume, 7027 .is_idle = gfx_v11_0_is_idle, 7028 .wait_for_idle = gfx_v11_0_wait_for_idle, 7029 .soft_reset = gfx_v11_0_soft_reset, 7030 .check_soft_reset = gfx_v11_0_check_soft_reset, 7031 .post_soft_reset = gfx_v11_0_post_soft_reset, 7032 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 7033 .set_powergating_state = gfx_v11_0_set_powergating_state, 7034 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 7035 .dump_ip_state = gfx_v11_ip_dump, 7036 .print_ip_state = gfx_v11_ip_print, 7037 }; 7038 7039 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7040 .type = AMDGPU_RING_TYPE_GFX, 7041 .align_mask = 0xff, 7042 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7043 .support_64bit_ptrs = true, 7044 .secure_submission_supported = true, 7045 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 7046 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 7047 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 7048 .emit_frame_size = /* totally 247 maximum if 16 IBs */ 7049 5 + /* update_spm_vmid */ 7050 5 + /* COND_EXEC */ 7051 22 + /* SET_Q_PREEMPTION_MODE */ 7052 7 + /* PIPELINE_SYNC */ 7053 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7054 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7055 4 + /* VM_FLUSH */ 7056 8 + /* FENCE for VM_FLUSH */ 7057 20 + /* GDS switch */ 7058 5 + /* COND_EXEC */ 7059 7 + /* HDP_flush */ 7060 4 + /* VGT_flush */ 7061 31 + /* DE_META */ 7062 3 + /* CNTX_CTRL */ 7063 5 + /* HDP_INVL */ 7064 22 + /* SET_Q_PREEMPTION_MODE */ 7065 8 + 8 + /* FENCE x2 */ 7066 8 + /* gfx_v11_0_emit_mem_sync */ 7067 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7068 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 7069 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 7070 .emit_fence = gfx_v11_0_ring_emit_fence, 7071 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7072 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7073 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7074 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7075 .test_ring = gfx_v11_0_ring_test_ring, 7076 .test_ib = gfx_v11_0_ring_test_ib, 7077 .insert_nop = gfx_v11_ring_insert_nop, 7078 .pad_ib = amdgpu_ring_generic_pad_ib, 7079 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 7080 .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, 7081 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 7082 .preempt_ib = gfx_v11_0_ring_preempt_ib, 7083 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 7084 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7085 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7086 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7087 .soft_recovery = gfx_v11_0_ring_soft_recovery, 7088 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7089 .reset = gfx_v11_0_reset_kgq, 7090 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7091 .begin_use = gfx_v11_0_ring_begin_use, 7092 .end_use = gfx_v11_0_ring_end_use, 7093 }; 7094 7095 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 7096 .type = AMDGPU_RING_TYPE_COMPUTE, 7097 .align_mask = 0xff, 7098 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7099 .support_64bit_ptrs = true, 7100 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7101 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7102 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7103 .emit_frame_size = 7104 5 + /* update_spm_vmid */ 7105 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7106 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7107 5 + /* hdp invalidate */ 7108 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7109 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7110 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7111 2 + /* gfx_v11_0_ring_emit_vm_flush */ 7112 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 7113 8 + /* gfx_v11_0_emit_mem_sync */ 7114 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7115 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7116 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7117 .emit_fence = gfx_v11_0_ring_emit_fence, 7118 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7119 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7120 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7121 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7122 .test_ring = gfx_v11_0_ring_test_ring, 7123 .test_ib = gfx_v11_0_ring_test_ib, 7124 .insert_nop = gfx_v11_ring_insert_nop, 7125 .pad_ib = amdgpu_ring_generic_pad_ib, 7126 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7127 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7128 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7129 .soft_recovery = gfx_v11_0_ring_soft_recovery, 7130 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7131 .reset = gfx_v11_0_reset_kcq, 7132 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7133 .begin_use = gfx_v11_0_ring_begin_use, 7134 .end_use = gfx_v11_0_ring_end_use, 7135 }; 7136 7137 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7138 .type = AMDGPU_RING_TYPE_KIQ, 7139 .align_mask = 0xff, 7140 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7141 .support_64bit_ptrs = true, 7142 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7143 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7144 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7145 .emit_frame_size = 7146 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7147 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7148 5 + /*hdp invalidate */ 7149 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7150 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7151 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7152 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7153 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7154 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7155 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 7156 .test_ring = gfx_v11_0_ring_test_ring, 7157 .test_ib = gfx_v11_0_ring_test_ib, 7158 .insert_nop = amdgpu_ring_insert_nop, 7159 .pad_ib = amdgpu_ring_generic_pad_ib, 7160 .emit_rreg = gfx_v11_0_ring_emit_rreg, 7161 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7162 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7163 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7164 }; 7165 7166 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 7167 { 7168 int i; 7169 7170 adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; 7171 7172 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7173 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 7174 7175 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7176 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 7177 } 7178 7179 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 7180 .set = gfx_v11_0_set_eop_interrupt_state, 7181 .process = gfx_v11_0_eop_irq, 7182 }; 7183 7184 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 7185 .set = gfx_v11_0_set_priv_reg_fault_state, 7186 .process = gfx_v11_0_priv_reg_irq, 7187 }; 7188 7189 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { 7190 .set = gfx_v11_0_set_bad_op_fault_state, 7191 .process = gfx_v11_0_bad_op_irq, 7192 }; 7193 7194 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 7195 .set = gfx_v11_0_set_priv_inst_fault_state, 7196 .process = gfx_v11_0_priv_inst_irq, 7197 }; 7198 7199 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { 7200 .process = gfx_v11_0_rlc_gc_fed_irq, 7201 }; 7202 7203 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 7204 { 7205 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7206 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 7207 7208 adev->gfx.priv_reg_irq.num_types = 1; 7209 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 7210 7211 adev->gfx.bad_op_irq.num_types = 1; 7212 adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; 7213 7214 adev->gfx.priv_inst_irq.num_types = 1; 7215 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 7216 7217 adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ 7218 adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; 7219 7220 } 7221 7222 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 7223 { 7224 if (adev->flags & AMD_IS_APU) 7225 adev->gfx.imu.mode = MISSION_MODE; 7226 else 7227 adev->gfx.imu.mode = DEBUG_MODE; 7228 7229 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 7230 } 7231 7232 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 7233 { 7234 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 7235 } 7236 7237 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 7238 { 7239 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 7240 adev->gfx.config.max_sh_per_se * 7241 adev->gfx.config.max_shader_engines; 7242 7243 adev->gds.gds_size = 0x1000; 7244 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 7245 adev->gds.gws_size = 64; 7246 adev->gds.oa_size = 16; 7247 } 7248 7249 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 7250 { 7251 /* set gfx eng mqd */ 7252 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 7253 sizeof(struct v11_gfx_mqd); 7254 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 7255 gfx_v11_0_gfx_mqd_init; 7256 /* set compute eng mqd */ 7257 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 7258 sizeof(struct v11_compute_mqd); 7259 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 7260 gfx_v11_0_compute_mqd_init; 7261 } 7262 7263 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 7264 u32 bitmap) 7265 { 7266 u32 data; 7267 7268 if (!bitmap) 7269 return; 7270 7271 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7272 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7273 7274 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 7275 } 7276 7277 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 7278 { 7279 u32 data, wgp_bitmask; 7280 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 7281 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 7282 7283 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7284 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7285 7286 wgp_bitmask = 7287 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 7288 7289 return (~data) & wgp_bitmask; 7290 } 7291 7292 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 7293 { 7294 u32 wgp_idx, wgp_active_bitmap; 7295 u32 cu_bitmap_per_wgp, cu_active_bitmap; 7296 7297 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 7298 cu_active_bitmap = 0; 7299 7300 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 7301 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 7302 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 7303 if (wgp_active_bitmap & (1 << wgp_idx)) 7304 cu_active_bitmap |= cu_bitmap_per_wgp; 7305 } 7306 7307 return cu_active_bitmap; 7308 } 7309 7310 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 7311 struct amdgpu_cu_info *cu_info) 7312 { 7313 int i, j, k, counter, active_cu_number = 0; 7314 u32 mask, bitmap; 7315 unsigned disable_masks[8 * 2]; 7316 7317 if (!adev || !cu_info) 7318 return -EINVAL; 7319 7320 amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2); 7321 7322 mutex_lock(&adev->grbm_idx_mutex); 7323 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7324 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7325 bitmap = i * adev->gfx.config.max_sh_per_se + j; 7326 if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) 7327 continue; 7328 mask = 1; 7329 counter = 0; 7330 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); 7331 if (i < 8 && j < 2) 7332 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 7333 adev, disable_masks[i * 2 + j]); 7334 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 7335 7336 /** 7337 * GFX11 could support more than 4 SEs, while the bitmap 7338 * in cu_info struct is 4x4 and ioctl interface struct 7339 * drm_amdgpu_info_device should keep stable. 7340 * So we use last two columns of bitmap to store cu mask for 7341 * SEs 4 to 7, the layout of the bitmap is as below: 7342 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 7343 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 7344 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 7345 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 7346 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 7347 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 7348 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 7349 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 7350 */ 7351 cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; 7352 7353 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 7354 if (bitmap & mask) 7355 counter++; 7356 7357 mask <<= 1; 7358 } 7359 active_cu_number += counter; 7360 } 7361 } 7362 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7363 mutex_unlock(&adev->grbm_idx_mutex); 7364 7365 cu_info->number = active_cu_number; 7366 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7367 7368 return 0; 7369 } 7370 7371 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 7372 { 7373 .type = AMD_IP_BLOCK_TYPE_GFX, 7374 .major = 11, 7375 .minor = 0, 7376 .rev = 0, 7377 .funcs = &gfx_v11_0_ip_funcs, 7378 }; 7379