1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "amdgpu_atomfirmware.h" 33 #include "imu_v11_0.h" 34 #include "soc21.h" 35 #include "nvd.h" 36 37 #include "gc/gc_11_0_0_offset.h" 38 #include "gc/gc_11_0_0_sh_mask.h" 39 #include "smuio/smuio_13_0_6_offset.h" 40 #include "smuio/smuio_13_0_6_sh_mask.h" 41 #include "navi10_enum.h" 42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 43 44 #include "soc15.h" 45 #include "soc15d.h" 46 #include "clearstate_gfx11.h" 47 #include "v11_structs.h" 48 #include "gfx_v11_0.h" 49 #include "gfx_v11_0_3.h" 50 #include "nbio_v4_3.h" 51 #include "mes_v11_0.h" 52 53 #define GFX11_NUM_GFX_RINGS 1 54 #define GFX11_MEC_HPD_SIZE 2048 55 56 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 57 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 58 59 #define regCGTT_WD_CLK_CTRL 0x5086 60 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 63 #define regPC_CONFIG_CNTL_1 0x194d 64 #define regPC_CONFIG_CNTL_1_BASE_IDX 1 65 66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 68 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 69 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 70 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); 71 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 72 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 73 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 74 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 75 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 76 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 77 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 78 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 79 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 80 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 81 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 82 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 83 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 84 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 85 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 86 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 87 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 88 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); 89 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); 90 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); 91 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); 92 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); 93 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); 94 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); 95 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); 96 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); 98 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); 100 101 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { 102 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), 103 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), 104 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), 105 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), 106 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), 107 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), 108 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), 109 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), 110 SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), 111 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), 112 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), 113 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), 114 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), 115 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), 116 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), 117 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), 118 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), 119 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 120 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), 121 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), 122 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), 123 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), 124 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), 125 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), 126 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), 127 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 128 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), 129 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 130 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 131 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 132 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), 133 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), 134 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), 135 SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), 136 SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), 137 SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), 138 SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), 139 SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), 140 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), 141 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), 142 SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), 143 SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), 144 SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), 145 SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), 146 SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), 147 SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), 148 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), 149 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), 150 SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), 151 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), 152 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), 153 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), 154 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), 155 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), 156 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), 157 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), 158 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), 159 /* cp header registers */ 160 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), 161 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 162 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 163 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 164 /* SE status registers */ 165 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), 166 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), 167 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), 168 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), 169 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), 170 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) 171 }; 172 173 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { 174 /* compute registers */ 175 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), 176 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), 177 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), 178 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), 179 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), 180 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), 181 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), 182 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), 183 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), 184 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), 185 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), 186 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), 187 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), 188 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), 189 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), 190 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), 191 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), 192 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), 193 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), 194 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), 195 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), 196 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), 197 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), 198 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), 199 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), 200 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), 201 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), 202 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), 203 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), 204 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), 205 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), 206 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), 207 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), 208 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), 209 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), 210 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), 211 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), 212 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), 213 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) 214 }; 215 216 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { 217 /* gfx queue registers */ 218 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), 219 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), 220 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), 221 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), 222 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), 223 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), 224 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), 225 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), 226 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), 227 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), 228 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), 229 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), 230 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), 231 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), 232 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), 233 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), 234 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), 235 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), 236 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), 237 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), 238 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 239 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 240 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 241 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 242 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) 243 }; 244 245 static const struct soc15_reg_golden golden_settings_gc_11_0[] = { 246 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) 247 }; 248 249 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 250 { 251 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 252 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 253 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 254 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 255 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 256 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 257 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 258 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 259 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 260 }; 261 262 #define DEFAULT_SH_MEM_CONFIG \ 263 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 264 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 265 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 266 267 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 268 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 269 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 270 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 271 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 272 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 273 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 274 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 275 struct amdgpu_cu_info *cu_info); 276 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 277 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 278 u32 sh_num, u32 instance, int xcc_id); 279 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 280 281 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 282 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 283 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 284 uint32_t val); 285 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 286 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 287 uint16_t pasid, uint32_t flush_type, 288 bool all_hub, uint8_t dst_sel); 289 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 290 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 291 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 292 bool enable); 293 294 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 295 { 296 struct amdgpu_device *adev = kiq_ring->adev; 297 u64 shader_mc_addr; 298 299 /* Cleaner shader MC address */ 300 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 301 302 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 303 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 304 PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ 305 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 306 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 307 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 308 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 309 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 310 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 311 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 312 } 313 314 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 315 struct amdgpu_ring *ring) 316 { 317 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 318 uint64_t wptr_addr = ring->wptr_gpu_addr; 319 uint32_t me = 0, eng_sel = 0; 320 321 switch (ring->funcs->type) { 322 case AMDGPU_RING_TYPE_COMPUTE: 323 me = 1; 324 eng_sel = 0; 325 break; 326 case AMDGPU_RING_TYPE_GFX: 327 me = 0; 328 eng_sel = 4; 329 break; 330 case AMDGPU_RING_TYPE_MES: 331 me = 2; 332 eng_sel = 5; 333 break; 334 default: 335 WARN_ON(1); 336 } 337 338 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 339 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 340 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 341 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 342 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 343 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 344 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 345 PACKET3_MAP_QUEUES_ME((me)) | 346 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 347 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 348 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 349 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 350 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 351 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 352 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 353 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 354 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 355 } 356 357 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 358 struct amdgpu_ring *ring, 359 enum amdgpu_unmap_queues_action action, 360 u64 gpu_addr, u64 seq) 361 { 362 struct amdgpu_device *adev = kiq_ring->adev; 363 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 364 365 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { 366 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); 367 return; 368 } 369 370 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 371 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 372 PACKET3_UNMAP_QUEUES_ACTION(action) | 373 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 374 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 375 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 376 amdgpu_ring_write(kiq_ring, 377 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 378 379 if (action == PREEMPT_QUEUES_NO_UNMAP) { 380 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 381 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 382 amdgpu_ring_write(kiq_ring, seq); 383 } else { 384 amdgpu_ring_write(kiq_ring, 0); 385 amdgpu_ring_write(kiq_ring, 0); 386 amdgpu_ring_write(kiq_ring, 0); 387 } 388 } 389 390 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 391 struct amdgpu_ring *ring, 392 u64 addr, 393 u64 seq) 394 { 395 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 396 397 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 398 amdgpu_ring_write(kiq_ring, 399 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 400 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 401 PACKET3_QUERY_STATUS_COMMAND(2)); 402 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 403 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 404 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 405 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 406 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 407 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 408 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 409 } 410 411 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 412 uint16_t pasid, uint32_t flush_type, 413 bool all_hub) 414 { 415 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 416 } 417 418 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 419 .kiq_set_resources = gfx11_kiq_set_resources, 420 .kiq_map_queues = gfx11_kiq_map_queues, 421 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 422 .kiq_query_status = gfx11_kiq_query_status, 423 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 424 .set_resources_size = 8, 425 .map_queues_size = 7, 426 .unmap_queues_size = 6, 427 .query_status_size = 7, 428 .invalidate_tlbs_size = 2, 429 }; 430 431 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 432 { 433 adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; 434 } 435 436 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 437 { 438 if (amdgpu_sriov_vf(adev)) 439 return; 440 441 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 442 case IP_VERSION(11, 0, 1): 443 case IP_VERSION(11, 0, 4): 444 soc15_program_register_sequence(adev, 445 golden_settings_gc_11_0_1, 446 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 447 break; 448 default: 449 break; 450 } 451 soc15_program_register_sequence(adev, 452 golden_settings_gc_11_0, 453 (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); 454 455 } 456 457 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 458 bool wc, uint32_t reg, uint32_t val) 459 { 460 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 461 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 462 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 463 amdgpu_ring_write(ring, reg); 464 amdgpu_ring_write(ring, 0); 465 amdgpu_ring_write(ring, val); 466 } 467 468 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 469 int mem_space, int opt, uint32_t addr0, 470 uint32_t addr1, uint32_t ref, uint32_t mask, 471 uint32_t inv) 472 { 473 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 474 amdgpu_ring_write(ring, 475 /* memory (1) or register (0) */ 476 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 477 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 478 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 479 WAIT_REG_MEM_ENGINE(eng_sel))); 480 481 if (mem_space) 482 BUG_ON(addr0 & 0x3); /* Dword align */ 483 amdgpu_ring_write(ring, addr0); 484 amdgpu_ring_write(ring, addr1); 485 amdgpu_ring_write(ring, ref); 486 amdgpu_ring_write(ring, mask); 487 amdgpu_ring_write(ring, inv); /* poll interval */ 488 } 489 490 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 491 { 492 /* Header itself is a NOP packet */ 493 if (num_nop == 1) { 494 amdgpu_ring_write(ring, ring->funcs->nop); 495 return; 496 } 497 498 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 499 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 500 501 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 502 amdgpu_ring_insert_nop(ring, num_nop - 1); 503 } 504 505 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 506 { 507 struct amdgpu_device *adev = ring->adev; 508 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 509 uint32_t tmp = 0; 510 unsigned i; 511 int r; 512 513 WREG32(scratch, 0xCAFEDEAD); 514 r = amdgpu_ring_alloc(ring, 5); 515 if (r) { 516 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 517 ring->idx, r); 518 return r; 519 } 520 521 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 522 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 523 } else { 524 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 525 amdgpu_ring_write(ring, scratch - 526 PACKET3_SET_UCONFIG_REG_START); 527 amdgpu_ring_write(ring, 0xDEADBEEF); 528 } 529 amdgpu_ring_commit(ring); 530 531 for (i = 0; i < adev->usec_timeout; i++) { 532 tmp = RREG32(scratch); 533 if (tmp == 0xDEADBEEF) 534 break; 535 if (amdgpu_emu_mode == 1) 536 msleep(1); 537 else 538 udelay(1); 539 } 540 541 if (i >= adev->usec_timeout) 542 r = -ETIMEDOUT; 543 return r; 544 } 545 546 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 547 { 548 struct amdgpu_device *adev = ring->adev; 549 struct amdgpu_ib ib; 550 struct dma_fence *f = NULL; 551 unsigned index; 552 uint64_t gpu_addr; 553 volatile uint32_t *cpu_ptr; 554 long r; 555 556 /* MES KIQ fw hasn't indirect buffer support for now */ 557 if (adev->enable_mes_kiq && 558 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 559 return 0; 560 561 memset(&ib, 0, sizeof(ib)); 562 563 if (ring->is_mes_queue) { 564 uint32_t padding, offset; 565 566 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); 567 padding = amdgpu_mes_ctx_get_offs(ring, 568 AMDGPU_MES_CTX_PADDING_OFFS); 569 570 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 571 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 572 573 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); 574 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); 575 *cpu_ptr = cpu_to_le32(0xCAFEDEAD); 576 } else { 577 r = amdgpu_device_wb_get(adev, &index); 578 if (r) 579 return r; 580 581 gpu_addr = adev->wb.gpu_addr + (index * 4); 582 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 583 cpu_ptr = &adev->wb.wb[index]; 584 585 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 586 if (r) { 587 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 588 goto err1; 589 } 590 } 591 592 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 593 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 594 ib.ptr[2] = lower_32_bits(gpu_addr); 595 ib.ptr[3] = upper_32_bits(gpu_addr); 596 ib.ptr[4] = 0xDEADBEEF; 597 ib.length_dw = 5; 598 599 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 600 if (r) 601 goto err2; 602 603 r = dma_fence_wait_timeout(f, false, timeout); 604 if (r == 0) { 605 r = -ETIMEDOUT; 606 goto err2; 607 } else if (r < 0) { 608 goto err2; 609 } 610 611 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 612 r = 0; 613 else 614 r = -EINVAL; 615 err2: 616 if (!ring->is_mes_queue) 617 amdgpu_ib_free(adev, &ib, NULL); 618 dma_fence_put(f); 619 err1: 620 if (!ring->is_mes_queue) 621 amdgpu_device_wb_free(adev, index); 622 return r; 623 } 624 625 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 626 { 627 amdgpu_ucode_release(&adev->gfx.pfp_fw); 628 amdgpu_ucode_release(&adev->gfx.me_fw); 629 amdgpu_ucode_release(&adev->gfx.rlc_fw); 630 amdgpu_ucode_release(&adev->gfx.mec_fw); 631 632 kfree(adev->gfx.rlc.register_list_format); 633 } 634 635 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) 636 { 637 const struct psp_firmware_header_v1_0 *toc_hdr; 638 int err = 0; 639 640 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, 641 "amdgpu/%s_toc.bin", ucode_prefix); 642 if (err) 643 goto out; 644 645 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 646 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 647 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 648 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 649 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 650 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 651 return 0; 652 out: 653 amdgpu_ucode_release(&adev->psp.toc_fw); 654 return err; 655 } 656 657 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) 658 { 659 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 660 case IP_VERSION(11, 0, 0): 661 case IP_VERSION(11, 0, 2): 662 case IP_VERSION(11, 0, 3): 663 if ((adev->gfx.me_fw_version >= 1505) && 664 (adev->gfx.pfp_fw_version >= 1600) && 665 (adev->gfx.mec_fw_version >= 512)) { 666 if (amdgpu_sriov_vf(adev)) 667 adev->gfx.cp_gfx_shadow = true; 668 else 669 adev->gfx.cp_gfx_shadow = false; 670 } 671 break; 672 default: 673 adev->gfx.cp_gfx_shadow = false; 674 break; 675 } 676 } 677 678 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 679 { 680 char ucode_prefix[25]; 681 int err; 682 const struct rlc_firmware_header_v2_0 *rlc_hdr; 683 uint16_t version_major; 684 uint16_t version_minor; 685 686 DRM_DEBUG("\n"); 687 688 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 689 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 690 "amdgpu/%s_pfp.bin", ucode_prefix); 691 if (err) 692 goto out; 693 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 694 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 695 (union amdgpu_firmware_header *) 696 adev->gfx.pfp_fw->data, 2, 0); 697 if (adev->gfx.rs64_enable) { 698 dev_info(adev->dev, "CP RS64 enable\n"); 699 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 700 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 701 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 702 } else { 703 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 704 } 705 706 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 707 "amdgpu/%s_me.bin", ucode_prefix); 708 if (err) 709 goto out; 710 if (adev->gfx.rs64_enable) { 711 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 712 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 713 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 714 } else { 715 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 716 } 717 718 if (!amdgpu_sriov_vf(adev)) { 719 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && 720 adev->pdev->revision == 0xCE) 721 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 722 "amdgpu/gc_11_0_0_rlc_1.bin"); 723 else 724 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 725 "amdgpu/%s_rlc.bin", ucode_prefix); 726 if (err) 727 goto out; 728 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 729 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 730 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 731 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 732 if (err) 733 goto out; 734 } 735 736 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 737 "amdgpu/%s_mec.bin", ucode_prefix); 738 if (err) 739 goto out; 740 if (adev->gfx.rs64_enable) { 741 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 742 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 743 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 744 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 745 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 746 } else { 747 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 748 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 749 } 750 751 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 752 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix); 753 754 /* only one MEC for gfx 11.0.0. */ 755 adev->gfx.mec2_fw = NULL; 756 757 gfx_v11_0_check_fw_cp_gfx_shadow(adev); 758 759 if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { 760 err = adev->gfx.imu.funcs->init_microcode(adev); 761 if (err) 762 DRM_ERROR("Failed to init imu firmware!\n"); 763 return err; 764 } 765 766 out: 767 if (err) { 768 amdgpu_ucode_release(&adev->gfx.pfp_fw); 769 amdgpu_ucode_release(&adev->gfx.me_fw); 770 amdgpu_ucode_release(&adev->gfx.rlc_fw); 771 amdgpu_ucode_release(&adev->gfx.mec_fw); 772 } 773 774 return err; 775 } 776 777 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 778 { 779 u32 count = 0; 780 const struct cs_section_def *sect = NULL; 781 const struct cs_extent_def *ext = NULL; 782 783 /* begin clear state */ 784 count += 2; 785 /* context control state */ 786 count += 3; 787 788 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 789 for (ext = sect->section; ext->extent != NULL; ++ext) { 790 if (sect->id == SECT_CONTEXT) 791 count += 2 + ext->reg_count; 792 else 793 return 0; 794 } 795 } 796 797 /* set PA_SC_TILE_STEERING_OVERRIDE */ 798 count += 3; 799 /* end clear state */ 800 count += 2; 801 /* clear state */ 802 count += 2; 803 804 return count; 805 } 806 807 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, 808 volatile u32 *buffer) 809 { 810 u32 count = 0, i; 811 const struct cs_section_def *sect = NULL; 812 const struct cs_extent_def *ext = NULL; 813 int ctx_reg_offset; 814 815 if (adev->gfx.rlc.cs_data == NULL) 816 return; 817 if (buffer == NULL) 818 return; 819 820 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 821 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 822 823 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 824 buffer[count++] = cpu_to_le32(0x80000000); 825 buffer[count++] = cpu_to_le32(0x80000000); 826 827 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 828 for (ext = sect->section; ext->extent != NULL; ++ext) { 829 if (sect->id == SECT_CONTEXT) { 830 buffer[count++] = 831 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 832 buffer[count++] = cpu_to_le32(ext->reg_index - 833 PACKET3_SET_CONTEXT_REG_START); 834 for (i = 0; i < ext->reg_count; i++) 835 buffer[count++] = cpu_to_le32(ext->extent[i]); 836 } else { 837 return; 838 } 839 } 840 } 841 842 ctx_reg_offset = 843 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 844 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 845 buffer[count++] = cpu_to_le32(ctx_reg_offset); 846 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 847 848 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 849 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 850 851 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 852 buffer[count++] = cpu_to_le32(0); 853 } 854 855 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 856 { 857 /* clear state block */ 858 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 859 &adev->gfx.rlc.clear_state_gpu_addr, 860 (void **)&adev->gfx.rlc.cs_ptr); 861 862 /* jump table block */ 863 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 864 &adev->gfx.rlc.cp_table_gpu_addr, 865 (void **)&adev->gfx.rlc.cp_table_ptr); 866 } 867 868 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 869 { 870 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 871 872 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 873 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 874 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 875 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 876 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 877 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 878 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 879 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 880 adev->gfx.rlc.rlcg_reg_access_supported = true; 881 } 882 883 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 884 { 885 const struct cs_section_def *cs_data; 886 int r; 887 888 adev->gfx.rlc.cs_data = gfx11_cs_data; 889 890 cs_data = adev->gfx.rlc.cs_data; 891 892 if (cs_data) { 893 /* init clear state block */ 894 r = amdgpu_gfx_rlc_init_csb(adev); 895 if (r) 896 return r; 897 } 898 899 /* init spm vmid with 0xf */ 900 if (adev->gfx.rlc.funcs->update_spm_vmid) 901 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); 902 903 return 0; 904 } 905 906 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 907 { 908 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 909 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 910 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 911 } 912 913 static void gfx_v11_0_me_init(struct amdgpu_device *adev) 914 { 915 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 916 917 amdgpu_gfx_graphics_queue_acquire(adev); 918 } 919 920 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 921 { 922 int r; 923 u32 *hpd; 924 size_t mec_hpd_size; 925 926 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 927 928 /* take ownership of the relevant compute queues */ 929 amdgpu_gfx_compute_queue_acquire(adev); 930 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 931 932 if (mec_hpd_size) { 933 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 934 AMDGPU_GEM_DOMAIN_GTT, 935 &adev->gfx.mec.hpd_eop_obj, 936 &adev->gfx.mec.hpd_eop_gpu_addr, 937 (void **)&hpd); 938 if (r) { 939 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 940 gfx_v11_0_mec_fini(adev); 941 return r; 942 } 943 944 memset(hpd, 0, mec_hpd_size); 945 946 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 947 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 948 } 949 950 return 0; 951 } 952 953 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 954 { 955 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 956 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 957 (address << SQ_IND_INDEX__INDEX__SHIFT)); 958 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 959 } 960 961 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 962 uint32_t thread, uint32_t regno, 963 uint32_t num, uint32_t *out) 964 { 965 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 966 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 967 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 968 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 969 (SQ_IND_INDEX__AUTO_INCR_MASK)); 970 while (num--) 971 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 972 } 973 974 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 975 { 976 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 977 * field when performing a select_se_sh so it should be 978 * zero here */ 979 WARN_ON(simd != 0); 980 981 /* type 3 wave data */ 982 dst[(*no_fields)++] = 3; 983 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 984 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 985 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 986 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 987 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 988 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 989 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 990 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 991 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 992 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 993 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 994 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 995 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 996 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 997 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 998 } 999 1000 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1001 uint32_t wave, uint32_t start, 1002 uint32_t size, uint32_t *dst) 1003 { 1004 WARN_ON(simd != 0); 1005 1006 wave_read_regs( 1007 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 1008 dst); 1009 } 1010 1011 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1012 uint32_t wave, uint32_t thread, 1013 uint32_t start, uint32_t size, 1014 uint32_t *dst) 1015 { 1016 wave_read_regs( 1017 adev, wave, thread, 1018 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1019 } 1020 1021 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 1022 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1023 { 1024 soc21_grbm_select(adev, me, pipe, q, vm); 1025 } 1026 1027 /* all sizes are in bytes */ 1028 #define MQD_SHADOW_BASE_SIZE 73728 1029 #define MQD_SHADOW_BASE_ALIGNMENT 256 1030 #define MQD_FWWORKAREA_SIZE 484 1031 #define MQD_FWWORKAREA_ALIGNMENT 256 1032 1033 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, 1034 struct amdgpu_gfx_shadow_info *shadow_info) 1035 { 1036 if (adev->gfx.cp_gfx_shadow) { 1037 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; 1038 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; 1039 shadow_info->csa_size = MQD_FWWORKAREA_SIZE; 1040 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; 1041 return 0; 1042 } else { 1043 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); 1044 return -ENOTSUPP; 1045 } 1046 } 1047 1048 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 1049 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 1050 .select_se_sh = &gfx_v11_0_select_se_sh, 1051 .read_wave_data = &gfx_v11_0_read_wave_data, 1052 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 1053 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 1054 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 1055 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 1056 .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, 1057 }; 1058 1059 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 1060 { 1061 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1062 case IP_VERSION(11, 0, 0): 1063 case IP_VERSION(11, 0, 2): 1064 adev->gfx.config.max_hw_contexts = 8; 1065 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1066 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1067 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1068 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1069 break; 1070 case IP_VERSION(11, 0, 3): 1071 adev->gfx.ras = &gfx_v11_0_3_ras; 1072 adev->gfx.config.max_hw_contexts = 8; 1073 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1074 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1075 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1076 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1077 break; 1078 case IP_VERSION(11, 0, 1): 1079 case IP_VERSION(11, 0, 4): 1080 case IP_VERSION(11, 5, 0): 1081 case IP_VERSION(11, 5, 1): 1082 case IP_VERSION(11, 5, 2): 1083 adev->gfx.config.max_hw_contexts = 8; 1084 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1085 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1086 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1087 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 1088 break; 1089 default: 1090 BUG(); 1091 break; 1092 } 1093 1094 return 0; 1095 } 1096 1097 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1098 int me, int pipe, int queue) 1099 { 1100 struct amdgpu_ring *ring; 1101 unsigned int irq_type; 1102 unsigned int hw_prio; 1103 1104 ring = &adev->gfx.gfx_ring[ring_id]; 1105 1106 ring->me = me; 1107 ring->pipe = pipe; 1108 ring->queue = queue; 1109 1110 ring->ring_obj = NULL; 1111 ring->use_doorbell = true; 1112 1113 if (!ring_id) 1114 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1115 else 1116 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1117 ring->vm_hub = AMDGPU_GFXHUB(0); 1118 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1119 1120 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1121 hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? 1122 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1123 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1124 hw_prio, NULL); 1125 } 1126 1127 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1128 int mec, int pipe, int queue) 1129 { 1130 int r; 1131 unsigned irq_type; 1132 struct amdgpu_ring *ring; 1133 unsigned int hw_prio; 1134 1135 ring = &adev->gfx.compute_ring[ring_id]; 1136 1137 /* mec0 is me1 */ 1138 ring->me = mec + 1; 1139 ring->pipe = pipe; 1140 ring->queue = queue; 1141 1142 ring->ring_obj = NULL; 1143 ring->use_doorbell = true; 1144 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1145 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1146 + (ring_id * GFX11_MEC_HPD_SIZE); 1147 ring->vm_hub = AMDGPU_GFXHUB(0); 1148 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1149 1150 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1151 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1152 + ring->pipe; 1153 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 1154 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1155 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1156 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1157 hw_prio, NULL); 1158 if (r) 1159 return r; 1160 1161 return 0; 1162 } 1163 1164 static struct { 1165 SOC21_FIRMWARE_ID id; 1166 unsigned int offset; 1167 unsigned int size; 1168 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 1169 1170 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 1171 { 1172 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 1173 1174 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 1175 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 1176 rlc_autoload_info[ucode->id].id = ucode->id; 1177 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 1178 rlc_autoload_info[ucode->id].size = ucode->size * 4; 1179 1180 ucode++; 1181 } 1182 } 1183 1184 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 1185 { 1186 uint32_t total_size = 0; 1187 SOC21_FIRMWARE_ID id; 1188 1189 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 1190 1191 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 1192 total_size += rlc_autoload_info[id].size; 1193 1194 /* In case the offset in rlc toc ucode is aligned */ 1195 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 1196 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 1197 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 1198 1199 return total_size; 1200 } 1201 1202 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 1203 { 1204 int r; 1205 uint32_t total_size; 1206 1207 total_size = gfx_v11_0_calc_toc_total_size(adev); 1208 1209 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 1210 AMDGPU_GEM_DOMAIN_VRAM | 1211 AMDGPU_GEM_DOMAIN_GTT, 1212 &adev->gfx.rlc.rlc_autoload_bo, 1213 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1214 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1215 1216 if (r) { 1217 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1218 return r; 1219 } 1220 1221 return 0; 1222 } 1223 1224 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1225 SOC21_FIRMWARE_ID id, 1226 const void *fw_data, 1227 uint32_t fw_size, 1228 uint32_t *fw_autoload_mask) 1229 { 1230 uint32_t toc_offset; 1231 uint32_t toc_fw_size; 1232 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1233 1234 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1235 return; 1236 1237 toc_offset = rlc_autoload_info[id].offset; 1238 toc_fw_size = rlc_autoload_info[id].size; 1239 1240 if (fw_size == 0) 1241 fw_size = toc_fw_size; 1242 1243 if (fw_size > toc_fw_size) 1244 fw_size = toc_fw_size; 1245 1246 memcpy(ptr + toc_offset, fw_data, fw_size); 1247 1248 if (fw_size < toc_fw_size) 1249 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1250 1251 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1252 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1253 } 1254 1255 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1256 uint32_t *fw_autoload_mask) 1257 { 1258 void *data; 1259 uint32_t size; 1260 uint64_t *toc_ptr; 1261 1262 *(uint64_t *)fw_autoload_mask |= 0x1; 1263 1264 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1265 1266 data = adev->psp.toc.start_addr; 1267 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1268 1269 toc_ptr = (uint64_t *)data + size / 8 - 1; 1270 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1271 1272 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1273 data, size, fw_autoload_mask); 1274 } 1275 1276 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1277 uint32_t *fw_autoload_mask) 1278 { 1279 const __le32 *fw_data; 1280 uint32_t fw_size; 1281 const struct gfx_firmware_header_v1_0 *cp_hdr; 1282 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1283 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1284 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1285 uint16_t version_major, version_minor; 1286 1287 if (adev->gfx.rs64_enable) { 1288 /* pfp ucode */ 1289 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1290 adev->gfx.pfp_fw->data; 1291 /* instruction */ 1292 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1293 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1294 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1295 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1296 fw_data, fw_size, fw_autoload_mask); 1297 /* data */ 1298 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1299 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1300 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1301 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1302 fw_data, fw_size, fw_autoload_mask); 1303 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1304 fw_data, fw_size, fw_autoload_mask); 1305 /* me ucode */ 1306 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1307 adev->gfx.me_fw->data; 1308 /* instruction */ 1309 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1310 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1311 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1312 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1313 fw_data, fw_size, fw_autoload_mask); 1314 /* data */ 1315 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1316 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1317 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1318 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1319 fw_data, fw_size, fw_autoload_mask); 1320 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1321 fw_data, fw_size, fw_autoload_mask); 1322 /* mec ucode */ 1323 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1324 adev->gfx.mec_fw->data; 1325 /* instruction */ 1326 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1327 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1328 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1329 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1330 fw_data, fw_size, fw_autoload_mask); 1331 /* data */ 1332 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1333 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1334 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1335 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1336 fw_data, fw_size, fw_autoload_mask); 1337 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1338 fw_data, fw_size, fw_autoload_mask); 1339 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1340 fw_data, fw_size, fw_autoload_mask); 1341 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1342 fw_data, fw_size, fw_autoload_mask); 1343 } else { 1344 /* pfp ucode */ 1345 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1346 adev->gfx.pfp_fw->data; 1347 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1348 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1349 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1350 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1351 fw_data, fw_size, fw_autoload_mask); 1352 1353 /* me ucode */ 1354 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1355 adev->gfx.me_fw->data; 1356 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1357 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1358 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1359 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1360 fw_data, fw_size, fw_autoload_mask); 1361 1362 /* mec ucode */ 1363 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1364 adev->gfx.mec_fw->data; 1365 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1366 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1367 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1368 cp_hdr->jt_size * 4; 1369 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1370 fw_data, fw_size, fw_autoload_mask); 1371 } 1372 1373 /* rlc ucode */ 1374 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1375 adev->gfx.rlc_fw->data; 1376 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1377 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1378 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1379 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1380 fw_data, fw_size, fw_autoload_mask); 1381 1382 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1383 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1384 if (version_major == 2) { 1385 if (version_minor >= 2) { 1386 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1387 1388 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1389 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1390 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1391 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1392 fw_data, fw_size, fw_autoload_mask); 1393 1394 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1395 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1396 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1397 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1398 fw_data, fw_size, fw_autoload_mask); 1399 } 1400 } 1401 } 1402 1403 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1404 uint32_t *fw_autoload_mask) 1405 { 1406 const __le32 *fw_data; 1407 uint32_t fw_size; 1408 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1409 1410 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1411 adev->sdma.instance[0].fw->data; 1412 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1413 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1414 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1415 1416 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1417 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1418 1419 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1420 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1421 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1422 1423 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1424 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1425 } 1426 1427 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1428 uint32_t *fw_autoload_mask) 1429 { 1430 const __le32 *fw_data; 1431 unsigned fw_size; 1432 const struct mes_firmware_header_v1_0 *mes_hdr; 1433 int pipe, ucode_id, data_id; 1434 1435 for (pipe = 0; pipe < 2; pipe++) { 1436 if (pipe==0) { 1437 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1438 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1439 } else { 1440 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1441 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1442 } 1443 1444 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1445 adev->mes.fw[pipe]->data; 1446 1447 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1448 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1449 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1450 1451 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1452 ucode_id, fw_data, fw_size, fw_autoload_mask); 1453 1454 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1455 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1456 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1457 1458 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1459 data_id, fw_data, fw_size, fw_autoload_mask); 1460 } 1461 } 1462 1463 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1464 { 1465 uint32_t rlc_g_offset, rlc_g_size; 1466 uint64_t gpu_addr; 1467 uint32_t autoload_fw_id[2]; 1468 1469 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1470 1471 /* RLC autoload sequence 2: copy ucode */ 1472 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1473 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1474 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1475 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1476 1477 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1478 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1479 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1480 1481 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1482 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1483 1484 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1485 1486 /* RLC autoload sequence 3: load IMU fw */ 1487 if (adev->gfx.imu.funcs->load_microcode) 1488 adev->gfx.imu.funcs->load_microcode(adev); 1489 /* RLC autoload sequence 4 init IMU fw */ 1490 if (adev->gfx.imu.funcs->setup_imu) 1491 adev->gfx.imu.funcs->setup_imu(adev); 1492 if (adev->gfx.imu.funcs->start_imu) 1493 adev->gfx.imu.funcs->start_imu(adev); 1494 1495 /* RLC autoload sequence 5 disable gpa mode */ 1496 gfx_v11_0_disable_gpa_mode(adev); 1497 1498 return 0; 1499 } 1500 1501 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) 1502 { 1503 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 1504 uint32_t *ptr; 1505 uint32_t inst; 1506 1507 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 1508 if (!ptr) { 1509 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 1510 adev->gfx.ip_dump_core = NULL; 1511 } else { 1512 adev->gfx.ip_dump_core = ptr; 1513 } 1514 1515 /* Allocate memory for compute queue registers for all the instances */ 1516 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 1517 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 1518 adev->gfx.mec.num_queue_per_pipe; 1519 1520 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1521 if (!ptr) { 1522 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 1523 adev->gfx.ip_dump_compute_queues = NULL; 1524 } else { 1525 adev->gfx.ip_dump_compute_queues = ptr; 1526 } 1527 1528 /* Allocate memory for gfx queue registers for all the instances */ 1529 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 1530 inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * 1531 adev->gfx.me.num_queue_per_pipe; 1532 1533 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1534 if (!ptr) { 1535 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); 1536 adev->gfx.ip_dump_gfx_queues = NULL; 1537 } else { 1538 adev->gfx.ip_dump_gfx_queues = ptr; 1539 } 1540 } 1541 1542 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) 1543 { 1544 int i, j, k, r, ring_id = 0; 1545 int xcc_id = 0; 1546 struct amdgpu_device *adev = ip_block->adev; 1547 1548 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1549 case IP_VERSION(11, 0, 0): 1550 case IP_VERSION(11, 0, 2): 1551 case IP_VERSION(11, 0, 3): 1552 adev->gfx.me.num_me = 1; 1553 adev->gfx.me.num_pipe_per_me = 1; 1554 adev->gfx.me.num_queue_per_pipe = 1; 1555 adev->gfx.mec.num_mec = 2; 1556 adev->gfx.mec.num_pipe_per_mec = 4; 1557 adev->gfx.mec.num_queue_per_pipe = 4; 1558 break; 1559 case IP_VERSION(11, 0, 1): 1560 case IP_VERSION(11, 0, 4): 1561 case IP_VERSION(11, 5, 0): 1562 case IP_VERSION(11, 5, 1): 1563 case IP_VERSION(11, 5, 2): 1564 adev->gfx.me.num_me = 1; 1565 adev->gfx.me.num_pipe_per_me = 1; 1566 adev->gfx.me.num_queue_per_pipe = 1; 1567 adev->gfx.mec.num_mec = 1; 1568 adev->gfx.mec.num_pipe_per_mec = 4; 1569 adev->gfx.mec.num_queue_per_pipe = 4; 1570 break; 1571 default: 1572 adev->gfx.me.num_me = 1; 1573 adev->gfx.me.num_pipe_per_me = 1; 1574 adev->gfx.me.num_queue_per_pipe = 1; 1575 adev->gfx.mec.num_mec = 1; 1576 adev->gfx.mec.num_pipe_per_mec = 4; 1577 adev->gfx.mec.num_queue_per_pipe = 8; 1578 break; 1579 } 1580 1581 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1582 default: 1583 adev->gfx.enable_cleaner_shader = false; 1584 } 1585 1586 /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ 1587 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && 1588 amdgpu_sriov_is_pp_one_vf(adev)) 1589 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; 1590 1591 /* EOP Event */ 1592 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1593 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1594 &adev->gfx.eop_irq); 1595 if (r) 1596 return r; 1597 1598 /* Bad opcode Event */ 1599 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1600 GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, 1601 &adev->gfx.bad_op_irq); 1602 if (r) 1603 return r; 1604 1605 /* Privileged reg */ 1606 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1607 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1608 &adev->gfx.priv_reg_irq); 1609 if (r) 1610 return r; 1611 1612 /* Privileged inst */ 1613 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1614 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1615 &adev->gfx.priv_inst_irq); 1616 if (r) 1617 return r; 1618 1619 /* FED error */ 1620 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1621 GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, 1622 &adev->gfx.rlc_gc_fed_irq); 1623 if (r) 1624 return r; 1625 1626 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1627 1628 gfx_v11_0_me_init(adev); 1629 1630 r = gfx_v11_0_rlc_init(adev); 1631 if (r) { 1632 DRM_ERROR("Failed to init rlc BOs!\n"); 1633 return r; 1634 } 1635 1636 r = gfx_v11_0_mec_init(adev); 1637 if (r) { 1638 DRM_ERROR("Failed to init MEC BOs!\n"); 1639 return r; 1640 } 1641 1642 /* set up the gfx ring */ 1643 for (i = 0; i < adev->gfx.me.num_me; i++) { 1644 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 1645 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1646 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1647 continue; 1648 1649 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1650 i, k, j); 1651 if (r) 1652 return r; 1653 ring_id++; 1654 } 1655 } 1656 } 1657 1658 ring_id = 0; 1659 /* set up the compute queues - allocate horizontally across pipes */ 1660 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1661 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1662 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1663 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 1664 k, j)) 1665 continue; 1666 1667 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1668 i, k, j); 1669 if (r) 1670 return r; 1671 1672 ring_id++; 1673 } 1674 } 1675 } 1676 1677 if (!adev->enable_mes_kiq) { 1678 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); 1679 if (r) { 1680 DRM_ERROR("Failed to init KIQ BOs!\n"); 1681 return r; 1682 } 1683 1684 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 1685 if (r) 1686 return r; 1687 } 1688 1689 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); 1690 if (r) 1691 return r; 1692 1693 /* allocate visible FB for rlc auto-loading fw */ 1694 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1695 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1696 if (r) 1697 return r; 1698 } 1699 1700 r = gfx_v11_0_gpu_early_init(adev); 1701 if (r) 1702 return r; 1703 1704 if (amdgpu_gfx_ras_sw_init(adev)) { 1705 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 1706 return -EINVAL; 1707 } 1708 1709 gfx_v11_0_alloc_ip_dump(adev); 1710 1711 r = amdgpu_gfx_sysfs_isolation_shader_init(adev); 1712 if (r) 1713 return r; 1714 1715 return 0; 1716 } 1717 1718 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1719 { 1720 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1721 &adev->gfx.pfp.pfp_fw_gpu_addr, 1722 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1723 1724 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1725 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1726 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1727 } 1728 1729 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1730 { 1731 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1732 &adev->gfx.me.me_fw_gpu_addr, 1733 (void **)&adev->gfx.me.me_fw_ptr); 1734 1735 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1736 &adev->gfx.me.me_fw_data_gpu_addr, 1737 (void **)&adev->gfx.me.me_fw_data_ptr); 1738 } 1739 1740 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1741 { 1742 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1743 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1744 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1745 } 1746 1747 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) 1748 { 1749 int i; 1750 struct amdgpu_device *adev = ip_block->adev; 1751 1752 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1753 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1754 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1755 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1756 1757 amdgpu_gfx_mqd_sw_fini(adev, 0); 1758 1759 if (!adev->enable_mes_kiq) { 1760 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 1761 amdgpu_gfx_kiq_fini(adev, 0); 1762 } 1763 1764 amdgpu_gfx_cleaner_shader_sw_fini(adev); 1765 1766 gfx_v11_0_pfp_fini(adev); 1767 gfx_v11_0_me_fini(adev); 1768 gfx_v11_0_rlc_fini(adev); 1769 gfx_v11_0_mec_fini(adev); 1770 1771 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1772 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1773 1774 gfx_v11_0_free_microcode(adev); 1775 1776 amdgpu_gfx_sysfs_isolation_shader_fini(adev); 1777 1778 kfree(adev->gfx.ip_dump_core); 1779 kfree(adev->gfx.ip_dump_compute_queues); 1780 kfree(adev->gfx.ip_dump_gfx_queues); 1781 1782 return 0; 1783 } 1784 1785 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1786 u32 sh_num, u32 instance, int xcc_id) 1787 { 1788 u32 data; 1789 1790 if (instance == 0xffffffff) 1791 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1792 INSTANCE_BROADCAST_WRITES, 1); 1793 else 1794 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1795 instance); 1796 1797 if (se_num == 0xffffffff) 1798 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1799 1); 1800 else 1801 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1802 1803 if (sh_num == 0xffffffff) 1804 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1805 1); 1806 else 1807 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1808 1809 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 1810 } 1811 1812 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) 1813 { 1814 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; 1815 1816 gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); 1817 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, 1818 CC_GC_SA_UNIT_DISABLE, 1819 SA_DISABLE); 1820 gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); 1821 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, 1822 GC_USER_SA_UNIT_DISABLE, 1823 SA_DISABLE); 1824 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * 1825 adev->gfx.config.max_shader_engines); 1826 1827 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 1828 } 1829 1830 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1831 { 1832 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; 1833 u32 rb_mask; 1834 1835 gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 1836 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, 1837 CC_RB_BACKEND_DISABLE, 1838 BACKEND_DISABLE); 1839 gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 1840 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, 1841 GC_USER_RB_BACKEND_DISABLE, 1842 BACKEND_DISABLE); 1843 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * 1844 adev->gfx.config.max_shader_engines); 1845 1846 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); 1847 } 1848 1849 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 1850 { 1851 u32 rb_bitmap_width_per_sa; 1852 u32 max_sa; 1853 u32 active_sa_bitmap; 1854 u32 global_active_rb_bitmap; 1855 u32 active_rb_bitmap = 0; 1856 u32 i; 1857 1858 /* query sa bitmap from SA_UNIT_DISABLE registers */ 1859 active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); 1860 /* query rb bitmap from RB_BACKEND_DISABLE registers */ 1861 global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); 1862 1863 /* generate active rb bitmap according to active sa bitmap */ 1864 max_sa = adev->gfx.config.max_shader_engines * 1865 adev->gfx.config.max_sh_per_se; 1866 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 1867 adev->gfx.config.max_sh_per_se; 1868 for (i = 0; i < max_sa; i++) { 1869 if (active_sa_bitmap & (1 << i)) 1870 active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); 1871 } 1872 1873 active_rb_bitmap &= global_active_rb_bitmap; 1874 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 1875 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 1876 } 1877 1878 #define DEFAULT_SH_MEM_BASES (0x6000) 1879 #define LDS_APP_BASE 0x1 1880 #define SCRATCH_APP_BASE 0x2 1881 1882 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 1883 { 1884 int i; 1885 uint32_t sh_mem_bases; 1886 uint32_t data; 1887 1888 /* 1889 * Configure apertures: 1890 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1891 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1892 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1893 */ 1894 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 1895 SCRATCH_APP_BASE; 1896 1897 mutex_lock(&adev->srbm_mutex); 1898 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 1899 soc21_grbm_select(adev, 0, 0, 0, i); 1900 /* CP and shaders */ 1901 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 1902 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 1903 1904 /* Enable trap for each kfd vmid. */ 1905 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 1906 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 1907 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); 1908 } 1909 soc21_grbm_select(adev, 0, 0, 0, 0); 1910 mutex_unlock(&adev->srbm_mutex); 1911 1912 /* 1913 * Initialize all compute VMIDs to have no GDS, GWS, or OA 1914 * access. These should be enabled by FW for target VMIDs. 1915 */ 1916 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 1917 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 1918 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 1919 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 1920 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 1921 } 1922 } 1923 1924 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 1925 { 1926 int vmid; 1927 1928 /* 1929 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 1930 * access. Compute VMIDs should be enabled by FW for target VMIDs, 1931 * the driver can enable them for graphics. VMID0 should maintain 1932 * access so that HWS firmware can save/restore entries. 1933 */ 1934 for (vmid = 1; vmid < 16; vmid++) { 1935 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 1936 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 1937 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 1938 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 1939 } 1940 } 1941 1942 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 1943 { 1944 /* TODO: harvest feature to be added later. */ 1945 } 1946 1947 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 1948 { 1949 /* TCCs are global (not instanced). */ 1950 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 1951 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 1952 1953 adev->gfx.config.tcc_disabled_mask = 1954 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 1955 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 1956 } 1957 1958 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 1959 { 1960 u32 tmp; 1961 int i; 1962 1963 if (!amdgpu_sriov_vf(adev)) 1964 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1965 1966 gfx_v11_0_setup_rb(adev); 1967 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 1968 gfx_v11_0_get_tcc_info(adev); 1969 adev->gfx.config.pa_sc_tile_steering_override = 0; 1970 1971 /* Set whether texture coordinate truncation is conformant. */ 1972 tmp = RREG32_SOC15(GC, 0, regTA_CNTL2); 1973 adev->gfx.config.ta_cntl2_truncate_coord_mode = 1974 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE); 1975 1976 /* XXX SH_MEM regs */ 1977 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1978 mutex_lock(&adev->srbm_mutex); 1979 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 1980 soc21_grbm_select(adev, 0, 0, 0, i); 1981 /* CP and shaders */ 1982 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 1983 if (i != 0) { 1984 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1985 (adev->gmc.private_aperture_start >> 48)); 1986 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1987 (adev->gmc.shared_aperture_start >> 48)); 1988 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 1989 } 1990 } 1991 soc21_grbm_select(adev, 0, 0, 0, 0); 1992 1993 mutex_unlock(&adev->srbm_mutex); 1994 1995 gfx_v11_0_init_compute_vmid(adev); 1996 gfx_v11_0_init_gds_vmid(adev); 1997 } 1998 1999 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, 2000 int me, int pipe) 2001 { 2002 if (me != 0) 2003 return 0; 2004 2005 switch (pipe) { 2006 case 0: 2007 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 2008 case 1: 2009 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 2010 default: 2011 return 0; 2012 } 2013 } 2014 2015 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, 2016 int me, int pipe) 2017 { 2018 /* 2019 * amdgpu controls only the first MEC. That's why this function only 2020 * handles the setting of interrupts for this specific MEC. All other 2021 * pipes' interrupts are set by amdkfd. 2022 */ 2023 if (me != 1) 2024 return 0; 2025 2026 switch (pipe) { 2027 case 0: 2028 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 2029 case 1: 2030 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 2031 case 2: 2032 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 2033 case 3: 2034 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 2035 default: 2036 return 0; 2037 } 2038 } 2039 2040 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2041 bool enable) 2042 { 2043 u32 tmp, cp_int_cntl_reg; 2044 int i, j; 2045 2046 if (amdgpu_sriov_vf(adev)) 2047 return; 2048 2049 for (i = 0; i < adev->gfx.me.num_me; i++) { 2050 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 2051 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 2052 2053 if (cp_int_cntl_reg) { 2054 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 2055 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 2056 enable ? 1 : 0); 2057 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 2058 enable ? 1 : 0); 2059 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 2060 enable ? 1 : 0); 2061 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 2062 enable ? 1 : 0); 2063 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); 2064 } 2065 } 2066 } 2067 } 2068 2069 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 2070 { 2071 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2072 2073 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 2074 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2075 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 2076 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2077 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 2078 2079 return 0; 2080 } 2081 2082 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 2083 { 2084 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 2085 2086 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 2087 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 2088 } 2089 2090 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 2091 { 2092 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2093 udelay(50); 2094 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2095 udelay(50); 2096 } 2097 2098 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 2099 bool enable) 2100 { 2101 uint32_t rlc_pg_cntl; 2102 2103 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 2104 2105 if (!enable) { 2106 /* RLC_PG_CNTL[23] = 0 (default) 2107 * RLC will wait for handshake acks with SMU 2108 * GFXOFF will be enabled 2109 * RLC_PG_CNTL[23] = 1 2110 * RLC will not issue any message to SMU 2111 * hence no handshake between SMU & RLC 2112 * GFXOFF will be disabled 2113 */ 2114 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2115 } else 2116 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2117 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 2118 } 2119 2120 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 2121 { 2122 /* TODO: enable rlc & smu handshake until smu 2123 * and gfxoff feature works as expected */ 2124 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 2125 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 2126 2127 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2128 udelay(50); 2129 } 2130 2131 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 2132 { 2133 uint32_t tmp; 2134 2135 /* enable Save Restore Machine */ 2136 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 2137 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2138 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 2139 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 2140 } 2141 2142 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 2143 { 2144 const struct rlc_firmware_header_v2_0 *hdr; 2145 const __le32 *fw_data; 2146 unsigned i, fw_size; 2147 2148 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2149 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2150 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2151 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2152 2153 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 2154 RLCG_UCODE_LOADING_START_ADDRESS); 2155 2156 for (i = 0; i < fw_size; i++) 2157 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 2158 le32_to_cpup(fw_data++)); 2159 2160 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2161 } 2162 2163 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 2164 { 2165 const struct rlc_firmware_header_v2_2 *hdr; 2166 const __le32 *fw_data; 2167 unsigned i, fw_size; 2168 u32 tmp; 2169 2170 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 2171 2172 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2173 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 2174 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 2175 2176 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 2177 2178 for (i = 0; i < fw_size; i++) { 2179 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2180 msleep(1); 2181 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 2182 le32_to_cpup(fw_data++)); 2183 } 2184 2185 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2186 2187 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2188 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 2189 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 2190 2191 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 2192 for (i = 0; i < fw_size; i++) { 2193 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2194 msleep(1); 2195 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 2196 le32_to_cpup(fw_data++)); 2197 } 2198 2199 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2200 2201 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 2202 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 2203 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 2204 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 2205 } 2206 2207 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 2208 { 2209 const struct rlc_firmware_header_v2_3 *hdr; 2210 const __le32 *fw_data; 2211 unsigned i, fw_size; 2212 u32 tmp; 2213 2214 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 2215 2216 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2217 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 2218 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 2219 2220 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 2221 2222 for (i = 0; i < fw_size; i++) { 2223 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2224 msleep(1); 2225 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 2226 le32_to_cpup(fw_data++)); 2227 } 2228 2229 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 2230 2231 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 2232 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 2233 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 2234 2235 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2236 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 2237 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 2238 2239 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 2240 2241 for (i = 0; i < fw_size; i++) { 2242 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2243 msleep(1); 2244 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 2245 le32_to_cpup(fw_data++)); 2246 } 2247 2248 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 2249 2250 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 2251 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 2252 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 2253 } 2254 2255 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 2256 { 2257 const struct rlc_firmware_header_v2_0 *hdr; 2258 uint16_t version_major; 2259 uint16_t version_minor; 2260 2261 if (!adev->gfx.rlc_fw) 2262 return -EINVAL; 2263 2264 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2265 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2266 2267 version_major = le16_to_cpu(hdr->header.header_version_major); 2268 version_minor = le16_to_cpu(hdr->header.header_version_minor); 2269 2270 if (version_major == 2) { 2271 gfx_v11_0_load_rlcg_microcode(adev); 2272 if (amdgpu_dpm == 1) { 2273 if (version_minor >= 2) 2274 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 2275 if (version_minor == 3) 2276 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 2277 } 2278 2279 return 0; 2280 } 2281 2282 return -EINVAL; 2283 } 2284 2285 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 2286 { 2287 int r; 2288 2289 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 2290 gfx_v11_0_init_csb(adev); 2291 2292 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 2293 gfx_v11_0_rlc_enable_srm(adev); 2294 } else { 2295 if (amdgpu_sriov_vf(adev)) { 2296 gfx_v11_0_init_csb(adev); 2297 return 0; 2298 } 2299 2300 adev->gfx.rlc.funcs->stop(adev); 2301 2302 /* disable CG */ 2303 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 2304 2305 /* disable PG */ 2306 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 2307 2308 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2309 /* legacy rlc firmware loading */ 2310 r = gfx_v11_0_rlc_load_microcode(adev); 2311 if (r) 2312 return r; 2313 } 2314 2315 gfx_v11_0_init_csb(adev); 2316 2317 adev->gfx.rlc.funcs->start(adev); 2318 } 2319 return 0; 2320 } 2321 2322 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 2323 { 2324 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2325 uint32_t tmp; 2326 int i; 2327 2328 /* Trigger an invalidation of the L1 instruction caches */ 2329 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2330 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2331 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2332 2333 /* Wait for invalidation complete */ 2334 for (i = 0; i < usec_timeout; i++) { 2335 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2336 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2337 INVALIDATE_CACHE_COMPLETE)) 2338 break; 2339 udelay(1); 2340 } 2341 2342 if (i >= usec_timeout) { 2343 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2344 return -EINVAL; 2345 } 2346 2347 if (amdgpu_emu_mode == 1) 2348 adev->hdp.funcs->flush_hdp(adev, NULL); 2349 2350 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2351 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2352 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2353 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2354 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2355 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2356 2357 /* Program me ucode address into intruction cache address register */ 2358 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2359 lower_32_bits(addr) & 0xFFFFF000); 2360 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2361 upper_32_bits(addr)); 2362 2363 return 0; 2364 } 2365 2366 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 2367 { 2368 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2369 uint32_t tmp; 2370 int i; 2371 2372 /* Trigger an invalidation of the L1 instruction caches */ 2373 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2374 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2375 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2376 2377 /* Wait for invalidation complete */ 2378 for (i = 0; i < usec_timeout; i++) { 2379 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2380 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2381 INVALIDATE_CACHE_COMPLETE)) 2382 break; 2383 udelay(1); 2384 } 2385 2386 if (i >= usec_timeout) { 2387 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2388 return -EINVAL; 2389 } 2390 2391 if (amdgpu_emu_mode == 1) 2392 adev->hdp.funcs->flush_hdp(adev, NULL); 2393 2394 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2395 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2396 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2397 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2398 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2399 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2400 2401 /* Program pfp ucode address into intruction cache address register */ 2402 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2403 lower_32_bits(addr) & 0xFFFFF000); 2404 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2405 upper_32_bits(addr)); 2406 2407 return 0; 2408 } 2409 2410 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2411 { 2412 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2413 uint32_t tmp; 2414 int i; 2415 2416 /* Trigger an invalidation of the L1 instruction caches */ 2417 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2418 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2419 2420 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2421 2422 /* Wait for invalidation complete */ 2423 for (i = 0; i < usec_timeout; i++) { 2424 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2425 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2426 INVALIDATE_CACHE_COMPLETE)) 2427 break; 2428 udelay(1); 2429 } 2430 2431 if (i >= usec_timeout) { 2432 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2433 return -EINVAL; 2434 } 2435 2436 if (amdgpu_emu_mode == 1) 2437 adev->hdp.funcs->flush_hdp(adev, NULL); 2438 2439 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2440 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2441 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2442 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2443 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2444 2445 /* Program mec1 ucode address into intruction cache address register */ 2446 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2447 lower_32_bits(addr) & 0xFFFFF000); 2448 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2449 upper_32_bits(addr)); 2450 2451 return 0; 2452 } 2453 2454 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2455 { 2456 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2457 uint32_t tmp; 2458 unsigned i, pipe_id; 2459 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2460 2461 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2462 adev->gfx.pfp_fw->data; 2463 2464 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2465 lower_32_bits(addr)); 2466 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2467 upper_32_bits(addr)); 2468 2469 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2470 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2471 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2472 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2473 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2474 2475 /* 2476 * Programming any of the CP_PFP_IC_BASE registers 2477 * forces invalidation of the ME L1 I$. Wait for the 2478 * invalidation complete 2479 */ 2480 for (i = 0; i < usec_timeout; i++) { 2481 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2482 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2483 INVALIDATE_CACHE_COMPLETE)) 2484 break; 2485 udelay(1); 2486 } 2487 2488 if (i >= usec_timeout) { 2489 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2490 return -EINVAL; 2491 } 2492 2493 /* Prime the L1 instruction caches */ 2494 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2495 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2496 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2497 /* Waiting for cache primed*/ 2498 for (i = 0; i < usec_timeout; i++) { 2499 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2500 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2501 ICACHE_PRIMED)) 2502 break; 2503 udelay(1); 2504 } 2505 2506 if (i >= usec_timeout) { 2507 dev_err(adev->dev, "failed to prime instruction cache\n"); 2508 return -EINVAL; 2509 } 2510 2511 mutex_lock(&adev->srbm_mutex); 2512 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2513 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2514 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2515 (pfp_hdr->ucode_start_addr_hi << 30) | 2516 (pfp_hdr->ucode_start_addr_lo >> 2)); 2517 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2518 pfp_hdr->ucode_start_addr_hi >> 2); 2519 2520 /* 2521 * Program CP_ME_CNTL to reset given PIPE to take 2522 * effect of CP_PFP_PRGRM_CNTR_START. 2523 */ 2524 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2525 if (pipe_id == 0) 2526 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2527 PFP_PIPE0_RESET, 1); 2528 else 2529 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2530 PFP_PIPE1_RESET, 1); 2531 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2532 2533 /* Clear pfp pipe0 reset bit. */ 2534 if (pipe_id == 0) 2535 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2536 PFP_PIPE0_RESET, 0); 2537 else 2538 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2539 PFP_PIPE1_RESET, 0); 2540 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2541 2542 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2543 lower_32_bits(addr2)); 2544 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2545 upper_32_bits(addr2)); 2546 } 2547 soc21_grbm_select(adev, 0, 0, 0, 0); 2548 mutex_unlock(&adev->srbm_mutex); 2549 2550 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2551 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2552 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2553 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2554 2555 /* Invalidate the data caches */ 2556 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2557 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2558 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2559 2560 for (i = 0; i < usec_timeout; i++) { 2561 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2562 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2563 INVALIDATE_DCACHE_COMPLETE)) 2564 break; 2565 udelay(1); 2566 } 2567 2568 if (i >= usec_timeout) { 2569 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2570 return -EINVAL; 2571 } 2572 2573 return 0; 2574 } 2575 2576 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2577 { 2578 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2579 uint32_t tmp; 2580 unsigned i, pipe_id; 2581 const struct gfx_firmware_header_v2_0 *me_hdr; 2582 2583 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2584 adev->gfx.me_fw->data; 2585 2586 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2587 lower_32_bits(addr)); 2588 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2589 upper_32_bits(addr)); 2590 2591 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2592 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2593 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2594 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2595 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2596 2597 /* 2598 * Programming any of the CP_ME_IC_BASE registers 2599 * forces invalidation of the ME L1 I$. Wait for the 2600 * invalidation complete 2601 */ 2602 for (i = 0; i < usec_timeout; i++) { 2603 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2604 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2605 INVALIDATE_CACHE_COMPLETE)) 2606 break; 2607 udelay(1); 2608 } 2609 2610 if (i >= usec_timeout) { 2611 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2612 return -EINVAL; 2613 } 2614 2615 /* Prime the instruction caches */ 2616 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2617 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2618 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2619 2620 /* Waiting for instruction cache primed*/ 2621 for (i = 0; i < usec_timeout; i++) { 2622 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2623 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2624 ICACHE_PRIMED)) 2625 break; 2626 udelay(1); 2627 } 2628 2629 if (i >= usec_timeout) { 2630 dev_err(adev->dev, "failed to prime instruction cache\n"); 2631 return -EINVAL; 2632 } 2633 2634 mutex_lock(&adev->srbm_mutex); 2635 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2636 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2637 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2638 (me_hdr->ucode_start_addr_hi << 30) | 2639 (me_hdr->ucode_start_addr_lo >> 2) ); 2640 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2641 me_hdr->ucode_start_addr_hi>>2); 2642 2643 /* 2644 * Program CP_ME_CNTL to reset given PIPE to take 2645 * effect of CP_PFP_PRGRM_CNTR_START. 2646 */ 2647 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2648 if (pipe_id == 0) 2649 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2650 ME_PIPE0_RESET, 1); 2651 else 2652 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2653 ME_PIPE1_RESET, 1); 2654 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2655 2656 /* Clear pfp pipe0 reset bit. */ 2657 if (pipe_id == 0) 2658 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2659 ME_PIPE0_RESET, 0); 2660 else 2661 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2662 ME_PIPE1_RESET, 0); 2663 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2664 2665 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2666 lower_32_bits(addr2)); 2667 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2668 upper_32_bits(addr2)); 2669 } 2670 soc21_grbm_select(adev, 0, 0, 0, 0); 2671 mutex_unlock(&adev->srbm_mutex); 2672 2673 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2674 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2675 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2676 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2677 2678 /* Invalidate the data caches */ 2679 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2680 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2681 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2682 2683 for (i = 0; i < usec_timeout; i++) { 2684 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2685 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2686 INVALIDATE_DCACHE_COMPLETE)) 2687 break; 2688 udelay(1); 2689 } 2690 2691 if (i >= usec_timeout) { 2692 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2693 return -EINVAL; 2694 } 2695 2696 return 0; 2697 } 2698 2699 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2700 { 2701 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2702 uint32_t tmp; 2703 unsigned i; 2704 const struct gfx_firmware_header_v2_0 *mec_hdr; 2705 2706 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2707 adev->gfx.mec_fw->data; 2708 2709 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2710 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2711 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2712 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2713 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2714 2715 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2716 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2717 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2718 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2719 2720 mutex_lock(&adev->srbm_mutex); 2721 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2722 soc21_grbm_select(adev, 1, i, 0, 0); 2723 2724 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2725 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2726 upper_32_bits(addr2)); 2727 2728 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2729 mec_hdr->ucode_start_addr_lo >> 2 | 2730 mec_hdr->ucode_start_addr_hi << 30); 2731 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2732 mec_hdr->ucode_start_addr_hi >> 2); 2733 2734 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2735 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2736 upper_32_bits(addr)); 2737 } 2738 mutex_unlock(&adev->srbm_mutex); 2739 soc21_grbm_select(adev, 0, 0, 0, 0); 2740 2741 /* Trigger an invalidation of the L1 instruction caches */ 2742 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2743 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2744 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2745 2746 /* Wait for invalidation complete */ 2747 for (i = 0; i < usec_timeout; i++) { 2748 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2749 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2750 INVALIDATE_DCACHE_COMPLETE)) 2751 break; 2752 udelay(1); 2753 } 2754 2755 if (i >= usec_timeout) { 2756 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2757 return -EINVAL; 2758 } 2759 2760 /* Trigger an invalidation of the L1 instruction caches */ 2761 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2762 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2763 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2764 2765 /* Wait for invalidation complete */ 2766 for (i = 0; i < usec_timeout; i++) { 2767 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2768 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2769 INVALIDATE_CACHE_COMPLETE)) 2770 break; 2771 udelay(1); 2772 } 2773 2774 if (i >= usec_timeout) { 2775 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2776 return -EINVAL; 2777 } 2778 2779 return 0; 2780 } 2781 2782 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2783 { 2784 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2785 const struct gfx_firmware_header_v2_0 *me_hdr; 2786 const struct gfx_firmware_header_v2_0 *mec_hdr; 2787 uint32_t pipe_id, tmp; 2788 2789 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2790 adev->gfx.mec_fw->data; 2791 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2792 adev->gfx.me_fw->data; 2793 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2794 adev->gfx.pfp_fw->data; 2795 2796 /* config pfp program start addr */ 2797 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2798 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2799 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2800 (pfp_hdr->ucode_start_addr_hi << 30) | 2801 (pfp_hdr->ucode_start_addr_lo >> 2)); 2802 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2803 pfp_hdr->ucode_start_addr_hi >> 2); 2804 } 2805 soc21_grbm_select(adev, 0, 0, 0, 0); 2806 2807 /* reset pfp pipe */ 2808 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2809 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 2810 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 2811 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2812 2813 /* clear pfp pipe reset */ 2814 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 2815 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 2816 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2817 2818 /* config me program start addr */ 2819 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2820 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2821 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2822 (me_hdr->ucode_start_addr_hi << 30) | 2823 (me_hdr->ucode_start_addr_lo >> 2) ); 2824 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2825 me_hdr->ucode_start_addr_hi>>2); 2826 } 2827 soc21_grbm_select(adev, 0, 0, 0, 0); 2828 2829 /* reset me pipe */ 2830 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2831 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 2832 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 2833 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2834 2835 /* clear me pipe reset */ 2836 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 2837 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 2838 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2839 2840 /* config mec program start addr */ 2841 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 2842 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 2843 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2844 mec_hdr->ucode_start_addr_lo >> 2 | 2845 mec_hdr->ucode_start_addr_hi << 30); 2846 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2847 mec_hdr->ucode_start_addr_hi >> 2); 2848 } 2849 soc21_grbm_select(adev, 0, 0, 0, 0); 2850 2851 /* reset mec pipe */ 2852 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 2853 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 2854 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 2855 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 2856 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 2857 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 2858 2859 /* clear mec pipe reset */ 2860 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 2861 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 2862 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 2863 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 2864 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 2865 } 2866 2867 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 2868 { 2869 uint32_t cp_status; 2870 uint32_t bootload_status; 2871 int i, r; 2872 uint64_t addr, addr2; 2873 2874 for (i = 0; i < adev->usec_timeout; i++) { 2875 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 2876 2877 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 2878 IP_VERSION(11, 0, 1) || 2879 amdgpu_ip_version(adev, GC_HWIP, 0) == 2880 IP_VERSION(11, 0, 4) || 2881 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || 2882 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || 2883 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2)) 2884 bootload_status = RREG32_SOC15(GC, 0, 2885 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 2886 else 2887 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 2888 2889 if ((cp_status == 0) && 2890 (REG_GET_FIELD(bootload_status, 2891 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 2892 break; 2893 } 2894 udelay(1); 2895 } 2896 2897 if (i >= adev->usec_timeout) { 2898 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 2899 return -ETIMEDOUT; 2900 } 2901 2902 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 2903 if (adev->gfx.rs64_enable) { 2904 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2905 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 2906 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2907 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 2908 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 2909 if (r) 2910 return r; 2911 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2912 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 2913 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2914 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 2915 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 2916 if (r) 2917 return r; 2918 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2919 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 2920 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2921 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 2922 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 2923 if (r) 2924 return r; 2925 } else { 2926 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2927 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 2928 r = gfx_v11_0_config_me_cache(adev, addr); 2929 if (r) 2930 return r; 2931 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2932 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 2933 r = gfx_v11_0_config_pfp_cache(adev, addr); 2934 if (r) 2935 return r; 2936 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2937 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 2938 r = gfx_v11_0_config_mec_cache(adev, addr); 2939 if (r) 2940 return r; 2941 } 2942 } 2943 2944 return 0; 2945 } 2946 2947 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2948 { 2949 int i; 2950 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2951 2952 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2953 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2954 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2955 2956 for (i = 0; i < adev->usec_timeout; i++) { 2957 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 2958 break; 2959 udelay(1); 2960 } 2961 2962 if (i >= adev->usec_timeout) 2963 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 2964 2965 return 0; 2966 } 2967 2968 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 2969 { 2970 int r; 2971 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2972 const __le32 *fw_data; 2973 unsigned i, fw_size; 2974 2975 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2976 adev->gfx.pfp_fw->data; 2977 2978 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2979 2980 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 2981 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2982 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 2983 2984 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 2985 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 2986 &adev->gfx.pfp.pfp_fw_obj, 2987 &adev->gfx.pfp.pfp_fw_gpu_addr, 2988 (void **)&adev->gfx.pfp.pfp_fw_ptr); 2989 if (r) { 2990 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 2991 gfx_v11_0_pfp_fini(adev); 2992 return r; 2993 } 2994 2995 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 2996 2997 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 2998 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 2999 3000 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 3001 3002 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 3003 3004 for (i = 0; i < pfp_hdr->jt_size; i++) 3005 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 3006 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 3007 3008 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3009 3010 return 0; 3011 } 3012 3013 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 3014 { 3015 int r; 3016 const struct gfx_firmware_header_v2_0 *pfp_hdr; 3017 const __le32 *fw_ucode, *fw_data; 3018 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3019 uint32_t tmp; 3020 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3021 3022 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 3023 adev->gfx.pfp_fw->data; 3024 3025 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3026 3027 /* instruction */ 3028 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 3029 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 3030 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 3031 /* data */ 3032 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3033 le32_to_cpu(pfp_hdr->data_offset_bytes)); 3034 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 3035 3036 /* 64kb align */ 3037 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3038 64 * 1024, 3039 AMDGPU_GEM_DOMAIN_VRAM | 3040 AMDGPU_GEM_DOMAIN_GTT, 3041 &adev->gfx.pfp.pfp_fw_obj, 3042 &adev->gfx.pfp.pfp_fw_gpu_addr, 3043 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3044 if (r) { 3045 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 3046 gfx_v11_0_pfp_fini(adev); 3047 return r; 3048 } 3049 3050 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3051 64 * 1024, 3052 AMDGPU_GEM_DOMAIN_VRAM | 3053 AMDGPU_GEM_DOMAIN_GTT, 3054 &adev->gfx.pfp.pfp_fw_data_obj, 3055 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 3056 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 3057 if (r) { 3058 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 3059 gfx_v11_0_pfp_fini(adev); 3060 return r; 3061 } 3062 3063 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 3064 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 3065 3066 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3067 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 3068 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3069 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3070 3071 if (amdgpu_emu_mode == 1) 3072 adev->hdp.funcs->flush_hdp(adev, NULL); 3073 3074 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3075 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3076 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 3077 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3078 3079 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 3080 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 3081 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 3082 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 3083 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 3084 3085 /* 3086 * Programming any of the CP_PFP_IC_BASE registers 3087 * forces invalidation of the ME L1 I$. Wait for the 3088 * invalidation complete 3089 */ 3090 for (i = 0; i < usec_timeout; i++) { 3091 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3092 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3093 INVALIDATE_CACHE_COMPLETE)) 3094 break; 3095 udelay(1); 3096 } 3097 3098 if (i >= usec_timeout) { 3099 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3100 return -EINVAL; 3101 } 3102 3103 /* Prime the L1 instruction caches */ 3104 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3105 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 3106 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 3107 /* Waiting for cache primed*/ 3108 for (i = 0; i < usec_timeout; i++) { 3109 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3110 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3111 ICACHE_PRIMED)) 3112 break; 3113 udelay(1); 3114 } 3115 3116 if (i >= usec_timeout) { 3117 dev_err(adev->dev, "failed to prime instruction cache\n"); 3118 return -EINVAL; 3119 } 3120 3121 mutex_lock(&adev->srbm_mutex); 3122 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3123 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3124 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 3125 (pfp_hdr->ucode_start_addr_hi << 30) | 3126 (pfp_hdr->ucode_start_addr_lo >> 2) ); 3127 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 3128 pfp_hdr->ucode_start_addr_hi>>2); 3129 3130 /* 3131 * Program CP_ME_CNTL to reset given PIPE to take 3132 * effect of CP_PFP_PRGRM_CNTR_START. 3133 */ 3134 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3135 if (pipe_id == 0) 3136 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3137 PFP_PIPE0_RESET, 1); 3138 else 3139 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3140 PFP_PIPE1_RESET, 1); 3141 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3142 3143 /* Clear pfp pipe0 reset bit. */ 3144 if (pipe_id == 0) 3145 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3146 PFP_PIPE0_RESET, 0); 3147 else 3148 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3149 PFP_PIPE1_RESET, 0); 3150 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3151 3152 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 3153 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3154 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 3155 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3156 } 3157 soc21_grbm_select(adev, 0, 0, 0, 0); 3158 mutex_unlock(&adev->srbm_mutex); 3159 3160 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3161 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3162 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3163 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3164 3165 /* Invalidate the data caches */ 3166 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3167 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3168 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3169 3170 for (i = 0; i < usec_timeout; i++) { 3171 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3172 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3173 INVALIDATE_DCACHE_COMPLETE)) 3174 break; 3175 udelay(1); 3176 } 3177 3178 if (i >= usec_timeout) { 3179 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3180 return -EINVAL; 3181 } 3182 3183 return 0; 3184 } 3185 3186 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 3187 { 3188 int r; 3189 const struct gfx_firmware_header_v1_0 *me_hdr; 3190 const __le32 *fw_data; 3191 unsigned i, fw_size; 3192 3193 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3194 adev->gfx.me_fw->data; 3195 3196 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3197 3198 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3199 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3200 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 3201 3202 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 3203 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3204 &adev->gfx.me.me_fw_obj, 3205 &adev->gfx.me.me_fw_gpu_addr, 3206 (void **)&adev->gfx.me.me_fw_ptr); 3207 if (r) { 3208 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 3209 gfx_v11_0_me_fini(adev); 3210 return r; 3211 } 3212 3213 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 3214 3215 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3216 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3217 3218 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 3219 3220 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 3221 3222 for (i = 0; i < me_hdr->jt_size; i++) 3223 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 3224 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 3225 3226 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 3227 3228 return 0; 3229 } 3230 3231 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 3232 { 3233 int r; 3234 const struct gfx_firmware_header_v2_0 *me_hdr; 3235 const __le32 *fw_ucode, *fw_data; 3236 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3237 uint32_t tmp; 3238 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3239 3240 me_hdr = (const struct gfx_firmware_header_v2_0 *) 3241 adev->gfx.me_fw->data; 3242 3243 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3244 3245 /* instruction */ 3246 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 3247 le32_to_cpu(me_hdr->ucode_offset_bytes)); 3248 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 3249 /* data */ 3250 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3251 le32_to_cpu(me_hdr->data_offset_bytes)); 3252 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 3253 3254 /* 64kb align*/ 3255 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3256 64 * 1024, 3257 AMDGPU_GEM_DOMAIN_VRAM | 3258 AMDGPU_GEM_DOMAIN_GTT, 3259 &adev->gfx.me.me_fw_obj, 3260 &adev->gfx.me.me_fw_gpu_addr, 3261 (void **)&adev->gfx.me.me_fw_ptr); 3262 if (r) { 3263 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 3264 gfx_v11_0_me_fini(adev); 3265 return r; 3266 } 3267 3268 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3269 64 * 1024, 3270 AMDGPU_GEM_DOMAIN_VRAM | 3271 AMDGPU_GEM_DOMAIN_GTT, 3272 &adev->gfx.me.me_fw_data_obj, 3273 &adev->gfx.me.me_fw_data_gpu_addr, 3274 (void **)&adev->gfx.me.me_fw_data_ptr); 3275 if (r) { 3276 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 3277 gfx_v11_0_pfp_fini(adev); 3278 return r; 3279 } 3280 3281 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 3282 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 3283 3284 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3285 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 3286 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3287 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3288 3289 if (amdgpu_emu_mode == 1) 3290 adev->hdp.funcs->flush_hdp(adev, NULL); 3291 3292 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3293 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3294 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 3295 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3296 3297 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 3298 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 3299 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 3300 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 3301 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 3302 3303 /* 3304 * Programming any of the CP_ME_IC_BASE registers 3305 * forces invalidation of the ME L1 I$. Wait for the 3306 * invalidation complete 3307 */ 3308 for (i = 0; i < usec_timeout; i++) { 3309 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3310 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3311 INVALIDATE_CACHE_COMPLETE)) 3312 break; 3313 udelay(1); 3314 } 3315 3316 if (i >= usec_timeout) { 3317 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3318 return -EINVAL; 3319 } 3320 3321 /* Prime the instruction caches */ 3322 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3323 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 3324 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 3325 3326 /* Waiting for instruction cache primed*/ 3327 for (i = 0; i < usec_timeout; i++) { 3328 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3329 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3330 ICACHE_PRIMED)) 3331 break; 3332 udelay(1); 3333 } 3334 3335 if (i >= usec_timeout) { 3336 dev_err(adev->dev, "failed to prime instruction cache\n"); 3337 return -EINVAL; 3338 } 3339 3340 mutex_lock(&adev->srbm_mutex); 3341 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3342 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3343 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3344 (me_hdr->ucode_start_addr_hi << 30) | 3345 (me_hdr->ucode_start_addr_lo >> 2) ); 3346 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3347 me_hdr->ucode_start_addr_hi>>2); 3348 3349 /* 3350 * Program CP_ME_CNTL to reset given PIPE to take 3351 * effect of CP_PFP_PRGRM_CNTR_START. 3352 */ 3353 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3354 if (pipe_id == 0) 3355 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3356 ME_PIPE0_RESET, 1); 3357 else 3358 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3359 ME_PIPE1_RESET, 1); 3360 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3361 3362 /* Clear pfp pipe0 reset bit. */ 3363 if (pipe_id == 0) 3364 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3365 ME_PIPE0_RESET, 0); 3366 else 3367 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3368 ME_PIPE1_RESET, 0); 3369 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3370 3371 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 3372 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3373 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 3374 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3375 } 3376 soc21_grbm_select(adev, 0, 0, 0, 0); 3377 mutex_unlock(&adev->srbm_mutex); 3378 3379 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3380 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3381 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3382 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3383 3384 /* Invalidate the data caches */ 3385 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3386 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3387 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3388 3389 for (i = 0; i < usec_timeout; i++) { 3390 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3391 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3392 INVALIDATE_DCACHE_COMPLETE)) 3393 break; 3394 udelay(1); 3395 } 3396 3397 if (i >= usec_timeout) { 3398 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3399 return -EINVAL; 3400 } 3401 3402 return 0; 3403 } 3404 3405 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3406 { 3407 int r; 3408 3409 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3410 return -EINVAL; 3411 3412 gfx_v11_0_cp_gfx_enable(adev, false); 3413 3414 if (adev->gfx.rs64_enable) 3415 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3416 else 3417 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3418 if (r) { 3419 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3420 return r; 3421 } 3422 3423 if (adev->gfx.rs64_enable) 3424 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3425 else 3426 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3427 if (r) { 3428 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3429 return r; 3430 } 3431 3432 return 0; 3433 } 3434 3435 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3436 { 3437 struct amdgpu_ring *ring; 3438 const struct cs_section_def *sect = NULL; 3439 const struct cs_extent_def *ext = NULL; 3440 int r, i; 3441 int ctx_reg_offset; 3442 3443 /* init the CP */ 3444 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3445 adev->gfx.config.max_hw_contexts - 1); 3446 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3447 3448 if (!amdgpu_async_gfx_ring) 3449 gfx_v11_0_cp_gfx_enable(adev, true); 3450 3451 ring = &adev->gfx.gfx_ring[0]; 3452 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3453 if (r) { 3454 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3455 return r; 3456 } 3457 3458 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3459 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3460 3461 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3462 amdgpu_ring_write(ring, 0x80000000); 3463 amdgpu_ring_write(ring, 0x80000000); 3464 3465 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3466 for (ext = sect->section; ext->extent != NULL; ++ext) { 3467 if (sect->id == SECT_CONTEXT) { 3468 amdgpu_ring_write(ring, 3469 PACKET3(PACKET3_SET_CONTEXT_REG, 3470 ext->reg_count)); 3471 amdgpu_ring_write(ring, ext->reg_index - 3472 PACKET3_SET_CONTEXT_REG_START); 3473 for (i = 0; i < ext->reg_count; i++) 3474 amdgpu_ring_write(ring, ext->extent[i]); 3475 } 3476 } 3477 } 3478 3479 ctx_reg_offset = 3480 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3481 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3482 amdgpu_ring_write(ring, ctx_reg_offset); 3483 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3484 3485 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3486 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3487 3488 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3489 amdgpu_ring_write(ring, 0); 3490 3491 amdgpu_ring_commit(ring); 3492 3493 /* submit cs packet to copy state 0 to next available state */ 3494 if (adev->gfx.num_gfx_rings > 1) { 3495 /* maximum supported gfx ring is 2 */ 3496 ring = &adev->gfx.gfx_ring[1]; 3497 r = amdgpu_ring_alloc(ring, 2); 3498 if (r) { 3499 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3500 return r; 3501 } 3502 3503 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3504 amdgpu_ring_write(ring, 0); 3505 3506 amdgpu_ring_commit(ring); 3507 } 3508 return 0; 3509 } 3510 3511 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3512 CP_PIPE_ID pipe) 3513 { 3514 u32 tmp; 3515 3516 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3517 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3518 3519 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3520 } 3521 3522 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3523 struct amdgpu_ring *ring) 3524 { 3525 u32 tmp; 3526 3527 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3528 if (ring->use_doorbell) { 3529 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3530 DOORBELL_OFFSET, ring->doorbell_index); 3531 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3532 DOORBELL_EN, 1); 3533 } else { 3534 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3535 DOORBELL_EN, 0); 3536 } 3537 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3538 3539 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3540 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3541 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3542 3543 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3544 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3545 } 3546 3547 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3548 { 3549 struct amdgpu_ring *ring; 3550 u32 tmp; 3551 u32 rb_bufsz; 3552 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3553 3554 /* Set the write pointer delay */ 3555 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3556 3557 /* set the RB to use vmid 0 */ 3558 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3559 3560 /* Init gfx ring 0 for pipe 0 */ 3561 mutex_lock(&adev->srbm_mutex); 3562 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3563 3564 /* Set ring buffer size */ 3565 ring = &adev->gfx.gfx_ring[0]; 3566 rb_bufsz = order_base_2(ring->ring_size / 8); 3567 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3568 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3569 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3570 3571 /* Initialize the ring buffer's write pointers */ 3572 ring->wptr = 0; 3573 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3574 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3575 3576 /* set the wb address whether it's enabled or not */ 3577 rptr_addr = ring->rptr_gpu_addr; 3578 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3579 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3580 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3581 3582 wptr_gpu_addr = ring->wptr_gpu_addr; 3583 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3584 lower_32_bits(wptr_gpu_addr)); 3585 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3586 upper_32_bits(wptr_gpu_addr)); 3587 3588 mdelay(1); 3589 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3590 3591 rb_addr = ring->gpu_addr >> 8; 3592 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3593 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3594 3595 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3596 3597 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3598 mutex_unlock(&adev->srbm_mutex); 3599 3600 /* Init gfx ring 1 for pipe 1 */ 3601 if (adev->gfx.num_gfx_rings > 1) { 3602 mutex_lock(&adev->srbm_mutex); 3603 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3604 /* maximum supported gfx ring is 2 */ 3605 ring = &adev->gfx.gfx_ring[1]; 3606 rb_bufsz = order_base_2(ring->ring_size / 8); 3607 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3608 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3609 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3610 /* Initialize the ring buffer's write pointers */ 3611 ring->wptr = 0; 3612 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3613 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3614 /* Set the wb address whether it's enabled or not */ 3615 rptr_addr = ring->rptr_gpu_addr; 3616 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3617 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3618 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3619 wptr_gpu_addr = ring->wptr_gpu_addr; 3620 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3621 lower_32_bits(wptr_gpu_addr)); 3622 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3623 upper_32_bits(wptr_gpu_addr)); 3624 3625 mdelay(1); 3626 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3627 3628 rb_addr = ring->gpu_addr >> 8; 3629 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3630 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3631 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3632 3633 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3634 mutex_unlock(&adev->srbm_mutex); 3635 } 3636 /* Switch to pipe 0 */ 3637 mutex_lock(&adev->srbm_mutex); 3638 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3639 mutex_unlock(&adev->srbm_mutex); 3640 3641 /* start the ring */ 3642 gfx_v11_0_cp_gfx_start(adev); 3643 3644 return 0; 3645 } 3646 3647 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3648 { 3649 u32 data; 3650 3651 if (adev->gfx.rs64_enable) { 3652 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3653 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3654 enable ? 0 : 1); 3655 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3656 enable ? 0 : 1); 3657 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3658 enable ? 0 : 1); 3659 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3660 enable ? 0 : 1); 3661 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3662 enable ? 0 : 1); 3663 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3664 enable ? 1 : 0); 3665 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3666 enable ? 1 : 0); 3667 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3668 enable ? 1 : 0); 3669 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3670 enable ? 1 : 0); 3671 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3672 enable ? 0 : 1); 3673 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3674 } else { 3675 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3676 3677 if (enable) { 3678 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3679 if (!adev->enable_mes_kiq) 3680 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3681 MEC_ME2_HALT, 0); 3682 } else { 3683 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3684 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3685 } 3686 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3687 } 3688 3689 udelay(50); 3690 } 3691 3692 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3693 { 3694 const struct gfx_firmware_header_v1_0 *mec_hdr; 3695 const __le32 *fw_data; 3696 unsigned i, fw_size; 3697 u32 *fw = NULL; 3698 int r; 3699 3700 if (!adev->gfx.mec_fw) 3701 return -EINVAL; 3702 3703 gfx_v11_0_cp_compute_enable(adev, false); 3704 3705 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3706 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3707 3708 fw_data = (const __le32 *) 3709 (adev->gfx.mec_fw->data + 3710 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3711 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3712 3713 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3714 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3715 &adev->gfx.mec.mec_fw_obj, 3716 &adev->gfx.mec.mec_fw_gpu_addr, 3717 (void **)&fw); 3718 if (r) { 3719 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3720 gfx_v11_0_mec_fini(adev); 3721 return r; 3722 } 3723 3724 memcpy(fw, fw_data, fw_size); 3725 3726 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3727 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3728 3729 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3730 3731 /* MEC1 */ 3732 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3733 3734 for (i = 0; i < mec_hdr->jt_size; i++) 3735 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3736 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3737 3738 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3739 3740 return 0; 3741 } 3742 3743 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3744 { 3745 const struct gfx_firmware_header_v2_0 *mec_hdr; 3746 const __le32 *fw_ucode, *fw_data; 3747 u32 tmp, fw_ucode_size, fw_data_size; 3748 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3749 u32 *fw_ucode_ptr, *fw_data_ptr; 3750 int r; 3751 3752 if (!adev->gfx.mec_fw) 3753 return -EINVAL; 3754 3755 gfx_v11_0_cp_compute_enable(adev, false); 3756 3757 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3758 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3759 3760 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3761 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3762 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3763 3764 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3765 le32_to_cpu(mec_hdr->data_offset_bytes)); 3766 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3767 3768 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3769 64 * 1024, 3770 AMDGPU_GEM_DOMAIN_VRAM | 3771 AMDGPU_GEM_DOMAIN_GTT, 3772 &adev->gfx.mec.mec_fw_obj, 3773 &adev->gfx.mec.mec_fw_gpu_addr, 3774 (void **)&fw_ucode_ptr); 3775 if (r) { 3776 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3777 gfx_v11_0_mec_fini(adev); 3778 return r; 3779 } 3780 3781 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3782 64 * 1024, 3783 AMDGPU_GEM_DOMAIN_VRAM | 3784 AMDGPU_GEM_DOMAIN_GTT, 3785 &adev->gfx.mec.mec_fw_data_obj, 3786 &adev->gfx.mec.mec_fw_data_gpu_addr, 3787 (void **)&fw_data_ptr); 3788 if (r) { 3789 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3790 gfx_v11_0_mec_fini(adev); 3791 return r; 3792 } 3793 3794 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 3795 memcpy(fw_data_ptr, fw_data, fw_data_size); 3796 3797 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3798 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 3799 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3800 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 3801 3802 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 3803 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3804 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 3805 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3806 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 3807 3808 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 3809 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 3810 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 3811 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 3812 3813 mutex_lock(&adev->srbm_mutex); 3814 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 3815 soc21_grbm_select(adev, 1, i, 0, 0); 3816 3817 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 3818 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 3819 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 3820 3821 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3822 mec_hdr->ucode_start_addr_lo >> 2 | 3823 mec_hdr->ucode_start_addr_hi << 30); 3824 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3825 mec_hdr->ucode_start_addr_hi >> 2); 3826 3827 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 3828 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 3829 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3830 } 3831 mutex_unlock(&adev->srbm_mutex); 3832 soc21_grbm_select(adev, 0, 0, 0, 0); 3833 3834 /* Trigger an invalidation of the L1 instruction caches */ 3835 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3836 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3837 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 3838 3839 /* Wait for invalidation complete */ 3840 for (i = 0; i < usec_timeout; i++) { 3841 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3842 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 3843 INVALIDATE_DCACHE_COMPLETE)) 3844 break; 3845 udelay(1); 3846 } 3847 3848 if (i >= usec_timeout) { 3849 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3850 return -EINVAL; 3851 } 3852 3853 /* Trigger an invalidation of the L1 instruction caches */ 3854 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 3855 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 3856 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 3857 3858 /* Wait for invalidation complete */ 3859 for (i = 0; i < usec_timeout; i++) { 3860 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 3861 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 3862 INVALIDATE_CACHE_COMPLETE)) 3863 break; 3864 udelay(1); 3865 } 3866 3867 if (i >= usec_timeout) { 3868 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3869 return -EINVAL; 3870 } 3871 3872 return 0; 3873 } 3874 3875 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 3876 { 3877 uint32_t tmp; 3878 struct amdgpu_device *adev = ring->adev; 3879 3880 /* tell RLC which is KIQ queue */ 3881 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 3882 tmp &= 0xffffff00; 3883 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3884 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); 3885 tmp |= 0x80; 3886 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); 3887 } 3888 3889 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 3890 { 3891 /* set graphics engine doorbell range */ 3892 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 3893 (adev->doorbell_index.gfx_ring0 * 2) << 2); 3894 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3895 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 3896 3897 /* set compute engine doorbell range */ 3898 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 3899 (adev->doorbell_index.kiq * 2) << 2); 3900 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 3901 (adev->doorbell_index.userqueue_end * 2) << 2); 3902 } 3903 3904 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, 3905 struct v11_gfx_mqd *mqd, 3906 struct amdgpu_mqd_prop *prop) 3907 { 3908 bool priority = 0; 3909 u32 tmp; 3910 3911 /* set up default queue priority level 3912 * 0x0 = low priority, 0x1 = high priority 3913 */ 3914 if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) 3915 priority = 1; 3916 3917 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); 3918 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); 3919 mqd->cp_gfx_hqd_queue_priority = tmp; 3920 } 3921 3922 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 3923 struct amdgpu_mqd_prop *prop) 3924 { 3925 struct v11_gfx_mqd *mqd = m; 3926 uint64_t hqd_gpu_addr, wb_gpu_addr; 3927 uint32_t tmp; 3928 uint32_t rb_bufsz; 3929 3930 /* set up gfx hqd wptr */ 3931 mqd->cp_gfx_hqd_wptr = 0; 3932 mqd->cp_gfx_hqd_wptr_hi = 0; 3933 3934 /* set the pointer to the MQD */ 3935 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 3936 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 3937 3938 /* set up mqd control */ 3939 tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); 3940 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 3941 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 3942 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 3943 mqd->cp_gfx_mqd_control = tmp; 3944 3945 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 3946 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); 3947 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 3948 mqd->cp_gfx_hqd_vmid = 0; 3949 3950 /* set up gfx queue priority */ 3951 gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); 3952 3953 /* set up time quantum */ 3954 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); 3955 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 3956 mqd->cp_gfx_hqd_quantum = tmp; 3957 3958 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 3959 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 3960 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 3961 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 3962 3963 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 3964 wb_gpu_addr = prop->rptr_gpu_addr; 3965 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 3966 mqd->cp_gfx_hqd_rptr_addr_hi = 3967 upper_32_bits(wb_gpu_addr) & 0xffff; 3968 3969 /* set up rb_wptr_poll addr */ 3970 wb_gpu_addr = prop->wptr_gpu_addr; 3971 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 3972 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 3973 3974 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 3975 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 3976 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); 3977 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 3978 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 3979 #ifdef __BIG_ENDIAN 3980 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 3981 #endif 3982 mqd->cp_gfx_hqd_cntl = tmp; 3983 3984 /* set up cp_doorbell_control */ 3985 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3986 if (prop->use_doorbell) { 3987 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3988 DOORBELL_OFFSET, prop->doorbell_index); 3989 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3990 DOORBELL_EN, 1); 3991 } else 3992 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3993 DOORBELL_EN, 0); 3994 mqd->cp_rb_doorbell_control = tmp; 3995 3996 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3997 mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); 3998 3999 /* active the queue */ 4000 mqd->cp_gfx_hqd_active = 1; 4001 4002 return 0; 4003 } 4004 4005 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) 4006 { 4007 struct amdgpu_device *adev = ring->adev; 4008 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 4009 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 4010 4011 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4012 memset((void *)mqd, 0, sizeof(*mqd)); 4013 mutex_lock(&adev->srbm_mutex); 4014 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4015 amdgpu_ring_init_mqd(ring); 4016 soc21_grbm_select(adev, 0, 0, 0, 0); 4017 mutex_unlock(&adev->srbm_mutex); 4018 if (adev->gfx.me.mqd_backup[mqd_idx]) 4019 memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4020 } else { 4021 /* restore mqd with the backup copy */ 4022 if (adev->gfx.me.mqd_backup[mqd_idx]) 4023 memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 4024 /* reset the ring */ 4025 ring->wptr = 0; 4026 *ring->wptr_cpu_addr = 0; 4027 amdgpu_ring_clear_ring(ring); 4028 } 4029 4030 return 0; 4031 } 4032 4033 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 4034 { 4035 int r, i; 4036 struct amdgpu_ring *ring; 4037 4038 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4039 ring = &adev->gfx.gfx_ring[i]; 4040 4041 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4042 if (unlikely(r != 0)) 4043 return r; 4044 4045 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 4046 if (!r) { 4047 r = gfx_v11_0_kgq_init_queue(ring, false); 4048 amdgpu_bo_kunmap(ring->mqd_obj); 4049 ring->mqd_ptr = NULL; 4050 } 4051 amdgpu_bo_unreserve(ring->mqd_obj); 4052 if (r) 4053 return r; 4054 } 4055 4056 r = amdgpu_gfx_enable_kgq(adev, 0); 4057 if (r) 4058 return r; 4059 4060 return gfx_v11_0_cp_gfx_start(adev); 4061 } 4062 4063 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 4064 struct amdgpu_mqd_prop *prop) 4065 { 4066 struct v11_compute_mqd *mqd = m; 4067 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4068 uint32_t tmp; 4069 4070 mqd->header = 0xC0310800; 4071 mqd->compute_pipelinestat_enable = 0x00000001; 4072 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4073 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4074 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4075 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4076 mqd->compute_misc_reserved = 0x00000007; 4077 4078 eop_base_addr = prop->eop_gpu_addr >> 8; 4079 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4080 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4081 4082 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4083 tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); 4084 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4085 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 4086 4087 mqd->cp_hqd_eop_control = tmp; 4088 4089 /* enable doorbell? */ 4090 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); 4091 4092 if (prop->use_doorbell) { 4093 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4094 DOORBELL_OFFSET, prop->doorbell_index); 4095 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4096 DOORBELL_EN, 1); 4097 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4098 DOORBELL_SOURCE, 0); 4099 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4100 DOORBELL_HIT, 0); 4101 } else { 4102 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4103 DOORBELL_EN, 0); 4104 } 4105 4106 mqd->cp_hqd_pq_doorbell_control = tmp; 4107 4108 /* disable the queue if it's active */ 4109 mqd->cp_hqd_dequeue_request = 0; 4110 mqd->cp_hqd_pq_rptr = 0; 4111 mqd->cp_hqd_pq_wptr_lo = 0; 4112 mqd->cp_hqd_pq_wptr_hi = 0; 4113 4114 /* set the pointer to the MQD */ 4115 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 4116 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4117 4118 /* set MQD vmid to 0 */ 4119 tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); 4120 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4121 mqd->cp_mqd_control = tmp; 4122 4123 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4124 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4125 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4126 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4127 4128 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4129 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); 4130 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4131 (order_base_2(prop->queue_size / 4) - 1)); 4132 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4133 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 4134 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 4135 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 4136 prop->allow_tunneling); 4137 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4138 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4139 mqd->cp_hqd_pq_control = tmp; 4140 4141 /* set the wb address whether it's enabled or not */ 4142 wb_gpu_addr = prop->rptr_gpu_addr; 4143 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4144 mqd->cp_hqd_pq_rptr_report_addr_hi = 4145 upper_32_bits(wb_gpu_addr) & 0xffff; 4146 4147 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4148 wb_gpu_addr = prop->wptr_gpu_addr; 4149 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4150 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4151 4152 tmp = 0; 4153 /* enable the doorbell if requested */ 4154 if (prop->use_doorbell) { 4155 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); 4156 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4157 DOORBELL_OFFSET, prop->doorbell_index); 4158 4159 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4160 DOORBELL_EN, 1); 4161 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4162 DOORBELL_SOURCE, 0); 4163 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4164 DOORBELL_HIT, 0); 4165 } 4166 4167 mqd->cp_hqd_pq_doorbell_control = tmp; 4168 4169 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4170 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); 4171 4172 /* set the vmid for the queue */ 4173 mqd->cp_hqd_vmid = 0; 4174 4175 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); 4176 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 4177 mqd->cp_hqd_persistent_state = tmp; 4178 4179 /* set MIN_IB_AVAIL_SIZE */ 4180 tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); 4181 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4182 mqd->cp_hqd_ib_control = tmp; 4183 4184 /* set static priority for a compute queue/ring */ 4185 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 4186 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 4187 4188 mqd->cp_hqd_active = prop->hqd_active; 4189 4190 return 0; 4191 } 4192 4193 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 4194 { 4195 struct amdgpu_device *adev = ring->adev; 4196 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4197 int j; 4198 4199 /* inactivate the queue */ 4200 if (amdgpu_sriov_vf(adev)) 4201 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 4202 4203 /* disable wptr polling */ 4204 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4205 4206 /* write the EOP addr */ 4207 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 4208 mqd->cp_hqd_eop_base_addr_lo); 4209 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 4210 mqd->cp_hqd_eop_base_addr_hi); 4211 4212 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4213 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 4214 mqd->cp_hqd_eop_control); 4215 4216 /* enable doorbell? */ 4217 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4218 mqd->cp_hqd_pq_doorbell_control); 4219 4220 /* disable the queue if it's active */ 4221 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 4222 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 4223 for (j = 0; j < adev->usec_timeout; j++) { 4224 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 4225 break; 4226 udelay(1); 4227 } 4228 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 4229 mqd->cp_hqd_dequeue_request); 4230 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 4231 mqd->cp_hqd_pq_rptr); 4232 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4233 mqd->cp_hqd_pq_wptr_lo); 4234 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4235 mqd->cp_hqd_pq_wptr_hi); 4236 } 4237 4238 /* set the pointer to the MQD */ 4239 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 4240 mqd->cp_mqd_base_addr_lo); 4241 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 4242 mqd->cp_mqd_base_addr_hi); 4243 4244 /* set MQD vmid to 0 */ 4245 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 4246 mqd->cp_mqd_control); 4247 4248 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4249 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 4250 mqd->cp_hqd_pq_base_lo); 4251 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 4252 mqd->cp_hqd_pq_base_hi); 4253 4254 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4255 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 4256 mqd->cp_hqd_pq_control); 4257 4258 /* set the wb address whether it's enabled or not */ 4259 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 4260 mqd->cp_hqd_pq_rptr_report_addr_lo); 4261 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4262 mqd->cp_hqd_pq_rptr_report_addr_hi); 4263 4264 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4265 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 4266 mqd->cp_hqd_pq_wptr_poll_addr_lo); 4267 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4268 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4269 4270 /* enable the doorbell if requested */ 4271 if (ring->use_doorbell) { 4272 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4273 (adev->doorbell_index.kiq * 2) << 2); 4274 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4275 (adev->doorbell_index.userqueue_end * 2) << 2); 4276 } 4277 4278 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4279 mqd->cp_hqd_pq_doorbell_control); 4280 4281 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4282 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4283 mqd->cp_hqd_pq_wptr_lo); 4284 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4285 mqd->cp_hqd_pq_wptr_hi); 4286 4287 /* set the vmid for the queue */ 4288 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 4289 4290 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 4291 mqd->cp_hqd_persistent_state); 4292 4293 /* activate the queue */ 4294 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 4295 mqd->cp_hqd_active); 4296 4297 if (ring->use_doorbell) 4298 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4299 4300 return 0; 4301 } 4302 4303 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 4304 { 4305 struct amdgpu_device *adev = ring->adev; 4306 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4307 4308 gfx_v11_0_kiq_setting(ring); 4309 4310 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4311 /* reset MQD to a clean status */ 4312 if (adev->gfx.kiq[0].mqd_backup) 4313 memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); 4314 4315 /* reset ring buffer */ 4316 ring->wptr = 0; 4317 amdgpu_ring_clear_ring(ring); 4318 4319 mutex_lock(&adev->srbm_mutex); 4320 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4321 gfx_v11_0_kiq_init_register(ring); 4322 soc21_grbm_select(adev, 0, 0, 0, 0); 4323 mutex_unlock(&adev->srbm_mutex); 4324 } else { 4325 memset((void *)mqd, 0, sizeof(*mqd)); 4326 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4327 amdgpu_ring_clear_ring(ring); 4328 mutex_lock(&adev->srbm_mutex); 4329 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4330 amdgpu_ring_init_mqd(ring); 4331 gfx_v11_0_kiq_init_register(ring); 4332 soc21_grbm_select(adev, 0, 0, 0, 0); 4333 mutex_unlock(&adev->srbm_mutex); 4334 4335 if (adev->gfx.kiq[0].mqd_backup) 4336 memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); 4337 } 4338 4339 return 0; 4340 } 4341 4342 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) 4343 { 4344 struct amdgpu_device *adev = ring->adev; 4345 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4346 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4347 4348 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4349 memset((void *)mqd, 0, sizeof(*mqd)); 4350 mutex_lock(&adev->srbm_mutex); 4351 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4352 amdgpu_ring_init_mqd(ring); 4353 soc21_grbm_select(adev, 0, 0, 0, 0); 4354 mutex_unlock(&adev->srbm_mutex); 4355 4356 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4357 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4358 } else { 4359 /* restore MQD to a clean status */ 4360 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4361 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4362 /* reset ring buffer */ 4363 ring->wptr = 0; 4364 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4365 amdgpu_ring_clear_ring(ring); 4366 } 4367 4368 return 0; 4369 } 4370 4371 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4372 { 4373 struct amdgpu_ring *ring; 4374 int r; 4375 4376 ring = &adev->gfx.kiq[0].ring; 4377 4378 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4379 if (unlikely(r != 0)) 4380 return r; 4381 4382 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 4383 if (unlikely(r != 0)) { 4384 amdgpu_bo_unreserve(ring->mqd_obj); 4385 return r; 4386 } 4387 4388 gfx_v11_0_kiq_init_queue(ring); 4389 amdgpu_bo_kunmap(ring->mqd_obj); 4390 ring->mqd_ptr = NULL; 4391 amdgpu_bo_unreserve(ring->mqd_obj); 4392 ring->sched.ready = true; 4393 return 0; 4394 } 4395 4396 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4397 { 4398 struct amdgpu_ring *ring = NULL; 4399 int r = 0, i; 4400 4401 if (!amdgpu_async_gfx_ring) 4402 gfx_v11_0_cp_compute_enable(adev, true); 4403 4404 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4405 ring = &adev->gfx.compute_ring[i]; 4406 4407 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4408 if (unlikely(r != 0)) 4409 goto done; 4410 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 4411 if (!r) { 4412 r = gfx_v11_0_kcq_init_queue(ring, false); 4413 amdgpu_bo_kunmap(ring->mqd_obj); 4414 ring->mqd_ptr = NULL; 4415 } 4416 amdgpu_bo_unreserve(ring->mqd_obj); 4417 if (r) 4418 goto done; 4419 } 4420 4421 r = amdgpu_gfx_enable_kcq(adev, 0); 4422 done: 4423 return r; 4424 } 4425 4426 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4427 { 4428 int r, i; 4429 struct amdgpu_ring *ring; 4430 4431 if (!(adev->flags & AMD_IS_APU)) 4432 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4433 4434 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4435 /* legacy firmware loading */ 4436 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4437 if (r) 4438 return r; 4439 4440 if (adev->gfx.rs64_enable) 4441 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4442 else 4443 r = gfx_v11_0_cp_compute_load_microcode(adev); 4444 if (r) 4445 return r; 4446 } 4447 4448 gfx_v11_0_cp_set_doorbell_range(adev); 4449 4450 if (amdgpu_async_gfx_ring) { 4451 gfx_v11_0_cp_compute_enable(adev, true); 4452 gfx_v11_0_cp_gfx_enable(adev, true); 4453 } 4454 4455 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4456 r = amdgpu_mes_kiq_hw_init(adev); 4457 else 4458 r = gfx_v11_0_kiq_resume(adev); 4459 if (r) 4460 return r; 4461 4462 r = gfx_v11_0_kcq_resume(adev); 4463 if (r) 4464 return r; 4465 4466 if (!amdgpu_async_gfx_ring) { 4467 r = gfx_v11_0_cp_gfx_resume(adev); 4468 if (r) 4469 return r; 4470 } else { 4471 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4472 if (r) 4473 return r; 4474 } 4475 4476 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4477 ring = &adev->gfx.gfx_ring[i]; 4478 r = amdgpu_ring_test_helper(ring); 4479 if (r) 4480 return r; 4481 } 4482 4483 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4484 ring = &adev->gfx.compute_ring[i]; 4485 r = amdgpu_ring_test_helper(ring); 4486 if (r) 4487 return r; 4488 } 4489 4490 return 0; 4491 } 4492 4493 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4494 { 4495 gfx_v11_0_cp_gfx_enable(adev, enable); 4496 gfx_v11_0_cp_compute_enable(adev, enable); 4497 } 4498 4499 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4500 { 4501 int r; 4502 bool value; 4503 4504 r = adev->gfxhub.funcs->gart_enable(adev); 4505 if (r) 4506 return r; 4507 4508 adev->hdp.funcs->flush_hdp(adev, NULL); 4509 4510 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 4511 false : true; 4512 4513 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4514 /* TODO investigate why this and the hdp flush above is needed, 4515 * are we missing a flush somewhere else? */ 4516 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); 4517 4518 return 0; 4519 } 4520 4521 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4522 { 4523 u32 tmp; 4524 4525 /* select RS64 */ 4526 if (adev->gfx.rs64_enable) { 4527 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4528 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4529 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4530 4531 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4532 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4533 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4534 } 4535 4536 if (amdgpu_emu_mode == 1) 4537 msleep(100); 4538 } 4539 4540 static int get_gb_addr_config(struct amdgpu_device * adev) 4541 { 4542 u32 gb_addr_config; 4543 4544 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4545 if (gb_addr_config == 0) 4546 return -EINVAL; 4547 4548 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4549 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4550 4551 adev->gfx.config.gb_addr_config = gb_addr_config; 4552 4553 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4554 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4555 GB_ADDR_CONFIG, NUM_PIPES); 4556 4557 adev->gfx.config.max_tile_pipes = 4558 adev->gfx.config.gb_addr_config_fields.num_pipes; 4559 4560 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4561 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4562 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4563 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4564 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4565 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4566 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4567 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4568 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4569 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4570 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4571 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4572 4573 return 0; 4574 } 4575 4576 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4577 { 4578 uint32_t data; 4579 4580 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4581 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4582 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4583 4584 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4585 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4586 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4587 } 4588 4589 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) 4590 { 4591 int r; 4592 struct amdgpu_device *adev = ip_block->adev; 4593 4594 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4595 adev->gfx.cleaner_shader_ptr); 4596 4597 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4598 if (adev->gfx.imu.funcs) { 4599 /* RLC autoload sequence 1: Program rlc ram */ 4600 if (adev->gfx.imu.funcs->program_rlc_ram) 4601 adev->gfx.imu.funcs->program_rlc_ram(adev); 4602 /* rlc autoload firmware */ 4603 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4604 if (r) 4605 return r; 4606 } 4607 } else { 4608 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4609 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4610 if (adev->gfx.imu.funcs->load_microcode) 4611 adev->gfx.imu.funcs->load_microcode(adev); 4612 if (adev->gfx.imu.funcs->setup_imu) 4613 adev->gfx.imu.funcs->setup_imu(adev); 4614 if (adev->gfx.imu.funcs->start_imu) 4615 adev->gfx.imu.funcs->start_imu(adev); 4616 } 4617 4618 /* disable gpa mode in backdoor loading */ 4619 gfx_v11_0_disable_gpa_mode(adev); 4620 } 4621 } 4622 4623 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4624 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4625 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4626 if (r) { 4627 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4628 return r; 4629 } 4630 } 4631 4632 adev->gfx.is_poweron = true; 4633 4634 if(get_gb_addr_config(adev)) 4635 DRM_WARN("Invalid gb_addr_config !\n"); 4636 4637 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4638 adev->gfx.rs64_enable) 4639 gfx_v11_0_config_gfx_rs64(adev); 4640 4641 r = gfx_v11_0_gfxhub_enable(adev); 4642 if (r) 4643 return r; 4644 4645 if (!amdgpu_emu_mode) 4646 gfx_v11_0_init_golden_registers(adev); 4647 4648 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4649 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4650 /** 4651 * For gfx 11, rlc firmware loading relies on smu firmware is 4652 * loaded firstly, so in direct type, it has to load smc ucode 4653 * here before rlc. 4654 */ 4655 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4656 if (r) 4657 return r; 4658 } 4659 4660 gfx_v11_0_constants_init(adev); 4661 4662 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4663 gfx_v11_0_select_cp_fw_arch(adev); 4664 4665 if (adev->nbio.funcs->gc_doorbell_init) 4666 adev->nbio.funcs->gc_doorbell_init(adev); 4667 4668 r = gfx_v11_0_rlc_resume(adev); 4669 if (r) 4670 return r; 4671 4672 /* 4673 * init golden registers and rlc resume may override some registers, 4674 * reconfig them here 4675 */ 4676 gfx_v11_0_tcp_harvest(adev); 4677 4678 r = gfx_v11_0_cp_resume(adev); 4679 if (r) 4680 return r; 4681 4682 /* get IMU version from HW if it's not set */ 4683 if (!adev->gfx.imu_fw_version) 4684 adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); 4685 4686 return r; 4687 } 4688 4689 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) 4690 { 4691 struct amdgpu_device *adev = ip_block->adev; 4692 4693 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4694 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4695 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4696 4697 if (!adev->no_hw_access) { 4698 if (amdgpu_async_gfx_ring) { 4699 if (amdgpu_gfx_disable_kgq(adev, 0)) 4700 DRM_ERROR("KGQ disable failed\n"); 4701 } 4702 4703 if (amdgpu_gfx_disable_kcq(adev, 0)) 4704 DRM_ERROR("KCQ disable failed\n"); 4705 4706 amdgpu_mes_kiq_hw_fini(adev); 4707 } 4708 4709 if (amdgpu_sriov_vf(adev)) 4710 /* Remove the steps disabling CPG and clearing KIQ position, 4711 * so that CP could perform IDLE-SAVE during switch. Those 4712 * steps are necessary to avoid a DMAR error in gfx9 but it is 4713 * not reproduced on gfx11. 4714 */ 4715 return 0; 4716 4717 gfx_v11_0_cp_enable(adev, false); 4718 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4719 4720 adev->gfxhub.funcs->gart_disable(adev); 4721 4722 adev->gfx.is_poweron = false; 4723 4724 return 0; 4725 } 4726 4727 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) 4728 { 4729 return gfx_v11_0_hw_fini(ip_block); 4730 } 4731 4732 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) 4733 { 4734 return gfx_v11_0_hw_init(ip_block); 4735 } 4736 4737 static bool gfx_v11_0_is_idle(void *handle) 4738 { 4739 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4740 4741 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 4742 GRBM_STATUS, GUI_ACTIVE)) 4743 return false; 4744 else 4745 return true; 4746 } 4747 4748 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 4749 { 4750 unsigned i; 4751 u32 tmp; 4752 struct amdgpu_device *adev = ip_block->adev; 4753 4754 for (i = 0; i < adev->usec_timeout; i++) { 4755 /* read MC_STATUS */ 4756 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 4757 GRBM_STATUS__GUI_ACTIVE_MASK; 4758 4759 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 4760 return 0; 4761 udelay(1); 4762 } 4763 return -ETIMEDOUT; 4764 } 4765 4766 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, 4767 bool req) 4768 { 4769 u32 i, tmp, val; 4770 4771 for (i = 0; i < adev->usec_timeout; i++) { 4772 /* Request with MeId=2, PipeId=0 */ 4773 tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); 4774 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); 4775 WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); 4776 4777 val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); 4778 if (req) { 4779 if (val == tmp) 4780 break; 4781 } else { 4782 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, 4783 REQUEST, 1); 4784 4785 /* unlocked or locked by firmware */ 4786 if (val != tmp) 4787 break; 4788 } 4789 udelay(1); 4790 } 4791 4792 if (i >= adev->usec_timeout) 4793 return -EINVAL; 4794 4795 return 0; 4796 } 4797 4798 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) 4799 { 4800 u32 grbm_soft_reset = 0; 4801 u32 tmp; 4802 int r, i, j, k; 4803 struct amdgpu_device *adev = ip_block->adev; 4804 4805 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4806 4807 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4808 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 4809 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 4810 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 4811 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 4812 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 4813 4814 mutex_lock(&adev->srbm_mutex); 4815 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 4816 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 4817 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 4818 soc21_grbm_select(adev, i, k, j, 0); 4819 4820 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 4821 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 4822 } 4823 } 4824 } 4825 for (i = 0; i < adev->gfx.me.num_me; ++i) { 4826 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 4827 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 4828 soc21_grbm_select(adev, i, k, j, 0); 4829 4830 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 4831 } 4832 } 4833 } 4834 soc21_grbm_select(adev, 0, 0, 0, 0); 4835 mutex_unlock(&adev->srbm_mutex); 4836 4837 /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ 4838 mutex_lock(&adev->gfx.reset_sem_mutex); 4839 r = gfx_v11_0_request_gfx_index_mutex(adev, true); 4840 if (r) { 4841 mutex_unlock(&adev->gfx.reset_sem_mutex); 4842 DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); 4843 return r; 4844 } 4845 4846 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 4847 4848 // Read CP_VMID_RESET register three times. 4849 // to get sufficient time for GFX_HQD_ACTIVE reach 0 4850 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4851 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4852 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4853 4854 /* release the gfx mutex */ 4855 r = gfx_v11_0_request_gfx_index_mutex(adev, false); 4856 mutex_unlock(&adev->gfx.reset_sem_mutex); 4857 if (r) { 4858 DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); 4859 return r; 4860 } 4861 4862 for (i = 0; i < adev->usec_timeout; i++) { 4863 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 4864 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 4865 break; 4866 udelay(1); 4867 } 4868 if (i >= adev->usec_timeout) { 4869 printk("Failed to wait all pipes clean\n"); 4870 return -EINVAL; 4871 } 4872 4873 /********** trigger soft reset ***********/ 4874 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 4875 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4876 SOFT_RESET_CP, 1); 4877 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4878 SOFT_RESET_GFX, 1); 4879 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4880 SOFT_RESET_CPF, 1); 4881 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4882 SOFT_RESET_CPC, 1); 4883 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4884 SOFT_RESET_CPG, 1); 4885 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 4886 /********** exit soft reset ***********/ 4887 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 4888 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4889 SOFT_RESET_CP, 0); 4890 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4891 SOFT_RESET_GFX, 0); 4892 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4893 SOFT_RESET_CPF, 0); 4894 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4895 SOFT_RESET_CPC, 0); 4896 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4897 SOFT_RESET_CPG, 0); 4898 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 4899 4900 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 4901 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 4902 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 4903 4904 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 4905 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 4906 4907 for (i = 0; i < adev->usec_timeout; i++) { 4908 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 4909 break; 4910 udelay(1); 4911 } 4912 if (i >= adev->usec_timeout) { 4913 printk("Failed to wait CP_VMID_RESET to 0\n"); 4914 return -EINVAL; 4915 } 4916 4917 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4918 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 4919 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 4920 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 4921 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 4922 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 4923 4924 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4925 4926 return gfx_v11_0_cp_resume(adev); 4927 } 4928 4929 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) 4930 { 4931 int i, r; 4932 struct amdgpu_device *adev = ip_block->adev; 4933 struct amdgpu_ring *ring; 4934 long tmo = msecs_to_jiffies(1000); 4935 4936 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4937 ring = &adev->gfx.gfx_ring[i]; 4938 r = amdgpu_ring_test_ib(ring, tmo); 4939 if (r) 4940 return true; 4941 } 4942 4943 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4944 ring = &adev->gfx.compute_ring[i]; 4945 r = amdgpu_ring_test_ib(ring, tmo); 4946 if (r) 4947 return true; 4948 } 4949 4950 return false; 4951 } 4952 4953 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) 4954 { 4955 struct amdgpu_device *adev = ip_block->adev; 4956 /** 4957 * GFX soft reset will impact MES, need resume MES when do GFX soft reset 4958 */ 4959 return amdgpu_mes_resume(adev); 4960 } 4961 4962 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4963 { 4964 uint64_t clock; 4965 uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; 4966 4967 if (amdgpu_sriov_vf(adev)) { 4968 amdgpu_gfx_off_ctrl(adev, false); 4969 mutex_lock(&adev->gfx.gpu_clock_mutex); 4970 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 4971 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 4972 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 4973 if (clock_counter_hi_pre != clock_counter_hi_after) 4974 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 4975 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4976 amdgpu_gfx_off_ctrl(adev, true); 4977 } else { 4978 preempt_disable(); 4979 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 4980 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 4981 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 4982 if (clock_counter_hi_pre != clock_counter_hi_after) 4983 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 4984 preempt_enable(); 4985 } 4986 clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); 4987 4988 return clock; 4989 } 4990 4991 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4992 uint32_t vmid, 4993 uint32_t gds_base, uint32_t gds_size, 4994 uint32_t gws_base, uint32_t gws_size, 4995 uint32_t oa_base, uint32_t oa_size) 4996 { 4997 struct amdgpu_device *adev = ring->adev; 4998 4999 /* GDS Base */ 5000 gfx_v11_0_write_data_to_reg(ring, 0, false, 5001 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 5002 gds_base); 5003 5004 /* GDS Size */ 5005 gfx_v11_0_write_data_to_reg(ring, 0, false, 5006 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 5007 gds_size); 5008 5009 /* GWS */ 5010 gfx_v11_0_write_data_to_reg(ring, 0, false, 5011 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 5012 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5013 5014 /* OA */ 5015 gfx_v11_0_write_data_to_reg(ring, 0, false, 5016 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 5017 (1 << (oa_size + oa_base)) - (1 << oa_base)); 5018 } 5019 5020 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) 5021 { 5022 struct amdgpu_device *adev = ip_block->adev; 5023 5024 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 5025 5026 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 5027 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 5028 AMDGPU_MAX_COMPUTE_RINGS); 5029 5030 gfx_v11_0_set_kiq_pm4_funcs(adev); 5031 gfx_v11_0_set_ring_funcs(adev); 5032 gfx_v11_0_set_irq_funcs(adev); 5033 gfx_v11_0_set_gds_init(adev); 5034 gfx_v11_0_set_rlc_funcs(adev); 5035 gfx_v11_0_set_mqd_funcs(adev); 5036 gfx_v11_0_set_imu_funcs(adev); 5037 5038 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 5039 5040 return gfx_v11_0_init_microcode(adev); 5041 } 5042 5043 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) 5044 { 5045 struct amdgpu_device *adev = ip_block->adev; 5046 int r; 5047 5048 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5049 if (r) 5050 return r; 5051 5052 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5053 if (r) 5054 return r; 5055 5056 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 5057 if (r) 5058 return r; 5059 return 0; 5060 } 5061 5062 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 5063 { 5064 uint32_t rlc_cntl; 5065 5066 /* if RLC is not enabled, do nothing */ 5067 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 5068 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 5069 } 5070 5071 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 5072 { 5073 uint32_t data; 5074 unsigned i; 5075 5076 data = RLC_SAFE_MODE__CMD_MASK; 5077 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5078 5079 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 5080 5081 /* wait for RLC_SAFE_MODE */ 5082 for (i = 0; i < adev->usec_timeout; i++) { 5083 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 5084 RLC_SAFE_MODE, CMD)) 5085 break; 5086 udelay(1); 5087 } 5088 } 5089 5090 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 5091 { 5092 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 5093 } 5094 5095 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 5096 bool enable) 5097 { 5098 uint32_t def, data; 5099 5100 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 5101 return; 5102 5103 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5104 5105 if (enable) 5106 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5107 else 5108 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5109 5110 if (def != data) 5111 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5112 } 5113 5114 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 5115 bool enable) 5116 { 5117 uint32_t def, data; 5118 5119 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 5120 return; 5121 5122 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5123 5124 if (enable) 5125 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5126 else 5127 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5128 5129 if (def != data) 5130 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5131 } 5132 5133 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 5134 bool enable) 5135 { 5136 uint32_t def, data; 5137 5138 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 5139 return; 5140 5141 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5142 5143 if (enable) 5144 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5145 else 5146 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5147 5148 if (def != data) 5149 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5150 } 5151 5152 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5153 bool enable) 5154 { 5155 uint32_t data, def; 5156 5157 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 5158 return; 5159 5160 /* It is disabled by HW by default */ 5161 if (enable) { 5162 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5163 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 5164 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5165 5166 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5167 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5168 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5169 5170 if (def != data) 5171 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5172 } 5173 } else { 5174 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5175 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5176 5177 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5178 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5179 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5180 5181 if (def != data) 5182 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5183 } 5184 } 5185 } 5186 5187 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5188 bool enable) 5189 { 5190 uint32_t def, data; 5191 5192 if (!(adev->cg_flags & 5193 (AMD_CG_SUPPORT_GFX_CGCG | 5194 AMD_CG_SUPPORT_GFX_CGLS | 5195 AMD_CG_SUPPORT_GFX_3D_CGCG | 5196 AMD_CG_SUPPORT_GFX_3D_CGLS))) 5197 return; 5198 5199 if (enable) { 5200 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5201 5202 /* unset CGCG override */ 5203 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5204 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5205 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5206 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5207 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 5208 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5209 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5210 5211 /* update CGCG override bits */ 5212 if (def != data) 5213 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5214 5215 /* enable cgcg FSM(0x0000363F) */ 5216 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5217 5218 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5219 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 5220 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5221 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5222 } 5223 5224 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5225 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 5226 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5227 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5228 } 5229 5230 if (def != data) 5231 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5232 5233 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5234 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5235 5236 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5237 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 5238 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5239 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5240 } 5241 5242 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5243 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 5244 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5245 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5246 } 5247 5248 if (def != data) 5249 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5250 5251 /* set IDLE_POLL_COUNT(0x00900100) */ 5252 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 5253 5254 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 5255 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5256 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5257 5258 if (def != data) 5259 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 5260 5261 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5262 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5263 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5264 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5265 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5266 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 5267 5268 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5269 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5270 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5271 5272 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5273 if (adev->sdma.num_instances > 1) { 5274 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5275 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5276 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5277 } 5278 } else { 5279 /* Program RLC_CGCG_CGLS_CTRL */ 5280 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5281 5282 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5283 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5284 5285 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5286 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5287 5288 if (def != data) 5289 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5290 5291 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5292 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5293 5294 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5295 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5296 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5297 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5298 5299 if (def != data) 5300 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5301 5302 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5303 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5304 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5305 5306 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5307 if (adev->sdma.num_instances > 1) { 5308 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5309 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5310 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5311 } 5312 } 5313 } 5314 5315 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5316 bool enable) 5317 { 5318 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5319 5320 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 5321 5322 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 5323 5324 gfx_v11_0_update_repeater_fgcg(adev, enable); 5325 5326 gfx_v11_0_update_sram_fgcg(adev, enable); 5327 5328 gfx_v11_0_update_perf_clk(adev, enable); 5329 5330 if (adev->cg_flags & 5331 (AMD_CG_SUPPORT_GFX_MGCG | 5332 AMD_CG_SUPPORT_GFX_CGLS | 5333 AMD_CG_SUPPORT_GFX_CGCG | 5334 AMD_CG_SUPPORT_GFX_3D_CGCG | 5335 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5336 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5337 5338 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5339 5340 return 0; 5341 } 5342 5343 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) 5344 { 5345 u32 reg, pre_data, data; 5346 5347 amdgpu_gfx_off_ctrl(adev, false); 5348 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5349 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) 5350 pre_data = RREG32_NO_KIQ(reg); 5351 else 5352 pre_data = RREG32(reg); 5353 5354 data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); 5355 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5356 5357 if (pre_data != data) { 5358 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { 5359 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5360 } else 5361 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5362 } 5363 amdgpu_gfx_off_ctrl(adev, true); 5364 5365 if (ring 5366 && amdgpu_sriov_is_pp_one_vf(adev) 5367 && (pre_data != data) 5368 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) 5369 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { 5370 amdgpu_ring_emit_wreg(ring, reg, data); 5371 } 5372 } 5373 5374 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5375 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5376 .set_safe_mode = gfx_v11_0_set_safe_mode, 5377 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5378 .init = gfx_v11_0_rlc_init, 5379 .get_csb_size = gfx_v11_0_get_csb_size, 5380 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5381 .resume = gfx_v11_0_rlc_resume, 5382 .stop = gfx_v11_0_rlc_stop, 5383 .reset = gfx_v11_0_rlc_reset, 5384 .start = gfx_v11_0_rlc_start, 5385 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5386 }; 5387 5388 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5389 { 5390 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5391 5392 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5393 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5394 else 5395 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5396 5397 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5398 5399 // Program RLC_PG_DELAY3 for CGPG hysteresis 5400 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5401 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5402 case IP_VERSION(11, 0, 1): 5403 case IP_VERSION(11, 0, 4): 5404 case IP_VERSION(11, 5, 0): 5405 case IP_VERSION(11, 5, 1): 5406 case IP_VERSION(11, 5, 2): 5407 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5408 break; 5409 default: 5410 break; 5411 } 5412 } 5413 } 5414 5415 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5416 { 5417 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5418 5419 gfx_v11_cntl_power_gating(adev, enable); 5420 5421 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5422 } 5423 5424 static int gfx_v11_0_set_powergating_state(void *handle, 5425 enum amd_powergating_state state) 5426 { 5427 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5428 bool enable = (state == AMD_PG_STATE_GATE); 5429 5430 if (amdgpu_sriov_vf(adev)) 5431 return 0; 5432 5433 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5434 case IP_VERSION(11, 0, 0): 5435 case IP_VERSION(11, 0, 2): 5436 case IP_VERSION(11, 0, 3): 5437 amdgpu_gfx_off_ctrl(adev, enable); 5438 break; 5439 case IP_VERSION(11, 0, 1): 5440 case IP_VERSION(11, 0, 4): 5441 case IP_VERSION(11, 5, 0): 5442 case IP_VERSION(11, 5, 1): 5443 case IP_VERSION(11, 5, 2): 5444 if (!enable) 5445 amdgpu_gfx_off_ctrl(adev, false); 5446 5447 gfx_v11_cntl_pg(adev, enable); 5448 5449 if (enable) 5450 amdgpu_gfx_off_ctrl(adev, true); 5451 5452 break; 5453 default: 5454 break; 5455 } 5456 5457 return 0; 5458 } 5459 5460 static int gfx_v11_0_set_clockgating_state(void *handle, 5461 enum amd_clockgating_state state) 5462 { 5463 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5464 5465 if (amdgpu_sriov_vf(adev)) 5466 return 0; 5467 5468 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5469 case IP_VERSION(11, 0, 0): 5470 case IP_VERSION(11, 0, 1): 5471 case IP_VERSION(11, 0, 2): 5472 case IP_VERSION(11, 0, 3): 5473 case IP_VERSION(11, 0, 4): 5474 case IP_VERSION(11, 5, 0): 5475 case IP_VERSION(11, 5, 1): 5476 case IP_VERSION(11, 5, 2): 5477 gfx_v11_0_update_gfx_clock_gating(adev, 5478 state == AMD_CG_STATE_GATE); 5479 break; 5480 default: 5481 break; 5482 } 5483 5484 return 0; 5485 } 5486 5487 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags) 5488 { 5489 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5490 int data; 5491 5492 /* AMD_CG_SUPPORT_GFX_MGCG */ 5493 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5494 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5495 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5496 5497 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5498 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5499 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5500 5501 /* AMD_CG_SUPPORT_GFX_FGCG */ 5502 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5503 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5504 5505 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5506 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5507 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5508 5509 /* AMD_CG_SUPPORT_GFX_CGCG */ 5510 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5511 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5512 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5513 5514 /* AMD_CG_SUPPORT_GFX_CGLS */ 5515 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5516 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5517 5518 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5519 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5520 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5521 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5522 5523 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5524 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5525 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5526 } 5527 5528 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5529 { 5530 /* gfx11 is 32bit rptr*/ 5531 return *(uint32_t *)ring->rptr_cpu_addr; 5532 } 5533 5534 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5535 { 5536 struct amdgpu_device *adev = ring->adev; 5537 u64 wptr; 5538 5539 /* XXX check if swapping is necessary on BE */ 5540 if (ring->use_doorbell) { 5541 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5542 } else { 5543 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5544 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5545 } 5546 5547 return wptr; 5548 } 5549 5550 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5551 { 5552 struct amdgpu_device *adev = ring->adev; 5553 5554 if (ring->use_doorbell) { 5555 /* XXX check if swapping is necessary on BE */ 5556 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5557 ring->wptr); 5558 WDOORBELL64(ring->doorbell_index, ring->wptr); 5559 } else { 5560 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5561 lower_32_bits(ring->wptr)); 5562 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5563 upper_32_bits(ring->wptr)); 5564 } 5565 } 5566 5567 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5568 { 5569 /* gfx11 hardware is 32bit rptr */ 5570 return *(uint32_t *)ring->rptr_cpu_addr; 5571 } 5572 5573 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5574 { 5575 u64 wptr; 5576 5577 /* XXX check if swapping is necessary on BE */ 5578 if (ring->use_doorbell) 5579 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5580 else 5581 BUG(); 5582 return wptr; 5583 } 5584 5585 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5586 { 5587 struct amdgpu_device *adev = ring->adev; 5588 5589 /* XXX check if swapping is necessary on BE */ 5590 if (ring->use_doorbell) { 5591 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5592 ring->wptr); 5593 WDOORBELL64(ring->doorbell_index, ring->wptr); 5594 } else { 5595 BUG(); /* only DOORBELL method supported on gfx11 now */ 5596 } 5597 } 5598 5599 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5600 { 5601 struct amdgpu_device *adev = ring->adev; 5602 u32 ref_and_mask, reg_mem_engine; 5603 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5604 5605 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5606 switch (ring->me) { 5607 case 1: 5608 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5609 break; 5610 case 2: 5611 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5612 break; 5613 default: 5614 return; 5615 } 5616 reg_mem_engine = 0; 5617 } else { 5618 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; 5619 reg_mem_engine = 1; /* pfp */ 5620 } 5621 5622 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5623 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5624 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5625 ref_and_mask, ref_and_mask, 0x20); 5626 } 5627 5628 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5629 struct amdgpu_job *job, 5630 struct amdgpu_ib *ib, 5631 uint32_t flags) 5632 { 5633 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5634 u32 header, control = 0; 5635 5636 BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); 5637 5638 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5639 5640 control |= ib->length_dw | (vmid << 24); 5641 5642 if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5643 control |= INDIRECT_BUFFER_PRE_ENB(1); 5644 5645 if (flags & AMDGPU_IB_PREEMPTED) 5646 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5647 5648 if (vmid) 5649 gfx_v11_0_ring_emit_de_meta(ring, 5650 (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); 5651 } 5652 5653 if (ring->is_mes_queue) 5654 /* inherit vmid from mqd */ 5655 control |= 0x400000; 5656 5657 amdgpu_ring_write(ring, header); 5658 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5659 amdgpu_ring_write(ring, 5660 #ifdef __BIG_ENDIAN 5661 (2 << 0) | 5662 #endif 5663 lower_32_bits(ib->gpu_addr)); 5664 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5665 amdgpu_ring_write(ring, control); 5666 } 5667 5668 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5669 struct amdgpu_job *job, 5670 struct amdgpu_ib *ib, 5671 uint32_t flags) 5672 { 5673 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5674 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5675 5676 if (ring->is_mes_queue) 5677 /* inherit vmid from mqd */ 5678 control |= 0x40000000; 5679 5680 /* Currently, there is a high possibility to get wave ID mismatch 5681 * between ME and GDS, leading to a hw deadlock, because ME generates 5682 * different wave IDs than the GDS expects. This situation happens 5683 * randomly when at least 5 compute pipes use GDS ordered append. 5684 * The wave IDs generated by ME are also wrong after suspend/resume. 5685 * Those are probably bugs somewhere else in the kernel driver. 5686 * 5687 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5688 * GDS to 0 for this ring (me/pipe). 5689 */ 5690 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5691 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5692 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 5693 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5694 } 5695 5696 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5697 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5698 amdgpu_ring_write(ring, 5699 #ifdef __BIG_ENDIAN 5700 (2 << 0) | 5701 #endif 5702 lower_32_bits(ib->gpu_addr)); 5703 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5704 amdgpu_ring_write(ring, control); 5705 } 5706 5707 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5708 u64 seq, unsigned flags) 5709 { 5710 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5711 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5712 5713 /* RELEASE_MEM - flush caches, send int */ 5714 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5715 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 5716 PACKET3_RELEASE_MEM_GCR_GL2_WB | 5717 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ 5718 PACKET3_RELEASE_MEM_GCR_GLM_WB | 5719 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 5720 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5721 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 5722 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 5723 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 5724 5725 /* 5726 * the address should be Qword aligned if 64bit write, Dword 5727 * aligned if only send 32bit data low (discard data high) 5728 */ 5729 if (write64bit) 5730 BUG_ON(addr & 0x7); 5731 else 5732 BUG_ON(addr & 0x3); 5733 amdgpu_ring_write(ring, lower_32_bits(addr)); 5734 amdgpu_ring_write(ring, upper_32_bits(addr)); 5735 amdgpu_ring_write(ring, lower_32_bits(seq)); 5736 amdgpu_ring_write(ring, upper_32_bits(seq)); 5737 amdgpu_ring_write(ring, ring->is_mes_queue ? 5738 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); 5739 } 5740 5741 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5742 { 5743 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5744 uint32_t seq = ring->fence_drv.sync_seq; 5745 uint64_t addr = ring->fence_drv.gpu_addr; 5746 5747 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 5748 upper_32_bits(addr), seq, 0xffffffff, 4); 5749 } 5750 5751 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 5752 uint16_t pasid, uint32_t flush_type, 5753 bool all_hub, uint8_t dst_sel) 5754 { 5755 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 5756 amdgpu_ring_write(ring, 5757 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 5758 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 5759 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 5760 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 5761 } 5762 5763 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5764 unsigned vmid, uint64_t pd_addr) 5765 { 5766 if (ring->is_mes_queue) 5767 gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); 5768 else 5769 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5770 5771 /* compute doesn't have PFP */ 5772 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5773 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5774 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5775 amdgpu_ring_write(ring, 0x0); 5776 } 5777 5778 /* Make sure that we can't skip the SET_Q_MODE packets when the VM 5779 * changed in any way. 5780 */ 5781 ring->set_q_mode_offs = 0; 5782 ring->set_q_mode_ptr = NULL; 5783 } 5784 5785 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5786 u64 seq, unsigned int flags) 5787 { 5788 struct amdgpu_device *adev = ring->adev; 5789 5790 /* we only allocate 32bit for each seq wb address */ 5791 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5792 5793 /* write fence seq to the "addr" */ 5794 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5795 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5796 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5797 amdgpu_ring_write(ring, lower_32_bits(addr)); 5798 amdgpu_ring_write(ring, upper_32_bits(addr)); 5799 amdgpu_ring_write(ring, lower_32_bits(seq)); 5800 5801 if (flags & AMDGPU_FENCE_FLAG_INT) { 5802 /* set register to trigger INT */ 5803 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5804 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5805 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5806 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 5807 amdgpu_ring_write(ring, 0); 5808 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5809 } 5810 } 5811 5812 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 5813 uint32_t flags) 5814 { 5815 uint32_t dw2 = 0; 5816 5817 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5818 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5819 /* set load_global_config & load_global_uconfig */ 5820 dw2 |= 0x8001; 5821 /* set load_cs_sh_regs */ 5822 dw2 |= 0x01000000; 5823 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5824 dw2 |= 0x10002; 5825 } 5826 5827 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5828 amdgpu_ring_write(ring, dw2); 5829 amdgpu_ring_write(ring, 0); 5830 } 5831 5832 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 5833 uint64_t addr) 5834 { 5835 unsigned ret; 5836 5837 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5838 amdgpu_ring_write(ring, lower_32_bits(addr)); 5839 amdgpu_ring_write(ring, upper_32_bits(addr)); 5840 /* discard following DWs if *cond_exec_gpu_addr==0 */ 5841 amdgpu_ring_write(ring, 0); 5842 ret = ring->wptr & ring->buf_mask; 5843 /* patch dummy value later */ 5844 amdgpu_ring_write(ring, 0); 5845 5846 return ret; 5847 } 5848 5849 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, 5850 u64 shadow_va, u64 csa_va, 5851 u64 gds_va, bool init_shadow, 5852 int vmid) 5853 { 5854 struct amdgpu_device *adev = ring->adev; 5855 unsigned int offs, end; 5856 5857 if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) 5858 return; 5859 5860 /* 5861 * The logic here isn't easy to understand because we need to keep state 5862 * accross multiple executions of the function as well as between the 5863 * CPU and GPU. The general idea is that the newly written GPU command 5864 * has a condition on the previous one and only executed if really 5865 * necessary. 5866 */ 5867 5868 /* 5869 * The dw in the NOP controls if the next SET_Q_MODE packet should be 5870 * executed or not. Reserve 64bits just to be on the save side. 5871 */ 5872 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); 5873 offs = ring->wptr & ring->buf_mask; 5874 5875 /* 5876 * We start with skipping the prefix SET_Q_MODE and always executing 5877 * the postfix SET_Q_MODE packet. This is changed below with a 5878 * WRITE_DATA command when the postfix executed. 5879 */ 5880 amdgpu_ring_write(ring, shadow_va ? 1 : 0); 5881 amdgpu_ring_write(ring, 0); 5882 5883 if (ring->set_q_mode_offs) { 5884 uint64_t addr; 5885 5886 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 5887 addr += ring->set_q_mode_offs << 2; 5888 end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); 5889 } 5890 5891 /* 5892 * When the postfix SET_Q_MODE packet executes we need to make sure that the 5893 * next prefix SET_Q_MODE packet executes as well. 5894 */ 5895 if (!shadow_va) { 5896 uint64_t addr; 5897 5898 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 5899 addr += offs << 2; 5900 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5901 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 5902 amdgpu_ring_write(ring, lower_32_bits(addr)); 5903 amdgpu_ring_write(ring, upper_32_bits(addr)); 5904 amdgpu_ring_write(ring, 0x1); 5905 } 5906 5907 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); 5908 amdgpu_ring_write(ring, lower_32_bits(shadow_va)); 5909 amdgpu_ring_write(ring, upper_32_bits(shadow_va)); 5910 amdgpu_ring_write(ring, lower_32_bits(gds_va)); 5911 amdgpu_ring_write(ring, upper_32_bits(gds_va)); 5912 amdgpu_ring_write(ring, lower_32_bits(csa_va)); 5913 amdgpu_ring_write(ring, upper_32_bits(csa_va)); 5914 amdgpu_ring_write(ring, shadow_va ? 5915 PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); 5916 amdgpu_ring_write(ring, init_shadow ? 5917 PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); 5918 5919 if (ring->set_q_mode_offs) 5920 amdgpu_ring_patch_cond_exec(ring, end); 5921 5922 if (shadow_va) { 5923 uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; 5924 5925 /* 5926 * If the tokens match try to skip the last postfix SET_Q_MODE 5927 * packet to avoid saving/restoring the state all the time. 5928 */ 5929 if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) 5930 *ring->set_q_mode_ptr = 0; 5931 5932 ring->set_q_mode_token = token; 5933 } else { 5934 ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; 5935 } 5936 5937 ring->set_q_mode_offs = offs; 5938 } 5939 5940 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) 5941 { 5942 int i, r = 0; 5943 struct amdgpu_device *adev = ring->adev; 5944 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 5945 struct amdgpu_ring *kiq_ring = &kiq->ring; 5946 unsigned long flags; 5947 5948 if (adev->enable_mes) 5949 return -EINVAL; 5950 5951 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 5952 return -EINVAL; 5953 5954 spin_lock_irqsave(&kiq->ring_lock, flags); 5955 5956 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 5957 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5958 return -ENOMEM; 5959 } 5960 5961 /* assert preemption condition */ 5962 amdgpu_ring_set_preempt_cond_exec(ring, false); 5963 5964 /* assert IB preemption, emit the trailing fence */ 5965 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 5966 ring->trail_fence_gpu_addr, 5967 ++ring->trail_seq); 5968 amdgpu_ring_commit(kiq_ring); 5969 5970 spin_unlock_irqrestore(&kiq->ring_lock, flags); 5971 5972 /* poll the trailing fence */ 5973 for (i = 0; i < adev->usec_timeout; i++) { 5974 if (ring->trail_seq == 5975 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 5976 break; 5977 udelay(1); 5978 } 5979 5980 if (i >= adev->usec_timeout) { 5981 r = -EINVAL; 5982 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 5983 } 5984 5985 /* deassert preemption condition */ 5986 amdgpu_ring_set_preempt_cond_exec(ring, true); 5987 return r; 5988 } 5989 5990 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 5991 { 5992 struct amdgpu_device *adev = ring->adev; 5993 struct v10_de_ib_state de_payload = {0}; 5994 uint64_t offset, gds_addr, de_payload_gpu_addr; 5995 void *de_payload_cpu_addr; 5996 int cnt; 5997 5998 if (ring->is_mes_queue) { 5999 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 6000 gfx[0].gfx_meta_data) + 6001 offsetof(struct v10_gfx_meta_data, de_payload); 6002 de_payload_gpu_addr = 6003 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 6004 de_payload_cpu_addr = 6005 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 6006 6007 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 6008 gfx[0].gds_backup) + 6009 offsetof(struct v10_gfx_meta_data, de_payload); 6010 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 6011 } else { 6012 offset = offsetof(struct v10_gfx_meta_data, de_payload); 6013 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 6014 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 6015 6016 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 6017 AMDGPU_CSA_SIZE - adev->gds.gds_size, 6018 PAGE_SIZE); 6019 } 6020 6021 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 6022 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 6023 6024 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 6025 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 6026 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 6027 WRITE_DATA_DST_SEL(8) | 6028 WR_CONFIRM) | 6029 WRITE_DATA_CACHE_POLICY(0)); 6030 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 6031 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 6032 6033 if (resume) 6034 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 6035 sizeof(de_payload) >> 2); 6036 else 6037 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 6038 sizeof(de_payload) >> 2); 6039 } 6040 6041 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 6042 bool secure) 6043 { 6044 uint32_t v = secure ? FRAME_TMZ : 0; 6045 6046 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 6047 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 6048 } 6049 6050 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 6051 uint32_t reg_val_offs) 6052 { 6053 struct amdgpu_device *adev = ring->adev; 6054 6055 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6056 amdgpu_ring_write(ring, 0 | /* src: register*/ 6057 (5 << 8) | /* dst: memory */ 6058 (1 << 20)); /* write confirm */ 6059 amdgpu_ring_write(ring, reg); 6060 amdgpu_ring_write(ring, 0); 6061 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6062 reg_val_offs * 4)); 6063 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6064 reg_val_offs * 4)); 6065 } 6066 6067 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6068 uint32_t val) 6069 { 6070 uint32_t cmd = 0; 6071 6072 switch (ring->funcs->type) { 6073 case AMDGPU_RING_TYPE_GFX: 6074 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6075 break; 6076 case AMDGPU_RING_TYPE_KIQ: 6077 cmd = (1 << 16); /* no inc addr */ 6078 break; 6079 default: 6080 cmd = WR_CONFIRM; 6081 break; 6082 } 6083 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6084 amdgpu_ring_write(ring, cmd); 6085 amdgpu_ring_write(ring, reg); 6086 amdgpu_ring_write(ring, 0); 6087 amdgpu_ring_write(ring, val); 6088 } 6089 6090 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 6091 uint32_t val, uint32_t mask) 6092 { 6093 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 6094 } 6095 6096 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 6097 uint32_t reg0, uint32_t reg1, 6098 uint32_t ref, uint32_t mask) 6099 { 6100 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6101 6102 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 6103 ref, mask, 0x20); 6104 } 6105 6106 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, 6107 unsigned vmid) 6108 { 6109 struct amdgpu_device *adev = ring->adev; 6110 uint32_t value = 0; 6111 6112 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 6113 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 6114 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 6115 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 6116 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 6117 WREG32_SOC15(GC, 0, regSQ_CMD, value); 6118 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 6119 } 6120 6121 static void 6122 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6123 uint32_t me, uint32_t pipe, 6124 enum amdgpu_interrupt_state state) 6125 { 6126 uint32_t cp_int_cntl, cp_int_cntl_reg; 6127 6128 if (!me) { 6129 switch (pipe) { 6130 case 0: 6131 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 6132 break; 6133 case 1: 6134 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 6135 break; 6136 default: 6137 DRM_DEBUG("invalid pipe %d\n", pipe); 6138 return; 6139 } 6140 } else { 6141 DRM_DEBUG("invalid me %d\n", me); 6142 return; 6143 } 6144 6145 switch (state) { 6146 case AMDGPU_IRQ_STATE_DISABLE: 6147 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6148 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6149 TIME_STAMP_INT_ENABLE, 0); 6150 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6151 GENERIC0_INT_ENABLE, 0); 6152 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6153 break; 6154 case AMDGPU_IRQ_STATE_ENABLE: 6155 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6156 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6157 TIME_STAMP_INT_ENABLE, 1); 6158 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6159 GENERIC0_INT_ENABLE, 1); 6160 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6161 break; 6162 default: 6163 break; 6164 } 6165 } 6166 6167 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6168 int me, int pipe, 6169 enum amdgpu_interrupt_state state) 6170 { 6171 u32 mec_int_cntl, mec_int_cntl_reg; 6172 6173 /* 6174 * amdgpu controls only the first MEC. That's why this function only 6175 * handles the setting of interrupts for this specific MEC. All other 6176 * pipes' interrupts are set by amdkfd. 6177 */ 6178 6179 if (me == 1) { 6180 switch (pipe) { 6181 case 0: 6182 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6183 break; 6184 case 1: 6185 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 6186 break; 6187 case 2: 6188 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 6189 break; 6190 case 3: 6191 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 6192 break; 6193 default: 6194 DRM_DEBUG("invalid pipe %d\n", pipe); 6195 return; 6196 } 6197 } else { 6198 DRM_DEBUG("invalid me %d\n", me); 6199 return; 6200 } 6201 6202 switch (state) { 6203 case AMDGPU_IRQ_STATE_DISABLE: 6204 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6205 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6206 TIME_STAMP_INT_ENABLE, 0); 6207 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6208 GENERIC0_INT_ENABLE, 0); 6209 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6210 break; 6211 case AMDGPU_IRQ_STATE_ENABLE: 6212 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6213 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6214 TIME_STAMP_INT_ENABLE, 1); 6215 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6216 GENERIC0_INT_ENABLE, 1); 6217 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6218 break; 6219 default: 6220 break; 6221 } 6222 } 6223 6224 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6225 struct amdgpu_irq_src *src, 6226 unsigned type, 6227 enum amdgpu_interrupt_state state) 6228 { 6229 switch (type) { 6230 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6231 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 6232 break; 6233 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 6234 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 6235 break; 6236 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6237 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6238 break; 6239 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6240 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6241 break; 6242 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6243 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6244 break; 6245 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6246 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6247 break; 6248 default: 6249 break; 6250 } 6251 return 0; 6252 } 6253 6254 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 6255 struct amdgpu_irq_src *source, 6256 struct amdgpu_iv_entry *entry) 6257 { 6258 int i; 6259 u8 me_id, pipe_id, queue_id; 6260 struct amdgpu_ring *ring; 6261 uint32_t mes_queue_id = entry->src_data[0]; 6262 6263 DRM_DEBUG("IH: CP EOP\n"); 6264 6265 if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { 6266 struct amdgpu_mes_queue *queue; 6267 6268 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; 6269 6270 spin_lock(&adev->mes.queue_id_lock); 6271 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); 6272 if (queue) { 6273 DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); 6274 amdgpu_fence_process(queue->ring); 6275 } 6276 spin_unlock(&adev->mes.queue_id_lock); 6277 } else { 6278 me_id = (entry->ring_id & 0x0c) >> 2; 6279 pipe_id = (entry->ring_id & 0x03) >> 0; 6280 queue_id = (entry->ring_id & 0x70) >> 4; 6281 6282 switch (me_id) { 6283 case 0: 6284 if (pipe_id == 0) 6285 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6286 else 6287 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 6288 break; 6289 case 1: 6290 case 2: 6291 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6292 ring = &adev->gfx.compute_ring[i]; 6293 /* Per-queue interrupt is supported for MEC starting from VI. 6294 * The interrupt can only be enabled/disabled per pipe instead 6295 * of per queue. 6296 */ 6297 if ((ring->me == me_id) && 6298 (ring->pipe == pipe_id) && 6299 (ring->queue == queue_id)) 6300 amdgpu_fence_process(ring); 6301 } 6302 break; 6303 } 6304 } 6305 6306 return 0; 6307 } 6308 6309 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6310 struct amdgpu_irq_src *source, 6311 unsigned int type, 6312 enum amdgpu_interrupt_state state) 6313 { 6314 u32 cp_int_cntl_reg, cp_int_cntl; 6315 int i, j; 6316 6317 switch (state) { 6318 case AMDGPU_IRQ_STATE_DISABLE: 6319 case AMDGPU_IRQ_STATE_ENABLE: 6320 for (i = 0; i < adev->gfx.me.num_me; i++) { 6321 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6322 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6323 6324 if (cp_int_cntl_reg) { 6325 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6326 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6327 PRIV_REG_INT_ENABLE, 6328 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6329 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6330 } 6331 } 6332 } 6333 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6334 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6335 /* MECs start at 1 */ 6336 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6337 6338 if (cp_int_cntl_reg) { 6339 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6340 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6341 PRIV_REG_INT_ENABLE, 6342 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6343 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6344 } 6345 } 6346 } 6347 break; 6348 default: 6349 break; 6350 } 6351 6352 return 0; 6353 } 6354 6355 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6356 struct amdgpu_irq_src *source, 6357 unsigned type, 6358 enum amdgpu_interrupt_state state) 6359 { 6360 u32 cp_int_cntl_reg, cp_int_cntl; 6361 int i, j; 6362 6363 switch (state) { 6364 case AMDGPU_IRQ_STATE_DISABLE: 6365 case AMDGPU_IRQ_STATE_ENABLE: 6366 for (i = 0; i < adev->gfx.me.num_me; i++) { 6367 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6368 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6369 6370 if (cp_int_cntl_reg) { 6371 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6372 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6373 OPCODE_ERROR_INT_ENABLE, 6374 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6375 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6376 } 6377 } 6378 } 6379 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6380 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6381 /* MECs start at 1 */ 6382 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6383 6384 if (cp_int_cntl_reg) { 6385 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6386 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6387 OPCODE_ERROR_INT_ENABLE, 6388 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6389 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6390 } 6391 } 6392 } 6393 break; 6394 default: 6395 break; 6396 } 6397 return 0; 6398 } 6399 6400 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6401 struct amdgpu_irq_src *source, 6402 unsigned int type, 6403 enum amdgpu_interrupt_state state) 6404 { 6405 u32 cp_int_cntl_reg, cp_int_cntl; 6406 int i, j; 6407 6408 switch (state) { 6409 case AMDGPU_IRQ_STATE_DISABLE: 6410 case AMDGPU_IRQ_STATE_ENABLE: 6411 for (i = 0; i < adev->gfx.me.num_me; i++) { 6412 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6413 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6414 6415 if (cp_int_cntl_reg) { 6416 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6417 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6418 PRIV_INSTR_INT_ENABLE, 6419 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6420 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6421 } 6422 } 6423 } 6424 break; 6425 default: 6426 break; 6427 } 6428 6429 return 0; 6430 } 6431 6432 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 6433 struct amdgpu_iv_entry *entry) 6434 { 6435 u8 me_id, pipe_id, queue_id; 6436 struct amdgpu_ring *ring; 6437 int i; 6438 6439 me_id = (entry->ring_id & 0x0c) >> 2; 6440 pipe_id = (entry->ring_id & 0x03) >> 0; 6441 queue_id = (entry->ring_id & 0x70) >> 4; 6442 6443 switch (me_id) { 6444 case 0: 6445 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 6446 ring = &adev->gfx.gfx_ring[i]; 6447 if (ring->me == me_id && ring->pipe == pipe_id && 6448 ring->queue == queue_id) 6449 drm_sched_fault(&ring->sched); 6450 } 6451 break; 6452 case 1: 6453 case 2: 6454 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6455 ring = &adev->gfx.compute_ring[i]; 6456 if (ring->me == me_id && ring->pipe == pipe_id && 6457 ring->queue == queue_id) 6458 drm_sched_fault(&ring->sched); 6459 } 6460 break; 6461 default: 6462 BUG(); 6463 break; 6464 } 6465 } 6466 6467 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 6468 struct amdgpu_irq_src *source, 6469 struct amdgpu_iv_entry *entry) 6470 { 6471 DRM_ERROR("Illegal register access in command stream\n"); 6472 gfx_v11_0_handle_priv_fault(adev, entry); 6473 return 0; 6474 } 6475 6476 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, 6477 struct amdgpu_irq_src *source, 6478 struct amdgpu_iv_entry *entry) 6479 { 6480 DRM_ERROR("Illegal opcode in command stream \n"); 6481 gfx_v11_0_handle_priv_fault(adev, entry); 6482 return 0; 6483 } 6484 6485 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6486 struct amdgpu_irq_src *source, 6487 struct amdgpu_iv_entry *entry) 6488 { 6489 DRM_ERROR("Illegal instruction in command stream\n"); 6490 gfx_v11_0_handle_priv_fault(adev, entry); 6491 return 0; 6492 } 6493 6494 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, 6495 struct amdgpu_irq_src *source, 6496 struct amdgpu_iv_entry *entry) 6497 { 6498 if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) 6499 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); 6500 6501 return 0; 6502 } 6503 6504 #if 0 6505 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6506 struct amdgpu_irq_src *src, 6507 unsigned int type, 6508 enum amdgpu_interrupt_state state) 6509 { 6510 uint32_t tmp, target; 6511 struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); 6512 6513 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6514 target += ring->pipe; 6515 6516 switch (type) { 6517 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6518 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6519 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6520 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6521 GENERIC2_INT_ENABLE, 0); 6522 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6523 6524 tmp = RREG32_SOC15_IP(GC, target); 6525 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6526 GENERIC2_INT_ENABLE, 0); 6527 WREG32_SOC15_IP(GC, target, tmp); 6528 } else { 6529 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6530 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6531 GENERIC2_INT_ENABLE, 1); 6532 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6533 6534 tmp = RREG32_SOC15_IP(GC, target); 6535 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6536 GENERIC2_INT_ENABLE, 1); 6537 WREG32_SOC15_IP(GC, target, tmp); 6538 } 6539 break; 6540 default: 6541 BUG(); /* kiq only support GENERIC2_INT now */ 6542 break; 6543 } 6544 return 0; 6545 } 6546 #endif 6547 6548 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6549 { 6550 const unsigned int gcr_cntl = 6551 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6552 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6553 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6554 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6555 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6556 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6557 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6558 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6559 6560 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6561 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6562 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6563 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6564 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6565 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6566 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6567 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6568 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6569 } 6570 6571 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) 6572 { 6573 struct amdgpu_device *adev = ring->adev; 6574 int r; 6575 6576 if (amdgpu_sriov_vf(adev)) 6577 return -EINVAL; 6578 6579 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); 6580 if (r) 6581 return r; 6582 6583 r = amdgpu_bo_reserve(ring->mqd_obj, false); 6584 if (unlikely(r != 0)) { 6585 dev_err(adev->dev, "fail to resv mqd_obj\n"); 6586 return r; 6587 } 6588 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 6589 if (!r) { 6590 r = gfx_v11_0_kgq_init_queue(ring, true); 6591 amdgpu_bo_kunmap(ring->mqd_obj); 6592 ring->mqd_ptr = NULL; 6593 } 6594 amdgpu_bo_unreserve(ring->mqd_obj); 6595 if (r) { 6596 dev_err(adev->dev, "fail to unresv mqd_obj\n"); 6597 return r; 6598 } 6599 6600 r = amdgpu_mes_map_legacy_queue(adev, ring); 6601 if (r) { 6602 dev_err(adev->dev, "failed to remap kgq\n"); 6603 return r; 6604 } 6605 6606 return amdgpu_ring_test_ring(ring); 6607 } 6608 6609 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) 6610 { 6611 struct amdgpu_device *adev = ring->adev; 6612 int i, r = 0; 6613 6614 if (amdgpu_sriov_vf(adev)) 6615 return -EINVAL; 6616 6617 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 6618 mutex_lock(&adev->srbm_mutex); 6619 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6620 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 6621 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 6622 6623 /* make sure dequeue is complete*/ 6624 for (i = 0; i < adev->usec_timeout; i++) { 6625 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 6626 break; 6627 udelay(1); 6628 } 6629 if (i >= adev->usec_timeout) 6630 r = -ETIMEDOUT; 6631 soc21_grbm_select(adev, 0, 0, 0, 0); 6632 mutex_unlock(&adev->srbm_mutex); 6633 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 6634 if (r) { 6635 dev_err(adev->dev, "fail to wait on hqd deactivate\n"); 6636 return r; 6637 } 6638 6639 r = amdgpu_bo_reserve(ring->mqd_obj, false); 6640 if (unlikely(r != 0)) { 6641 dev_err(adev->dev, "fail to resv mqd_obj\n"); 6642 return r; 6643 } 6644 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 6645 if (!r) { 6646 r = gfx_v11_0_kcq_init_queue(ring, true); 6647 amdgpu_bo_kunmap(ring->mqd_obj); 6648 ring->mqd_ptr = NULL; 6649 } 6650 amdgpu_bo_unreserve(ring->mqd_obj); 6651 if (r) { 6652 dev_err(adev->dev, "fail to unresv mqd_obj\n"); 6653 return r; 6654 } 6655 r = amdgpu_mes_map_legacy_queue(adev, ring); 6656 if (r) { 6657 dev_err(adev->dev, "failed to remap kcq\n"); 6658 return r; 6659 } 6660 6661 return amdgpu_ring_test_ring(ring); 6662 } 6663 6664 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 6665 { 6666 struct amdgpu_device *adev = ip_block->adev; 6667 uint32_t i, j, k, reg, index = 0; 6668 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 6669 6670 if (!adev->gfx.ip_dump_core) 6671 return; 6672 6673 for (i = 0; i < reg_count; i++) 6674 drm_printf(p, "%-50s \t 0x%08x\n", 6675 gc_reg_list_11_0[i].reg_name, 6676 adev->gfx.ip_dump_core[i]); 6677 6678 /* print compute queue registers for all instances */ 6679 if (!adev->gfx.ip_dump_compute_queues) 6680 return; 6681 6682 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 6683 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 6684 adev->gfx.mec.num_mec, 6685 adev->gfx.mec.num_pipe_per_mec, 6686 adev->gfx.mec.num_queue_per_pipe); 6687 6688 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6689 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6690 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 6691 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 6692 for (reg = 0; reg < reg_count; reg++) { 6693 drm_printf(p, "%-50s \t 0x%08x\n", 6694 gc_cp_reg_list_11[reg].reg_name, 6695 adev->gfx.ip_dump_compute_queues[index + reg]); 6696 } 6697 index += reg_count; 6698 } 6699 } 6700 } 6701 6702 /* print gfx queue registers for all instances */ 6703 if (!adev->gfx.ip_dump_gfx_queues) 6704 return; 6705 6706 index = 0; 6707 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 6708 drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", 6709 adev->gfx.me.num_me, 6710 adev->gfx.me.num_pipe_per_me, 6711 adev->gfx.me.num_queue_per_pipe); 6712 6713 for (i = 0; i < adev->gfx.me.num_me; i++) { 6714 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6715 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 6716 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); 6717 for (reg = 0; reg < reg_count; reg++) { 6718 drm_printf(p, "%-50s \t 0x%08x\n", 6719 gc_gfx_queue_reg_list_11[reg].reg_name, 6720 adev->gfx.ip_dump_gfx_queues[index + reg]); 6721 } 6722 index += reg_count; 6723 } 6724 } 6725 } 6726 } 6727 6728 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) 6729 { 6730 struct amdgpu_device *adev = ip_block->adev; 6731 uint32_t i, j, k, reg, index = 0; 6732 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 6733 6734 if (!adev->gfx.ip_dump_core) 6735 return; 6736 6737 amdgpu_gfx_off_ctrl(adev, false); 6738 for (i = 0; i < reg_count; i++) 6739 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); 6740 amdgpu_gfx_off_ctrl(adev, true); 6741 6742 /* dump compute queue registers for all instances */ 6743 if (!adev->gfx.ip_dump_compute_queues) 6744 return; 6745 6746 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 6747 amdgpu_gfx_off_ctrl(adev, false); 6748 mutex_lock(&adev->srbm_mutex); 6749 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6750 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6751 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 6752 /* ME0 is for GFX so start from 1 for CP */ 6753 soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); 6754 for (reg = 0; reg < reg_count; reg++) { 6755 adev->gfx.ip_dump_compute_queues[index + reg] = 6756 RREG32(SOC15_REG_ENTRY_OFFSET( 6757 gc_cp_reg_list_11[reg])); 6758 } 6759 index += reg_count; 6760 } 6761 } 6762 } 6763 soc21_grbm_select(adev, 0, 0, 0, 0); 6764 mutex_unlock(&adev->srbm_mutex); 6765 amdgpu_gfx_off_ctrl(adev, true); 6766 6767 /* dump gfx queue registers for all instances */ 6768 if (!adev->gfx.ip_dump_gfx_queues) 6769 return; 6770 6771 index = 0; 6772 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 6773 amdgpu_gfx_off_ctrl(adev, false); 6774 mutex_lock(&adev->srbm_mutex); 6775 for (i = 0; i < adev->gfx.me.num_me; i++) { 6776 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6777 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 6778 soc21_grbm_select(adev, i, j, k, 0); 6779 6780 for (reg = 0; reg < reg_count; reg++) { 6781 adev->gfx.ip_dump_gfx_queues[index + reg] = 6782 RREG32(SOC15_REG_ENTRY_OFFSET( 6783 gc_gfx_queue_reg_list_11[reg])); 6784 } 6785 index += reg_count; 6786 } 6787 } 6788 } 6789 soc21_grbm_select(adev, 0, 0, 0, 0); 6790 mutex_unlock(&adev->srbm_mutex); 6791 amdgpu_gfx_off_ctrl(adev, true); 6792 } 6793 6794 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 6795 { 6796 /* Emit the cleaner shader */ 6797 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 6798 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 6799 } 6800 6801 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 6802 .name = "gfx_v11_0", 6803 .early_init = gfx_v11_0_early_init, 6804 .late_init = gfx_v11_0_late_init, 6805 .sw_init = gfx_v11_0_sw_init, 6806 .sw_fini = gfx_v11_0_sw_fini, 6807 .hw_init = gfx_v11_0_hw_init, 6808 .hw_fini = gfx_v11_0_hw_fini, 6809 .suspend = gfx_v11_0_suspend, 6810 .resume = gfx_v11_0_resume, 6811 .is_idle = gfx_v11_0_is_idle, 6812 .wait_for_idle = gfx_v11_0_wait_for_idle, 6813 .soft_reset = gfx_v11_0_soft_reset, 6814 .check_soft_reset = gfx_v11_0_check_soft_reset, 6815 .post_soft_reset = gfx_v11_0_post_soft_reset, 6816 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 6817 .set_powergating_state = gfx_v11_0_set_powergating_state, 6818 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 6819 .dump_ip_state = gfx_v11_ip_dump, 6820 .print_ip_state = gfx_v11_ip_print, 6821 }; 6822 6823 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 6824 .type = AMDGPU_RING_TYPE_GFX, 6825 .align_mask = 0xff, 6826 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6827 .support_64bit_ptrs = true, 6828 .secure_submission_supported = true, 6829 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 6830 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 6831 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 6832 .emit_frame_size = /* totally 247 maximum if 16 IBs */ 6833 5 + /* update_spm_vmid */ 6834 5 + /* COND_EXEC */ 6835 22 + /* SET_Q_PREEMPTION_MODE */ 6836 7 + /* PIPELINE_SYNC */ 6837 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6838 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6839 4 + /* VM_FLUSH */ 6840 8 + /* FENCE for VM_FLUSH */ 6841 20 + /* GDS switch */ 6842 5 + /* COND_EXEC */ 6843 7 + /* HDP_flush */ 6844 4 + /* VGT_flush */ 6845 31 + /* DE_META */ 6846 3 + /* CNTX_CTRL */ 6847 5 + /* HDP_INVL */ 6848 22 + /* SET_Q_PREEMPTION_MODE */ 6849 8 + 8 + /* FENCE x2 */ 6850 8 + /* gfx_v11_0_emit_mem_sync */ 6851 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 6852 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 6853 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 6854 .emit_fence = gfx_v11_0_ring_emit_fence, 6855 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 6856 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 6857 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 6858 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 6859 .test_ring = gfx_v11_0_ring_test_ring, 6860 .test_ib = gfx_v11_0_ring_test_ib, 6861 .insert_nop = gfx_v11_ring_insert_nop, 6862 .pad_ib = amdgpu_ring_generic_pad_ib, 6863 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 6864 .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, 6865 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 6866 .preempt_ib = gfx_v11_0_ring_preempt_ib, 6867 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 6868 .emit_wreg = gfx_v11_0_ring_emit_wreg, 6869 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 6870 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 6871 .soft_recovery = gfx_v11_0_ring_soft_recovery, 6872 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 6873 .reset = gfx_v11_0_reset_kgq, 6874 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 6875 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, 6876 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, 6877 }; 6878 6879 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 6880 .type = AMDGPU_RING_TYPE_COMPUTE, 6881 .align_mask = 0xff, 6882 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6883 .support_64bit_ptrs = true, 6884 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 6885 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 6886 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 6887 .emit_frame_size = 6888 5 + /* update_spm_vmid */ 6889 20 + /* gfx_v11_0_ring_emit_gds_switch */ 6890 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 6891 5 + /* hdp invalidate */ 6892 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 6893 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6894 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6895 2 + /* gfx_v11_0_ring_emit_vm_flush */ 6896 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 6897 8 + /* gfx_v11_0_emit_mem_sync */ 6898 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 6899 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 6900 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 6901 .emit_fence = gfx_v11_0_ring_emit_fence, 6902 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 6903 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 6904 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 6905 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 6906 .test_ring = gfx_v11_0_ring_test_ring, 6907 .test_ib = gfx_v11_0_ring_test_ib, 6908 .insert_nop = gfx_v11_ring_insert_nop, 6909 .pad_ib = amdgpu_ring_generic_pad_ib, 6910 .emit_wreg = gfx_v11_0_ring_emit_wreg, 6911 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 6912 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 6913 .soft_recovery = gfx_v11_0_ring_soft_recovery, 6914 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 6915 .reset = gfx_v11_0_reset_kcq, 6916 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 6917 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, 6918 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, 6919 }; 6920 6921 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 6922 .type = AMDGPU_RING_TYPE_KIQ, 6923 .align_mask = 0xff, 6924 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6925 .support_64bit_ptrs = true, 6926 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 6927 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 6928 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 6929 .emit_frame_size = 6930 20 + /* gfx_v11_0_ring_emit_gds_switch */ 6931 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 6932 5 + /*hdp invalidate */ 6933 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 6934 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6935 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6936 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6937 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 6938 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 6939 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 6940 .test_ring = gfx_v11_0_ring_test_ring, 6941 .test_ib = gfx_v11_0_ring_test_ib, 6942 .insert_nop = amdgpu_ring_insert_nop, 6943 .pad_ib = amdgpu_ring_generic_pad_ib, 6944 .emit_rreg = gfx_v11_0_ring_emit_rreg, 6945 .emit_wreg = gfx_v11_0_ring_emit_wreg, 6946 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 6947 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 6948 }; 6949 6950 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 6951 { 6952 int i; 6953 6954 adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; 6955 6956 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6957 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 6958 6959 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6960 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 6961 } 6962 6963 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 6964 .set = gfx_v11_0_set_eop_interrupt_state, 6965 .process = gfx_v11_0_eop_irq, 6966 }; 6967 6968 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 6969 .set = gfx_v11_0_set_priv_reg_fault_state, 6970 .process = gfx_v11_0_priv_reg_irq, 6971 }; 6972 6973 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { 6974 .set = gfx_v11_0_set_bad_op_fault_state, 6975 .process = gfx_v11_0_bad_op_irq, 6976 }; 6977 6978 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 6979 .set = gfx_v11_0_set_priv_inst_fault_state, 6980 .process = gfx_v11_0_priv_inst_irq, 6981 }; 6982 6983 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { 6984 .process = gfx_v11_0_rlc_gc_fed_irq, 6985 }; 6986 6987 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 6988 { 6989 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6990 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 6991 6992 adev->gfx.priv_reg_irq.num_types = 1; 6993 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 6994 6995 adev->gfx.bad_op_irq.num_types = 1; 6996 adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; 6997 6998 adev->gfx.priv_inst_irq.num_types = 1; 6999 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 7000 7001 adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ 7002 adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; 7003 7004 } 7005 7006 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 7007 { 7008 if (adev->flags & AMD_IS_APU) 7009 adev->gfx.imu.mode = MISSION_MODE; 7010 else 7011 adev->gfx.imu.mode = DEBUG_MODE; 7012 7013 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 7014 } 7015 7016 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 7017 { 7018 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 7019 } 7020 7021 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 7022 { 7023 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 7024 adev->gfx.config.max_sh_per_se * 7025 adev->gfx.config.max_shader_engines; 7026 7027 adev->gds.gds_size = 0x1000; 7028 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 7029 adev->gds.gws_size = 64; 7030 adev->gds.oa_size = 16; 7031 } 7032 7033 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 7034 { 7035 /* set gfx eng mqd */ 7036 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 7037 sizeof(struct v11_gfx_mqd); 7038 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 7039 gfx_v11_0_gfx_mqd_init; 7040 /* set compute eng mqd */ 7041 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 7042 sizeof(struct v11_compute_mqd); 7043 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 7044 gfx_v11_0_compute_mqd_init; 7045 } 7046 7047 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 7048 u32 bitmap) 7049 { 7050 u32 data; 7051 7052 if (!bitmap) 7053 return; 7054 7055 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7056 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7057 7058 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 7059 } 7060 7061 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 7062 { 7063 u32 data, wgp_bitmask; 7064 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 7065 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 7066 7067 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7068 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7069 7070 wgp_bitmask = 7071 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 7072 7073 return (~data) & wgp_bitmask; 7074 } 7075 7076 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 7077 { 7078 u32 wgp_idx, wgp_active_bitmap; 7079 u32 cu_bitmap_per_wgp, cu_active_bitmap; 7080 7081 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 7082 cu_active_bitmap = 0; 7083 7084 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 7085 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 7086 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 7087 if (wgp_active_bitmap & (1 << wgp_idx)) 7088 cu_active_bitmap |= cu_bitmap_per_wgp; 7089 } 7090 7091 return cu_active_bitmap; 7092 } 7093 7094 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 7095 struct amdgpu_cu_info *cu_info) 7096 { 7097 int i, j, k, counter, active_cu_number = 0; 7098 u32 mask, bitmap; 7099 unsigned disable_masks[8 * 2]; 7100 7101 if (!adev || !cu_info) 7102 return -EINVAL; 7103 7104 amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2); 7105 7106 mutex_lock(&adev->grbm_idx_mutex); 7107 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7108 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7109 bitmap = i * adev->gfx.config.max_sh_per_se + j; 7110 if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) 7111 continue; 7112 mask = 1; 7113 counter = 0; 7114 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); 7115 if (i < 8 && j < 2) 7116 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 7117 adev, disable_masks[i * 2 + j]); 7118 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 7119 7120 /** 7121 * GFX11 could support more than 4 SEs, while the bitmap 7122 * in cu_info struct is 4x4 and ioctl interface struct 7123 * drm_amdgpu_info_device should keep stable. 7124 * So we use last two columns of bitmap to store cu mask for 7125 * SEs 4 to 7, the layout of the bitmap is as below: 7126 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 7127 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 7128 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 7129 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 7130 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 7131 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 7132 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 7133 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 7134 */ 7135 cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; 7136 7137 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 7138 if (bitmap & mask) 7139 counter++; 7140 7141 mask <<= 1; 7142 } 7143 active_cu_number += counter; 7144 } 7145 } 7146 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7147 mutex_unlock(&adev->grbm_idx_mutex); 7148 7149 cu_info->number = active_cu_number; 7150 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7151 7152 return 0; 7153 } 7154 7155 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 7156 { 7157 .type = AMD_IP_BLOCK_TYPE_GFX, 7158 .major = 11, 7159 .minor = 0, 7160 .rev = 0, 7161 .funcs = &gfx_v11_0_ip_funcs, 7162 }; 7163