1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "imu_v11_0.h" 33 #include "soc21.h" 34 #include "nvd.h" 35 36 #include "gc/gc_11_0_0_offset.h" 37 #include "gc/gc_11_0_0_sh_mask.h" 38 #include "smuio/smuio_13_0_6_offset.h" 39 #include "smuio/smuio_13_0_6_sh_mask.h" 40 #include "navi10_enum.h" 41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 42 43 #include "soc15.h" 44 #include "clearstate_gfx11.h" 45 #include "v11_structs.h" 46 #include "gfx_v11_0.h" 47 #include "gfx_v11_0_cleaner_shader.h" 48 #include "gfx_v11_0_3.h" 49 #include "nbio_v4_3.h" 50 #include "mes_v11_0.h" 51 52 #define GFX11_NUM_GFX_RINGS 1 53 #define GFX11_MEC_HPD_SIZE 2048 54 55 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 56 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 57 58 #define regCGTT_WD_CLK_CTRL 0x5086 59 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 60 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 62 #define regPC_CONFIG_CNTL_1 0x194d 63 #define regPC_CONFIG_CNTL_1_BASE_IDX 1 64 65 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 68 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 69 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); 70 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 71 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 72 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 73 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 74 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 75 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 76 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 77 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 78 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 79 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 80 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 81 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 82 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 83 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 84 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 85 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 86 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 87 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); 88 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); 89 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); 90 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); 91 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); 92 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); 93 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); 94 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); 95 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); 97 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); 99 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); 100 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); 101 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); 102 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); 103 104 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { 105 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), 106 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), 107 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), 108 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), 109 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), 110 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), 111 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), 112 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), 113 SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), 114 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), 115 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), 116 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), 117 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), 118 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), 119 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), 120 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), 121 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), 122 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 123 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), 124 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), 125 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), 126 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), 127 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), 128 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), 129 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), 130 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 131 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), 132 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 133 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 134 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 135 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), 136 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), 137 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), 138 SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), 139 SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), 140 SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), 141 SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), 142 SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), 143 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), 144 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), 145 SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), 146 SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), 147 SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), 148 SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), 149 SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), 150 SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), 151 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), 152 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), 153 SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), 154 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), 155 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), 156 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), 157 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), 158 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), 159 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), 160 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), 161 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), 162 /* cp header registers */ 163 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), 164 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 165 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 166 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 167 /* SE status registers */ 168 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), 169 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), 170 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), 171 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), 172 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), 173 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) 174 }; 175 176 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { 177 /* compute registers */ 178 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), 179 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), 180 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), 181 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), 182 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), 183 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), 184 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), 185 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), 186 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), 187 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), 188 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), 189 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), 190 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), 191 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), 192 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), 193 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), 194 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), 195 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), 196 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), 197 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), 198 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), 199 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), 200 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), 201 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), 202 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), 203 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), 204 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), 205 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), 206 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), 207 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), 208 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), 209 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), 210 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), 211 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), 212 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), 213 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), 214 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), 215 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), 216 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) 217 }; 218 219 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { 220 /* gfx queue registers */ 221 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), 222 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), 223 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), 224 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), 225 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), 226 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), 227 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), 228 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), 229 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), 230 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), 231 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), 232 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), 233 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), 234 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), 235 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), 236 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), 237 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), 238 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), 239 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), 240 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), 241 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 242 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 243 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 244 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 245 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) 246 }; 247 248 static const struct soc15_reg_golden golden_settings_gc_11_0[] = { 249 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) 250 }; 251 252 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 253 { 254 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 255 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 256 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 257 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 258 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 259 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 260 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 261 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 262 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 263 }; 264 265 #define DEFAULT_SH_MEM_CONFIG \ 266 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 267 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 268 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 269 270 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 271 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 272 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 273 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 274 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 275 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 276 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 277 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 278 struct amdgpu_cu_info *cu_info); 279 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 280 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 281 u32 sh_num, u32 instance, int xcc_id); 282 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 283 284 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 285 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 286 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 287 uint32_t val); 288 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 289 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 290 uint16_t pasid, uint32_t flush_type, 291 bool all_hub, uint8_t dst_sel); 292 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 293 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 294 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 295 bool enable); 296 297 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 298 { 299 struct amdgpu_device *adev = kiq_ring->adev; 300 u64 shader_mc_addr; 301 302 /* Cleaner shader MC address */ 303 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 304 305 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 306 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 307 PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ 308 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 309 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 310 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 311 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 312 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 313 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 314 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 315 } 316 317 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 318 struct amdgpu_ring *ring) 319 { 320 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 321 uint64_t wptr_addr = ring->wptr_gpu_addr; 322 uint32_t me = 0, eng_sel = 0; 323 324 switch (ring->funcs->type) { 325 case AMDGPU_RING_TYPE_COMPUTE: 326 me = 1; 327 eng_sel = 0; 328 break; 329 case AMDGPU_RING_TYPE_GFX: 330 me = 0; 331 eng_sel = 4; 332 break; 333 case AMDGPU_RING_TYPE_MES: 334 me = 2; 335 eng_sel = 5; 336 break; 337 default: 338 WARN_ON(1); 339 } 340 341 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 342 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 343 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 344 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 345 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 346 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 347 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 348 PACKET3_MAP_QUEUES_ME((me)) | 349 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 350 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 351 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 352 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 353 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 354 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 355 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 356 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 357 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 358 } 359 360 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 361 struct amdgpu_ring *ring, 362 enum amdgpu_unmap_queues_action action, 363 u64 gpu_addr, u64 seq) 364 { 365 struct amdgpu_device *adev = kiq_ring->adev; 366 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 367 368 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { 369 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); 370 return; 371 } 372 373 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 374 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 375 PACKET3_UNMAP_QUEUES_ACTION(action) | 376 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 377 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 378 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 379 amdgpu_ring_write(kiq_ring, 380 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 381 382 if (action == PREEMPT_QUEUES_NO_UNMAP) { 383 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 384 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 385 amdgpu_ring_write(kiq_ring, seq); 386 } else { 387 amdgpu_ring_write(kiq_ring, 0); 388 amdgpu_ring_write(kiq_ring, 0); 389 amdgpu_ring_write(kiq_ring, 0); 390 } 391 } 392 393 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 394 struct amdgpu_ring *ring, 395 u64 addr, 396 u64 seq) 397 { 398 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 399 400 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 401 amdgpu_ring_write(kiq_ring, 402 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 403 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 404 PACKET3_QUERY_STATUS_COMMAND(2)); 405 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 406 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 407 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 408 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 409 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 410 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 411 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 412 } 413 414 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 415 uint16_t pasid, uint32_t flush_type, 416 bool all_hub) 417 { 418 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 419 } 420 421 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 422 .kiq_set_resources = gfx11_kiq_set_resources, 423 .kiq_map_queues = gfx11_kiq_map_queues, 424 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 425 .kiq_query_status = gfx11_kiq_query_status, 426 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 427 .set_resources_size = 8, 428 .map_queues_size = 7, 429 .unmap_queues_size = 6, 430 .query_status_size = 7, 431 .invalidate_tlbs_size = 2, 432 }; 433 434 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 435 { 436 adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; 437 } 438 439 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 440 { 441 if (amdgpu_sriov_vf(adev)) 442 return; 443 444 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 445 case IP_VERSION(11, 0, 1): 446 case IP_VERSION(11, 0, 4): 447 soc15_program_register_sequence(adev, 448 golden_settings_gc_11_0_1, 449 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 450 break; 451 default: 452 break; 453 } 454 soc15_program_register_sequence(adev, 455 golden_settings_gc_11_0, 456 (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); 457 458 } 459 460 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 461 bool wc, uint32_t reg, uint32_t val) 462 { 463 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 464 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 465 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 466 amdgpu_ring_write(ring, reg); 467 amdgpu_ring_write(ring, 0); 468 amdgpu_ring_write(ring, val); 469 } 470 471 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 472 int mem_space, int opt, uint32_t addr0, 473 uint32_t addr1, uint32_t ref, uint32_t mask, 474 uint32_t inv) 475 { 476 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 477 amdgpu_ring_write(ring, 478 /* memory (1) or register (0) */ 479 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 480 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 481 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 482 WAIT_REG_MEM_ENGINE(eng_sel))); 483 484 if (mem_space) 485 BUG_ON(addr0 & 0x3); /* Dword align */ 486 amdgpu_ring_write(ring, addr0); 487 amdgpu_ring_write(ring, addr1); 488 amdgpu_ring_write(ring, ref); 489 amdgpu_ring_write(ring, mask); 490 amdgpu_ring_write(ring, inv); /* poll interval */ 491 } 492 493 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 494 { 495 /* Header itself is a NOP packet */ 496 if (num_nop == 1) { 497 amdgpu_ring_write(ring, ring->funcs->nop); 498 return; 499 } 500 501 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 502 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 503 504 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 505 amdgpu_ring_insert_nop(ring, num_nop - 1); 506 } 507 508 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 509 { 510 struct amdgpu_device *adev = ring->adev; 511 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 512 uint32_t tmp = 0; 513 unsigned i; 514 int r; 515 516 WREG32(scratch, 0xCAFEDEAD); 517 r = amdgpu_ring_alloc(ring, 5); 518 if (r) { 519 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 520 ring->idx, r); 521 return r; 522 } 523 524 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 525 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 526 } else { 527 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 528 amdgpu_ring_write(ring, scratch - 529 PACKET3_SET_UCONFIG_REG_START); 530 amdgpu_ring_write(ring, 0xDEADBEEF); 531 } 532 amdgpu_ring_commit(ring); 533 534 for (i = 0; i < adev->usec_timeout; i++) { 535 tmp = RREG32(scratch); 536 if (tmp == 0xDEADBEEF) 537 break; 538 if (amdgpu_emu_mode == 1) 539 msleep(1); 540 else 541 udelay(1); 542 } 543 544 if (i >= adev->usec_timeout) 545 r = -ETIMEDOUT; 546 return r; 547 } 548 549 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 550 { 551 struct amdgpu_device *adev = ring->adev; 552 struct amdgpu_ib ib; 553 struct dma_fence *f = NULL; 554 unsigned index; 555 uint64_t gpu_addr; 556 volatile uint32_t *cpu_ptr; 557 long r; 558 559 /* MES KIQ fw hasn't indirect buffer support for now */ 560 if (adev->enable_mes_kiq && 561 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 562 return 0; 563 564 memset(&ib, 0, sizeof(ib)); 565 566 if (ring->is_mes_queue) { 567 uint32_t padding, offset; 568 569 offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); 570 padding = amdgpu_mes_ctx_get_offs(ring, 571 AMDGPU_MES_CTX_PADDING_OFFS); 572 573 ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 574 ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 575 576 gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding); 577 cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding); 578 *cpu_ptr = cpu_to_le32(0xCAFEDEAD); 579 } else { 580 r = amdgpu_device_wb_get(adev, &index); 581 if (r) 582 return r; 583 584 gpu_addr = adev->wb.gpu_addr + (index * 4); 585 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 586 cpu_ptr = &adev->wb.wb[index]; 587 588 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 589 if (r) { 590 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 591 goto err1; 592 } 593 } 594 595 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 596 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 597 ib.ptr[2] = lower_32_bits(gpu_addr); 598 ib.ptr[3] = upper_32_bits(gpu_addr); 599 ib.ptr[4] = 0xDEADBEEF; 600 ib.length_dw = 5; 601 602 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 603 if (r) 604 goto err2; 605 606 r = dma_fence_wait_timeout(f, false, timeout); 607 if (r == 0) { 608 r = -ETIMEDOUT; 609 goto err2; 610 } else if (r < 0) { 611 goto err2; 612 } 613 614 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 615 r = 0; 616 else 617 r = -EINVAL; 618 err2: 619 if (!ring->is_mes_queue) 620 amdgpu_ib_free(&ib, NULL); 621 dma_fence_put(f); 622 err1: 623 if (!ring->is_mes_queue) 624 amdgpu_device_wb_free(adev, index); 625 return r; 626 } 627 628 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 629 { 630 amdgpu_ucode_release(&adev->gfx.pfp_fw); 631 amdgpu_ucode_release(&adev->gfx.me_fw); 632 amdgpu_ucode_release(&adev->gfx.rlc_fw); 633 amdgpu_ucode_release(&adev->gfx.mec_fw); 634 635 kfree(adev->gfx.rlc.register_list_format); 636 } 637 638 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) 639 { 640 const struct psp_firmware_header_v1_0 *toc_hdr; 641 int err = 0; 642 643 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, 644 AMDGPU_UCODE_REQUIRED, 645 "amdgpu/%s_toc.bin", ucode_prefix); 646 if (err) 647 goto out; 648 649 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 650 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 651 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 652 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 653 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 654 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 655 return 0; 656 out: 657 amdgpu_ucode_release(&adev->psp.toc_fw); 658 return err; 659 } 660 661 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) 662 { 663 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 664 case IP_VERSION(11, 0, 0): 665 case IP_VERSION(11, 0, 2): 666 case IP_VERSION(11, 0, 3): 667 if ((adev->gfx.me_fw_version >= 1505) && 668 (adev->gfx.pfp_fw_version >= 1600) && 669 (adev->gfx.mec_fw_version >= 512)) { 670 if (amdgpu_sriov_vf(adev)) 671 adev->gfx.cp_gfx_shadow = true; 672 else 673 adev->gfx.cp_gfx_shadow = false; 674 } 675 break; 676 default: 677 adev->gfx.cp_gfx_shadow = false; 678 break; 679 } 680 } 681 682 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 683 { 684 char ucode_prefix[25]; 685 int err; 686 const struct rlc_firmware_header_v2_0 *rlc_hdr; 687 uint16_t version_major; 688 uint16_t version_minor; 689 690 DRM_DEBUG("\n"); 691 692 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 693 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 694 AMDGPU_UCODE_REQUIRED, 695 "amdgpu/%s_pfp.bin", ucode_prefix); 696 if (err) 697 goto out; 698 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 699 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 700 (union amdgpu_firmware_header *) 701 adev->gfx.pfp_fw->data, 2, 0); 702 if (adev->gfx.rs64_enable) { 703 dev_info(adev->dev, "CP RS64 enable\n"); 704 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 705 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 706 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 707 } else { 708 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 709 } 710 711 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 712 AMDGPU_UCODE_REQUIRED, 713 "amdgpu/%s_me.bin", ucode_prefix); 714 if (err) 715 goto out; 716 if (adev->gfx.rs64_enable) { 717 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 718 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 719 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 720 } else { 721 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 722 } 723 724 if (!amdgpu_sriov_vf(adev)) { 725 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && 726 adev->pdev->revision == 0xCE) 727 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 728 AMDGPU_UCODE_REQUIRED, 729 "amdgpu/gc_11_0_0_rlc_1.bin"); 730 else 731 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 732 AMDGPU_UCODE_REQUIRED, 733 "amdgpu/%s_rlc.bin", ucode_prefix); 734 if (err) 735 goto out; 736 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 737 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 738 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 739 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 740 if (err) 741 goto out; 742 } 743 744 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 745 AMDGPU_UCODE_REQUIRED, 746 "amdgpu/%s_mec.bin", ucode_prefix); 747 if (err) 748 goto out; 749 if (adev->gfx.rs64_enable) { 750 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 751 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 752 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 753 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 754 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 755 } else { 756 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 757 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 758 } 759 760 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 761 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix); 762 763 /* only one MEC for gfx 11.0.0. */ 764 adev->gfx.mec2_fw = NULL; 765 766 gfx_v11_0_check_fw_cp_gfx_shadow(adev); 767 768 if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { 769 err = adev->gfx.imu.funcs->init_microcode(adev); 770 if (err) 771 DRM_ERROR("Failed to init imu firmware!\n"); 772 return err; 773 } 774 775 out: 776 if (err) { 777 amdgpu_ucode_release(&adev->gfx.pfp_fw); 778 amdgpu_ucode_release(&adev->gfx.me_fw); 779 amdgpu_ucode_release(&adev->gfx.rlc_fw); 780 amdgpu_ucode_release(&adev->gfx.mec_fw); 781 } 782 783 return err; 784 } 785 786 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 787 { 788 u32 count = 0; 789 const struct cs_section_def *sect = NULL; 790 const struct cs_extent_def *ext = NULL; 791 792 /* begin clear state */ 793 count += 2; 794 /* context control state */ 795 count += 3; 796 797 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 798 for (ext = sect->section; ext->extent != NULL; ++ext) { 799 if (sect->id == SECT_CONTEXT) 800 count += 2 + ext->reg_count; 801 else 802 return 0; 803 } 804 } 805 806 /* set PA_SC_TILE_STEERING_OVERRIDE */ 807 count += 3; 808 /* end clear state */ 809 count += 2; 810 /* clear state */ 811 count += 2; 812 813 return count; 814 } 815 816 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, 817 volatile u32 *buffer) 818 { 819 u32 count = 0, i; 820 const struct cs_section_def *sect = NULL; 821 const struct cs_extent_def *ext = NULL; 822 int ctx_reg_offset; 823 824 if (adev->gfx.rlc.cs_data == NULL) 825 return; 826 if (buffer == NULL) 827 return; 828 829 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 830 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 831 832 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 833 buffer[count++] = cpu_to_le32(0x80000000); 834 buffer[count++] = cpu_to_le32(0x80000000); 835 836 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 837 for (ext = sect->section; ext->extent != NULL; ++ext) { 838 if (sect->id == SECT_CONTEXT) { 839 buffer[count++] = 840 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 841 buffer[count++] = cpu_to_le32(ext->reg_index - 842 PACKET3_SET_CONTEXT_REG_START); 843 for (i = 0; i < ext->reg_count; i++) 844 buffer[count++] = cpu_to_le32(ext->extent[i]); 845 } else { 846 return; 847 } 848 } 849 } 850 851 ctx_reg_offset = 852 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 853 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 854 buffer[count++] = cpu_to_le32(ctx_reg_offset); 855 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 856 857 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 858 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 859 860 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 861 buffer[count++] = cpu_to_le32(0); 862 } 863 864 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 865 { 866 /* clear state block */ 867 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 868 &adev->gfx.rlc.clear_state_gpu_addr, 869 (void **)&adev->gfx.rlc.cs_ptr); 870 871 /* jump table block */ 872 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 873 &adev->gfx.rlc.cp_table_gpu_addr, 874 (void **)&adev->gfx.rlc.cp_table_ptr); 875 } 876 877 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 878 { 879 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 880 881 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 882 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 883 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 884 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 885 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 886 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 887 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 888 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 889 adev->gfx.rlc.rlcg_reg_access_supported = true; 890 } 891 892 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 893 { 894 const struct cs_section_def *cs_data; 895 int r; 896 897 adev->gfx.rlc.cs_data = gfx11_cs_data; 898 899 cs_data = adev->gfx.rlc.cs_data; 900 901 if (cs_data) { 902 /* init clear state block */ 903 r = amdgpu_gfx_rlc_init_csb(adev); 904 if (r) 905 return r; 906 } 907 908 /* init spm vmid with 0xf */ 909 if (adev->gfx.rlc.funcs->update_spm_vmid) 910 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); 911 912 return 0; 913 } 914 915 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 916 { 917 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 918 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 919 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 920 } 921 922 static void gfx_v11_0_me_init(struct amdgpu_device *adev) 923 { 924 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 925 926 amdgpu_gfx_graphics_queue_acquire(adev); 927 } 928 929 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 930 { 931 int r; 932 u32 *hpd; 933 size_t mec_hpd_size; 934 935 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 936 937 /* take ownership of the relevant compute queues */ 938 amdgpu_gfx_compute_queue_acquire(adev); 939 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 940 941 if (mec_hpd_size) { 942 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 943 AMDGPU_GEM_DOMAIN_GTT, 944 &adev->gfx.mec.hpd_eop_obj, 945 &adev->gfx.mec.hpd_eop_gpu_addr, 946 (void **)&hpd); 947 if (r) { 948 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 949 gfx_v11_0_mec_fini(adev); 950 return r; 951 } 952 953 memset(hpd, 0, mec_hpd_size); 954 955 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 956 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 957 } 958 959 return 0; 960 } 961 962 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 963 { 964 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 965 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 966 (address << SQ_IND_INDEX__INDEX__SHIFT)); 967 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 968 } 969 970 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 971 uint32_t thread, uint32_t regno, 972 uint32_t num, uint32_t *out) 973 { 974 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 975 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 976 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 977 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 978 (SQ_IND_INDEX__AUTO_INCR_MASK)); 979 while (num--) 980 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 981 } 982 983 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 984 { 985 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 986 * field when performing a select_se_sh so it should be 987 * zero here */ 988 WARN_ON(simd != 0); 989 990 /* type 3 wave data */ 991 dst[(*no_fields)++] = 3; 992 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 993 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 994 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 995 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 996 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 997 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 998 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 999 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 1000 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 1001 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 1002 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 1003 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 1004 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 1005 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 1006 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 1007 } 1008 1009 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1010 uint32_t wave, uint32_t start, 1011 uint32_t size, uint32_t *dst) 1012 { 1013 WARN_ON(simd != 0); 1014 1015 wave_read_regs( 1016 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 1017 dst); 1018 } 1019 1020 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1021 uint32_t wave, uint32_t thread, 1022 uint32_t start, uint32_t size, 1023 uint32_t *dst) 1024 { 1025 wave_read_regs( 1026 adev, wave, thread, 1027 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1028 } 1029 1030 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 1031 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1032 { 1033 soc21_grbm_select(adev, me, pipe, q, vm); 1034 } 1035 1036 /* all sizes are in bytes */ 1037 #define MQD_SHADOW_BASE_SIZE 73728 1038 #define MQD_SHADOW_BASE_ALIGNMENT 256 1039 #define MQD_FWWORKAREA_SIZE 484 1040 #define MQD_FWWORKAREA_ALIGNMENT 256 1041 1042 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, 1043 struct amdgpu_gfx_shadow_info *shadow_info) 1044 { 1045 if (adev->gfx.cp_gfx_shadow) { 1046 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; 1047 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; 1048 shadow_info->csa_size = MQD_FWWORKAREA_SIZE; 1049 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; 1050 return 0; 1051 } else { 1052 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); 1053 return -ENOTSUPP; 1054 } 1055 } 1056 1057 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 1058 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 1059 .select_se_sh = &gfx_v11_0_select_se_sh, 1060 .read_wave_data = &gfx_v11_0_read_wave_data, 1061 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 1062 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 1063 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 1064 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 1065 .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, 1066 }; 1067 1068 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 1069 { 1070 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1071 case IP_VERSION(11, 0, 0): 1072 case IP_VERSION(11, 0, 2): 1073 adev->gfx.config.max_hw_contexts = 8; 1074 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1075 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1076 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1077 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1078 break; 1079 case IP_VERSION(11, 0, 3): 1080 adev->gfx.ras = &gfx_v11_0_3_ras; 1081 adev->gfx.config.max_hw_contexts = 8; 1082 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1083 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1084 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1085 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1086 break; 1087 case IP_VERSION(11, 0, 1): 1088 case IP_VERSION(11, 0, 4): 1089 case IP_VERSION(11, 5, 0): 1090 case IP_VERSION(11, 5, 1): 1091 case IP_VERSION(11, 5, 2): 1092 case IP_VERSION(11, 5, 3): 1093 adev->gfx.config.max_hw_contexts = 8; 1094 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1095 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1096 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1097 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 1098 break; 1099 default: 1100 BUG(); 1101 break; 1102 } 1103 1104 return 0; 1105 } 1106 1107 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1108 int me, int pipe, int queue) 1109 { 1110 struct amdgpu_ring *ring; 1111 unsigned int irq_type; 1112 unsigned int hw_prio; 1113 1114 ring = &adev->gfx.gfx_ring[ring_id]; 1115 1116 ring->me = me; 1117 ring->pipe = pipe; 1118 ring->queue = queue; 1119 1120 ring->ring_obj = NULL; 1121 ring->use_doorbell = true; 1122 1123 if (!ring_id) 1124 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1125 else 1126 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1127 ring->vm_hub = AMDGPU_GFXHUB(0); 1128 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1129 1130 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1131 hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? 1132 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1133 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1134 hw_prio, NULL); 1135 } 1136 1137 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1138 int mec, int pipe, int queue) 1139 { 1140 int r; 1141 unsigned irq_type; 1142 struct amdgpu_ring *ring; 1143 unsigned int hw_prio; 1144 1145 ring = &adev->gfx.compute_ring[ring_id]; 1146 1147 /* mec0 is me1 */ 1148 ring->me = mec + 1; 1149 ring->pipe = pipe; 1150 ring->queue = queue; 1151 1152 ring->ring_obj = NULL; 1153 ring->use_doorbell = true; 1154 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1155 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1156 + (ring_id * GFX11_MEC_HPD_SIZE); 1157 ring->vm_hub = AMDGPU_GFXHUB(0); 1158 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1159 1160 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1161 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1162 + ring->pipe; 1163 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 1164 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1165 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1166 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1167 hw_prio, NULL); 1168 if (r) 1169 return r; 1170 1171 return 0; 1172 } 1173 1174 static struct { 1175 SOC21_FIRMWARE_ID id; 1176 unsigned int offset; 1177 unsigned int size; 1178 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 1179 1180 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 1181 { 1182 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 1183 1184 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 1185 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 1186 rlc_autoload_info[ucode->id].id = ucode->id; 1187 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 1188 rlc_autoload_info[ucode->id].size = ucode->size * 4; 1189 1190 ucode++; 1191 } 1192 } 1193 1194 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 1195 { 1196 uint32_t total_size = 0; 1197 SOC21_FIRMWARE_ID id; 1198 1199 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 1200 1201 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 1202 total_size += rlc_autoload_info[id].size; 1203 1204 /* In case the offset in rlc toc ucode is aligned */ 1205 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 1206 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 1207 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 1208 1209 return total_size; 1210 } 1211 1212 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 1213 { 1214 int r; 1215 uint32_t total_size; 1216 1217 total_size = gfx_v11_0_calc_toc_total_size(adev); 1218 1219 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 1220 AMDGPU_GEM_DOMAIN_VRAM | 1221 AMDGPU_GEM_DOMAIN_GTT, 1222 &adev->gfx.rlc.rlc_autoload_bo, 1223 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1224 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1225 1226 if (r) { 1227 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1228 return r; 1229 } 1230 1231 return 0; 1232 } 1233 1234 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1235 SOC21_FIRMWARE_ID id, 1236 const void *fw_data, 1237 uint32_t fw_size, 1238 uint32_t *fw_autoload_mask) 1239 { 1240 uint32_t toc_offset; 1241 uint32_t toc_fw_size; 1242 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1243 1244 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1245 return; 1246 1247 toc_offset = rlc_autoload_info[id].offset; 1248 toc_fw_size = rlc_autoload_info[id].size; 1249 1250 if (fw_size == 0) 1251 fw_size = toc_fw_size; 1252 1253 if (fw_size > toc_fw_size) 1254 fw_size = toc_fw_size; 1255 1256 memcpy(ptr + toc_offset, fw_data, fw_size); 1257 1258 if (fw_size < toc_fw_size) 1259 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1260 1261 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1262 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1263 } 1264 1265 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1266 uint32_t *fw_autoload_mask) 1267 { 1268 void *data; 1269 uint32_t size; 1270 uint64_t *toc_ptr; 1271 1272 *(uint64_t *)fw_autoload_mask |= 0x1; 1273 1274 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1275 1276 data = adev->psp.toc.start_addr; 1277 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1278 1279 toc_ptr = (uint64_t *)data + size / 8 - 1; 1280 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1281 1282 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1283 data, size, fw_autoload_mask); 1284 } 1285 1286 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1287 uint32_t *fw_autoload_mask) 1288 { 1289 const __le32 *fw_data; 1290 uint32_t fw_size; 1291 const struct gfx_firmware_header_v1_0 *cp_hdr; 1292 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1293 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1294 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1295 uint16_t version_major, version_minor; 1296 1297 if (adev->gfx.rs64_enable) { 1298 /* pfp ucode */ 1299 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1300 adev->gfx.pfp_fw->data; 1301 /* instruction */ 1302 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1303 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1304 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1305 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1306 fw_data, fw_size, fw_autoload_mask); 1307 /* data */ 1308 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1309 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1310 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1311 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1312 fw_data, fw_size, fw_autoload_mask); 1313 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1314 fw_data, fw_size, fw_autoload_mask); 1315 /* me ucode */ 1316 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1317 adev->gfx.me_fw->data; 1318 /* instruction */ 1319 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1320 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1321 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1322 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1323 fw_data, fw_size, fw_autoload_mask); 1324 /* data */ 1325 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1326 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1327 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1328 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1329 fw_data, fw_size, fw_autoload_mask); 1330 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1331 fw_data, fw_size, fw_autoload_mask); 1332 /* mec ucode */ 1333 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1334 adev->gfx.mec_fw->data; 1335 /* instruction */ 1336 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1337 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1338 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1339 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1340 fw_data, fw_size, fw_autoload_mask); 1341 /* data */ 1342 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1343 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1344 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1345 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1346 fw_data, fw_size, fw_autoload_mask); 1347 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1348 fw_data, fw_size, fw_autoload_mask); 1349 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1350 fw_data, fw_size, fw_autoload_mask); 1351 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1352 fw_data, fw_size, fw_autoload_mask); 1353 } else { 1354 /* pfp ucode */ 1355 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1356 adev->gfx.pfp_fw->data; 1357 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1358 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1359 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1360 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1361 fw_data, fw_size, fw_autoload_mask); 1362 1363 /* me ucode */ 1364 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1365 adev->gfx.me_fw->data; 1366 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1367 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1368 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1369 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1370 fw_data, fw_size, fw_autoload_mask); 1371 1372 /* mec ucode */ 1373 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1374 adev->gfx.mec_fw->data; 1375 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1376 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1377 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1378 cp_hdr->jt_size * 4; 1379 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1380 fw_data, fw_size, fw_autoload_mask); 1381 } 1382 1383 /* rlc ucode */ 1384 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1385 adev->gfx.rlc_fw->data; 1386 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1387 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1388 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1389 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1390 fw_data, fw_size, fw_autoload_mask); 1391 1392 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1393 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1394 if (version_major == 2) { 1395 if (version_minor >= 2) { 1396 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1397 1398 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1399 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1400 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1401 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1402 fw_data, fw_size, fw_autoload_mask); 1403 1404 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1405 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1406 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1407 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1408 fw_data, fw_size, fw_autoload_mask); 1409 } 1410 } 1411 } 1412 1413 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1414 uint32_t *fw_autoload_mask) 1415 { 1416 const __le32 *fw_data; 1417 uint32_t fw_size; 1418 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1419 1420 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1421 adev->sdma.instance[0].fw->data; 1422 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1423 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1424 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1425 1426 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1427 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1428 1429 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1430 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1431 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1432 1433 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1434 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1435 } 1436 1437 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1438 uint32_t *fw_autoload_mask) 1439 { 1440 const __le32 *fw_data; 1441 unsigned fw_size; 1442 const struct mes_firmware_header_v1_0 *mes_hdr; 1443 int pipe, ucode_id, data_id; 1444 1445 for (pipe = 0; pipe < 2; pipe++) { 1446 if (pipe==0) { 1447 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1448 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1449 } else { 1450 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1451 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1452 } 1453 1454 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1455 adev->mes.fw[pipe]->data; 1456 1457 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1458 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1459 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1460 1461 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1462 ucode_id, fw_data, fw_size, fw_autoload_mask); 1463 1464 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1465 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1466 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1467 1468 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1469 data_id, fw_data, fw_size, fw_autoload_mask); 1470 } 1471 } 1472 1473 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1474 { 1475 uint32_t rlc_g_offset, rlc_g_size; 1476 uint64_t gpu_addr; 1477 uint32_t autoload_fw_id[2]; 1478 1479 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1480 1481 /* RLC autoload sequence 2: copy ucode */ 1482 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1483 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1484 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1485 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1486 1487 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1488 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1489 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1490 1491 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1492 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1493 1494 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1495 1496 /* RLC autoload sequence 3: load IMU fw */ 1497 if (adev->gfx.imu.funcs->load_microcode) 1498 adev->gfx.imu.funcs->load_microcode(adev); 1499 /* RLC autoload sequence 4 init IMU fw */ 1500 if (adev->gfx.imu.funcs->setup_imu) 1501 adev->gfx.imu.funcs->setup_imu(adev); 1502 if (adev->gfx.imu.funcs->start_imu) 1503 adev->gfx.imu.funcs->start_imu(adev); 1504 1505 /* RLC autoload sequence 5 disable gpa mode */ 1506 gfx_v11_0_disable_gpa_mode(adev); 1507 1508 return 0; 1509 } 1510 1511 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) 1512 { 1513 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 1514 uint32_t *ptr; 1515 uint32_t inst; 1516 1517 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 1518 if (!ptr) { 1519 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 1520 adev->gfx.ip_dump_core = NULL; 1521 } else { 1522 adev->gfx.ip_dump_core = ptr; 1523 } 1524 1525 /* Allocate memory for compute queue registers for all the instances */ 1526 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 1527 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 1528 adev->gfx.mec.num_queue_per_pipe; 1529 1530 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1531 if (!ptr) { 1532 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 1533 adev->gfx.ip_dump_compute_queues = NULL; 1534 } else { 1535 adev->gfx.ip_dump_compute_queues = ptr; 1536 } 1537 1538 /* Allocate memory for gfx queue registers for all the instances */ 1539 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 1540 inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * 1541 adev->gfx.me.num_queue_per_pipe; 1542 1543 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1544 if (!ptr) { 1545 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); 1546 adev->gfx.ip_dump_gfx_queues = NULL; 1547 } else { 1548 adev->gfx.ip_dump_gfx_queues = ptr; 1549 } 1550 } 1551 1552 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) 1553 { 1554 int i, j, k, r, ring_id = 0; 1555 int xcc_id = 0; 1556 struct amdgpu_device *adev = ip_block->adev; 1557 1558 INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); 1559 1560 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1561 case IP_VERSION(11, 0, 0): 1562 case IP_VERSION(11, 0, 2): 1563 case IP_VERSION(11, 0, 3): 1564 adev->gfx.me.num_me = 1; 1565 adev->gfx.me.num_pipe_per_me = 1; 1566 adev->gfx.me.num_queue_per_pipe = 1; 1567 adev->gfx.mec.num_mec = 2; 1568 adev->gfx.mec.num_pipe_per_mec = 4; 1569 adev->gfx.mec.num_queue_per_pipe = 4; 1570 break; 1571 case IP_VERSION(11, 0, 1): 1572 case IP_VERSION(11, 0, 4): 1573 case IP_VERSION(11, 5, 0): 1574 case IP_VERSION(11, 5, 1): 1575 case IP_VERSION(11, 5, 2): 1576 case IP_VERSION(11, 5, 3): 1577 adev->gfx.me.num_me = 1; 1578 adev->gfx.me.num_pipe_per_me = 1; 1579 adev->gfx.me.num_queue_per_pipe = 1; 1580 adev->gfx.mec.num_mec = 1; 1581 adev->gfx.mec.num_pipe_per_mec = 4; 1582 adev->gfx.mec.num_queue_per_pipe = 4; 1583 break; 1584 default: 1585 adev->gfx.me.num_me = 1; 1586 adev->gfx.me.num_pipe_per_me = 1; 1587 adev->gfx.me.num_queue_per_pipe = 1; 1588 adev->gfx.mec.num_mec = 1; 1589 adev->gfx.mec.num_pipe_per_mec = 4; 1590 adev->gfx.mec.num_queue_per_pipe = 8; 1591 break; 1592 } 1593 1594 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1595 case IP_VERSION(11, 0, 0): 1596 case IP_VERSION(11, 0, 2): 1597 case IP_VERSION(11, 0, 3): 1598 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1599 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1600 if (adev->gfx.me_fw_version >= 2280 && 1601 adev->gfx.pfp_fw_version >= 2370 && 1602 adev->gfx.mec_fw_version >= 2450 && 1603 adev->mes.fw_version[0] >= 99) { 1604 adev->gfx.enable_cleaner_shader = true; 1605 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1606 if (r) { 1607 adev->gfx.enable_cleaner_shader = false; 1608 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1609 } 1610 } 1611 break; 1612 default: 1613 adev->gfx.enable_cleaner_shader = false; 1614 break; 1615 } 1616 1617 /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ 1618 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && 1619 amdgpu_sriov_is_pp_one_vf(adev)) 1620 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; 1621 1622 /* EOP Event */ 1623 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1624 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1625 &adev->gfx.eop_irq); 1626 if (r) 1627 return r; 1628 1629 /* Bad opcode Event */ 1630 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1631 GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, 1632 &adev->gfx.bad_op_irq); 1633 if (r) 1634 return r; 1635 1636 /* Privileged reg */ 1637 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1638 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1639 &adev->gfx.priv_reg_irq); 1640 if (r) 1641 return r; 1642 1643 /* Privileged inst */ 1644 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1645 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1646 &adev->gfx.priv_inst_irq); 1647 if (r) 1648 return r; 1649 1650 /* FED error */ 1651 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1652 GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, 1653 &adev->gfx.rlc_gc_fed_irq); 1654 if (r) 1655 return r; 1656 1657 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1658 1659 gfx_v11_0_me_init(adev); 1660 1661 r = gfx_v11_0_rlc_init(adev); 1662 if (r) { 1663 DRM_ERROR("Failed to init rlc BOs!\n"); 1664 return r; 1665 } 1666 1667 r = gfx_v11_0_mec_init(adev); 1668 if (r) { 1669 DRM_ERROR("Failed to init MEC BOs!\n"); 1670 return r; 1671 } 1672 1673 /* set up the gfx ring */ 1674 for (i = 0; i < adev->gfx.me.num_me; i++) { 1675 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 1676 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1677 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1678 continue; 1679 1680 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1681 i, k, j); 1682 if (r) 1683 return r; 1684 ring_id++; 1685 } 1686 } 1687 } 1688 1689 ring_id = 0; 1690 /* set up the compute queues - allocate horizontally across pipes */ 1691 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1692 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1693 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1694 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 1695 k, j)) 1696 continue; 1697 1698 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1699 i, k, j); 1700 if (r) 1701 return r; 1702 1703 ring_id++; 1704 } 1705 } 1706 } 1707 1708 adev->gfx.gfx_supported_reset = 1709 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1710 adev->gfx.compute_supported_reset = 1711 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1712 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1713 case IP_VERSION(11, 0, 0): 1714 case IP_VERSION(11, 0, 2): 1715 case IP_VERSION(11, 0, 3): 1716 if ((adev->gfx.me_fw_version >= 2280) && 1717 (adev->gfx.mec_fw_version >= 2410)) { 1718 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1719 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1720 } 1721 break; 1722 default: 1723 break; 1724 } 1725 1726 if (!adev->enable_mes_kiq) { 1727 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); 1728 if (r) { 1729 DRM_ERROR("Failed to init KIQ BOs!\n"); 1730 return r; 1731 } 1732 1733 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 1734 if (r) 1735 return r; 1736 } 1737 1738 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); 1739 if (r) 1740 return r; 1741 1742 /* allocate visible FB for rlc auto-loading fw */ 1743 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1744 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1745 if (r) 1746 return r; 1747 } 1748 1749 r = gfx_v11_0_gpu_early_init(adev); 1750 if (r) 1751 return r; 1752 1753 if (amdgpu_gfx_ras_sw_init(adev)) { 1754 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 1755 return -EINVAL; 1756 } 1757 1758 gfx_v11_0_alloc_ip_dump(adev); 1759 1760 r = amdgpu_gfx_sysfs_init(adev); 1761 if (r) 1762 return r; 1763 1764 return 0; 1765 } 1766 1767 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1768 { 1769 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1770 &adev->gfx.pfp.pfp_fw_gpu_addr, 1771 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1772 1773 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1774 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1775 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1776 } 1777 1778 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1779 { 1780 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1781 &adev->gfx.me.me_fw_gpu_addr, 1782 (void **)&adev->gfx.me.me_fw_ptr); 1783 1784 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1785 &adev->gfx.me.me_fw_data_gpu_addr, 1786 (void **)&adev->gfx.me.me_fw_data_ptr); 1787 } 1788 1789 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1790 { 1791 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1792 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1793 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1794 } 1795 1796 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) 1797 { 1798 int i; 1799 struct amdgpu_device *adev = ip_block->adev; 1800 1801 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1802 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1803 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1804 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1805 1806 amdgpu_gfx_mqd_sw_fini(adev, 0); 1807 1808 if (!adev->enable_mes_kiq) { 1809 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 1810 amdgpu_gfx_kiq_fini(adev, 0); 1811 } 1812 1813 amdgpu_gfx_cleaner_shader_sw_fini(adev); 1814 1815 gfx_v11_0_pfp_fini(adev); 1816 gfx_v11_0_me_fini(adev); 1817 gfx_v11_0_rlc_fini(adev); 1818 gfx_v11_0_mec_fini(adev); 1819 1820 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1821 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1822 1823 gfx_v11_0_free_microcode(adev); 1824 1825 amdgpu_gfx_sysfs_fini(adev); 1826 1827 kfree(adev->gfx.ip_dump_core); 1828 kfree(adev->gfx.ip_dump_compute_queues); 1829 kfree(adev->gfx.ip_dump_gfx_queues); 1830 1831 return 0; 1832 } 1833 1834 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1835 u32 sh_num, u32 instance, int xcc_id) 1836 { 1837 u32 data; 1838 1839 if (instance == 0xffffffff) 1840 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1841 INSTANCE_BROADCAST_WRITES, 1); 1842 else 1843 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1844 instance); 1845 1846 if (se_num == 0xffffffff) 1847 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1848 1); 1849 else 1850 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1851 1852 if (sh_num == 0xffffffff) 1853 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1854 1); 1855 else 1856 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1857 1858 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 1859 } 1860 1861 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) 1862 { 1863 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; 1864 1865 gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); 1866 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, 1867 CC_GC_SA_UNIT_DISABLE, 1868 SA_DISABLE); 1869 gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); 1870 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, 1871 GC_USER_SA_UNIT_DISABLE, 1872 SA_DISABLE); 1873 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * 1874 adev->gfx.config.max_shader_engines); 1875 1876 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 1877 } 1878 1879 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1880 { 1881 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; 1882 u32 rb_mask; 1883 1884 gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 1885 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, 1886 CC_RB_BACKEND_DISABLE, 1887 BACKEND_DISABLE); 1888 gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 1889 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, 1890 GC_USER_RB_BACKEND_DISABLE, 1891 BACKEND_DISABLE); 1892 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * 1893 adev->gfx.config.max_shader_engines); 1894 1895 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); 1896 } 1897 1898 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 1899 { 1900 u32 rb_bitmap_per_sa; 1901 u32 rb_bitmap_width_per_sa; 1902 u32 max_sa; 1903 u32 active_sa_bitmap; 1904 u32 global_active_rb_bitmap; 1905 u32 active_rb_bitmap = 0; 1906 u32 i; 1907 1908 /* query sa bitmap from SA_UNIT_DISABLE registers */ 1909 active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); 1910 /* query rb bitmap from RB_BACKEND_DISABLE registers */ 1911 global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); 1912 1913 /* generate active rb bitmap according to active sa bitmap */ 1914 max_sa = adev->gfx.config.max_shader_engines * 1915 adev->gfx.config.max_sh_per_se; 1916 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 1917 adev->gfx.config.max_sh_per_se; 1918 rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); 1919 1920 for (i = 0; i < max_sa; i++) { 1921 if (active_sa_bitmap & (1 << i)) 1922 active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); 1923 } 1924 1925 active_rb_bitmap &= global_active_rb_bitmap; 1926 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 1927 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 1928 } 1929 1930 #define DEFAULT_SH_MEM_BASES (0x6000) 1931 #define LDS_APP_BASE 0x1 1932 #define SCRATCH_APP_BASE 0x2 1933 1934 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 1935 { 1936 int i; 1937 uint32_t sh_mem_bases; 1938 uint32_t data; 1939 1940 /* 1941 * Configure apertures: 1942 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1943 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1944 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1945 */ 1946 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 1947 SCRATCH_APP_BASE; 1948 1949 mutex_lock(&adev->srbm_mutex); 1950 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 1951 soc21_grbm_select(adev, 0, 0, 0, i); 1952 /* CP and shaders */ 1953 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 1954 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 1955 1956 /* Enable trap for each kfd vmid. */ 1957 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 1958 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 1959 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); 1960 } 1961 soc21_grbm_select(adev, 0, 0, 0, 0); 1962 mutex_unlock(&adev->srbm_mutex); 1963 1964 /* 1965 * Initialize all compute VMIDs to have no GDS, GWS, or OA 1966 * access. These should be enabled by FW for target VMIDs. 1967 */ 1968 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 1969 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 1970 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 1971 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 1972 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 1973 } 1974 } 1975 1976 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 1977 { 1978 int vmid; 1979 1980 /* 1981 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 1982 * access. Compute VMIDs should be enabled by FW for target VMIDs, 1983 * the driver can enable them for graphics. VMID0 should maintain 1984 * access so that HWS firmware can save/restore entries. 1985 */ 1986 for (vmid = 1; vmid < 16; vmid++) { 1987 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 1988 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 1989 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 1990 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 1991 } 1992 } 1993 1994 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 1995 { 1996 /* TODO: harvest feature to be added later. */ 1997 } 1998 1999 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 2000 { 2001 /* TCCs are global (not instanced). */ 2002 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 2003 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 2004 2005 adev->gfx.config.tcc_disabled_mask = 2006 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 2007 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 2008 } 2009 2010 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 2011 { 2012 u32 tmp; 2013 int i; 2014 2015 if (!amdgpu_sriov_vf(adev)) 2016 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2017 2018 gfx_v11_0_setup_rb(adev); 2019 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 2020 gfx_v11_0_get_tcc_info(adev); 2021 adev->gfx.config.pa_sc_tile_steering_override = 0; 2022 2023 /* Set whether texture coordinate truncation is conformant. */ 2024 tmp = RREG32_SOC15(GC, 0, regTA_CNTL2); 2025 adev->gfx.config.ta_cntl2_truncate_coord_mode = 2026 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE); 2027 2028 /* XXX SH_MEM regs */ 2029 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2030 mutex_lock(&adev->srbm_mutex); 2031 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2032 soc21_grbm_select(adev, 0, 0, 0, i); 2033 /* CP and shaders */ 2034 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2035 if (i != 0) { 2036 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2037 (adev->gmc.private_aperture_start >> 48)); 2038 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2039 (adev->gmc.shared_aperture_start >> 48)); 2040 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 2041 } 2042 } 2043 soc21_grbm_select(adev, 0, 0, 0, 0); 2044 2045 mutex_unlock(&adev->srbm_mutex); 2046 2047 gfx_v11_0_init_compute_vmid(adev); 2048 gfx_v11_0_init_gds_vmid(adev); 2049 } 2050 2051 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, 2052 int me, int pipe) 2053 { 2054 if (me != 0) 2055 return 0; 2056 2057 switch (pipe) { 2058 case 0: 2059 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 2060 case 1: 2061 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 2062 default: 2063 return 0; 2064 } 2065 } 2066 2067 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, 2068 int me, int pipe) 2069 { 2070 /* 2071 * amdgpu controls only the first MEC. That's why this function only 2072 * handles the setting of interrupts for this specific MEC. All other 2073 * pipes' interrupts are set by amdkfd. 2074 */ 2075 if (me != 1) 2076 return 0; 2077 2078 switch (pipe) { 2079 case 0: 2080 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 2081 case 1: 2082 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 2083 case 2: 2084 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 2085 case 3: 2086 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 2087 default: 2088 return 0; 2089 } 2090 } 2091 2092 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2093 bool enable) 2094 { 2095 u32 tmp, cp_int_cntl_reg; 2096 int i, j; 2097 2098 if (amdgpu_sriov_vf(adev)) 2099 return; 2100 2101 for (i = 0; i < adev->gfx.me.num_me; i++) { 2102 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 2103 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 2104 2105 if (cp_int_cntl_reg) { 2106 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 2107 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 2108 enable ? 1 : 0); 2109 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 2110 enable ? 1 : 0); 2111 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 2112 enable ? 1 : 0); 2113 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 2114 enable ? 1 : 0); 2115 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); 2116 } 2117 } 2118 } 2119 } 2120 2121 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 2122 { 2123 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2124 2125 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 2126 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2127 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 2128 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2129 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 2130 2131 return 0; 2132 } 2133 2134 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 2135 { 2136 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 2137 2138 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 2139 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 2140 } 2141 2142 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 2143 { 2144 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2145 udelay(50); 2146 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2147 udelay(50); 2148 } 2149 2150 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 2151 bool enable) 2152 { 2153 uint32_t rlc_pg_cntl; 2154 2155 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 2156 2157 if (!enable) { 2158 /* RLC_PG_CNTL[23] = 0 (default) 2159 * RLC will wait for handshake acks with SMU 2160 * GFXOFF will be enabled 2161 * RLC_PG_CNTL[23] = 1 2162 * RLC will not issue any message to SMU 2163 * hence no handshake between SMU & RLC 2164 * GFXOFF will be disabled 2165 */ 2166 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2167 } else 2168 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2169 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 2170 } 2171 2172 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 2173 { 2174 /* TODO: enable rlc & smu handshake until smu 2175 * and gfxoff feature works as expected */ 2176 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 2177 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 2178 2179 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2180 udelay(50); 2181 } 2182 2183 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 2184 { 2185 uint32_t tmp; 2186 2187 /* enable Save Restore Machine */ 2188 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 2189 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2190 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 2191 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 2192 } 2193 2194 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 2195 { 2196 const struct rlc_firmware_header_v2_0 *hdr; 2197 const __le32 *fw_data; 2198 unsigned i, fw_size; 2199 2200 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2201 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2202 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2203 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2204 2205 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 2206 RLCG_UCODE_LOADING_START_ADDRESS); 2207 2208 for (i = 0; i < fw_size; i++) 2209 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 2210 le32_to_cpup(fw_data++)); 2211 2212 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2213 } 2214 2215 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 2216 { 2217 const struct rlc_firmware_header_v2_2 *hdr; 2218 const __le32 *fw_data; 2219 unsigned i, fw_size; 2220 u32 tmp; 2221 2222 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 2223 2224 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2225 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 2226 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 2227 2228 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 2229 2230 for (i = 0; i < fw_size; i++) { 2231 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2232 msleep(1); 2233 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 2234 le32_to_cpup(fw_data++)); 2235 } 2236 2237 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2238 2239 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2240 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 2241 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 2242 2243 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 2244 for (i = 0; i < fw_size; i++) { 2245 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2246 msleep(1); 2247 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 2248 le32_to_cpup(fw_data++)); 2249 } 2250 2251 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2252 2253 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 2254 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 2255 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 2256 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 2257 } 2258 2259 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 2260 { 2261 const struct rlc_firmware_header_v2_3 *hdr; 2262 const __le32 *fw_data; 2263 unsigned i, fw_size; 2264 u32 tmp; 2265 2266 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 2267 2268 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2269 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 2270 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 2271 2272 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 2273 2274 for (i = 0; i < fw_size; i++) { 2275 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2276 msleep(1); 2277 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 2278 le32_to_cpup(fw_data++)); 2279 } 2280 2281 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 2282 2283 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 2284 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 2285 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 2286 2287 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2288 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 2289 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 2290 2291 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 2292 2293 for (i = 0; i < fw_size; i++) { 2294 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2295 msleep(1); 2296 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 2297 le32_to_cpup(fw_data++)); 2298 } 2299 2300 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 2301 2302 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 2303 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 2304 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 2305 } 2306 2307 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 2308 { 2309 const struct rlc_firmware_header_v2_0 *hdr; 2310 uint16_t version_major; 2311 uint16_t version_minor; 2312 2313 if (!adev->gfx.rlc_fw) 2314 return -EINVAL; 2315 2316 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2317 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2318 2319 version_major = le16_to_cpu(hdr->header.header_version_major); 2320 version_minor = le16_to_cpu(hdr->header.header_version_minor); 2321 2322 if (version_major == 2) { 2323 gfx_v11_0_load_rlcg_microcode(adev); 2324 if (amdgpu_dpm == 1) { 2325 if (version_minor >= 2) 2326 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 2327 if (version_minor == 3) 2328 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 2329 } 2330 2331 return 0; 2332 } 2333 2334 return -EINVAL; 2335 } 2336 2337 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 2338 { 2339 int r; 2340 2341 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 2342 gfx_v11_0_init_csb(adev); 2343 2344 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 2345 gfx_v11_0_rlc_enable_srm(adev); 2346 } else { 2347 if (amdgpu_sriov_vf(adev)) { 2348 gfx_v11_0_init_csb(adev); 2349 return 0; 2350 } 2351 2352 adev->gfx.rlc.funcs->stop(adev); 2353 2354 /* disable CG */ 2355 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 2356 2357 /* disable PG */ 2358 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 2359 2360 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2361 /* legacy rlc firmware loading */ 2362 r = gfx_v11_0_rlc_load_microcode(adev); 2363 if (r) 2364 return r; 2365 } 2366 2367 gfx_v11_0_init_csb(adev); 2368 2369 adev->gfx.rlc.funcs->start(adev); 2370 } 2371 return 0; 2372 } 2373 2374 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 2375 { 2376 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2377 uint32_t tmp; 2378 int i; 2379 2380 /* Trigger an invalidation of the L1 instruction caches */ 2381 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2382 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2383 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2384 2385 /* Wait for invalidation complete */ 2386 for (i = 0; i < usec_timeout; i++) { 2387 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2388 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2389 INVALIDATE_CACHE_COMPLETE)) 2390 break; 2391 udelay(1); 2392 } 2393 2394 if (i >= usec_timeout) { 2395 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2396 return -EINVAL; 2397 } 2398 2399 if (amdgpu_emu_mode == 1) 2400 adev->hdp.funcs->flush_hdp(adev, NULL); 2401 2402 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2403 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2404 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2405 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2406 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2407 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2408 2409 /* Program me ucode address into intruction cache address register */ 2410 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2411 lower_32_bits(addr) & 0xFFFFF000); 2412 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2413 upper_32_bits(addr)); 2414 2415 return 0; 2416 } 2417 2418 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 2419 { 2420 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2421 uint32_t tmp; 2422 int i; 2423 2424 /* Trigger an invalidation of the L1 instruction caches */ 2425 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2426 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2427 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2428 2429 /* Wait for invalidation complete */ 2430 for (i = 0; i < usec_timeout; i++) { 2431 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2432 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2433 INVALIDATE_CACHE_COMPLETE)) 2434 break; 2435 udelay(1); 2436 } 2437 2438 if (i >= usec_timeout) { 2439 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2440 return -EINVAL; 2441 } 2442 2443 if (amdgpu_emu_mode == 1) 2444 adev->hdp.funcs->flush_hdp(adev, NULL); 2445 2446 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2447 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2448 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2449 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2450 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2451 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2452 2453 /* Program pfp ucode address into intruction cache address register */ 2454 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2455 lower_32_bits(addr) & 0xFFFFF000); 2456 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2457 upper_32_bits(addr)); 2458 2459 return 0; 2460 } 2461 2462 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2463 { 2464 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2465 uint32_t tmp; 2466 int i; 2467 2468 /* Trigger an invalidation of the L1 instruction caches */ 2469 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2470 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2471 2472 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2473 2474 /* Wait for invalidation complete */ 2475 for (i = 0; i < usec_timeout; i++) { 2476 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2477 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2478 INVALIDATE_CACHE_COMPLETE)) 2479 break; 2480 udelay(1); 2481 } 2482 2483 if (i >= usec_timeout) { 2484 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2485 return -EINVAL; 2486 } 2487 2488 if (amdgpu_emu_mode == 1) 2489 adev->hdp.funcs->flush_hdp(adev, NULL); 2490 2491 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2492 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2493 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2494 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2495 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2496 2497 /* Program mec1 ucode address into intruction cache address register */ 2498 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2499 lower_32_bits(addr) & 0xFFFFF000); 2500 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2501 upper_32_bits(addr)); 2502 2503 return 0; 2504 } 2505 2506 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2507 { 2508 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2509 uint32_t tmp; 2510 unsigned i, pipe_id; 2511 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2512 2513 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2514 adev->gfx.pfp_fw->data; 2515 2516 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2517 lower_32_bits(addr)); 2518 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2519 upper_32_bits(addr)); 2520 2521 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2522 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2523 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2524 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2525 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2526 2527 /* 2528 * Programming any of the CP_PFP_IC_BASE registers 2529 * forces invalidation of the ME L1 I$. Wait for the 2530 * invalidation complete 2531 */ 2532 for (i = 0; i < usec_timeout; i++) { 2533 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2534 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2535 INVALIDATE_CACHE_COMPLETE)) 2536 break; 2537 udelay(1); 2538 } 2539 2540 if (i >= usec_timeout) { 2541 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2542 return -EINVAL; 2543 } 2544 2545 /* Prime the L1 instruction caches */ 2546 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2547 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2548 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2549 /* Waiting for cache primed*/ 2550 for (i = 0; i < usec_timeout; i++) { 2551 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2552 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2553 ICACHE_PRIMED)) 2554 break; 2555 udelay(1); 2556 } 2557 2558 if (i >= usec_timeout) { 2559 dev_err(adev->dev, "failed to prime instruction cache\n"); 2560 return -EINVAL; 2561 } 2562 2563 mutex_lock(&adev->srbm_mutex); 2564 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2565 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2566 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2567 (pfp_hdr->ucode_start_addr_hi << 30) | 2568 (pfp_hdr->ucode_start_addr_lo >> 2)); 2569 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2570 pfp_hdr->ucode_start_addr_hi >> 2); 2571 2572 /* 2573 * Program CP_ME_CNTL to reset given PIPE to take 2574 * effect of CP_PFP_PRGRM_CNTR_START. 2575 */ 2576 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2577 if (pipe_id == 0) 2578 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2579 PFP_PIPE0_RESET, 1); 2580 else 2581 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2582 PFP_PIPE1_RESET, 1); 2583 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2584 2585 /* Clear pfp pipe0 reset bit. */ 2586 if (pipe_id == 0) 2587 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2588 PFP_PIPE0_RESET, 0); 2589 else 2590 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2591 PFP_PIPE1_RESET, 0); 2592 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2593 2594 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2595 lower_32_bits(addr2)); 2596 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2597 upper_32_bits(addr2)); 2598 } 2599 soc21_grbm_select(adev, 0, 0, 0, 0); 2600 mutex_unlock(&adev->srbm_mutex); 2601 2602 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2603 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2604 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2605 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2606 2607 /* Invalidate the data caches */ 2608 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2609 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2610 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2611 2612 for (i = 0; i < usec_timeout; i++) { 2613 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2614 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2615 INVALIDATE_DCACHE_COMPLETE)) 2616 break; 2617 udelay(1); 2618 } 2619 2620 if (i >= usec_timeout) { 2621 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2622 return -EINVAL; 2623 } 2624 2625 return 0; 2626 } 2627 2628 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2629 { 2630 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2631 uint32_t tmp; 2632 unsigned i, pipe_id; 2633 const struct gfx_firmware_header_v2_0 *me_hdr; 2634 2635 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2636 adev->gfx.me_fw->data; 2637 2638 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2639 lower_32_bits(addr)); 2640 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2641 upper_32_bits(addr)); 2642 2643 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2644 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2645 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2646 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2647 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2648 2649 /* 2650 * Programming any of the CP_ME_IC_BASE registers 2651 * forces invalidation of the ME L1 I$. Wait for the 2652 * invalidation complete 2653 */ 2654 for (i = 0; i < usec_timeout; i++) { 2655 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2656 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2657 INVALIDATE_CACHE_COMPLETE)) 2658 break; 2659 udelay(1); 2660 } 2661 2662 if (i >= usec_timeout) { 2663 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2664 return -EINVAL; 2665 } 2666 2667 /* Prime the instruction caches */ 2668 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2669 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2670 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2671 2672 /* Waiting for instruction cache primed*/ 2673 for (i = 0; i < usec_timeout; i++) { 2674 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2675 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2676 ICACHE_PRIMED)) 2677 break; 2678 udelay(1); 2679 } 2680 2681 if (i >= usec_timeout) { 2682 dev_err(adev->dev, "failed to prime instruction cache\n"); 2683 return -EINVAL; 2684 } 2685 2686 mutex_lock(&adev->srbm_mutex); 2687 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2688 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2689 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2690 (me_hdr->ucode_start_addr_hi << 30) | 2691 (me_hdr->ucode_start_addr_lo >> 2) ); 2692 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2693 me_hdr->ucode_start_addr_hi>>2); 2694 2695 /* 2696 * Program CP_ME_CNTL to reset given PIPE to take 2697 * effect of CP_PFP_PRGRM_CNTR_START. 2698 */ 2699 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2700 if (pipe_id == 0) 2701 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2702 ME_PIPE0_RESET, 1); 2703 else 2704 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2705 ME_PIPE1_RESET, 1); 2706 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2707 2708 /* Clear pfp pipe0 reset bit. */ 2709 if (pipe_id == 0) 2710 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2711 ME_PIPE0_RESET, 0); 2712 else 2713 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2714 ME_PIPE1_RESET, 0); 2715 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2716 2717 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2718 lower_32_bits(addr2)); 2719 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2720 upper_32_bits(addr2)); 2721 } 2722 soc21_grbm_select(adev, 0, 0, 0, 0); 2723 mutex_unlock(&adev->srbm_mutex); 2724 2725 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2726 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2727 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2728 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2729 2730 /* Invalidate the data caches */ 2731 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2732 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2733 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2734 2735 for (i = 0; i < usec_timeout; i++) { 2736 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2737 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2738 INVALIDATE_DCACHE_COMPLETE)) 2739 break; 2740 udelay(1); 2741 } 2742 2743 if (i >= usec_timeout) { 2744 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2745 return -EINVAL; 2746 } 2747 2748 return 0; 2749 } 2750 2751 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2752 { 2753 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2754 uint32_t tmp; 2755 unsigned i; 2756 const struct gfx_firmware_header_v2_0 *mec_hdr; 2757 2758 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2759 adev->gfx.mec_fw->data; 2760 2761 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2762 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2763 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2764 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2765 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2766 2767 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2768 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2769 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2770 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2771 2772 mutex_lock(&adev->srbm_mutex); 2773 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2774 soc21_grbm_select(adev, 1, i, 0, 0); 2775 2776 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2777 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2778 upper_32_bits(addr2)); 2779 2780 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2781 mec_hdr->ucode_start_addr_lo >> 2 | 2782 mec_hdr->ucode_start_addr_hi << 30); 2783 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2784 mec_hdr->ucode_start_addr_hi >> 2); 2785 2786 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2787 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2788 upper_32_bits(addr)); 2789 } 2790 mutex_unlock(&adev->srbm_mutex); 2791 soc21_grbm_select(adev, 0, 0, 0, 0); 2792 2793 /* Trigger an invalidation of the L1 instruction caches */ 2794 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2795 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2796 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2797 2798 /* Wait for invalidation complete */ 2799 for (i = 0; i < usec_timeout; i++) { 2800 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2801 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2802 INVALIDATE_DCACHE_COMPLETE)) 2803 break; 2804 udelay(1); 2805 } 2806 2807 if (i >= usec_timeout) { 2808 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2809 return -EINVAL; 2810 } 2811 2812 /* Trigger an invalidation of the L1 instruction caches */ 2813 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2814 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2815 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2816 2817 /* Wait for invalidation complete */ 2818 for (i = 0; i < usec_timeout; i++) { 2819 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2820 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2821 INVALIDATE_CACHE_COMPLETE)) 2822 break; 2823 udelay(1); 2824 } 2825 2826 if (i >= usec_timeout) { 2827 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2828 return -EINVAL; 2829 } 2830 2831 return 0; 2832 } 2833 2834 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2835 { 2836 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2837 const struct gfx_firmware_header_v2_0 *me_hdr; 2838 const struct gfx_firmware_header_v2_0 *mec_hdr; 2839 uint32_t pipe_id, tmp; 2840 2841 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2842 adev->gfx.mec_fw->data; 2843 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2844 adev->gfx.me_fw->data; 2845 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2846 adev->gfx.pfp_fw->data; 2847 2848 /* config pfp program start addr */ 2849 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2850 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2851 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2852 (pfp_hdr->ucode_start_addr_hi << 30) | 2853 (pfp_hdr->ucode_start_addr_lo >> 2)); 2854 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2855 pfp_hdr->ucode_start_addr_hi >> 2); 2856 } 2857 soc21_grbm_select(adev, 0, 0, 0, 0); 2858 2859 /* reset pfp pipe */ 2860 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2861 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 2862 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 2863 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2864 2865 /* clear pfp pipe reset */ 2866 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 2867 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 2868 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2869 2870 /* config me program start addr */ 2871 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2872 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2873 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2874 (me_hdr->ucode_start_addr_hi << 30) | 2875 (me_hdr->ucode_start_addr_lo >> 2) ); 2876 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2877 me_hdr->ucode_start_addr_hi>>2); 2878 } 2879 soc21_grbm_select(adev, 0, 0, 0, 0); 2880 2881 /* reset me pipe */ 2882 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2883 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 2884 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 2885 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2886 2887 /* clear me pipe reset */ 2888 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 2889 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 2890 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2891 2892 /* config mec program start addr */ 2893 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 2894 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 2895 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2896 mec_hdr->ucode_start_addr_lo >> 2 | 2897 mec_hdr->ucode_start_addr_hi << 30); 2898 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2899 mec_hdr->ucode_start_addr_hi >> 2); 2900 } 2901 soc21_grbm_select(adev, 0, 0, 0, 0); 2902 2903 /* reset mec pipe */ 2904 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 2905 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 2906 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 2907 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 2908 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 2909 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 2910 2911 /* clear mec pipe reset */ 2912 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 2913 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 2914 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 2915 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 2916 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 2917 } 2918 2919 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 2920 { 2921 uint32_t cp_status; 2922 uint32_t bootload_status; 2923 int i, r; 2924 uint64_t addr, addr2; 2925 2926 for (i = 0; i < adev->usec_timeout; i++) { 2927 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 2928 2929 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 2930 IP_VERSION(11, 0, 1) || 2931 amdgpu_ip_version(adev, GC_HWIP, 0) == 2932 IP_VERSION(11, 0, 4) || 2933 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || 2934 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || 2935 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || 2936 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3)) 2937 bootload_status = RREG32_SOC15(GC, 0, 2938 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 2939 else 2940 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 2941 2942 if ((cp_status == 0) && 2943 (REG_GET_FIELD(bootload_status, 2944 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 2945 break; 2946 } 2947 udelay(1); 2948 } 2949 2950 if (i >= adev->usec_timeout) { 2951 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 2952 return -ETIMEDOUT; 2953 } 2954 2955 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 2956 if (adev->gfx.rs64_enable) { 2957 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2958 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 2959 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2960 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 2961 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 2962 if (r) 2963 return r; 2964 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2965 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 2966 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2967 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 2968 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 2969 if (r) 2970 return r; 2971 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2972 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 2973 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 2974 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 2975 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 2976 if (r) 2977 return r; 2978 } else { 2979 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2980 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 2981 r = gfx_v11_0_config_me_cache(adev, addr); 2982 if (r) 2983 return r; 2984 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2985 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 2986 r = gfx_v11_0_config_pfp_cache(adev, addr); 2987 if (r) 2988 return r; 2989 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 2990 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 2991 r = gfx_v11_0_config_mec_cache(adev, addr); 2992 if (r) 2993 return r; 2994 } 2995 } 2996 2997 return 0; 2998 } 2999 3000 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3001 { 3002 int i; 3003 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3004 3005 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3006 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3007 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3008 3009 for (i = 0; i < adev->usec_timeout; i++) { 3010 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 3011 break; 3012 udelay(1); 3013 } 3014 3015 if (i >= adev->usec_timeout) 3016 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 3017 3018 return 0; 3019 } 3020 3021 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 3022 { 3023 int r; 3024 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3025 const __le32 *fw_data; 3026 unsigned i, fw_size; 3027 3028 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3029 adev->gfx.pfp_fw->data; 3030 3031 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3032 3033 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3034 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3035 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 3036 3037 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 3038 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3039 &adev->gfx.pfp.pfp_fw_obj, 3040 &adev->gfx.pfp.pfp_fw_gpu_addr, 3041 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3042 if (r) { 3043 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 3044 gfx_v11_0_pfp_fini(adev); 3045 return r; 3046 } 3047 3048 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 3049 3050 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3051 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3052 3053 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 3054 3055 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 3056 3057 for (i = 0; i < pfp_hdr->jt_size; i++) 3058 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 3059 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 3060 3061 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3062 3063 return 0; 3064 } 3065 3066 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 3067 { 3068 int r; 3069 const struct gfx_firmware_header_v2_0 *pfp_hdr; 3070 const __le32 *fw_ucode, *fw_data; 3071 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3072 uint32_t tmp; 3073 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3074 3075 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 3076 adev->gfx.pfp_fw->data; 3077 3078 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3079 3080 /* instruction */ 3081 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 3082 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 3083 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 3084 /* data */ 3085 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3086 le32_to_cpu(pfp_hdr->data_offset_bytes)); 3087 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 3088 3089 /* 64kb align */ 3090 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3091 64 * 1024, 3092 AMDGPU_GEM_DOMAIN_VRAM | 3093 AMDGPU_GEM_DOMAIN_GTT, 3094 &adev->gfx.pfp.pfp_fw_obj, 3095 &adev->gfx.pfp.pfp_fw_gpu_addr, 3096 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3097 if (r) { 3098 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 3099 gfx_v11_0_pfp_fini(adev); 3100 return r; 3101 } 3102 3103 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3104 64 * 1024, 3105 AMDGPU_GEM_DOMAIN_VRAM | 3106 AMDGPU_GEM_DOMAIN_GTT, 3107 &adev->gfx.pfp.pfp_fw_data_obj, 3108 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 3109 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 3110 if (r) { 3111 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 3112 gfx_v11_0_pfp_fini(adev); 3113 return r; 3114 } 3115 3116 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 3117 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 3118 3119 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3120 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 3121 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3122 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3123 3124 if (amdgpu_emu_mode == 1) 3125 adev->hdp.funcs->flush_hdp(adev, NULL); 3126 3127 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3128 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3129 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 3130 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3131 3132 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 3133 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 3134 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 3135 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 3136 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 3137 3138 /* 3139 * Programming any of the CP_PFP_IC_BASE registers 3140 * forces invalidation of the ME L1 I$. Wait for the 3141 * invalidation complete 3142 */ 3143 for (i = 0; i < usec_timeout; i++) { 3144 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3145 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3146 INVALIDATE_CACHE_COMPLETE)) 3147 break; 3148 udelay(1); 3149 } 3150 3151 if (i >= usec_timeout) { 3152 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3153 return -EINVAL; 3154 } 3155 3156 /* Prime the L1 instruction caches */ 3157 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3158 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 3159 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 3160 /* Waiting for cache primed*/ 3161 for (i = 0; i < usec_timeout; i++) { 3162 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3163 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3164 ICACHE_PRIMED)) 3165 break; 3166 udelay(1); 3167 } 3168 3169 if (i >= usec_timeout) { 3170 dev_err(adev->dev, "failed to prime instruction cache\n"); 3171 return -EINVAL; 3172 } 3173 3174 mutex_lock(&adev->srbm_mutex); 3175 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3176 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3177 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 3178 (pfp_hdr->ucode_start_addr_hi << 30) | 3179 (pfp_hdr->ucode_start_addr_lo >> 2) ); 3180 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 3181 pfp_hdr->ucode_start_addr_hi>>2); 3182 3183 /* 3184 * Program CP_ME_CNTL to reset given PIPE to take 3185 * effect of CP_PFP_PRGRM_CNTR_START. 3186 */ 3187 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3188 if (pipe_id == 0) 3189 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3190 PFP_PIPE0_RESET, 1); 3191 else 3192 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3193 PFP_PIPE1_RESET, 1); 3194 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3195 3196 /* Clear pfp pipe0 reset bit. */ 3197 if (pipe_id == 0) 3198 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3199 PFP_PIPE0_RESET, 0); 3200 else 3201 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3202 PFP_PIPE1_RESET, 0); 3203 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3204 3205 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 3206 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3207 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 3208 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3209 } 3210 soc21_grbm_select(adev, 0, 0, 0, 0); 3211 mutex_unlock(&adev->srbm_mutex); 3212 3213 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3214 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3215 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3216 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3217 3218 /* Invalidate the data caches */ 3219 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3220 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3221 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3222 3223 for (i = 0; i < usec_timeout; i++) { 3224 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3225 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3226 INVALIDATE_DCACHE_COMPLETE)) 3227 break; 3228 udelay(1); 3229 } 3230 3231 if (i >= usec_timeout) { 3232 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3233 return -EINVAL; 3234 } 3235 3236 return 0; 3237 } 3238 3239 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 3240 { 3241 int r; 3242 const struct gfx_firmware_header_v1_0 *me_hdr; 3243 const __le32 *fw_data; 3244 unsigned i, fw_size; 3245 3246 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3247 adev->gfx.me_fw->data; 3248 3249 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3250 3251 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3252 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3253 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 3254 3255 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 3256 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3257 &adev->gfx.me.me_fw_obj, 3258 &adev->gfx.me.me_fw_gpu_addr, 3259 (void **)&adev->gfx.me.me_fw_ptr); 3260 if (r) { 3261 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 3262 gfx_v11_0_me_fini(adev); 3263 return r; 3264 } 3265 3266 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 3267 3268 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3269 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3270 3271 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 3272 3273 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 3274 3275 for (i = 0; i < me_hdr->jt_size; i++) 3276 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 3277 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 3278 3279 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 3280 3281 return 0; 3282 } 3283 3284 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 3285 { 3286 int r; 3287 const struct gfx_firmware_header_v2_0 *me_hdr; 3288 const __le32 *fw_ucode, *fw_data; 3289 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3290 uint32_t tmp; 3291 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3292 3293 me_hdr = (const struct gfx_firmware_header_v2_0 *) 3294 adev->gfx.me_fw->data; 3295 3296 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3297 3298 /* instruction */ 3299 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 3300 le32_to_cpu(me_hdr->ucode_offset_bytes)); 3301 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 3302 /* data */ 3303 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3304 le32_to_cpu(me_hdr->data_offset_bytes)); 3305 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 3306 3307 /* 64kb align*/ 3308 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3309 64 * 1024, 3310 AMDGPU_GEM_DOMAIN_VRAM | 3311 AMDGPU_GEM_DOMAIN_GTT, 3312 &adev->gfx.me.me_fw_obj, 3313 &adev->gfx.me.me_fw_gpu_addr, 3314 (void **)&adev->gfx.me.me_fw_ptr); 3315 if (r) { 3316 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 3317 gfx_v11_0_me_fini(adev); 3318 return r; 3319 } 3320 3321 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3322 64 * 1024, 3323 AMDGPU_GEM_DOMAIN_VRAM | 3324 AMDGPU_GEM_DOMAIN_GTT, 3325 &adev->gfx.me.me_fw_data_obj, 3326 &adev->gfx.me.me_fw_data_gpu_addr, 3327 (void **)&adev->gfx.me.me_fw_data_ptr); 3328 if (r) { 3329 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 3330 gfx_v11_0_pfp_fini(adev); 3331 return r; 3332 } 3333 3334 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 3335 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 3336 3337 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3338 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 3339 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3340 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3341 3342 if (amdgpu_emu_mode == 1) 3343 adev->hdp.funcs->flush_hdp(adev, NULL); 3344 3345 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3346 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3347 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 3348 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3349 3350 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 3351 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 3352 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 3353 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 3354 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 3355 3356 /* 3357 * Programming any of the CP_ME_IC_BASE registers 3358 * forces invalidation of the ME L1 I$. Wait for the 3359 * invalidation complete 3360 */ 3361 for (i = 0; i < usec_timeout; i++) { 3362 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3363 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3364 INVALIDATE_CACHE_COMPLETE)) 3365 break; 3366 udelay(1); 3367 } 3368 3369 if (i >= usec_timeout) { 3370 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3371 return -EINVAL; 3372 } 3373 3374 /* Prime the instruction caches */ 3375 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3376 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 3377 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 3378 3379 /* Waiting for instruction cache primed*/ 3380 for (i = 0; i < usec_timeout; i++) { 3381 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3382 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3383 ICACHE_PRIMED)) 3384 break; 3385 udelay(1); 3386 } 3387 3388 if (i >= usec_timeout) { 3389 dev_err(adev->dev, "failed to prime instruction cache\n"); 3390 return -EINVAL; 3391 } 3392 3393 mutex_lock(&adev->srbm_mutex); 3394 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3395 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3396 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3397 (me_hdr->ucode_start_addr_hi << 30) | 3398 (me_hdr->ucode_start_addr_lo >> 2) ); 3399 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3400 me_hdr->ucode_start_addr_hi>>2); 3401 3402 /* 3403 * Program CP_ME_CNTL to reset given PIPE to take 3404 * effect of CP_PFP_PRGRM_CNTR_START. 3405 */ 3406 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3407 if (pipe_id == 0) 3408 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3409 ME_PIPE0_RESET, 1); 3410 else 3411 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3412 ME_PIPE1_RESET, 1); 3413 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3414 3415 /* Clear pfp pipe0 reset bit. */ 3416 if (pipe_id == 0) 3417 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3418 ME_PIPE0_RESET, 0); 3419 else 3420 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3421 ME_PIPE1_RESET, 0); 3422 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3423 3424 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 3425 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3426 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 3427 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3428 } 3429 soc21_grbm_select(adev, 0, 0, 0, 0); 3430 mutex_unlock(&adev->srbm_mutex); 3431 3432 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3433 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3434 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3435 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3436 3437 /* Invalidate the data caches */ 3438 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3439 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3440 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3441 3442 for (i = 0; i < usec_timeout; i++) { 3443 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3444 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3445 INVALIDATE_DCACHE_COMPLETE)) 3446 break; 3447 udelay(1); 3448 } 3449 3450 if (i >= usec_timeout) { 3451 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3452 return -EINVAL; 3453 } 3454 3455 return 0; 3456 } 3457 3458 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3459 { 3460 int r; 3461 3462 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3463 return -EINVAL; 3464 3465 gfx_v11_0_cp_gfx_enable(adev, false); 3466 3467 if (adev->gfx.rs64_enable) 3468 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3469 else 3470 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3471 if (r) { 3472 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3473 return r; 3474 } 3475 3476 if (adev->gfx.rs64_enable) 3477 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3478 else 3479 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3480 if (r) { 3481 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3482 return r; 3483 } 3484 3485 return 0; 3486 } 3487 3488 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3489 { 3490 struct amdgpu_ring *ring; 3491 const struct cs_section_def *sect = NULL; 3492 const struct cs_extent_def *ext = NULL; 3493 int r, i; 3494 int ctx_reg_offset; 3495 3496 /* init the CP */ 3497 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3498 adev->gfx.config.max_hw_contexts - 1); 3499 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3500 3501 if (!amdgpu_async_gfx_ring) 3502 gfx_v11_0_cp_gfx_enable(adev, true); 3503 3504 ring = &adev->gfx.gfx_ring[0]; 3505 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3506 if (r) { 3507 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3508 return r; 3509 } 3510 3511 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3512 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3513 3514 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3515 amdgpu_ring_write(ring, 0x80000000); 3516 amdgpu_ring_write(ring, 0x80000000); 3517 3518 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3519 for (ext = sect->section; ext->extent != NULL; ++ext) { 3520 if (sect->id == SECT_CONTEXT) { 3521 amdgpu_ring_write(ring, 3522 PACKET3(PACKET3_SET_CONTEXT_REG, 3523 ext->reg_count)); 3524 amdgpu_ring_write(ring, ext->reg_index - 3525 PACKET3_SET_CONTEXT_REG_START); 3526 for (i = 0; i < ext->reg_count; i++) 3527 amdgpu_ring_write(ring, ext->extent[i]); 3528 } 3529 } 3530 } 3531 3532 ctx_reg_offset = 3533 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3534 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3535 amdgpu_ring_write(ring, ctx_reg_offset); 3536 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3537 3538 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3539 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3540 3541 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3542 amdgpu_ring_write(ring, 0); 3543 3544 amdgpu_ring_commit(ring); 3545 3546 /* submit cs packet to copy state 0 to next available state */ 3547 if (adev->gfx.num_gfx_rings > 1) { 3548 /* maximum supported gfx ring is 2 */ 3549 ring = &adev->gfx.gfx_ring[1]; 3550 r = amdgpu_ring_alloc(ring, 2); 3551 if (r) { 3552 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3553 return r; 3554 } 3555 3556 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3557 amdgpu_ring_write(ring, 0); 3558 3559 amdgpu_ring_commit(ring); 3560 } 3561 return 0; 3562 } 3563 3564 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3565 CP_PIPE_ID pipe) 3566 { 3567 u32 tmp; 3568 3569 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3570 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3571 3572 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3573 } 3574 3575 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3576 struct amdgpu_ring *ring) 3577 { 3578 u32 tmp; 3579 3580 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3581 if (ring->use_doorbell) { 3582 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3583 DOORBELL_OFFSET, ring->doorbell_index); 3584 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3585 DOORBELL_EN, 1); 3586 } else { 3587 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3588 DOORBELL_EN, 0); 3589 } 3590 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3591 3592 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3593 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3594 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3595 3596 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3597 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3598 } 3599 3600 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3601 { 3602 struct amdgpu_ring *ring; 3603 u32 tmp; 3604 u32 rb_bufsz; 3605 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3606 3607 /* Set the write pointer delay */ 3608 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3609 3610 /* set the RB to use vmid 0 */ 3611 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3612 3613 /* Init gfx ring 0 for pipe 0 */ 3614 mutex_lock(&adev->srbm_mutex); 3615 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3616 3617 /* Set ring buffer size */ 3618 ring = &adev->gfx.gfx_ring[0]; 3619 rb_bufsz = order_base_2(ring->ring_size / 8); 3620 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3621 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3622 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3623 3624 /* Initialize the ring buffer's write pointers */ 3625 ring->wptr = 0; 3626 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3627 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3628 3629 /* set the wb address whether it's enabled or not */ 3630 rptr_addr = ring->rptr_gpu_addr; 3631 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3632 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3633 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3634 3635 wptr_gpu_addr = ring->wptr_gpu_addr; 3636 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3637 lower_32_bits(wptr_gpu_addr)); 3638 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3639 upper_32_bits(wptr_gpu_addr)); 3640 3641 mdelay(1); 3642 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3643 3644 rb_addr = ring->gpu_addr >> 8; 3645 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3646 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3647 3648 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3649 3650 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3651 mutex_unlock(&adev->srbm_mutex); 3652 3653 /* Init gfx ring 1 for pipe 1 */ 3654 if (adev->gfx.num_gfx_rings > 1) { 3655 mutex_lock(&adev->srbm_mutex); 3656 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3657 /* maximum supported gfx ring is 2 */ 3658 ring = &adev->gfx.gfx_ring[1]; 3659 rb_bufsz = order_base_2(ring->ring_size / 8); 3660 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3661 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3662 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3663 /* Initialize the ring buffer's write pointers */ 3664 ring->wptr = 0; 3665 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3666 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3667 /* Set the wb address whether it's enabled or not */ 3668 rptr_addr = ring->rptr_gpu_addr; 3669 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3670 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3671 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3672 wptr_gpu_addr = ring->wptr_gpu_addr; 3673 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3674 lower_32_bits(wptr_gpu_addr)); 3675 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3676 upper_32_bits(wptr_gpu_addr)); 3677 3678 mdelay(1); 3679 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3680 3681 rb_addr = ring->gpu_addr >> 8; 3682 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3683 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3684 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3685 3686 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3687 mutex_unlock(&adev->srbm_mutex); 3688 } 3689 /* Switch to pipe 0 */ 3690 mutex_lock(&adev->srbm_mutex); 3691 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3692 mutex_unlock(&adev->srbm_mutex); 3693 3694 /* start the ring */ 3695 gfx_v11_0_cp_gfx_start(adev); 3696 3697 return 0; 3698 } 3699 3700 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3701 { 3702 u32 data; 3703 3704 if (adev->gfx.rs64_enable) { 3705 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3706 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3707 enable ? 0 : 1); 3708 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3709 enable ? 0 : 1); 3710 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3711 enable ? 0 : 1); 3712 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3713 enable ? 0 : 1); 3714 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3715 enable ? 0 : 1); 3716 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3717 enable ? 1 : 0); 3718 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3719 enable ? 1 : 0); 3720 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3721 enable ? 1 : 0); 3722 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3723 enable ? 1 : 0); 3724 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3725 enable ? 0 : 1); 3726 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3727 } else { 3728 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3729 3730 if (enable) { 3731 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3732 if (!adev->enable_mes_kiq) 3733 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3734 MEC_ME2_HALT, 0); 3735 } else { 3736 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3737 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3738 } 3739 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3740 } 3741 3742 udelay(50); 3743 } 3744 3745 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3746 { 3747 const struct gfx_firmware_header_v1_0 *mec_hdr; 3748 const __le32 *fw_data; 3749 unsigned i, fw_size; 3750 u32 *fw = NULL; 3751 int r; 3752 3753 if (!adev->gfx.mec_fw) 3754 return -EINVAL; 3755 3756 gfx_v11_0_cp_compute_enable(adev, false); 3757 3758 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3759 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3760 3761 fw_data = (const __le32 *) 3762 (adev->gfx.mec_fw->data + 3763 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3764 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3765 3766 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3767 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3768 &adev->gfx.mec.mec_fw_obj, 3769 &adev->gfx.mec.mec_fw_gpu_addr, 3770 (void **)&fw); 3771 if (r) { 3772 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3773 gfx_v11_0_mec_fini(adev); 3774 return r; 3775 } 3776 3777 memcpy(fw, fw_data, fw_size); 3778 3779 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3780 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3781 3782 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3783 3784 /* MEC1 */ 3785 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3786 3787 for (i = 0; i < mec_hdr->jt_size; i++) 3788 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3789 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3790 3791 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3792 3793 return 0; 3794 } 3795 3796 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3797 { 3798 const struct gfx_firmware_header_v2_0 *mec_hdr; 3799 const __le32 *fw_ucode, *fw_data; 3800 u32 tmp, fw_ucode_size, fw_data_size; 3801 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3802 u32 *fw_ucode_ptr, *fw_data_ptr; 3803 int r; 3804 3805 if (!adev->gfx.mec_fw) 3806 return -EINVAL; 3807 3808 gfx_v11_0_cp_compute_enable(adev, false); 3809 3810 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3811 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3812 3813 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3814 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3815 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3816 3817 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3818 le32_to_cpu(mec_hdr->data_offset_bytes)); 3819 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3820 3821 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3822 64 * 1024, 3823 AMDGPU_GEM_DOMAIN_VRAM | 3824 AMDGPU_GEM_DOMAIN_GTT, 3825 &adev->gfx.mec.mec_fw_obj, 3826 &adev->gfx.mec.mec_fw_gpu_addr, 3827 (void **)&fw_ucode_ptr); 3828 if (r) { 3829 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3830 gfx_v11_0_mec_fini(adev); 3831 return r; 3832 } 3833 3834 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3835 64 * 1024, 3836 AMDGPU_GEM_DOMAIN_VRAM | 3837 AMDGPU_GEM_DOMAIN_GTT, 3838 &adev->gfx.mec.mec_fw_data_obj, 3839 &adev->gfx.mec.mec_fw_data_gpu_addr, 3840 (void **)&fw_data_ptr); 3841 if (r) { 3842 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3843 gfx_v11_0_mec_fini(adev); 3844 return r; 3845 } 3846 3847 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 3848 memcpy(fw_data_ptr, fw_data, fw_data_size); 3849 3850 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3851 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 3852 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3853 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 3854 3855 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 3856 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3857 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 3858 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3859 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 3860 3861 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 3862 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 3863 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 3864 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 3865 3866 mutex_lock(&adev->srbm_mutex); 3867 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 3868 soc21_grbm_select(adev, 1, i, 0, 0); 3869 3870 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 3871 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 3872 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 3873 3874 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3875 mec_hdr->ucode_start_addr_lo >> 2 | 3876 mec_hdr->ucode_start_addr_hi << 30); 3877 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3878 mec_hdr->ucode_start_addr_hi >> 2); 3879 3880 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 3881 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 3882 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3883 } 3884 mutex_unlock(&adev->srbm_mutex); 3885 soc21_grbm_select(adev, 0, 0, 0, 0); 3886 3887 /* Trigger an invalidation of the L1 instruction caches */ 3888 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3889 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3890 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 3891 3892 /* Wait for invalidation complete */ 3893 for (i = 0; i < usec_timeout; i++) { 3894 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3895 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 3896 INVALIDATE_DCACHE_COMPLETE)) 3897 break; 3898 udelay(1); 3899 } 3900 3901 if (i >= usec_timeout) { 3902 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3903 return -EINVAL; 3904 } 3905 3906 /* Trigger an invalidation of the L1 instruction caches */ 3907 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 3908 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 3909 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 3910 3911 /* Wait for invalidation complete */ 3912 for (i = 0; i < usec_timeout; i++) { 3913 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 3914 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 3915 INVALIDATE_CACHE_COMPLETE)) 3916 break; 3917 udelay(1); 3918 } 3919 3920 if (i >= usec_timeout) { 3921 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3922 return -EINVAL; 3923 } 3924 3925 return 0; 3926 } 3927 3928 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 3929 { 3930 uint32_t tmp; 3931 struct amdgpu_device *adev = ring->adev; 3932 3933 /* tell RLC which is KIQ queue */ 3934 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 3935 tmp &= 0xffffff00; 3936 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 3937 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); 3938 } 3939 3940 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 3941 { 3942 /* set graphics engine doorbell range */ 3943 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 3944 (adev->doorbell_index.gfx_ring0 * 2) << 2); 3945 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3946 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 3947 3948 /* set compute engine doorbell range */ 3949 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 3950 (adev->doorbell_index.kiq * 2) << 2); 3951 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 3952 (adev->doorbell_index.userqueue_end * 2) << 2); 3953 } 3954 3955 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, 3956 struct v11_gfx_mqd *mqd, 3957 struct amdgpu_mqd_prop *prop) 3958 { 3959 bool priority = 0; 3960 u32 tmp; 3961 3962 /* set up default queue priority level 3963 * 0x0 = low priority, 0x1 = high priority 3964 */ 3965 if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) 3966 priority = 1; 3967 3968 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); 3969 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); 3970 mqd->cp_gfx_hqd_queue_priority = tmp; 3971 } 3972 3973 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 3974 struct amdgpu_mqd_prop *prop) 3975 { 3976 struct v11_gfx_mqd *mqd = m; 3977 uint64_t hqd_gpu_addr, wb_gpu_addr; 3978 uint32_t tmp; 3979 uint32_t rb_bufsz; 3980 3981 /* set up gfx hqd wptr */ 3982 mqd->cp_gfx_hqd_wptr = 0; 3983 mqd->cp_gfx_hqd_wptr_hi = 0; 3984 3985 /* set the pointer to the MQD */ 3986 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 3987 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 3988 3989 /* set up mqd control */ 3990 tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); 3991 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 3992 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 3993 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 3994 mqd->cp_gfx_mqd_control = tmp; 3995 3996 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 3997 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); 3998 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 3999 mqd->cp_gfx_hqd_vmid = 0; 4000 4001 /* set up gfx queue priority */ 4002 gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); 4003 4004 /* set up time quantum */ 4005 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); 4006 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 4007 mqd->cp_gfx_hqd_quantum = tmp; 4008 4009 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 4010 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4011 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 4012 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 4013 4014 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 4015 wb_gpu_addr = prop->rptr_gpu_addr; 4016 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 4017 mqd->cp_gfx_hqd_rptr_addr_hi = 4018 upper_32_bits(wb_gpu_addr) & 0xffff; 4019 4020 /* set up rb_wptr_poll addr */ 4021 wb_gpu_addr = prop->wptr_gpu_addr; 4022 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4023 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4024 4025 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 4026 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 4027 tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); 4028 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 4029 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 4030 #ifdef __BIG_ENDIAN 4031 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 4032 #endif 4033 mqd->cp_gfx_hqd_cntl = tmp; 4034 4035 /* set up cp_doorbell_control */ 4036 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 4037 if (prop->use_doorbell) { 4038 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4039 DOORBELL_OFFSET, prop->doorbell_index); 4040 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4041 DOORBELL_EN, 1); 4042 } else 4043 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4044 DOORBELL_EN, 0); 4045 mqd->cp_rb_doorbell_control = tmp; 4046 4047 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4048 mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); 4049 4050 /* active the queue */ 4051 mqd->cp_gfx_hqd_active = 1; 4052 4053 return 0; 4054 } 4055 4056 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) 4057 { 4058 struct amdgpu_device *adev = ring->adev; 4059 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 4060 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 4061 4062 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4063 memset((void *)mqd, 0, sizeof(*mqd)); 4064 mutex_lock(&adev->srbm_mutex); 4065 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4066 amdgpu_ring_init_mqd(ring); 4067 soc21_grbm_select(adev, 0, 0, 0, 0); 4068 mutex_unlock(&adev->srbm_mutex); 4069 if (adev->gfx.me.mqd_backup[mqd_idx]) 4070 memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4071 } else { 4072 /* restore mqd with the backup copy */ 4073 if (adev->gfx.me.mqd_backup[mqd_idx]) 4074 memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 4075 /* reset the ring */ 4076 ring->wptr = 0; 4077 *ring->wptr_cpu_addr = 0; 4078 amdgpu_ring_clear_ring(ring); 4079 } 4080 4081 return 0; 4082 } 4083 4084 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 4085 { 4086 int r, i; 4087 struct amdgpu_ring *ring; 4088 4089 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4090 ring = &adev->gfx.gfx_ring[i]; 4091 4092 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4093 if (unlikely(r != 0)) 4094 return r; 4095 4096 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 4097 if (!r) { 4098 r = gfx_v11_0_kgq_init_queue(ring, false); 4099 amdgpu_bo_kunmap(ring->mqd_obj); 4100 ring->mqd_ptr = NULL; 4101 } 4102 amdgpu_bo_unreserve(ring->mqd_obj); 4103 if (r) 4104 return r; 4105 } 4106 4107 r = amdgpu_gfx_enable_kgq(adev, 0); 4108 if (r) 4109 return r; 4110 4111 return gfx_v11_0_cp_gfx_start(adev); 4112 } 4113 4114 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 4115 struct amdgpu_mqd_prop *prop) 4116 { 4117 struct v11_compute_mqd *mqd = m; 4118 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4119 uint32_t tmp; 4120 4121 mqd->header = 0xC0310800; 4122 mqd->compute_pipelinestat_enable = 0x00000001; 4123 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4124 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4125 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4126 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4127 mqd->compute_misc_reserved = 0x00000007; 4128 4129 eop_base_addr = prop->eop_gpu_addr >> 8; 4130 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4131 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4132 4133 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4134 tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); 4135 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4136 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 4137 4138 mqd->cp_hqd_eop_control = tmp; 4139 4140 /* enable doorbell? */ 4141 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); 4142 4143 if (prop->use_doorbell) { 4144 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4145 DOORBELL_OFFSET, prop->doorbell_index); 4146 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4147 DOORBELL_EN, 1); 4148 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4149 DOORBELL_SOURCE, 0); 4150 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4151 DOORBELL_HIT, 0); 4152 } else { 4153 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4154 DOORBELL_EN, 0); 4155 } 4156 4157 mqd->cp_hqd_pq_doorbell_control = tmp; 4158 4159 /* disable the queue if it's active */ 4160 mqd->cp_hqd_dequeue_request = 0; 4161 mqd->cp_hqd_pq_rptr = 0; 4162 mqd->cp_hqd_pq_wptr_lo = 0; 4163 mqd->cp_hqd_pq_wptr_hi = 0; 4164 4165 /* set the pointer to the MQD */ 4166 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 4167 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4168 4169 /* set MQD vmid to 0 */ 4170 tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); 4171 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4172 mqd->cp_mqd_control = tmp; 4173 4174 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4175 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4176 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4177 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4178 4179 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4180 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); 4181 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4182 (order_base_2(prop->queue_size / 4) - 1)); 4183 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4184 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 4185 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 4186 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 4187 prop->allow_tunneling); 4188 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4189 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4190 mqd->cp_hqd_pq_control = tmp; 4191 4192 /* set the wb address whether it's enabled or not */ 4193 wb_gpu_addr = prop->rptr_gpu_addr; 4194 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4195 mqd->cp_hqd_pq_rptr_report_addr_hi = 4196 upper_32_bits(wb_gpu_addr) & 0xffff; 4197 4198 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4199 wb_gpu_addr = prop->wptr_gpu_addr; 4200 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4201 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4202 4203 tmp = 0; 4204 /* enable the doorbell if requested */ 4205 if (prop->use_doorbell) { 4206 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); 4207 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4208 DOORBELL_OFFSET, prop->doorbell_index); 4209 4210 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4211 DOORBELL_EN, 1); 4212 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4213 DOORBELL_SOURCE, 0); 4214 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4215 DOORBELL_HIT, 0); 4216 } 4217 4218 mqd->cp_hqd_pq_doorbell_control = tmp; 4219 4220 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4221 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); 4222 4223 /* set the vmid for the queue */ 4224 mqd->cp_hqd_vmid = 0; 4225 4226 tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); 4227 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 4228 mqd->cp_hqd_persistent_state = tmp; 4229 4230 /* set MIN_IB_AVAIL_SIZE */ 4231 tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); 4232 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4233 mqd->cp_hqd_ib_control = tmp; 4234 4235 /* set static priority for a compute queue/ring */ 4236 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 4237 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 4238 4239 mqd->cp_hqd_active = prop->hqd_active; 4240 4241 return 0; 4242 } 4243 4244 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 4245 { 4246 struct amdgpu_device *adev = ring->adev; 4247 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4248 int j; 4249 4250 /* inactivate the queue */ 4251 if (amdgpu_sriov_vf(adev)) 4252 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 4253 4254 /* disable wptr polling */ 4255 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4256 4257 /* write the EOP addr */ 4258 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 4259 mqd->cp_hqd_eop_base_addr_lo); 4260 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 4261 mqd->cp_hqd_eop_base_addr_hi); 4262 4263 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4264 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 4265 mqd->cp_hqd_eop_control); 4266 4267 /* enable doorbell? */ 4268 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4269 mqd->cp_hqd_pq_doorbell_control); 4270 4271 /* disable the queue if it's active */ 4272 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 4273 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 4274 for (j = 0; j < adev->usec_timeout; j++) { 4275 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 4276 break; 4277 udelay(1); 4278 } 4279 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 4280 mqd->cp_hqd_dequeue_request); 4281 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 4282 mqd->cp_hqd_pq_rptr); 4283 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4284 mqd->cp_hqd_pq_wptr_lo); 4285 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4286 mqd->cp_hqd_pq_wptr_hi); 4287 } 4288 4289 /* set the pointer to the MQD */ 4290 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 4291 mqd->cp_mqd_base_addr_lo); 4292 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 4293 mqd->cp_mqd_base_addr_hi); 4294 4295 /* set MQD vmid to 0 */ 4296 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 4297 mqd->cp_mqd_control); 4298 4299 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4300 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 4301 mqd->cp_hqd_pq_base_lo); 4302 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 4303 mqd->cp_hqd_pq_base_hi); 4304 4305 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4306 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 4307 mqd->cp_hqd_pq_control); 4308 4309 /* set the wb address whether it's enabled or not */ 4310 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 4311 mqd->cp_hqd_pq_rptr_report_addr_lo); 4312 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4313 mqd->cp_hqd_pq_rptr_report_addr_hi); 4314 4315 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4316 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 4317 mqd->cp_hqd_pq_wptr_poll_addr_lo); 4318 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4319 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4320 4321 /* enable the doorbell if requested */ 4322 if (ring->use_doorbell) { 4323 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4324 (adev->doorbell_index.kiq * 2) << 2); 4325 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4326 (adev->doorbell_index.userqueue_end * 2) << 2); 4327 } 4328 4329 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4330 mqd->cp_hqd_pq_doorbell_control); 4331 4332 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4333 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4334 mqd->cp_hqd_pq_wptr_lo); 4335 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4336 mqd->cp_hqd_pq_wptr_hi); 4337 4338 /* set the vmid for the queue */ 4339 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 4340 4341 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 4342 mqd->cp_hqd_persistent_state); 4343 4344 /* activate the queue */ 4345 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 4346 mqd->cp_hqd_active); 4347 4348 if (ring->use_doorbell) 4349 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4350 4351 return 0; 4352 } 4353 4354 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 4355 { 4356 struct amdgpu_device *adev = ring->adev; 4357 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4358 4359 gfx_v11_0_kiq_setting(ring); 4360 4361 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4362 /* reset MQD to a clean status */ 4363 if (adev->gfx.kiq[0].mqd_backup) 4364 memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); 4365 4366 /* reset ring buffer */ 4367 ring->wptr = 0; 4368 amdgpu_ring_clear_ring(ring); 4369 4370 mutex_lock(&adev->srbm_mutex); 4371 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4372 gfx_v11_0_kiq_init_register(ring); 4373 soc21_grbm_select(adev, 0, 0, 0, 0); 4374 mutex_unlock(&adev->srbm_mutex); 4375 } else { 4376 memset((void *)mqd, 0, sizeof(*mqd)); 4377 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4378 amdgpu_ring_clear_ring(ring); 4379 mutex_lock(&adev->srbm_mutex); 4380 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4381 amdgpu_ring_init_mqd(ring); 4382 gfx_v11_0_kiq_init_register(ring); 4383 soc21_grbm_select(adev, 0, 0, 0, 0); 4384 mutex_unlock(&adev->srbm_mutex); 4385 4386 if (adev->gfx.kiq[0].mqd_backup) 4387 memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); 4388 } 4389 4390 return 0; 4391 } 4392 4393 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) 4394 { 4395 struct amdgpu_device *adev = ring->adev; 4396 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4397 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4398 4399 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4400 memset((void *)mqd, 0, sizeof(*mqd)); 4401 mutex_lock(&adev->srbm_mutex); 4402 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4403 amdgpu_ring_init_mqd(ring); 4404 soc21_grbm_select(adev, 0, 0, 0, 0); 4405 mutex_unlock(&adev->srbm_mutex); 4406 4407 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4408 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4409 } else { 4410 /* restore MQD to a clean status */ 4411 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4412 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4413 /* reset ring buffer */ 4414 ring->wptr = 0; 4415 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4416 amdgpu_ring_clear_ring(ring); 4417 } 4418 4419 return 0; 4420 } 4421 4422 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4423 { 4424 struct amdgpu_ring *ring; 4425 int r; 4426 4427 ring = &adev->gfx.kiq[0].ring; 4428 4429 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4430 if (unlikely(r != 0)) 4431 return r; 4432 4433 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 4434 if (unlikely(r != 0)) { 4435 amdgpu_bo_unreserve(ring->mqd_obj); 4436 return r; 4437 } 4438 4439 gfx_v11_0_kiq_init_queue(ring); 4440 amdgpu_bo_kunmap(ring->mqd_obj); 4441 ring->mqd_ptr = NULL; 4442 amdgpu_bo_unreserve(ring->mqd_obj); 4443 ring->sched.ready = true; 4444 return 0; 4445 } 4446 4447 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4448 { 4449 struct amdgpu_ring *ring = NULL; 4450 int r = 0, i; 4451 4452 if (!amdgpu_async_gfx_ring) 4453 gfx_v11_0_cp_compute_enable(adev, true); 4454 4455 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4456 ring = &adev->gfx.compute_ring[i]; 4457 4458 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4459 if (unlikely(r != 0)) 4460 goto done; 4461 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 4462 if (!r) { 4463 r = gfx_v11_0_kcq_init_queue(ring, false); 4464 amdgpu_bo_kunmap(ring->mqd_obj); 4465 ring->mqd_ptr = NULL; 4466 } 4467 amdgpu_bo_unreserve(ring->mqd_obj); 4468 if (r) 4469 goto done; 4470 } 4471 4472 r = amdgpu_gfx_enable_kcq(adev, 0); 4473 done: 4474 return r; 4475 } 4476 4477 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4478 { 4479 int r, i; 4480 struct amdgpu_ring *ring; 4481 4482 if (!(adev->flags & AMD_IS_APU)) 4483 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4484 4485 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4486 /* legacy firmware loading */ 4487 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4488 if (r) 4489 return r; 4490 4491 if (adev->gfx.rs64_enable) 4492 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4493 else 4494 r = gfx_v11_0_cp_compute_load_microcode(adev); 4495 if (r) 4496 return r; 4497 } 4498 4499 gfx_v11_0_cp_set_doorbell_range(adev); 4500 4501 if (amdgpu_async_gfx_ring) { 4502 gfx_v11_0_cp_compute_enable(adev, true); 4503 gfx_v11_0_cp_gfx_enable(adev, true); 4504 } 4505 4506 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4507 r = amdgpu_mes_kiq_hw_init(adev); 4508 else 4509 r = gfx_v11_0_kiq_resume(adev); 4510 if (r) 4511 return r; 4512 4513 r = gfx_v11_0_kcq_resume(adev); 4514 if (r) 4515 return r; 4516 4517 if (!amdgpu_async_gfx_ring) { 4518 r = gfx_v11_0_cp_gfx_resume(adev); 4519 if (r) 4520 return r; 4521 } else { 4522 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4523 if (r) 4524 return r; 4525 } 4526 4527 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4528 ring = &adev->gfx.gfx_ring[i]; 4529 r = amdgpu_ring_test_helper(ring); 4530 if (r) 4531 return r; 4532 } 4533 4534 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4535 ring = &adev->gfx.compute_ring[i]; 4536 r = amdgpu_ring_test_helper(ring); 4537 if (r) 4538 return r; 4539 } 4540 4541 return 0; 4542 } 4543 4544 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4545 { 4546 gfx_v11_0_cp_gfx_enable(adev, enable); 4547 gfx_v11_0_cp_compute_enable(adev, enable); 4548 } 4549 4550 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4551 { 4552 int r; 4553 bool value; 4554 4555 r = adev->gfxhub.funcs->gart_enable(adev); 4556 if (r) 4557 return r; 4558 4559 adev->hdp.funcs->flush_hdp(adev, NULL); 4560 4561 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 4562 false : true; 4563 4564 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4565 /* TODO investigate why this and the hdp flush above is needed, 4566 * are we missing a flush somewhere else? */ 4567 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); 4568 4569 return 0; 4570 } 4571 4572 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4573 { 4574 u32 tmp; 4575 4576 /* select RS64 */ 4577 if (adev->gfx.rs64_enable) { 4578 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4579 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4580 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4581 4582 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4583 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4584 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4585 } 4586 4587 if (amdgpu_emu_mode == 1) 4588 msleep(100); 4589 } 4590 4591 static int get_gb_addr_config(struct amdgpu_device * adev) 4592 { 4593 u32 gb_addr_config; 4594 4595 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4596 if (gb_addr_config == 0) 4597 return -EINVAL; 4598 4599 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4600 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4601 4602 adev->gfx.config.gb_addr_config = gb_addr_config; 4603 4604 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4605 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4606 GB_ADDR_CONFIG, NUM_PIPES); 4607 4608 adev->gfx.config.max_tile_pipes = 4609 adev->gfx.config.gb_addr_config_fields.num_pipes; 4610 4611 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4612 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4613 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4614 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4615 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4616 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4617 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4618 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4619 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4620 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4621 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4622 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4623 4624 return 0; 4625 } 4626 4627 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4628 { 4629 uint32_t data; 4630 4631 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4632 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4633 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4634 4635 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4636 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4637 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4638 } 4639 4640 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) 4641 { 4642 int r; 4643 struct amdgpu_device *adev = ip_block->adev; 4644 4645 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4646 adev->gfx.cleaner_shader_ptr); 4647 4648 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4649 if (adev->gfx.imu.funcs) { 4650 /* RLC autoload sequence 1: Program rlc ram */ 4651 if (adev->gfx.imu.funcs->program_rlc_ram) 4652 adev->gfx.imu.funcs->program_rlc_ram(adev); 4653 /* rlc autoload firmware */ 4654 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4655 if (r) 4656 return r; 4657 } 4658 } else { 4659 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4660 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4661 if (adev->gfx.imu.funcs->load_microcode) 4662 adev->gfx.imu.funcs->load_microcode(adev); 4663 if (adev->gfx.imu.funcs->setup_imu) 4664 adev->gfx.imu.funcs->setup_imu(adev); 4665 if (adev->gfx.imu.funcs->start_imu) 4666 adev->gfx.imu.funcs->start_imu(adev); 4667 } 4668 4669 /* disable gpa mode in backdoor loading */ 4670 gfx_v11_0_disable_gpa_mode(adev); 4671 } 4672 } 4673 4674 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4675 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4676 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4677 if (r) { 4678 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4679 return r; 4680 } 4681 } 4682 4683 adev->gfx.is_poweron = true; 4684 4685 if(get_gb_addr_config(adev)) 4686 DRM_WARN("Invalid gb_addr_config !\n"); 4687 4688 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4689 adev->gfx.rs64_enable) 4690 gfx_v11_0_config_gfx_rs64(adev); 4691 4692 r = gfx_v11_0_gfxhub_enable(adev); 4693 if (r) 4694 return r; 4695 4696 if (!amdgpu_emu_mode) 4697 gfx_v11_0_init_golden_registers(adev); 4698 4699 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4700 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4701 /** 4702 * For gfx 11, rlc firmware loading relies on smu firmware is 4703 * loaded firstly, so in direct type, it has to load smc ucode 4704 * here before rlc. 4705 */ 4706 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4707 if (r) 4708 return r; 4709 } 4710 4711 gfx_v11_0_constants_init(adev); 4712 4713 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4714 gfx_v11_0_select_cp_fw_arch(adev); 4715 4716 if (adev->nbio.funcs->gc_doorbell_init) 4717 adev->nbio.funcs->gc_doorbell_init(adev); 4718 4719 r = gfx_v11_0_rlc_resume(adev); 4720 if (r) 4721 return r; 4722 4723 /* 4724 * init golden registers and rlc resume may override some registers, 4725 * reconfig them here 4726 */ 4727 gfx_v11_0_tcp_harvest(adev); 4728 4729 r = gfx_v11_0_cp_resume(adev); 4730 if (r) 4731 return r; 4732 4733 /* get IMU version from HW if it's not set */ 4734 if (!adev->gfx.imu_fw_version) 4735 adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); 4736 4737 return r; 4738 } 4739 4740 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) 4741 { 4742 struct amdgpu_device *adev = ip_block->adev; 4743 4744 cancel_delayed_work_sync(&adev->gfx.idle_work); 4745 4746 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4747 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4748 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4749 4750 if (!adev->no_hw_access) { 4751 if (amdgpu_async_gfx_ring) { 4752 if (amdgpu_gfx_disable_kgq(adev, 0)) 4753 DRM_ERROR("KGQ disable failed\n"); 4754 } 4755 4756 if (amdgpu_gfx_disable_kcq(adev, 0)) 4757 DRM_ERROR("KCQ disable failed\n"); 4758 4759 amdgpu_mes_kiq_hw_fini(adev); 4760 } 4761 4762 if (amdgpu_sriov_vf(adev)) 4763 /* Remove the steps disabling CPG and clearing KIQ position, 4764 * so that CP could perform IDLE-SAVE during switch. Those 4765 * steps are necessary to avoid a DMAR error in gfx9 but it is 4766 * not reproduced on gfx11. 4767 */ 4768 return 0; 4769 4770 gfx_v11_0_cp_enable(adev, false); 4771 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4772 4773 adev->gfxhub.funcs->gart_disable(adev); 4774 4775 adev->gfx.is_poweron = false; 4776 4777 return 0; 4778 } 4779 4780 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) 4781 { 4782 return gfx_v11_0_hw_fini(ip_block); 4783 } 4784 4785 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) 4786 { 4787 return gfx_v11_0_hw_init(ip_block); 4788 } 4789 4790 static bool gfx_v11_0_is_idle(void *handle) 4791 { 4792 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4793 4794 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 4795 GRBM_STATUS, GUI_ACTIVE)) 4796 return false; 4797 else 4798 return true; 4799 } 4800 4801 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 4802 { 4803 unsigned i; 4804 u32 tmp; 4805 struct amdgpu_device *adev = ip_block->adev; 4806 4807 for (i = 0; i < adev->usec_timeout; i++) { 4808 /* read MC_STATUS */ 4809 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 4810 GRBM_STATUS__GUI_ACTIVE_MASK; 4811 4812 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 4813 return 0; 4814 udelay(1); 4815 } 4816 return -ETIMEDOUT; 4817 } 4818 4819 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, 4820 bool req) 4821 { 4822 u32 i, tmp, val; 4823 4824 for (i = 0; i < adev->usec_timeout; i++) { 4825 /* Request with MeId=2, PipeId=0 */ 4826 tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); 4827 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); 4828 WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); 4829 4830 val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); 4831 if (req) { 4832 if (val == tmp) 4833 break; 4834 } else { 4835 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, 4836 REQUEST, 1); 4837 4838 /* unlocked or locked by firmware */ 4839 if (val != tmp) 4840 break; 4841 } 4842 udelay(1); 4843 } 4844 4845 if (i >= adev->usec_timeout) 4846 return -EINVAL; 4847 4848 return 0; 4849 } 4850 4851 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) 4852 { 4853 u32 grbm_soft_reset = 0; 4854 u32 tmp; 4855 int r, i, j, k; 4856 struct amdgpu_device *adev = ip_block->adev; 4857 4858 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4859 4860 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4861 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 4862 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 4863 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 4864 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 4865 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 4866 4867 mutex_lock(&adev->srbm_mutex); 4868 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 4869 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 4870 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 4871 soc21_grbm_select(adev, i, k, j, 0); 4872 4873 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 4874 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 4875 } 4876 } 4877 } 4878 for (i = 0; i < adev->gfx.me.num_me; ++i) { 4879 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 4880 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 4881 soc21_grbm_select(adev, i, k, j, 0); 4882 4883 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 4884 } 4885 } 4886 } 4887 soc21_grbm_select(adev, 0, 0, 0, 0); 4888 mutex_unlock(&adev->srbm_mutex); 4889 4890 /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ 4891 mutex_lock(&adev->gfx.reset_sem_mutex); 4892 r = gfx_v11_0_request_gfx_index_mutex(adev, true); 4893 if (r) { 4894 mutex_unlock(&adev->gfx.reset_sem_mutex); 4895 DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); 4896 return r; 4897 } 4898 4899 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 4900 4901 // Read CP_VMID_RESET register three times. 4902 // to get sufficient time for GFX_HQD_ACTIVE reach 0 4903 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4904 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4905 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 4906 4907 /* release the gfx mutex */ 4908 r = gfx_v11_0_request_gfx_index_mutex(adev, false); 4909 mutex_unlock(&adev->gfx.reset_sem_mutex); 4910 if (r) { 4911 DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); 4912 return r; 4913 } 4914 4915 for (i = 0; i < adev->usec_timeout; i++) { 4916 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 4917 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 4918 break; 4919 udelay(1); 4920 } 4921 if (i >= adev->usec_timeout) { 4922 printk("Failed to wait all pipes clean\n"); 4923 return -EINVAL; 4924 } 4925 4926 /********** trigger soft reset ***********/ 4927 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 4928 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4929 SOFT_RESET_CP, 1); 4930 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4931 SOFT_RESET_GFX, 1); 4932 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4933 SOFT_RESET_CPF, 1); 4934 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4935 SOFT_RESET_CPC, 1); 4936 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4937 SOFT_RESET_CPG, 1); 4938 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 4939 /********** exit soft reset ***********/ 4940 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 4941 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4942 SOFT_RESET_CP, 0); 4943 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4944 SOFT_RESET_GFX, 0); 4945 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4946 SOFT_RESET_CPF, 0); 4947 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4948 SOFT_RESET_CPC, 0); 4949 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4950 SOFT_RESET_CPG, 0); 4951 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 4952 4953 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 4954 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 4955 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 4956 4957 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 4958 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 4959 4960 for (i = 0; i < adev->usec_timeout; i++) { 4961 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 4962 break; 4963 udelay(1); 4964 } 4965 if (i >= adev->usec_timeout) { 4966 printk("Failed to wait CP_VMID_RESET to 0\n"); 4967 return -EINVAL; 4968 } 4969 4970 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4971 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 4972 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 4973 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 4974 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 4975 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 4976 4977 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4978 4979 return gfx_v11_0_cp_resume(adev); 4980 } 4981 4982 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) 4983 { 4984 int i, r; 4985 struct amdgpu_device *adev = ip_block->adev; 4986 struct amdgpu_ring *ring; 4987 long tmo = msecs_to_jiffies(1000); 4988 4989 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4990 ring = &adev->gfx.gfx_ring[i]; 4991 r = amdgpu_ring_test_ib(ring, tmo); 4992 if (r) 4993 return true; 4994 } 4995 4996 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4997 ring = &adev->gfx.compute_ring[i]; 4998 r = amdgpu_ring_test_ib(ring, tmo); 4999 if (r) 5000 return true; 5001 } 5002 5003 return false; 5004 } 5005 5006 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) 5007 { 5008 struct amdgpu_device *adev = ip_block->adev; 5009 /** 5010 * GFX soft reset will impact MES, need resume MES when do GFX soft reset 5011 */ 5012 return amdgpu_mes_resume(adev); 5013 } 5014 5015 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5016 { 5017 uint64_t clock; 5018 uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; 5019 5020 if (amdgpu_sriov_vf(adev)) { 5021 amdgpu_gfx_off_ctrl(adev, false); 5022 mutex_lock(&adev->gfx.gpu_clock_mutex); 5023 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5024 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5025 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5026 if (clock_counter_hi_pre != clock_counter_hi_after) 5027 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5028 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5029 amdgpu_gfx_off_ctrl(adev, true); 5030 } else { 5031 preempt_disable(); 5032 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5033 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5034 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5035 if (clock_counter_hi_pre != clock_counter_hi_after) 5036 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5037 preempt_enable(); 5038 } 5039 clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); 5040 5041 return clock; 5042 } 5043 5044 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5045 uint32_t vmid, 5046 uint32_t gds_base, uint32_t gds_size, 5047 uint32_t gws_base, uint32_t gws_size, 5048 uint32_t oa_base, uint32_t oa_size) 5049 { 5050 struct amdgpu_device *adev = ring->adev; 5051 5052 /* GDS Base */ 5053 gfx_v11_0_write_data_to_reg(ring, 0, false, 5054 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 5055 gds_base); 5056 5057 /* GDS Size */ 5058 gfx_v11_0_write_data_to_reg(ring, 0, false, 5059 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 5060 gds_size); 5061 5062 /* GWS */ 5063 gfx_v11_0_write_data_to_reg(ring, 0, false, 5064 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 5065 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5066 5067 /* OA */ 5068 gfx_v11_0_write_data_to_reg(ring, 0, false, 5069 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 5070 (1 << (oa_size + oa_base)) - (1 << oa_base)); 5071 } 5072 5073 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) 5074 { 5075 struct amdgpu_device *adev = ip_block->adev; 5076 5077 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 5078 5079 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 5080 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 5081 AMDGPU_MAX_COMPUTE_RINGS); 5082 5083 gfx_v11_0_set_kiq_pm4_funcs(adev); 5084 gfx_v11_0_set_ring_funcs(adev); 5085 gfx_v11_0_set_irq_funcs(adev); 5086 gfx_v11_0_set_gds_init(adev); 5087 gfx_v11_0_set_rlc_funcs(adev); 5088 gfx_v11_0_set_mqd_funcs(adev); 5089 gfx_v11_0_set_imu_funcs(adev); 5090 5091 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 5092 5093 return gfx_v11_0_init_microcode(adev); 5094 } 5095 5096 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) 5097 { 5098 struct amdgpu_device *adev = ip_block->adev; 5099 int r; 5100 5101 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5102 if (r) 5103 return r; 5104 5105 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5106 if (r) 5107 return r; 5108 5109 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 5110 if (r) 5111 return r; 5112 return 0; 5113 } 5114 5115 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 5116 { 5117 uint32_t rlc_cntl; 5118 5119 /* if RLC is not enabled, do nothing */ 5120 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 5121 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 5122 } 5123 5124 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 5125 { 5126 uint32_t data; 5127 unsigned i; 5128 5129 data = RLC_SAFE_MODE__CMD_MASK; 5130 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5131 5132 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 5133 5134 /* wait for RLC_SAFE_MODE */ 5135 for (i = 0; i < adev->usec_timeout; i++) { 5136 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 5137 RLC_SAFE_MODE, CMD)) 5138 break; 5139 udelay(1); 5140 } 5141 } 5142 5143 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 5144 { 5145 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 5146 } 5147 5148 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 5149 bool enable) 5150 { 5151 uint32_t def, data; 5152 5153 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 5154 return; 5155 5156 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5157 5158 if (enable) 5159 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5160 else 5161 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5162 5163 if (def != data) 5164 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5165 } 5166 5167 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 5168 bool enable) 5169 { 5170 uint32_t def, data; 5171 5172 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 5173 return; 5174 5175 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5176 5177 if (enable) 5178 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5179 else 5180 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5181 5182 if (def != data) 5183 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5184 } 5185 5186 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 5187 bool enable) 5188 { 5189 uint32_t def, data; 5190 5191 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 5192 return; 5193 5194 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5195 5196 if (enable) 5197 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5198 else 5199 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5200 5201 if (def != data) 5202 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5203 } 5204 5205 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5206 bool enable) 5207 { 5208 uint32_t data, def; 5209 5210 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 5211 return; 5212 5213 /* It is disabled by HW by default */ 5214 if (enable) { 5215 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5216 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 5217 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5218 5219 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5220 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5221 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5222 5223 if (def != data) 5224 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5225 } 5226 } else { 5227 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5228 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5229 5230 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5231 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5232 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5233 5234 if (def != data) 5235 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5236 } 5237 } 5238 } 5239 5240 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5241 bool enable) 5242 { 5243 uint32_t def, data; 5244 5245 if (!(adev->cg_flags & 5246 (AMD_CG_SUPPORT_GFX_CGCG | 5247 AMD_CG_SUPPORT_GFX_CGLS | 5248 AMD_CG_SUPPORT_GFX_3D_CGCG | 5249 AMD_CG_SUPPORT_GFX_3D_CGLS))) 5250 return; 5251 5252 if (enable) { 5253 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5254 5255 /* unset CGCG override */ 5256 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5257 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5258 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5259 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5260 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 5261 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5262 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5263 5264 /* update CGCG override bits */ 5265 if (def != data) 5266 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5267 5268 /* enable cgcg FSM(0x0000363F) */ 5269 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5270 5271 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5272 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 5273 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5274 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5275 } 5276 5277 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5278 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 5279 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5280 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5281 } 5282 5283 if (def != data) 5284 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5285 5286 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5287 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5288 5289 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5290 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 5291 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5292 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5293 } 5294 5295 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5296 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 5297 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5298 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5299 } 5300 5301 if (def != data) 5302 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5303 5304 /* set IDLE_POLL_COUNT(0x00900100) */ 5305 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 5306 5307 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 5308 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5309 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5310 5311 if (def != data) 5312 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 5313 5314 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5315 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5316 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5317 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5318 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5319 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 5320 5321 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5322 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5323 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5324 5325 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5326 if (adev->sdma.num_instances > 1) { 5327 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5328 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5329 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5330 } 5331 } else { 5332 /* Program RLC_CGCG_CGLS_CTRL */ 5333 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5334 5335 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5336 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5337 5338 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5339 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5340 5341 if (def != data) 5342 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5343 5344 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5345 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5346 5347 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5348 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5349 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5350 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5351 5352 if (def != data) 5353 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5354 5355 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5356 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5357 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5358 5359 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5360 if (adev->sdma.num_instances > 1) { 5361 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5362 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5363 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5364 } 5365 } 5366 } 5367 5368 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5369 bool enable) 5370 { 5371 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5372 5373 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 5374 5375 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 5376 5377 gfx_v11_0_update_repeater_fgcg(adev, enable); 5378 5379 gfx_v11_0_update_sram_fgcg(adev, enable); 5380 5381 gfx_v11_0_update_perf_clk(adev, enable); 5382 5383 if (adev->cg_flags & 5384 (AMD_CG_SUPPORT_GFX_MGCG | 5385 AMD_CG_SUPPORT_GFX_CGLS | 5386 AMD_CG_SUPPORT_GFX_CGCG | 5387 AMD_CG_SUPPORT_GFX_3D_CGCG | 5388 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5389 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5390 5391 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5392 5393 return 0; 5394 } 5395 5396 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) 5397 { 5398 u32 reg, pre_data, data; 5399 5400 amdgpu_gfx_off_ctrl(adev, false); 5401 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5402 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) 5403 pre_data = RREG32_NO_KIQ(reg); 5404 else 5405 pre_data = RREG32(reg); 5406 5407 data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); 5408 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5409 5410 if (pre_data != data) { 5411 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { 5412 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5413 } else 5414 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5415 } 5416 amdgpu_gfx_off_ctrl(adev, true); 5417 5418 if (ring 5419 && amdgpu_sriov_is_pp_one_vf(adev) 5420 && (pre_data != data) 5421 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) 5422 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { 5423 amdgpu_ring_emit_wreg(ring, reg, data); 5424 } 5425 } 5426 5427 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5428 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5429 .set_safe_mode = gfx_v11_0_set_safe_mode, 5430 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5431 .init = gfx_v11_0_rlc_init, 5432 .get_csb_size = gfx_v11_0_get_csb_size, 5433 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5434 .resume = gfx_v11_0_rlc_resume, 5435 .stop = gfx_v11_0_rlc_stop, 5436 .reset = gfx_v11_0_rlc_reset, 5437 .start = gfx_v11_0_rlc_start, 5438 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5439 }; 5440 5441 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5442 { 5443 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5444 5445 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5446 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5447 else 5448 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5449 5450 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5451 5452 // Program RLC_PG_DELAY3 for CGPG hysteresis 5453 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5454 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5455 case IP_VERSION(11, 0, 1): 5456 case IP_VERSION(11, 0, 4): 5457 case IP_VERSION(11, 5, 0): 5458 case IP_VERSION(11, 5, 1): 5459 case IP_VERSION(11, 5, 2): 5460 case IP_VERSION(11, 5, 3): 5461 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5462 break; 5463 default: 5464 break; 5465 } 5466 } 5467 } 5468 5469 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5470 { 5471 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5472 5473 gfx_v11_cntl_power_gating(adev, enable); 5474 5475 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5476 } 5477 5478 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5479 enum amd_powergating_state state) 5480 { 5481 struct amdgpu_device *adev = ip_block->adev; 5482 bool enable = (state == AMD_PG_STATE_GATE); 5483 5484 if (amdgpu_sriov_vf(adev)) 5485 return 0; 5486 5487 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5488 case IP_VERSION(11, 0, 0): 5489 case IP_VERSION(11, 0, 2): 5490 case IP_VERSION(11, 0, 3): 5491 amdgpu_gfx_off_ctrl(adev, enable); 5492 break; 5493 case IP_VERSION(11, 0, 1): 5494 case IP_VERSION(11, 0, 4): 5495 case IP_VERSION(11, 5, 0): 5496 case IP_VERSION(11, 5, 1): 5497 case IP_VERSION(11, 5, 2): 5498 case IP_VERSION(11, 5, 3): 5499 if (!enable) 5500 amdgpu_gfx_off_ctrl(adev, false); 5501 5502 gfx_v11_cntl_pg(adev, enable); 5503 5504 if (enable) 5505 amdgpu_gfx_off_ctrl(adev, true); 5506 5507 break; 5508 default: 5509 break; 5510 } 5511 5512 return 0; 5513 } 5514 5515 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5516 enum amd_clockgating_state state) 5517 { 5518 struct amdgpu_device *adev = ip_block->adev; 5519 5520 if (amdgpu_sriov_vf(adev)) 5521 return 0; 5522 5523 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5524 case IP_VERSION(11, 0, 0): 5525 case IP_VERSION(11, 0, 1): 5526 case IP_VERSION(11, 0, 2): 5527 case IP_VERSION(11, 0, 3): 5528 case IP_VERSION(11, 0, 4): 5529 case IP_VERSION(11, 5, 0): 5530 case IP_VERSION(11, 5, 1): 5531 case IP_VERSION(11, 5, 2): 5532 case IP_VERSION(11, 5, 3): 5533 gfx_v11_0_update_gfx_clock_gating(adev, 5534 state == AMD_CG_STATE_GATE); 5535 break; 5536 default: 5537 break; 5538 } 5539 5540 return 0; 5541 } 5542 5543 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags) 5544 { 5545 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5546 int data; 5547 5548 /* AMD_CG_SUPPORT_GFX_MGCG */ 5549 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5550 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5551 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5552 5553 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5554 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5555 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5556 5557 /* AMD_CG_SUPPORT_GFX_FGCG */ 5558 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5559 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5560 5561 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5562 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5563 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5564 5565 /* AMD_CG_SUPPORT_GFX_CGCG */ 5566 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5567 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5568 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5569 5570 /* AMD_CG_SUPPORT_GFX_CGLS */ 5571 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5572 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5573 5574 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5575 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5576 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5577 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5578 5579 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5580 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5581 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5582 } 5583 5584 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5585 { 5586 /* gfx11 is 32bit rptr*/ 5587 return *(uint32_t *)ring->rptr_cpu_addr; 5588 } 5589 5590 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5591 { 5592 struct amdgpu_device *adev = ring->adev; 5593 u64 wptr; 5594 5595 /* XXX check if swapping is necessary on BE */ 5596 if (ring->use_doorbell) { 5597 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5598 } else { 5599 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5600 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5601 } 5602 5603 return wptr; 5604 } 5605 5606 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5607 { 5608 struct amdgpu_device *adev = ring->adev; 5609 5610 if (ring->use_doorbell) { 5611 /* XXX check if swapping is necessary on BE */ 5612 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5613 ring->wptr); 5614 WDOORBELL64(ring->doorbell_index, ring->wptr); 5615 } else { 5616 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5617 lower_32_bits(ring->wptr)); 5618 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5619 upper_32_bits(ring->wptr)); 5620 } 5621 } 5622 5623 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5624 { 5625 /* gfx11 hardware is 32bit rptr */ 5626 return *(uint32_t *)ring->rptr_cpu_addr; 5627 } 5628 5629 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5630 { 5631 u64 wptr; 5632 5633 /* XXX check if swapping is necessary on BE */ 5634 if (ring->use_doorbell) 5635 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5636 else 5637 BUG(); 5638 return wptr; 5639 } 5640 5641 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5642 { 5643 struct amdgpu_device *adev = ring->adev; 5644 5645 /* XXX check if swapping is necessary on BE */ 5646 if (ring->use_doorbell) { 5647 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5648 ring->wptr); 5649 WDOORBELL64(ring->doorbell_index, ring->wptr); 5650 } else { 5651 BUG(); /* only DOORBELL method supported on gfx11 now */ 5652 } 5653 } 5654 5655 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5656 { 5657 struct amdgpu_device *adev = ring->adev; 5658 u32 ref_and_mask, reg_mem_engine; 5659 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5660 5661 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5662 switch (ring->me) { 5663 case 1: 5664 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5665 break; 5666 case 2: 5667 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5668 break; 5669 default: 5670 return; 5671 } 5672 reg_mem_engine = 0; 5673 } else { 5674 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; 5675 reg_mem_engine = 1; /* pfp */ 5676 } 5677 5678 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5679 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5680 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5681 ref_and_mask, ref_and_mask, 0x20); 5682 } 5683 5684 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5685 struct amdgpu_job *job, 5686 struct amdgpu_ib *ib, 5687 uint32_t flags) 5688 { 5689 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5690 u32 header, control = 0; 5691 5692 BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); 5693 5694 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5695 5696 control |= ib->length_dw | (vmid << 24); 5697 5698 if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5699 control |= INDIRECT_BUFFER_PRE_ENB(1); 5700 5701 if (flags & AMDGPU_IB_PREEMPTED) 5702 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5703 5704 if (vmid) 5705 gfx_v11_0_ring_emit_de_meta(ring, 5706 (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); 5707 } 5708 5709 if (ring->is_mes_queue) 5710 /* inherit vmid from mqd */ 5711 control |= 0x400000; 5712 5713 amdgpu_ring_write(ring, header); 5714 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5715 amdgpu_ring_write(ring, 5716 #ifdef __BIG_ENDIAN 5717 (2 << 0) | 5718 #endif 5719 lower_32_bits(ib->gpu_addr)); 5720 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5721 amdgpu_ring_write(ring, control); 5722 } 5723 5724 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5725 struct amdgpu_job *job, 5726 struct amdgpu_ib *ib, 5727 uint32_t flags) 5728 { 5729 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5730 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5731 5732 if (ring->is_mes_queue) 5733 /* inherit vmid from mqd */ 5734 control |= 0x40000000; 5735 5736 /* Currently, there is a high possibility to get wave ID mismatch 5737 * between ME and GDS, leading to a hw deadlock, because ME generates 5738 * different wave IDs than the GDS expects. This situation happens 5739 * randomly when at least 5 compute pipes use GDS ordered append. 5740 * The wave IDs generated by ME are also wrong after suspend/resume. 5741 * Those are probably bugs somewhere else in the kernel driver. 5742 * 5743 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5744 * GDS to 0 for this ring (me/pipe). 5745 */ 5746 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5747 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5748 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 5749 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5750 } 5751 5752 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5753 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5754 amdgpu_ring_write(ring, 5755 #ifdef __BIG_ENDIAN 5756 (2 << 0) | 5757 #endif 5758 lower_32_bits(ib->gpu_addr)); 5759 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5760 amdgpu_ring_write(ring, control); 5761 } 5762 5763 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5764 u64 seq, unsigned flags) 5765 { 5766 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5767 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5768 5769 /* RELEASE_MEM - flush caches, send int */ 5770 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5771 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 5772 PACKET3_RELEASE_MEM_GCR_GL2_WB | 5773 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ 5774 PACKET3_RELEASE_MEM_GCR_GLM_WB | 5775 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 5776 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5777 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 5778 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 5779 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 5780 5781 /* 5782 * the address should be Qword aligned if 64bit write, Dword 5783 * aligned if only send 32bit data low (discard data high) 5784 */ 5785 if (write64bit) 5786 BUG_ON(addr & 0x7); 5787 else 5788 BUG_ON(addr & 0x3); 5789 amdgpu_ring_write(ring, lower_32_bits(addr)); 5790 amdgpu_ring_write(ring, upper_32_bits(addr)); 5791 amdgpu_ring_write(ring, lower_32_bits(seq)); 5792 amdgpu_ring_write(ring, upper_32_bits(seq)); 5793 amdgpu_ring_write(ring, ring->is_mes_queue ? 5794 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0); 5795 } 5796 5797 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5798 { 5799 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5800 uint32_t seq = ring->fence_drv.sync_seq; 5801 uint64_t addr = ring->fence_drv.gpu_addr; 5802 5803 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 5804 upper_32_bits(addr), seq, 0xffffffff, 4); 5805 } 5806 5807 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 5808 uint16_t pasid, uint32_t flush_type, 5809 bool all_hub, uint8_t dst_sel) 5810 { 5811 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 5812 amdgpu_ring_write(ring, 5813 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 5814 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 5815 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 5816 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 5817 } 5818 5819 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5820 unsigned vmid, uint64_t pd_addr) 5821 { 5822 if (ring->is_mes_queue) 5823 gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); 5824 else 5825 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5826 5827 /* compute doesn't have PFP */ 5828 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5829 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5830 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5831 amdgpu_ring_write(ring, 0x0); 5832 } 5833 5834 /* Make sure that we can't skip the SET_Q_MODE packets when the VM 5835 * changed in any way. 5836 */ 5837 ring->set_q_mode_offs = 0; 5838 ring->set_q_mode_ptr = NULL; 5839 } 5840 5841 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5842 u64 seq, unsigned int flags) 5843 { 5844 struct amdgpu_device *adev = ring->adev; 5845 5846 /* we only allocate 32bit for each seq wb address */ 5847 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5848 5849 /* write fence seq to the "addr" */ 5850 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5851 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5852 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5853 amdgpu_ring_write(ring, lower_32_bits(addr)); 5854 amdgpu_ring_write(ring, upper_32_bits(addr)); 5855 amdgpu_ring_write(ring, lower_32_bits(seq)); 5856 5857 if (flags & AMDGPU_FENCE_FLAG_INT) { 5858 /* set register to trigger INT */ 5859 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5860 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5861 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5862 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 5863 amdgpu_ring_write(ring, 0); 5864 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5865 } 5866 } 5867 5868 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 5869 uint32_t flags) 5870 { 5871 uint32_t dw2 = 0; 5872 5873 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 5874 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 5875 /* set load_global_config & load_global_uconfig */ 5876 dw2 |= 0x8001; 5877 /* set load_cs_sh_regs */ 5878 dw2 |= 0x01000000; 5879 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 5880 dw2 |= 0x10002; 5881 } 5882 5883 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 5884 amdgpu_ring_write(ring, dw2); 5885 amdgpu_ring_write(ring, 0); 5886 } 5887 5888 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 5889 uint64_t addr) 5890 { 5891 unsigned ret; 5892 5893 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 5894 amdgpu_ring_write(ring, lower_32_bits(addr)); 5895 amdgpu_ring_write(ring, upper_32_bits(addr)); 5896 /* discard following DWs if *cond_exec_gpu_addr==0 */ 5897 amdgpu_ring_write(ring, 0); 5898 ret = ring->wptr & ring->buf_mask; 5899 /* patch dummy value later */ 5900 amdgpu_ring_write(ring, 0); 5901 5902 return ret; 5903 } 5904 5905 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, 5906 u64 shadow_va, u64 csa_va, 5907 u64 gds_va, bool init_shadow, 5908 int vmid) 5909 { 5910 struct amdgpu_device *adev = ring->adev; 5911 unsigned int offs, end; 5912 5913 if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) 5914 return; 5915 5916 /* 5917 * The logic here isn't easy to understand because we need to keep state 5918 * accross multiple executions of the function as well as between the 5919 * CPU and GPU. The general idea is that the newly written GPU command 5920 * has a condition on the previous one and only executed if really 5921 * necessary. 5922 */ 5923 5924 /* 5925 * The dw in the NOP controls if the next SET_Q_MODE packet should be 5926 * executed or not. Reserve 64bits just to be on the save side. 5927 */ 5928 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); 5929 offs = ring->wptr & ring->buf_mask; 5930 5931 /* 5932 * We start with skipping the prefix SET_Q_MODE and always executing 5933 * the postfix SET_Q_MODE packet. This is changed below with a 5934 * WRITE_DATA command when the postfix executed. 5935 */ 5936 amdgpu_ring_write(ring, shadow_va ? 1 : 0); 5937 amdgpu_ring_write(ring, 0); 5938 5939 if (ring->set_q_mode_offs) { 5940 uint64_t addr; 5941 5942 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 5943 addr += ring->set_q_mode_offs << 2; 5944 end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); 5945 } 5946 5947 /* 5948 * When the postfix SET_Q_MODE packet executes we need to make sure that the 5949 * next prefix SET_Q_MODE packet executes as well. 5950 */ 5951 if (!shadow_va) { 5952 uint64_t addr; 5953 5954 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 5955 addr += offs << 2; 5956 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5957 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 5958 amdgpu_ring_write(ring, lower_32_bits(addr)); 5959 amdgpu_ring_write(ring, upper_32_bits(addr)); 5960 amdgpu_ring_write(ring, 0x1); 5961 } 5962 5963 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); 5964 amdgpu_ring_write(ring, lower_32_bits(shadow_va)); 5965 amdgpu_ring_write(ring, upper_32_bits(shadow_va)); 5966 amdgpu_ring_write(ring, lower_32_bits(gds_va)); 5967 amdgpu_ring_write(ring, upper_32_bits(gds_va)); 5968 amdgpu_ring_write(ring, lower_32_bits(csa_va)); 5969 amdgpu_ring_write(ring, upper_32_bits(csa_va)); 5970 amdgpu_ring_write(ring, shadow_va ? 5971 PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); 5972 amdgpu_ring_write(ring, init_shadow ? 5973 PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); 5974 5975 if (ring->set_q_mode_offs) 5976 amdgpu_ring_patch_cond_exec(ring, end); 5977 5978 if (shadow_va) { 5979 uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; 5980 5981 /* 5982 * If the tokens match try to skip the last postfix SET_Q_MODE 5983 * packet to avoid saving/restoring the state all the time. 5984 */ 5985 if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) 5986 *ring->set_q_mode_ptr = 0; 5987 5988 ring->set_q_mode_token = token; 5989 } else { 5990 ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; 5991 } 5992 5993 ring->set_q_mode_offs = offs; 5994 } 5995 5996 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) 5997 { 5998 int i, r = 0; 5999 struct amdgpu_device *adev = ring->adev; 6000 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 6001 struct amdgpu_ring *kiq_ring = &kiq->ring; 6002 unsigned long flags; 6003 6004 if (adev->enable_mes) 6005 return -EINVAL; 6006 6007 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 6008 return -EINVAL; 6009 6010 spin_lock_irqsave(&kiq->ring_lock, flags); 6011 6012 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 6013 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6014 return -ENOMEM; 6015 } 6016 6017 /* assert preemption condition */ 6018 amdgpu_ring_set_preempt_cond_exec(ring, false); 6019 6020 /* assert IB preemption, emit the trailing fence */ 6021 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 6022 ring->trail_fence_gpu_addr, 6023 ++ring->trail_seq); 6024 amdgpu_ring_commit(kiq_ring); 6025 6026 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6027 6028 /* poll the trailing fence */ 6029 for (i = 0; i < adev->usec_timeout; i++) { 6030 if (ring->trail_seq == 6031 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 6032 break; 6033 udelay(1); 6034 } 6035 6036 if (i >= adev->usec_timeout) { 6037 r = -EINVAL; 6038 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 6039 } 6040 6041 /* deassert preemption condition */ 6042 amdgpu_ring_set_preempt_cond_exec(ring, true); 6043 return r; 6044 } 6045 6046 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 6047 { 6048 struct amdgpu_device *adev = ring->adev; 6049 struct v10_de_ib_state de_payload = {0}; 6050 uint64_t offset, gds_addr, de_payload_gpu_addr; 6051 void *de_payload_cpu_addr; 6052 int cnt; 6053 6054 if (ring->is_mes_queue) { 6055 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 6056 gfx[0].gfx_meta_data) + 6057 offsetof(struct v10_gfx_meta_data, de_payload); 6058 de_payload_gpu_addr = 6059 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 6060 de_payload_cpu_addr = 6061 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); 6062 6063 offset = offsetof(struct amdgpu_mes_ctx_meta_data, 6064 gfx[0].gds_backup) + 6065 offsetof(struct v10_gfx_meta_data, de_payload); 6066 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); 6067 } else { 6068 offset = offsetof(struct v10_gfx_meta_data, de_payload); 6069 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 6070 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 6071 6072 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 6073 AMDGPU_CSA_SIZE - adev->gds.gds_size, 6074 PAGE_SIZE); 6075 } 6076 6077 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 6078 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 6079 6080 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 6081 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 6082 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 6083 WRITE_DATA_DST_SEL(8) | 6084 WR_CONFIRM) | 6085 WRITE_DATA_CACHE_POLICY(0)); 6086 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 6087 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 6088 6089 if (resume) 6090 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 6091 sizeof(de_payload) >> 2); 6092 else 6093 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 6094 sizeof(de_payload) >> 2); 6095 } 6096 6097 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 6098 bool secure) 6099 { 6100 uint32_t v = secure ? FRAME_TMZ : 0; 6101 6102 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 6103 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 6104 } 6105 6106 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 6107 uint32_t reg_val_offs) 6108 { 6109 struct amdgpu_device *adev = ring->adev; 6110 6111 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6112 amdgpu_ring_write(ring, 0 | /* src: register*/ 6113 (5 << 8) | /* dst: memory */ 6114 (1 << 20)); /* write confirm */ 6115 amdgpu_ring_write(ring, reg); 6116 amdgpu_ring_write(ring, 0); 6117 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6118 reg_val_offs * 4)); 6119 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6120 reg_val_offs * 4)); 6121 } 6122 6123 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6124 uint32_t val) 6125 { 6126 uint32_t cmd = 0; 6127 6128 switch (ring->funcs->type) { 6129 case AMDGPU_RING_TYPE_GFX: 6130 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6131 break; 6132 case AMDGPU_RING_TYPE_KIQ: 6133 cmd = (1 << 16); /* no inc addr */ 6134 break; 6135 default: 6136 cmd = WR_CONFIRM; 6137 break; 6138 } 6139 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6140 amdgpu_ring_write(ring, cmd); 6141 amdgpu_ring_write(ring, reg); 6142 amdgpu_ring_write(ring, 0); 6143 amdgpu_ring_write(ring, val); 6144 } 6145 6146 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 6147 uint32_t val, uint32_t mask) 6148 { 6149 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 6150 } 6151 6152 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 6153 uint32_t reg0, uint32_t reg1, 6154 uint32_t ref, uint32_t mask) 6155 { 6156 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6157 6158 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 6159 ref, mask, 0x20); 6160 } 6161 6162 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, 6163 unsigned vmid) 6164 { 6165 struct amdgpu_device *adev = ring->adev; 6166 uint32_t value = 0; 6167 6168 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 6169 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 6170 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 6171 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 6172 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 6173 WREG32_SOC15(GC, 0, regSQ_CMD, value); 6174 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 6175 } 6176 6177 static void 6178 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6179 uint32_t me, uint32_t pipe, 6180 enum amdgpu_interrupt_state state) 6181 { 6182 uint32_t cp_int_cntl, cp_int_cntl_reg; 6183 6184 if (!me) { 6185 switch (pipe) { 6186 case 0: 6187 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 6188 break; 6189 case 1: 6190 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 6191 break; 6192 default: 6193 DRM_DEBUG("invalid pipe %d\n", pipe); 6194 return; 6195 } 6196 } else { 6197 DRM_DEBUG("invalid me %d\n", me); 6198 return; 6199 } 6200 6201 switch (state) { 6202 case AMDGPU_IRQ_STATE_DISABLE: 6203 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6204 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6205 TIME_STAMP_INT_ENABLE, 0); 6206 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6207 GENERIC0_INT_ENABLE, 0); 6208 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6209 break; 6210 case AMDGPU_IRQ_STATE_ENABLE: 6211 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6212 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6213 TIME_STAMP_INT_ENABLE, 1); 6214 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6215 GENERIC0_INT_ENABLE, 1); 6216 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6217 break; 6218 default: 6219 break; 6220 } 6221 } 6222 6223 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6224 int me, int pipe, 6225 enum amdgpu_interrupt_state state) 6226 { 6227 u32 mec_int_cntl, mec_int_cntl_reg; 6228 6229 /* 6230 * amdgpu controls only the first MEC. That's why this function only 6231 * handles the setting of interrupts for this specific MEC. All other 6232 * pipes' interrupts are set by amdkfd. 6233 */ 6234 6235 if (me == 1) { 6236 switch (pipe) { 6237 case 0: 6238 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6239 break; 6240 case 1: 6241 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 6242 break; 6243 case 2: 6244 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 6245 break; 6246 case 3: 6247 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 6248 break; 6249 default: 6250 DRM_DEBUG("invalid pipe %d\n", pipe); 6251 return; 6252 } 6253 } else { 6254 DRM_DEBUG("invalid me %d\n", me); 6255 return; 6256 } 6257 6258 switch (state) { 6259 case AMDGPU_IRQ_STATE_DISABLE: 6260 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6261 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6262 TIME_STAMP_INT_ENABLE, 0); 6263 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6264 GENERIC0_INT_ENABLE, 0); 6265 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6266 break; 6267 case AMDGPU_IRQ_STATE_ENABLE: 6268 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6269 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6270 TIME_STAMP_INT_ENABLE, 1); 6271 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6272 GENERIC0_INT_ENABLE, 1); 6273 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6274 break; 6275 default: 6276 break; 6277 } 6278 } 6279 6280 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6281 struct amdgpu_irq_src *src, 6282 unsigned type, 6283 enum amdgpu_interrupt_state state) 6284 { 6285 switch (type) { 6286 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6287 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 6288 break; 6289 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 6290 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 6291 break; 6292 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6293 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6294 break; 6295 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6296 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6297 break; 6298 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6299 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6300 break; 6301 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6302 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6303 break; 6304 default: 6305 break; 6306 } 6307 return 0; 6308 } 6309 6310 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 6311 struct amdgpu_irq_src *source, 6312 struct amdgpu_iv_entry *entry) 6313 { 6314 int i; 6315 u8 me_id, pipe_id, queue_id; 6316 struct amdgpu_ring *ring; 6317 uint32_t mes_queue_id = entry->src_data[0]; 6318 6319 DRM_DEBUG("IH: CP EOP\n"); 6320 6321 if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { 6322 struct amdgpu_mes_queue *queue; 6323 6324 mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; 6325 6326 spin_lock(&adev->mes.queue_id_lock); 6327 queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); 6328 if (queue) { 6329 DRM_DEBUG("process mes queue id = %d\n", mes_queue_id); 6330 amdgpu_fence_process(queue->ring); 6331 } 6332 spin_unlock(&adev->mes.queue_id_lock); 6333 } else { 6334 me_id = (entry->ring_id & 0x0c) >> 2; 6335 pipe_id = (entry->ring_id & 0x03) >> 0; 6336 queue_id = (entry->ring_id & 0x70) >> 4; 6337 6338 switch (me_id) { 6339 case 0: 6340 if (pipe_id == 0) 6341 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6342 else 6343 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 6344 break; 6345 case 1: 6346 case 2: 6347 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6348 ring = &adev->gfx.compute_ring[i]; 6349 /* Per-queue interrupt is supported for MEC starting from VI. 6350 * The interrupt can only be enabled/disabled per pipe instead 6351 * of per queue. 6352 */ 6353 if ((ring->me == me_id) && 6354 (ring->pipe == pipe_id) && 6355 (ring->queue == queue_id)) 6356 amdgpu_fence_process(ring); 6357 } 6358 break; 6359 } 6360 } 6361 6362 return 0; 6363 } 6364 6365 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6366 struct amdgpu_irq_src *source, 6367 unsigned int type, 6368 enum amdgpu_interrupt_state state) 6369 { 6370 u32 cp_int_cntl_reg, cp_int_cntl; 6371 int i, j; 6372 6373 switch (state) { 6374 case AMDGPU_IRQ_STATE_DISABLE: 6375 case AMDGPU_IRQ_STATE_ENABLE: 6376 for (i = 0; i < adev->gfx.me.num_me; i++) { 6377 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6378 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6379 6380 if (cp_int_cntl_reg) { 6381 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6382 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6383 PRIV_REG_INT_ENABLE, 6384 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6385 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6386 } 6387 } 6388 } 6389 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6390 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6391 /* MECs start at 1 */ 6392 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6393 6394 if (cp_int_cntl_reg) { 6395 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6396 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6397 PRIV_REG_INT_ENABLE, 6398 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6399 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6400 } 6401 } 6402 } 6403 break; 6404 default: 6405 break; 6406 } 6407 6408 return 0; 6409 } 6410 6411 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6412 struct amdgpu_irq_src *source, 6413 unsigned type, 6414 enum amdgpu_interrupt_state state) 6415 { 6416 u32 cp_int_cntl_reg, cp_int_cntl; 6417 int i, j; 6418 6419 switch (state) { 6420 case AMDGPU_IRQ_STATE_DISABLE: 6421 case AMDGPU_IRQ_STATE_ENABLE: 6422 for (i = 0; i < adev->gfx.me.num_me; i++) { 6423 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6424 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6425 6426 if (cp_int_cntl_reg) { 6427 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6428 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6429 OPCODE_ERROR_INT_ENABLE, 6430 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6431 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6432 } 6433 } 6434 } 6435 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6436 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6437 /* MECs start at 1 */ 6438 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6439 6440 if (cp_int_cntl_reg) { 6441 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6442 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6443 OPCODE_ERROR_INT_ENABLE, 6444 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6445 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6446 } 6447 } 6448 } 6449 break; 6450 default: 6451 break; 6452 } 6453 return 0; 6454 } 6455 6456 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6457 struct amdgpu_irq_src *source, 6458 unsigned int type, 6459 enum amdgpu_interrupt_state state) 6460 { 6461 u32 cp_int_cntl_reg, cp_int_cntl; 6462 int i, j; 6463 6464 switch (state) { 6465 case AMDGPU_IRQ_STATE_DISABLE: 6466 case AMDGPU_IRQ_STATE_ENABLE: 6467 for (i = 0; i < adev->gfx.me.num_me; i++) { 6468 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6469 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6470 6471 if (cp_int_cntl_reg) { 6472 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6473 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6474 PRIV_INSTR_INT_ENABLE, 6475 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6476 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6477 } 6478 } 6479 } 6480 break; 6481 default: 6482 break; 6483 } 6484 6485 return 0; 6486 } 6487 6488 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 6489 struct amdgpu_iv_entry *entry) 6490 { 6491 u8 me_id, pipe_id, queue_id; 6492 struct amdgpu_ring *ring; 6493 int i; 6494 6495 me_id = (entry->ring_id & 0x0c) >> 2; 6496 pipe_id = (entry->ring_id & 0x03) >> 0; 6497 queue_id = (entry->ring_id & 0x70) >> 4; 6498 6499 switch (me_id) { 6500 case 0: 6501 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 6502 ring = &adev->gfx.gfx_ring[i]; 6503 if (ring->me == me_id && ring->pipe == pipe_id && 6504 ring->queue == queue_id) 6505 drm_sched_fault(&ring->sched); 6506 } 6507 break; 6508 case 1: 6509 case 2: 6510 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6511 ring = &adev->gfx.compute_ring[i]; 6512 if (ring->me == me_id && ring->pipe == pipe_id && 6513 ring->queue == queue_id) 6514 drm_sched_fault(&ring->sched); 6515 } 6516 break; 6517 default: 6518 BUG(); 6519 break; 6520 } 6521 } 6522 6523 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 6524 struct amdgpu_irq_src *source, 6525 struct amdgpu_iv_entry *entry) 6526 { 6527 DRM_ERROR("Illegal register access in command stream\n"); 6528 gfx_v11_0_handle_priv_fault(adev, entry); 6529 return 0; 6530 } 6531 6532 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, 6533 struct amdgpu_irq_src *source, 6534 struct amdgpu_iv_entry *entry) 6535 { 6536 DRM_ERROR("Illegal opcode in command stream \n"); 6537 gfx_v11_0_handle_priv_fault(adev, entry); 6538 return 0; 6539 } 6540 6541 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6542 struct amdgpu_irq_src *source, 6543 struct amdgpu_iv_entry *entry) 6544 { 6545 DRM_ERROR("Illegal instruction in command stream\n"); 6546 gfx_v11_0_handle_priv_fault(adev, entry); 6547 return 0; 6548 } 6549 6550 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, 6551 struct amdgpu_irq_src *source, 6552 struct amdgpu_iv_entry *entry) 6553 { 6554 if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) 6555 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); 6556 6557 return 0; 6558 } 6559 6560 #if 0 6561 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6562 struct amdgpu_irq_src *src, 6563 unsigned int type, 6564 enum amdgpu_interrupt_state state) 6565 { 6566 uint32_t tmp, target; 6567 struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); 6568 6569 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6570 target += ring->pipe; 6571 6572 switch (type) { 6573 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6574 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6575 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6576 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6577 GENERIC2_INT_ENABLE, 0); 6578 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6579 6580 tmp = RREG32_SOC15_IP(GC, target); 6581 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6582 GENERIC2_INT_ENABLE, 0); 6583 WREG32_SOC15_IP(GC, target, tmp); 6584 } else { 6585 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6586 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6587 GENERIC2_INT_ENABLE, 1); 6588 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6589 6590 tmp = RREG32_SOC15_IP(GC, target); 6591 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6592 GENERIC2_INT_ENABLE, 1); 6593 WREG32_SOC15_IP(GC, target, tmp); 6594 } 6595 break; 6596 default: 6597 BUG(); /* kiq only support GENERIC2_INT now */ 6598 break; 6599 } 6600 return 0; 6601 } 6602 #endif 6603 6604 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6605 { 6606 const unsigned int gcr_cntl = 6607 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6608 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6609 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6610 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6611 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6612 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6613 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6614 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6615 6616 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6617 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6618 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6619 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6620 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6621 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6622 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6623 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6624 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6625 } 6626 6627 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) 6628 { 6629 struct amdgpu_device *adev = ring->adev; 6630 int r; 6631 6632 if (amdgpu_sriov_vf(adev)) 6633 return -EINVAL; 6634 6635 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); 6636 if (r) 6637 return r; 6638 6639 r = amdgpu_bo_reserve(ring->mqd_obj, false); 6640 if (unlikely(r != 0)) { 6641 dev_err(adev->dev, "fail to resv mqd_obj\n"); 6642 return r; 6643 } 6644 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 6645 if (!r) { 6646 r = gfx_v11_0_kgq_init_queue(ring, true); 6647 amdgpu_bo_kunmap(ring->mqd_obj); 6648 ring->mqd_ptr = NULL; 6649 } 6650 amdgpu_bo_unreserve(ring->mqd_obj); 6651 if (r) { 6652 dev_err(adev->dev, "fail to unresv mqd_obj\n"); 6653 return r; 6654 } 6655 6656 r = amdgpu_mes_map_legacy_queue(adev, ring); 6657 if (r) { 6658 dev_err(adev->dev, "failed to remap kgq\n"); 6659 return r; 6660 } 6661 6662 return amdgpu_ring_test_ring(ring); 6663 } 6664 6665 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) 6666 { 6667 struct amdgpu_device *adev = ring->adev; 6668 int r = 0; 6669 6670 if (amdgpu_sriov_vf(adev)) 6671 return -EINVAL; 6672 6673 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); 6674 if (r) { 6675 dev_err(adev->dev, "reset via MMIO failed %d\n", r); 6676 return r; 6677 } 6678 6679 r = amdgpu_bo_reserve(ring->mqd_obj, false); 6680 if (unlikely(r != 0)) { 6681 dev_err(adev->dev, "fail to resv mqd_obj\n"); 6682 return r; 6683 } 6684 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 6685 if (!r) { 6686 r = gfx_v11_0_kcq_init_queue(ring, true); 6687 amdgpu_bo_kunmap(ring->mqd_obj); 6688 ring->mqd_ptr = NULL; 6689 } 6690 amdgpu_bo_unreserve(ring->mqd_obj); 6691 if (r) { 6692 dev_err(adev->dev, "fail to unresv mqd_obj\n"); 6693 return r; 6694 } 6695 r = amdgpu_mes_map_legacy_queue(adev, ring); 6696 if (r) { 6697 dev_err(adev->dev, "failed to remap kcq\n"); 6698 return r; 6699 } 6700 6701 return amdgpu_ring_test_ring(ring); 6702 } 6703 6704 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 6705 { 6706 struct amdgpu_device *adev = ip_block->adev; 6707 uint32_t i, j, k, reg, index = 0; 6708 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 6709 6710 if (!adev->gfx.ip_dump_core) 6711 return; 6712 6713 for (i = 0; i < reg_count; i++) 6714 drm_printf(p, "%-50s \t 0x%08x\n", 6715 gc_reg_list_11_0[i].reg_name, 6716 adev->gfx.ip_dump_core[i]); 6717 6718 /* print compute queue registers for all instances */ 6719 if (!adev->gfx.ip_dump_compute_queues) 6720 return; 6721 6722 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 6723 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 6724 adev->gfx.mec.num_mec, 6725 adev->gfx.mec.num_pipe_per_mec, 6726 adev->gfx.mec.num_queue_per_pipe); 6727 6728 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6729 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6730 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 6731 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 6732 for (reg = 0; reg < reg_count; reg++) { 6733 drm_printf(p, "%-50s \t 0x%08x\n", 6734 gc_cp_reg_list_11[reg].reg_name, 6735 adev->gfx.ip_dump_compute_queues[index + reg]); 6736 } 6737 index += reg_count; 6738 } 6739 } 6740 } 6741 6742 /* print gfx queue registers for all instances */ 6743 if (!adev->gfx.ip_dump_gfx_queues) 6744 return; 6745 6746 index = 0; 6747 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 6748 drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", 6749 adev->gfx.me.num_me, 6750 adev->gfx.me.num_pipe_per_me, 6751 adev->gfx.me.num_queue_per_pipe); 6752 6753 for (i = 0; i < adev->gfx.me.num_me; i++) { 6754 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6755 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 6756 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); 6757 for (reg = 0; reg < reg_count; reg++) { 6758 drm_printf(p, "%-50s \t 0x%08x\n", 6759 gc_gfx_queue_reg_list_11[reg].reg_name, 6760 adev->gfx.ip_dump_gfx_queues[index + reg]); 6761 } 6762 index += reg_count; 6763 } 6764 } 6765 } 6766 } 6767 6768 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) 6769 { 6770 struct amdgpu_device *adev = ip_block->adev; 6771 uint32_t i, j, k, reg, index = 0; 6772 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 6773 6774 if (!adev->gfx.ip_dump_core) 6775 return; 6776 6777 amdgpu_gfx_off_ctrl(adev, false); 6778 for (i = 0; i < reg_count; i++) 6779 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); 6780 amdgpu_gfx_off_ctrl(adev, true); 6781 6782 /* dump compute queue registers for all instances */ 6783 if (!adev->gfx.ip_dump_compute_queues) 6784 return; 6785 6786 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 6787 amdgpu_gfx_off_ctrl(adev, false); 6788 mutex_lock(&adev->srbm_mutex); 6789 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6790 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6791 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 6792 /* ME0 is for GFX so start from 1 for CP */ 6793 soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); 6794 for (reg = 0; reg < reg_count; reg++) { 6795 adev->gfx.ip_dump_compute_queues[index + reg] = 6796 RREG32(SOC15_REG_ENTRY_OFFSET( 6797 gc_cp_reg_list_11[reg])); 6798 } 6799 index += reg_count; 6800 } 6801 } 6802 } 6803 soc21_grbm_select(adev, 0, 0, 0, 0); 6804 mutex_unlock(&adev->srbm_mutex); 6805 amdgpu_gfx_off_ctrl(adev, true); 6806 6807 /* dump gfx queue registers for all instances */ 6808 if (!adev->gfx.ip_dump_gfx_queues) 6809 return; 6810 6811 index = 0; 6812 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 6813 amdgpu_gfx_off_ctrl(adev, false); 6814 mutex_lock(&adev->srbm_mutex); 6815 for (i = 0; i < adev->gfx.me.num_me; i++) { 6816 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6817 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 6818 soc21_grbm_select(adev, i, j, k, 0); 6819 6820 for (reg = 0; reg < reg_count; reg++) { 6821 adev->gfx.ip_dump_gfx_queues[index + reg] = 6822 RREG32(SOC15_REG_ENTRY_OFFSET( 6823 gc_gfx_queue_reg_list_11[reg])); 6824 } 6825 index += reg_count; 6826 } 6827 } 6828 } 6829 soc21_grbm_select(adev, 0, 0, 0, 0); 6830 mutex_unlock(&adev->srbm_mutex); 6831 amdgpu_gfx_off_ctrl(adev, true); 6832 } 6833 6834 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 6835 { 6836 /* Emit the cleaner shader */ 6837 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 6838 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 6839 } 6840 6841 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring) 6842 { 6843 amdgpu_gfx_profile_ring_begin_use(ring); 6844 6845 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 6846 } 6847 6848 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring) 6849 { 6850 amdgpu_gfx_profile_ring_end_use(ring); 6851 6852 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 6853 } 6854 6855 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 6856 .name = "gfx_v11_0", 6857 .early_init = gfx_v11_0_early_init, 6858 .late_init = gfx_v11_0_late_init, 6859 .sw_init = gfx_v11_0_sw_init, 6860 .sw_fini = gfx_v11_0_sw_fini, 6861 .hw_init = gfx_v11_0_hw_init, 6862 .hw_fini = gfx_v11_0_hw_fini, 6863 .suspend = gfx_v11_0_suspend, 6864 .resume = gfx_v11_0_resume, 6865 .is_idle = gfx_v11_0_is_idle, 6866 .wait_for_idle = gfx_v11_0_wait_for_idle, 6867 .soft_reset = gfx_v11_0_soft_reset, 6868 .check_soft_reset = gfx_v11_0_check_soft_reset, 6869 .post_soft_reset = gfx_v11_0_post_soft_reset, 6870 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 6871 .set_powergating_state = gfx_v11_0_set_powergating_state, 6872 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 6873 .dump_ip_state = gfx_v11_ip_dump, 6874 .print_ip_state = gfx_v11_ip_print, 6875 }; 6876 6877 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 6878 .type = AMDGPU_RING_TYPE_GFX, 6879 .align_mask = 0xff, 6880 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6881 .support_64bit_ptrs = true, 6882 .secure_submission_supported = true, 6883 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 6884 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 6885 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 6886 .emit_frame_size = /* totally 247 maximum if 16 IBs */ 6887 5 + /* update_spm_vmid */ 6888 5 + /* COND_EXEC */ 6889 22 + /* SET_Q_PREEMPTION_MODE */ 6890 7 + /* PIPELINE_SYNC */ 6891 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6892 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6893 4 + /* VM_FLUSH */ 6894 8 + /* FENCE for VM_FLUSH */ 6895 20 + /* GDS switch */ 6896 5 + /* COND_EXEC */ 6897 7 + /* HDP_flush */ 6898 4 + /* VGT_flush */ 6899 31 + /* DE_META */ 6900 3 + /* CNTX_CTRL */ 6901 5 + /* HDP_INVL */ 6902 22 + /* SET_Q_PREEMPTION_MODE */ 6903 8 + 8 + /* FENCE x2 */ 6904 8 + /* gfx_v11_0_emit_mem_sync */ 6905 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 6906 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 6907 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 6908 .emit_fence = gfx_v11_0_ring_emit_fence, 6909 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 6910 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 6911 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 6912 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 6913 .test_ring = gfx_v11_0_ring_test_ring, 6914 .test_ib = gfx_v11_0_ring_test_ib, 6915 .insert_nop = gfx_v11_ring_insert_nop, 6916 .pad_ib = amdgpu_ring_generic_pad_ib, 6917 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 6918 .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, 6919 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 6920 .preempt_ib = gfx_v11_0_ring_preempt_ib, 6921 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 6922 .emit_wreg = gfx_v11_0_ring_emit_wreg, 6923 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 6924 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 6925 .soft_recovery = gfx_v11_0_ring_soft_recovery, 6926 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 6927 .reset = gfx_v11_0_reset_kgq, 6928 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 6929 .begin_use = gfx_v11_0_ring_begin_use, 6930 .end_use = gfx_v11_0_ring_end_use, 6931 }; 6932 6933 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 6934 .type = AMDGPU_RING_TYPE_COMPUTE, 6935 .align_mask = 0xff, 6936 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6937 .support_64bit_ptrs = true, 6938 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 6939 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 6940 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 6941 .emit_frame_size = 6942 5 + /* update_spm_vmid */ 6943 20 + /* gfx_v11_0_ring_emit_gds_switch */ 6944 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 6945 5 + /* hdp invalidate */ 6946 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 6947 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6948 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6949 2 + /* gfx_v11_0_ring_emit_vm_flush */ 6950 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 6951 8 + /* gfx_v11_0_emit_mem_sync */ 6952 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 6953 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 6954 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 6955 .emit_fence = gfx_v11_0_ring_emit_fence, 6956 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 6957 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 6958 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 6959 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 6960 .test_ring = gfx_v11_0_ring_test_ring, 6961 .test_ib = gfx_v11_0_ring_test_ib, 6962 .insert_nop = gfx_v11_ring_insert_nop, 6963 .pad_ib = amdgpu_ring_generic_pad_ib, 6964 .emit_wreg = gfx_v11_0_ring_emit_wreg, 6965 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 6966 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 6967 .soft_recovery = gfx_v11_0_ring_soft_recovery, 6968 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 6969 .reset = gfx_v11_0_reset_kcq, 6970 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 6971 .begin_use = gfx_v11_0_ring_begin_use, 6972 .end_use = gfx_v11_0_ring_end_use, 6973 }; 6974 6975 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 6976 .type = AMDGPU_RING_TYPE_KIQ, 6977 .align_mask = 0xff, 6978 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6979 .support_64bit_ptrs = true, 6980 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 6981 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 6982 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 6983 .emit_frame_size = 6984 20 + /* gfx_v11_0_ring_emit_gds_switch */ 6985 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 6986 5 + /*hdp invalidate */ 6987 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 6988 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 6989 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 6990 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6991 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 6992 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 6993 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 6994 .test_ring = gfx_v11_0_ring_test_ring, 6995 .test_ib = gfx_v11_0_ring_test_ib, 6996 .insert_nop = amdgpu_ring_insert_nop, 6997 .pad_ib = amdgpu_ring_generic_pad_ib, 6998 .emit_rreg = gfx_v11_0_ring_emit_rreg, 6999 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7000 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7001 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7002 }; 7003 7004 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 7005 { 7006 int i; 7007 7008 adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; 7009 7010 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7011 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 7012 7013 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7014 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 7015 } 7016 7017 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 7018 .set = gfx_v11_0_set_eop_interrupt_state, 7019 .process = gfx_v11_0_eop_irq, 7020 }; 7021 7022 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 7023 .set = gfx_v11_0_set_priv_reg_fault_state, 7024 .process = gfx_v11_0_priv_reg_irq, 7025 }; 7026 7027 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { 7028 .set = gfx_v11_0_set_bad_op_fault_state, 7029 .process = gfx_v11_0_bad_op_irq, 7030 }; 7031 7032 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 7033 .set = gfx_v11_0_set_priv_inst_fault_state, 7034 .process = gfx_v11_0_priv_inst_irq, 7035 }; 7036 7037 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { 7038 .process = gfx_v11_0_rlc_gc_fed_irq, 7039 }; 7040 7041 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 7042 { 7043 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7044 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 7045 7046 adev->gfx.priv_reg_irq.num_types = 1; 7047 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 7048 7049 adev->gfx.bad_op_irq.num_types = 1; 7050 adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; 7051 7052 adev->gfx.priv_inst_irq.num_types = 1; 7053 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 7054 7055 adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ 7056 adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; 7057 7058 } 7059 7060 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 7061 { 7062 if (adev->flags & AMD_IS_APU) 7063 adev->gfx.imu.mode = MISSION_MODE; 7064 else 7065 adev->gfx.imu.mode = DEBUG_MODE; 7066 7067 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 7068 } 7069 7070 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 7071 { 7072 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 7073 } 7074 7075 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 7076 { 7077 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 7078 adev->gfx.config.max_sh_per_se * 7079 adev->gfx.config.max_shader_engines; 7080 7081 adev->gds.gds_size = 0x1000; 7082 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 7083 adev->gds.gws_size = 64; 7084 adev->gds.oa_size = 16; 7085 } 7086 7087 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 7088 { 7089 /* set gfx eng mqd */ 7090 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 7091 sizeof(struct v11_gfx_mqd); 7092 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 7093 gfx_v11_0_gfx_mqd_init; 7094 /* set compute eng mqd */ 7095 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 7096 sizeof(struct v11_compute_mqd); 7097 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 7098 gfx_v11_0_compute_mqd_init; 7099 } 7100 7101 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 7102 u32 bitmap) 7103 { 7104 u32 data; 7105 7106 if (!bitmap) 7107 return; 7108 7109 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7110 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7111 7112 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 7113 } 7114 7115 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 7116 { 7117 u32 data, wgp_bitmask; 7118 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 7119 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 7120 7121 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7122 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7123 7124 wgp_bitmask = 7125 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 7126 7127 return (~data) & wgp_bitmask; 7128 } 7129 7130 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 7131 { 7132 u32 wgp_idx, wgp_active_bitmap; 7133 u32 cu_bitmap_per_wgp, cu_active_bitmap; 7134 7135 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 7136 cu_active_bitmap = 0; 7137 7138 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 7139 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 7140 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 7141 if (wgp_active_bitmap & (1 << wgp_idx)) 7142 cu_active_bitmap |= cu_bitmap_per_wgp; 7143 } 7144 7145 return cu_active_bitmap; 7146 } 7147 7148 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 7149 struct amdgpu_cu_info *cu_info) 7150 { 7151 int i, j, k, counter, active_cu_number = 0; 7152 u32 mask, bitmap; 7153 unsigned disable_masks[8 * 2]; 7154 7155 if (!adev || !cu_info) 7156 return -EINVAL; 7157 7158 amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2); 7159 7160 mutex_lock(&adev->grbm_idx_mutex); 7161 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7162 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7163 bitmap = i * adev->gfx.config.max_sh_per_se + j; 7164 if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) 7165 continue; 7166 mask = 1; 7167 counter = 0; 7168 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); 7169 if (i < 8 && j < 2) 7170 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 7171 adev, disable_masks[i * 2 + j]); 7172 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 7173 7174 /** 7175 * GFX11 could support more than 4 SEs, while the bitmap 7176 * in cu_info struct is 4x4 and ioctl interface struct 7177 * drm_amdgpu_info_device should keep stable. 7178 * So we use last two columns of bitmap to store cu mask for 7179 * SEs 4 to 7, the layout of the bitmap is as below: 7180 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 7181 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 7182 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 7183 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 7184 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 7185 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 7186 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 7187 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 7188 */ 7189 cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; 7190 7191 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 7192 if (bitmap & mask) 7193 counter++; 7194 7195 mask <<= 1; 7196 } 7197 active_cu_number += counter; 7198 } 7199 } 7200 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7201 mutex_unlock(&adev->grbm_idx_mutex); 7202 7203 cu_info->number = active_cu_number; 7204 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7205 7206 return 0; 7207 } 7208 7209 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 7210 { 7211 .type = AMD_IP_BLOCK_TYPE_GFX, 7212 .major = 11, 7213 .minor = 0, 7214 .rev = 0, 7215 .funcs = &gfx_v11_0_ip_funcs, 7216 }; 7217