1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "imu_v11_0.h" 33 #include "soc21.h" 34 #include "nvd.h" 35 36 #include "gc/gc_11_0_0_offset.h" 37 #include "gc/gc_11_0_0_sh_mask.h" 38 #include "smuio/smuio_13_0_6_offset.h" 39 #include "smuio/smuio_13_0_6_sh_mask.h" 40 #include "navi10_enum.h" 41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 42 43 #include "soc15.h" 44 #include "clearstate_gfx11.h" 45 #include "v11_structs.h" 46 #include "gfx_v11_0.h" 47 #include "gfx_v11_0_cleaner_shader.h" 48 #include "gfx_v11_0_3.h" 49 #include "nbio_v4_3.h" 50 #include "mes_v11_0.h" 51 #include "mes_userqueue.h" 52 #include "amdgpu_userq_fence.h" 53 54 #define GFX11_NUM_GFX_RINGS 1 55 #define GFX11_MEC_HPD_SIZE 2048 56 57 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 58 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 59 60 #define regCGTT_WD_CLK_CTRL 0x5086 61 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 63 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 64 #define regPC_CONFIG_CNTL_1 0x194d 65 #define regPC_CONFIG_CNTL_1_BASE_IDX 1 66 67 #define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 68 #define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 69 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 70 #define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 71 #define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 72 #define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 73 #define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 74 75 #define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 76 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 77 #define regCP_MQD_CONTROL_DEFAULT 0x00000100 78 #define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 79 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 80 #define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 81 #define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 82 #define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 83 84 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 85 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 86 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 87 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 88 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); 89 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); 90 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 91 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 92 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 93 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 94 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 95 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 97 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 99 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 100 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 101 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 102 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 103 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 105 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 107 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); 108 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); 109 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); 110 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); 111 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); 112 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); 113 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); 115 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); 117 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); 119 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); 120 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); 121 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); 122 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); 123 124 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { 125 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), 126 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), 127 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), 128 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), 129 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), 130 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), 131 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), 132 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), 133 SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), 134 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), 135 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), 136 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), 137 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), 138 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), 139 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), 140 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), 141 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), 142 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 143 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), 144 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), 145 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), 146 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), 147 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), 148 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), 149 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), 150 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 151 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), 152 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 153 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 154 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 155 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), 156 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), 157 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), 158 SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), 159 SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), 160 SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), 161 SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), 162 SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), 163 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), 164 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), 165 SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), 166 SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), 167 SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), 168 SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), 169 SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), 170 SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), 171 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), 172 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), 173 SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), 174 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), 175 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), 176 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), 177 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), 178 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), 179 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), 180 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), 181 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), 182 /* cp header registers */ 183 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 184 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 185 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 186 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 187 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 188 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 189 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 190 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 191 /* SE status registers */ 192 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), 193 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), 194 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), 195 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), 196 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), 197 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) 198 }; 199 200 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { 201 /* compute registers */ 202 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), 203 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), 204 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), 205 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), 206 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), 207 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), 208 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), 209 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), 210 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), 211 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), 212 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), 213 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), 214 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), 215 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), 216 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), 217 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), 218 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), 219 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), 220 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), 221 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), 222 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), 223 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), 224 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), 225 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), 226 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), 227 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), 228 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), 229 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), 230 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), 231 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), 232 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), 233 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), 234 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), 235 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), 236 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), 237 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), 238 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), 239 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), 240 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), 241 /* cp header registers */ 242 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 243 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 244 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 245 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 246 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 247 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 248 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 249 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 250 }; 251 252 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { 253 /* gfx queue registers */ 254 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), 255 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), 256 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), 257 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), 258 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), 259 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), 260 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), 261 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), 262 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), 263 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), 264 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), 265 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), 266 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), 267 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), 268 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), 269 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), 270 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), 271 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), 272 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), 273 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), 274 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 275 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 276 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 277 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 278 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 279 /* cp header registers */ 280 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 281 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 282 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 283 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 284 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 285 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 286 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 287 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 288 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 289 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 290 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 291 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 292 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 293 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 294 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 295 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 296 }; 297 298 static const struct soc15_reg_golden golden_settings_gc_11_0[] = { 299 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) 300 }; 301 302 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 303 { 304 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 305 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 306 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 307 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 308 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 309 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 310 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 311 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 312 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 313 }; 314 315 #define DEFAULT_SH_MEM_CONFIG \ 316 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 317 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 318 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 319 320 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 321 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 322 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 323 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 324 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 325 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 326 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 327 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 328 struct amdgpu_cu_info *cu_info); 329 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 330 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 331 u32 sh_num, u32 instance, int xcc_id); 332 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 333 334 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 335 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 336 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 337 uint32_t val); 338 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 339 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 340 uint16_t pasid, uint32_t flush_type, 341 bool all_hub, uint8_t dst_sel); 342 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 343 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 344 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 345 bool enable); 346 347 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 348 { 349 struct amdgpu_device *adev = kiq_ring->adev; 350 u64 shader_mc_addr; 351 352 /* Cleaner shader MC address */ 353 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 354 355 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 356 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 357 PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ 358 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 359 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 360 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 361 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 362 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 363 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 364 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 365 } 366 367 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 368 struct amdgpu_ring *ring) 369 { 370 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 371 uint64_t wptr_addr = ring->wptr_gpu_addr; 372 uint32_t me = 0, eng_sel = 0; 373 374 switch (ring->funcs->type) { 375 case AMDGPU_RING_TYPE_COMPUTE: 376 me = 1; 377 eng_sel = 0; 378 break; 379 case AMDGPU_RING_TYPE_GFX: 380 me = 0; 381 eng_sel = 4; 382 break; 383 case AMDGPU_RING_TYPE_MES: 384 me = 2; 385 eng_sel = 5; 386 break; 387 default: 388 WARN_ON(1); 389 } 390 391 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 392 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 393 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 394 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 395 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 396 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 397 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 398 PACKET3_MAP_QUEUES_ME((me)) | 399 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 400 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 401 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 402 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 403 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 404 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 405 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 406 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 407 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 408 } 409 410 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 411 struct amdgpu_ring *ring, 412 enum amdgpu_unmap_queues_action action, 413 u64 gpu_addr, u64 seq) 414 { 415 struct amdgpu_device *adev = kiq_ring->adev; 416 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 417 418 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { 419 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); 420 return; 421 } 422 423 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 424 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 425 PACKET3_UNMAP_QUEUES_ACTION(action) | 426 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 427 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 428 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 429 amdgpu_ring_write(kiq_ring, 430 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 431 432 if (action == PREEMPT_QUEUES_NO_UNMAP) { 433 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 434 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 435 amdgpu_ring_write(kiq_ring, seq); 436 } else { 437 amdgpu_ring_write(kiq_ring, 0); 438 amdgpu_ring_write(kiq_ring, 0); 439 amdgpu_ring_write(kiq_ring, 0); 440 } 441 } 442 443 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 444 struct amdgpu_ring *ring, 445 u64 addr, 446 u64 seq) 447 { 448 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 449 450 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 451 amdgpu_ring_write(kiq_ring, 452 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 453 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 454 PACKET3_QUERY_STATUS_COMMAND(2)); 455 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 456 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 457 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 458 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 459 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 460 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 461 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 462 } 463 464 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 465 uint16_t pasid, uint32_t flush_type, 466 bool all_hub) 467 { 468 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 469 } 470 471 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 472 .kiq_set_resources = gfx11_kiq_set_resources, 473 .kiq_map_queues = gfx11_kiq_map_queues, 474 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 475 .kiq_query_status = gfx11_kiq_query_status, 476 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 477 .set_resources_size = 8, 478 .map_queues_size = 7, 479 .unmap_queues_size = 6, 480 .query_status_size = 7, 481 .invalidate_tlbs_size = 2, 482 }; 483 484 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 485 { 486 adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; 487 } 488 489 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 490 { 491 if (amdgpu_sriov_vf(adev)) 492 return; 493 494 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 495 case IP_VERSION(11, 0, 1): 496 case IP_VERSION(11, 0, 4): 497 soc15_program_register_sequence(adev, 498 golden_settings_gc_11_0_1, 499 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 500 break; 501 default: 502 break; 503 } 504 soc15_program_register_sequence(adev, 505 golden_settings_gc_11_0, 506 (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); 507 508 } 509 510 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 511 bool wc, uint32_t reg, uint32_t val) 512 { 513 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 514 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 515 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 516 amdgpu_ring_write(ring, reg); 517 amdgpu_ring_write(ring, 0); 518 amdgpu_ring_write(ring, val); 519 } 520 521 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 522 int mem_space, int opt, uint32_t addr0, 523 uint32_t addr1, uint32_t ref, uint32_t mask, 524 uint32_t inv) 525 { 526 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 527 amdgpu_ring_write(ring, 528 /* memory (1) or register (0) */ 529 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 530 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 531 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 532 WAIT_REG_MEM_ENGINE(eng_sel))); 533 534 if (mem_space) 535 BUG_ON(addr0 & 0x3); /* Dword align */ 536 amdgpu_ring_write(ring, addr0); 537 amdgpu_ring_write(ring, addr1); 538 amdgpu_ring_write(ring, ref); 539 amdgpu_ring_write(ring, mask); 540 amdgpu_ring_write(ring, inv); /* poll interval */ 541 } 542 543 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 544 { 545 /* Header itself is a NOP packet */ 546 if (num_nop == 1) { 547 amdgpu_ring_write(ring, ring->funcs->nop); 548 return; 549 } 550 551 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 552 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 553 554 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 555 amdgpu_ring_insert_nop(ring, num_nop - 1); 556 } 557 558 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 559 { 560 struct amdgpu_device *adev = ring->adev; 561 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 562 uint32_t tmp = 0; 563 unsigned i; 564 int r; 565 566 WREG32(scratch, 0xCAFEDEAD); 567 r = amdgpu_ring_alloc(ring, 5); 568 if (r) { 569 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 570 ring->idx, r); 571 return r; 572 } 573 574 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 575 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 576 } else { 577 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 578 amdgpu_ring_write(ring, scratch - 579 PACKET3_SET_UCONFIG_REG_START); 580 amdgpu_ring_write(ring, 0xDEADBEEF); 581 } 582 amdgpu_ring_commit(ring); 583 584 for (i = 0; i < adev->usec_timeout; i++) { 585 tmp = RREG32(scratch); 586 if (tmp == 0xDEADBEEF) 587 break; 588 if (amdgpu_emu_mode == 1) 589 msleep(1); 590 else 591 udelay(1); 592 } 593 594 if (i >= adev->usec_timeout) 595 r = -ETIMEDOUT; 596 return r; 597 } 598 599 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 600 { 601 struct amdgpu_device *adev = ring->adev; 602 struct amdgpu_ib ib; 603 struct dma_fence *f = NULL; 604 unsigned index; 605 uint64_t gpu_addr; 606 volatile uint32_t *cpu_ptr; 607 long r; 608 609 /* MES KIQ fw hasn't indirect buffer support for now */ 610 if (adev->enable_mes_kiq && 611 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 612 return 0; 613 614 memset(&ib, 0, sizeof(ib)); 615 616 r = amdgpu_device_wb_get(adev, &index); 617 if (r) 618 return r; 619 620 gpu_addr = adev->wb.gpu_addr + (index * 4); 621 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 622 cpu_ptr = &adev->wb.wb[index]; 623 624 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 625 if (r) { 626 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 627 goto err1; 628 } 629 630 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 631 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 632 ib.ptr[2] = lower_32_bits(gpu_addr); 633 ib.ptr[3] = upper_32_bits(gpu_addr); 634 ib.ptr[4] = 0xDEADBEEF; 635 ib.length_dw = 5; 636 637 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 638 if (r) 639 goto err2; 640 641 r = dma_fence_wait_timeout(f, false, timeout); 642 if (r == 0) { 643 r = -ETIMEDOUT; 644 goto err2; 645 } else if (r < 0) { 646 goto err2; 647 } 648 649 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 650 r = 0; 651 else 652 r = -EINVAL; 653 err2: 654 amdgpu_ib_free(&ib, NULL); 655 dma_fence_put(f); 656 err1: 657 amdgpu_device_wb_free(adev, index); 658 return r; 659 } 660 661 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 662 { 663 amdgpu_ucode_release(&adev->gfx.pfp_fw); 664 amdgpu_ucode_release(&adev->gfx.me_fw); 665 amdgpu_ucode_release(&adev->gfx.rlc_fw); 666 amdgpu_ucode_release(&adev->gfx.mec_fw); 667 668 kfree(adev->gfx.rlc.register_list_format); 669 } 670 671 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) 672 { 673 const struct psp_firmware_header_v1_0 *toc_hdr; 674 int err = 0; 675 676 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, 677 AMDGPU_UCODE_REQUIRED, 678 "amdgpu/%s_toc.bin", ucode_prefix); 679 if (err) 680 goto out; 681 682 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 683 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 684 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 685 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 686 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 687 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 688 return 0; 689 out: 690 amdgpu_ucode_release(&adev->psp.toc_fw); 691 return err; 692 } 693 694 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) 695 { 696 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 697 case IP_VERSION(11, 0, 0): 698 case IP_VERSION(11, 0, 2): 699 case IP_VERSION(11, 0, 3): 700 if ((adev->gfx.me_fw_version >= 1505) && 701 (adev->gfx.pfp_fw_version >= 1600) && 702 (adev->gfx.mec_fw_version >= 512)) { 703 if (amdgpu_sriov_vf(adev)) 704 adev->gfx.cp_gfx_shadow = true; 705 else 706 adev->gfx.cp_gfx_shadow = false; 707 } 708 break; 709 default: 710 adev->gfx.cp_gfx_shadow = false; 711 break; 712 } 713 } 714 715 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 716 { 717 char ucode_prefix[25]; 718 int err; 719 const struct rlc_firmware_header_v2_0 *rlc_hdr; 720 uint16_t version_major; 721 uint16_t version_minor; 722 723 DRM_DEBUG("\n"); 724 725 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 726 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 727 AMDGPU_UCODE_REQUIRED, 728 "amdgpu/%s_pfp.bin", ucode_prefix); 729 if (err) 730 goto out; 731 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 732 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 733 (union amdgpu_firmware_header *) 734 adev->gfx.pfp_fw->data, 2, 0); 735 if (adev->gfx.rs64_enable) { 736 dev_info(adev->dev, "CP RS64 enable\n"); 737 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 738 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 739 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 740 } else { 741 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 742 } 743 744 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 745 AMDGPU_UCODE_REQUIRED, 746 "amdgpu/%s_me.bin", ucode_prefix); 747 if (err) 748 goto out; 749 if (adev->gfx.rs64_enable) { 750 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 751 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 752 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 753 } else { 754 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 755 } 756 757 if (!amdgpu_sriov_vf(adev)) { 758 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && 759 adev->pdev->revision == 0xCE) 760 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 761 AMDGPU_UCODE_REQUIRED, 762 "amdgpu/gc_11_0_0_rlc_1.bin"); 763 else if (amdgpu_is_kicker_fw(adev)) 764 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 765 AMDGPU_UCODE_REQUIRED, 766 "amdgpu/%s_rlc_kicker.bin", ucode_prefix); 767 else 768 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 769 AMDGPU_UCODE_REQUIRED, 770 "amdgpu/%s_rlc.bin", ucode_prefix); 771 if (err) 772 goto out; 773 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 774 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 775 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 776 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 777 if (err) 778 goto out; 779 } 780 781 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 782 AMDGPU_UCODE_REQUIRED, 783 "amdgpu/%s_mec.bin", ucode_prefix); 784 if (err) 785 goto out; 786 if (adev->gfx.rs64_enable) { 787 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 788 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 789 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 790 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 791 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 792 } else { 793 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 794 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 795 } 796 797 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 798 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix); 799 800 /* only one MEC for gfx 11.0.0. */ 801 adev->gfx.mec2_fw = NULL; 802 803 gfx_v11_0_check_fw_cp_gfx_shadow(adev); 804 805 if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { 806 err = adev->gfx.imu.funcs->init_microcode(adev); 807 if (err) 808 DRM_ERROR("Failed to init imu firmware!\n"); 809 return err; 810 } 811 812 out: 813 if (err) { 814 amdgpu_ucode_release(&adev->gfx.pfp_fw); 815 amdgpu_ucode_release(&adev->gfx.me_fw); 816 amdgpu_ucode_release(&adev->gfx.rlc_fw); 817 amdgpu_ucode_release(&adev->gfx.mec_fw); 818 } 819 820 return err; 821 } 822 823 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 824 { 825 u32 count = 0; 826 const struct cs_section_def *sect = NULL; 827 const struct cs_extent_def *ext = NULL; 828 829 /* begin clear state */ 830 count += 2; 831 /* context control state */ 832 count += 3; 833 834 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 835 for (ext = sect->section; ext->extent != NULL; ++ext) { 836 if (sect->id == SECT_CONTEXT) 837 count += 2 + ext->reg_count; 838 else 839 return 0; 840 } 841 } 842 843 /* set PA_SC_TILE_STEERING_OVERRIDE */ 844 count += 3; 845 /* end clear state */ 846 count += 2; 847 /* clear state */ 848 count += 2; 849 850 return count; 851 } 852 853 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, 854 volatile u32 *buffer) 855 { 856 u32 count = 0; 857 int ctx_reg_offset; 858 859 if (adev->gfx.rlc.cs_data == NULL) 860 return; 861 if (buffer == NULL) 862 return; 863 864 count = amdgpu_gfx_csb_preamble_start(buffer); 865 count = amdgpu_gfx_csb_data_parser(adev, buffer, count); 866 867 ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 868 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 869 buffer[count++] = cpu_to_le32(ctx_reg_offset); 870 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 871 872 amdgpu_gfx_csb_preamble_end(buffer, count); 873 } 874 875 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 876 { 877 /* clear state block */ 878 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 879 &adev->gfx.rlc.clear_state_gpu_addr, 880 (void **)&adev->gfx.rlc.cs_ptr); 881 882 /* jump table block */ 883 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 884 &adev->gfx.rlc.cp_table_gpu_addr, 885 (void **)&adev->gfx.rlc.cp_table_ptr); 886 } 887 888 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 889 { 890 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 891 892 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 893 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 894 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 895 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 896 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 897 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 898 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 899 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 900 adev->gfx.rlc.rlcg_reg_access_supported = true; 901 } 902 903 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 904 { 905 const struct cs_section_def *cs_data; 906 int r; 907 908 adev->gfx.rlc.cs_data = gfx11_cs_data; 909 910 cs_data = adev->gfx.rlc.cs_data; 911 912 if (cs_data) { 913 /* init clear state block */ 914 r = amdgpu_gfx_rlc_init_csb(adev); 915 if (r) 916 return r; 917 } 918 919 /* init spm vmid with 0xf */ 920 if (adev->gfx.rlc.funcs->update_spm_vmid) 921 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); 922 923 return 0; 924 } 925 926 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 927 { 928 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 929 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 930 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 931 } 932 933 static void gfx_v11_0_me_init(struct amdgpu_device *adev) 934 { 935 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 936 937 amdgpu_gfx_graphics_queue_acquire(adev); 938 } 939 940 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 941 { 942 int r; 943 u32 *hpd; 944 size_t mec_hpd_size; 945 946 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 947 948 /* take ownership of the relevant compute queues */ 949 amdgpu_gfx_compute_queue_acquire(adev); 950 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 951 952 if (mec_hpd_size) { 953 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 954 AMDGPU_GEM_DOMAIN_GTT, 955 &adev->gfx.mec.hpd_eop_obj, 956 &adev->gfx.mec.hpd_eop_gpu_addr, 957 (void **)&hpd); 958 if (r) { 959 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 960 gfx_v11_0_mec_fini(adev); 961 return r; 962 } 963 964 memset(hpd, 0, mec_hpd_size); 965 966 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 967 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 968 } 969 970 return 0; 971 } 972 973 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 974 { 975 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 976 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 977 (address << SQ_IND_INDEX__INDEX__SHIFT)); 978 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 979 } 980 981 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 982 uint32_t thread, uint32_t regno, 983 uint32_t num, uint32_t *out) 984 { 985 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 986 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 987 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 988 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 989 (SQ_IND_INDEX__AUTO_INCR_MASK)); 990 while (num--) 991 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 992 } 993 994 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 995 { 996 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 997 * field when performing a select_se_sh so it should be 998 * zero here */ 999 WARN_ON(simd != 0); 1000 1001 /* type 3 wave data */ 1002 dst[(*no_fields)++] = 3; 1003 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 1004 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 1005 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 1006 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 1007 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 1008 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 1009 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 1010 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 1011 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 1012 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 1013 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 1014 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 1015 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 1016 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 1017 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 1018 } 1019 1020 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1021 uint32_t wave, uint32_t start, 1022 uint32_t size, uint32_t *dst) 1023 { 1024 WARN_ON(simd != 0); 1025 1026 wave_read_regs( 1027 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 1028 dst); 1029 } 1030 1031 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1032 uint32_t wave, uint32_t thread, 1033 uint32_t start, uint32_t size, 1034 uint32_t *dst) 1035 { 1036 wave_read_regs( 1037 adev, wave, thread, 1038 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1039 } 1040 1041 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 1042 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1043 { 1044 soc21_grbm_select(adev, me, pipe, q, vm); 1045 } 1046 1047 /* all sizes are in bytes */ 1048 #define MQD_SHADOW_BASE_SIZE 73728 1049 #define MQD_SHADOW_BASE_ALIGNMENT 256 1050 #define MQD_FWWORKAREA_SIZE 484 1051 #define MQD_FWWORKAREA_ALIGNMENT 256 1052 1053 static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, 1054 struct amdgpu_gfx_shadow_info *shadow_info) 1055 { 1056 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; 1057 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; 1058 shadow_info->csa_size = MQD_FWWORKAREA_SIZE; 1059 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; 1060 } 1061 1062 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, 1063 struct amdgpu_gfx_shadow_info *shadow_info, 1064 bool skip_check) 1065 { 1066 if (adev->gfx.cp_gfx_shadow || skip_check) { 1067 gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); 1068 return 0; 1069 } else { 1070 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); 1071 return -ENOTSUPP; 1072 } 1073 } 1074 1075 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 1076 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 1077 .select_se_sh = &gfx_v11_0_select_se_sh, 1078 .read_wave_data = &gfx_v11_0_read_wave_data, 1079 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 1080 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 1081 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 1082 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 1083 .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, 1084 }; 1085 1086 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 1087 { 1088 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1089 case IP_VERSION(11, 0, 0): 1090 case IP_VERSION(11, 0, 2): 1091 adev->gfx.config.max_hw_contexts = 8; 1092 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1093 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1094 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1095 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1096 break; 1097 case IP_VERSION(11, 0, 3): 1098 adev->gfx.ras = &gfx_v11_0_3_ras; 1099 adev->gfx.config.max_hw_contexts = 8; 1100 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1101 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1102 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1103 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1104 break; 1105 case IP_VERSION(11, 0, 1): 1106 case IP_VERSION(11, 0, 4): 1107 case IP_VERSION(11, 5, 0): 1108 case IP_VERSION(11, 5, 1): 1109 case IP_VERSION(11, 5, 2): 1110 case IP_VERSION(11, 5, 3): 1111 adev->gfx.config.max_hw_contexts = 8; 1112 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1113 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1114 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1115 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 1116 break; 1117 default: 1118 BUG(); 1119 break; 1120 } 1121 1122 return 0; 1123 } 1124 1125 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1126 int me, int pipe, int queue) 1127 { 1128 struct amdgpu_ring *ring; 1129 unsigned int irq_type; 1130 unsigned int hw_prio; 1131 1132 ring = &adev->gfx.gfx_ring[ring_id]; 1133 1134 ring->me = me; 1135 ring->pipe = pipe; 1136 ring->queue = queue; 1137 1138 ring->ring_obj = NULL; 1139 ring->use_doorbell = true; 1140 if (adev->gfx.disable_kq) { 1141 ring->no_scheduler = true; 1142 ring->no_user_submission = true; 1143 } 1144 1145 if (!ring_id) 1146 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1147 else 1148 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1149 ring->vm_hub = AMDGPU_GFXHUB(0); 1150 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1151 1152 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1153 hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? 1154 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1155 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1156 hw_prio, NULL); 1157 } 1158 1159 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1160 int mec, int pipe, int queue) 1161 { 1162 int r; 1163 unsigned irq_type; 1164 struct amdgpu_ring *ring; 1165 unsigned int hw_prio; 1166 1167 ring = &adev->gfx.compute_ring[ring_id]; 1168 1169 /* mec0 is me1 */ 1170 ring->me = mec + 1; 1171 ring->pipe = pipe; 1172 ring->queue = queue; 1173 1174 ring->ring_obj = NULL; 1175 ring->use_doorbell = true; 1176 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1177 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1178 + (ring_id * GFX11_MEC_HPD_SIZE); 1179 ring->vm_hub = AMDGPU_GFXHUB(0); 1180 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1181 1182 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1183 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1184 + ring->pipe; 1185 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 1186 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1187 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1188 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1189 hw_prio, NULL); 1190 if (r) 1191 return r; 1192 1193 return 0; 1194 } 1195 1196 static struct { 1197 SOC21_FIRMWARE_ID id; 1198 unsigned int offset; 1199 unsigned int size; 1200 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 1201 1202 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 1203 { 1204 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 1205 1206 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 1207 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 1208 rlc_autoload_info[ucode->id].id = ucode->id; 1209 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 1210 rlc_autoload_info[ucode->id].size = ucode->size * 4; 1211 1212 ucode++; 1213 } 1214 } 1215 1216 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 1217 { 1218 uint32_t total_size = 0; 1219 SOC21_FIRMWARE_ID id; 1220 1221 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 1222 1223 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 1224 total_size += rlc_autoload_info[id].size; 1225 1226 /* In case the offset in rlc toc ucode is aligned */ 1227 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 1228 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 1229 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 1230 1231 return total_size; 1232 } 1233 1234 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 1235 { 1236 int r; 1237 uint32_t total_size; 1238 1239 total_size = gfx_v11_0_calc_toc_total_size(adev); 1240 1241 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 1242 AMDGPU_GEM_DOMAIN_VRAM | 1243 AMDGPU_GEM_DOMAIN_GTT, 1244 &adev->gfx.rlc.rlc_autoload_bo, 1245 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1246 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1247 1248 if (r) { 1249 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1250 return r; 1251 } 1252 1253 return 0; 1254 } 1255 1256 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1257 SOC21_FIRMWARE_ID id, 1258 const void *fw_data, 1259 uint32_t fw_size, 1260 uint32_t *fw_autoload_mask) 1261 { 1262 uint32_t toc_offset; 1263 uint32_t toc_fw_size; 1264 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1265 1266 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1267 return; 1268 1269 toc_offset = rlc_autoload_info[id].offset; 1270 toc_fw_size = rlc_autoload_info[id].size; 1271 1272 if (fw_size == 0) 1273 fw_size = toc_fw_size; 1274 1275 if (fw_size > toc_fw_size) 1276 fw_size = toc_fw_size; 1277 1278 memcpy(ptr + toc_offset, fw_data, fw_size); 1279 1280 if (fw_size < toc_fw_size) 1281 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1282 1283 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1284 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1285 } 1286 1287 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1288 uint32_t *fw_autoload_mask) 1289 { 1290 void *data; 1291 uint32_t size; 1292 uint64_t *toc_ptr; 1293 1294 *(uint64_t *)fw_autoload_mask |= 0x1; 1295 1296 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1297 1298 data = adev->psp.toc.start_addr; 1299 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1300 1301 toc_ptr = (uint64_t *)data + size / 8 - 1; 1302 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1303 1304 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1305 data, size, fw_autoload_mask); 1306 } 1307 1308 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1309 uint32_t *fw_autoload_mask) 1310 { 1311 const __le32 *fw_data; 1312 uint32_t fw_size; 1313 const struct gfx_firmware_header_v1_0 *cp_hdr; 1314 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1315 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1316 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1317 uint16_t version_major, version_minor; 1318 1319 if (adev->gfx.rs64_enable) { 1320 /* pfp ucode */ 1321 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1322 adev->gfx.pfp_fw->data; 1323 /* instruction */ 1324 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1325 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1326 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1327 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1328 fw_data, fw_size, fw_autoload_mask); 1329 /* data */ 1330 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1331 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1332 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1333 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1334 fw_data, fw_size, fw_autoload_mask); 1335 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1336 fw_data, fw_size, fw_autoload_mask); 1337 /* me ucode */ 1338 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1339 adev->gfx.me_fw->data; 1340 /* instruction */ 1341 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1342 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1343 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1344 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1345 fw_data, fw_size, fw_autoload_mask); 1346 /* data */ 1347 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1348 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1349 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1350 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1351 fw_data, fw_size, fw_autoload_mask); 1352 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1353 fw_data, fw_size, fw_autoload_mask); 1354 /* mec ucode */ 1355 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1356 adev->gfx.mec_fw->data; 1357 /* instruction */ 1358 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1359 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1360 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1361 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1362 fw_data, fw_size, fw_autoload_mask); 1363 /* data */ 1364 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1365 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1366 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1367 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1368 fw_data, fw_size, fw_autoload_mask); 1369 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1370 fw_data, fw_size, fw_autoload_mask); 1371 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1372 fw_data, fw_size, fw_autoload_mask); 1373 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1374 fw_data, fw_size, fw_autoload_mask); 1375 } else { 1376 /* pfp ucode */ 1377 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1378 adev->gfx.pfp_fw->data; 1379 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1380 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1381 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1382 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1383 fw_data, fw_size, fw_autoload_mask); 1384 1385 /* me ucode */ 1386 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1387 adev->gfx.me_fw->data; 1388 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1389 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1390 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1391 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1392 fw_data, fw_size, fw_autoload_mask); 1393 1394 /* mec ucode */ 1395 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1396 adev->gfx.mec_fw->data; 1397 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1398 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1399 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1400 cp_hdr->jt_size * 4; 1401 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1402 fw_data, fw_size, fw_autoload_mask); 1403 } 1404 1405 /* rlc ucode */ 1406 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1407 adev->gfx.rlc_fw->data; 1408 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1409 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1410 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1411 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1412 fw_data, fw_size, fw_autoload_mask); 1413 1414 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1415 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1416 if (version_major == 2) { 1417 if (version_minor >= 2) { 1418 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1419 1420 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1421 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1422 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1423 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1424 fw_data, fw_size, fw_autoload_mask); 1425 1426 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1427 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1428 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1429 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1430 fw_data, fw_size, fw_autoload_mask); 1431 } 1432 } 1433 } 1434 1435 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1436 uint32_t *fw_autoload_mask) 1437 { 1438 const __le32 *fw_data; 1439 uint32_t fw_size; 1440 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1441 1442 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1443 adev->sdma.instance[0].fw->data; 1444 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1445 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1446 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1447 1448 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1449 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1450 1451 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1452 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1453 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1454 1455 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1456 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1457 } 1458 1459 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1460 uint32_t *fw_autoload_mask) 1461 { 1462 const __le32 *fw_data; 1463 unsigned fw_size; 1464 const struct mes_firmware_header_v1_0 *mes_hdr; 1465 int pipe, ucode_id, data_id; 1466 1467 for (pipe = 0; pipe < 2; pipe++) { 1468 if (pipe==0) { 1469 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1470 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1471 } else { 1472 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1473 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1474 } 1475 1476 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1477 adev->mes.fw[pipe]->data; 1478 1479 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1480 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1481 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1482 1483 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1484 ucode_id, fw_data, fw_size, fw_autoload_mask); 1485 1486 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1487 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1488 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1489 1490 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1491 data_id, fw_data, fw_size, fw_autoload_mask); 1492 } 1493 } 1494 1495 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1496 { 1497 uint32_t rlc_g_offset, rlc_g_size; 1498 uint64_t gpu_addr; 1499 uint32_t autoload_fw_id[2]; 1500 1501 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1502 1503 /* RLC autoload sequence 2: copy ucode */ 1504 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1505 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1506 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1507 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1508 1509 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1510 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1511 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1512 1513 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1514 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1515 1516 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1517 1518 /* RLC autoload sequence 3: load IMU fw */ 1519 if (adev->gfx.imu.funcs->load_microcode) 1520 adev->gfx.imu.funcs->load_microcode(adev); 1521 /* RLC autoload sequence 4 init IMU fw */ 1522 if (adev->gfx.imu.funcs->setup_imu) 1523 adev->gfx.imu.funcs->setup_imu(adev); 1524 if (adev->gfx.imu.funcs->start_imu) 1525 adev->gfx.imu.funcs->start_imu(adev); 1526 1527 /* RLC autoload sequence 5 disable gpa mode */ 1528 gfx_v11_0_disable_gpa_mode(adev); 1529 1530 return 0; 1531 } 1532 1533 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) 1534 { 1535 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 1536 uint32_t *ptr; 1537 uint32_t inst; 1538 1539 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 1540 if (!ptr) { 1541 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 1542 adev->gfx.ip_dump_core = NULL; 1543 } else { 1544 adev->gfx.ip_dump_core = ptr; 1545 } 1546 1547 /* Allocate memory for compute queue registers for all the instances */ 1548 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 1549 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 1550 adev->gfx.mec.num_queue_per_pipe; 1551 1552 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1553 if (!ptr) { 1554 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 1555 adev->gfx.ip_dump_compute_queues = NULL; 1556 } else { 1557 adev->gfx.ip_dump_compute_queues = ptr; 1558 } 1559 1560 /* Allocate memory for gfx queue registers for all the instances */ 1561 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 1562 inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * 1563 adev->gfx.me.num_queue_per_pipe; 1564 1565 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1566 if (!ptr) { 1567 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); 1568 adev->gfx.ip_dump_gfx_queues = NULL; 1569 } else { 1570 adev->gfx.ip_dump_gfx_queues = ptr; 1571 } 1572 } 1573 1574 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) 1575 { 1576 int i, j, k, r, ring_id; 1577 int xcc_id = 0; 1578 struct amdgpu_device *adev = ip_block->adev; 1579 int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ 1580 1581 INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); 1582 1583 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1584 case IP_VERSION(11, 0, 0): 1585 case IP_VERSION(11, 0, 1): 1586 case IP_VERSION(11, 0, 2): 1587 case IP_VERSION(11, 0, 3): 1588 case IP_VERSION(11, 0, 4): 1589 case IP_VERSION(11, 5, 0): 1590 case IP_VERSION(11, 5, 1): 1591 case IP_VERSION(11, 5, 2): 1592 case IP_VERSION(11, 5, 3): 1593 adev->gfx.me.num_me = 1; 1594 adev->gfx.me.num_pipe_per_me = 1; 1595 adev->gfx.me.num_queue_per_pipe = 2; 1596 adev->gfx.mec.num_mec = 1; 1597 adev->gfx.mec.num_pipe_per_mec = 4; 1598 adev->gfx.mec.num_queue_per_pipe = 4; 1599 break; 1600 default: 1601 adev->gfx.me.num_me = 1; 1602 adev->gfx.me.num_pipe_per_me = 1; 1603 adev->gfx.me.num_queue_per_pipe = 1; 1604 adev->gfx.mec.num_mec = 1; 1605 adev->gfx.mec.num_pipe_per_mec = 4; 1606 adev->gfx.mec.num_queue_per_pipe = 8; 1607 break; 1608 } 1609 1610 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1611 case IP_VERSION(11, 0, 0): 1612 case IP_VERSION(11, 0, 2): 1613 case IP_VERSION(11, 0, 3): 1614 if (!adev->gfx.disable_uq && 1615 adev->gfx.me_fw_version >= 2420 && 1616 adev->gfx.pfp_fw_version >= 2580 && 1617 adev->gfx.mec_fw_version >= 2650 && 1618 adev->mes.fw_version[0] >= 120) { 1619 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1620 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1621 } 1622 break; 1623 case IP_VERSION(11, 0, 1): 1624 case IP_VERSION(11, 0, 4): 1625 case IP_VERSION(11, 5, 0): 1626 case IP_VERSION(11, 5, 1): 1627 case IP_VERSION(11, 5, 2): 1628 case IP_VERSION(11, 5, 3): 1629 /* add firmware version checks here */ 1630 if (0 && !adev->gfx.disable_uq) { 1631 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1632 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1633 } 1634 break; 1635 default: 1636 break; 1637 } 1638 1639 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1640 case IP_VERSION(11, 0, 0): 1641 case IP_VERSION(11, 0, 2): 1642 case IP_VERSION(11, 0, 3): 1643 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1644 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1645 if (adev->gfx.me_fw_version >= 2280 && 1646 adev->gfx.pfp_fw_version >= 2370 && 1647 adev->gfx.mec_fw_version >= 2450 && 1648 adev->mes.fw_version[0] >= 99) { 1649 adev->gfx.enable_cleaner_shader = true; 1650 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1651 if (r) { 1652 adev->gfx.enable_cleaner_shader = false; 1653 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1654 } 1655 } 1656 break; 1657 case IP_VERSION(11, 0, 1): 1658 case IP_VERSION(11, 0, 4): 1659 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1660 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1661 if (adev->gfx.pfp_fw_version >= 102 && 1662 adev->gfx.mec_fw_version >= 66 && 1663 adev->mes.fw_version[0] >= 128) { 1664 adev->gfx.enable_cleaner_shader = true; 1665 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1666 if (r) { 1667 adev->gfx.enable_cleaner_shader = false; 1668 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1669 } 1670 } 1671 break; 1672 case IP_VERSION(11, 5, 0): 1673 case IP_VERSION(11, 5, 1): 1674 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1675 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1676 if (adev->gfx.mec_fw_version >= 26 && 1677 adev->mes.fw_version[0] >= 114) { 1678 adev->gfx.enable_cleaner_shader = true; 1679 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1680 if (r) { 1681 adev->gfx.enable_cleaner_shader = false; 1682 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1683 } 1684 } 1685 break; 1686 case IP_VERSION(11, 5, 2): 1687 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1688 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1689 if (adev->gfx.me_fw_version >= 12 && 1690 adev->gfx.pfp_fw_version >= 15 && 1691 adev->gfx.mec_fw_version >= 15) { 1692 adev->gfx.enable_cleaner_shader = true; 1693 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1694 if (r) { 1695 adev->gfx.enable_cleaner_shader = false; 1696 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1697 } 1698 } 1699 break; 1700 case IP_VERSION(11, 5, 3): 1701 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1702 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1703 if (adev->gfx.me_fw_version >= 7 && 1704 adev->gfx.pfp_fw_version >= 8 && 1705 adev->gfx.mec_fw_version >= 8) { 1706 adev->gfx.enable_cleaner_shader = true; 1707 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1708 if (r) { 1709 adev->gfx.enable_cleaner_shader = false; 1710 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1711 } 1712 } 1713 break; 1714 default: 1715 adev->gfx.enable_cleaner_shader = false; 1716 break; 1717 } 1718 1719 /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ 1720 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && 1721 amdgpu_sriov_is_pp_one_vf(adev)) 1722 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; 1723 1724 /* EOP Event */ 1725 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1726 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1727 &adev->gfx.eop_irq); 1728 if (r) 1729 return r; 1730 1731 /* Bad opcode Event */ 1732 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1733 GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, 1734 &adev->gfx.bad_op_irq); 1735 if (r) 1736 return r; 1737 1738 /* Privileged reg */ 1739 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1740 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1741 &adev->gfx.priv_reg_irq); 1742 if (r) 1743 return r; 1744 1745 /* Privileged inst */ 1746 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1747 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1748 &adev->gfx.priv_inst_irq); 1749 if (r) 1750 return r; 1751 1752 /* FED error */ 1753 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1754 GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, 1755 &adev->gfx.rlc_gc_fed_irq); 1756 if (r) 1757 return r; 1758 1759 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1760 1761 gfx_v11_0_me_init(adev); 1762 1763 r = gfx_v11_0_rlc_init(adev); 1764 if (r) { 1765 DRM_ERROR("Failed to init rlc BOs!\n"); 1766 return r; 1767 } 1768 1769 r = gfx_v11_0_mec_init(adev); 1770 if (r) { 1771 DRM_ERROR("Failed to init MEC BOs!\n"); 1772 return r; 1773 } 1774 1775 if (adev->gfx.num_gfx_rings) { 1776 ring_id = 0; 1777 /* set up the gfx ring */ 1778 for (i = 0; i < adev->gfx.me.num_me; i++) { 1779 for (j = 0; j < num_queue_per_pipe; j++) { 1780 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1781 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1782 continue; 1783 1784 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1785 i, k, j); 1786 if (r) 1787 return r; 1788 ring_id++; 1789 } 1790 } 1791 } 1792 } 1793 1794 if (adev->gfx.num_compute_rings) { 1795 ring_id = 0; 1796 /* set up the compute queues - allocate horizontally across pipes */ 1797 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1798 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1799 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1800 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 1801 k, j)) 1802 continue; 1803 1804 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1805 i, k, j); 1806 if (r) 1807 return r; 1808 1809 ring_id++; 1810 } 1811 } 1812 } 1813 } 1814 1815 adev->gfx.gfx_supported_reset = 1816 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1817 adev->gfx.compute_supported_reset = 1818 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1819 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1820 case IP_VERSION(11, 0, 0): 1821 case IP_VERSION(11, 0, 2): 1822 case IP_VERSION(11, 0, 3): 1823 if ((adev->gfx.me_fw_version >= 2280) && 1824 (adev->gfx.mec_fw_version >= 2410) && 1825 !amdgpu_sriov_vf(adev)) { 1826 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1827 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1828 } 1829 break; 1830 default: 1831 if (!amdgpu_sriov_vf(adev)) { 1832 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1833 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1834 } 1835 break; 1836 } 1837 1838 if (!adev->enable_mes_kiq) { 1839 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); 1840 if (r) { 1841 DRM_ERROR("Failed to init KIQ BOs!\n"); 1842 return r; 1843 } 1844 1845 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 1846 if (r) 1847 return r; 1848 } 1849 1850 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); 1851 if (r) 1852 return r; 1853 1854 /* allocate visible FB for rlc auto-loading fw */ 1855 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1856 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1857 if (r) 1858 return r; 1859 } 1860 1861 r = gfx_v11_0_gpu_early_init(adev); 1862 if (r) 1863 return r; 1864 1865 if (amdgpu_gfx_ras_sw_init(adev)) { 1866 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 1867 return -EINVAL; 1868 } 1869 1870 gfx_v11_0_alloc_ip_dump(adev); 1871 1872 r = amdgpu_gfx_sysfs_init(adev); 1873 if (r) 1874 return r; 1875 1876 return 0; 1877 } 1878 1879 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1880 { 1881 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1882 &adev->gfx.pfp.pfp_fw_gpu_addr, 1883 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1884 1885 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1886 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1887 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1888 } 1889 1890 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1891 { 1892 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1893 &adev->gfx.me.me_fw_gpu_addr, 1894 (void **)&adev->gfx.me.me_fw_ptr); 1895 1896 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1897 &adev->gfx.me.me_fw_data_gpu_addr, 1898 (void **)&adev->gfx.me.me_fw_data_ptr); 1899 } 1900 1901 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1902 { 1903 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1904 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1905 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1906 } 1907 1908 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) 1909 { 1910 int i; 1911 struct amdgpu_device *adev = ip_block->adev; 1912 1913 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1914 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1915 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1916 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1917 1918 amdgpu_gfx_mqd_sw_fini(adev, 0); 1919 1920 if (!adev->enable_mes_kiq) { 1921 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 1922 amdgpu_gfx_kiq_fini(adev, 0); 1923 } 1924 1925 amdgpu_gfx_cleaner_shader_sw_fini(adev); 1926 1927 gfx_v11_0_pfp_fini(adev); 1928 gfx_v11_0_me_fini(adev); 1929 gfx_v11_0_rlc_fini(adev); 1930 gfx_v11_0_mec_fini(adev); 1931 1932 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1933 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1934 1935 gfx_v11_0_free_microcode(adev); 1936 1937 amdgpu_gfx_sysfs_fini(adev); 1938 1939 kfree(adev->gfx.ip_dump_core); 1940 kfree(adev->gfx.ip_dump_compute_queues); 1941 kfree(adev->gfx.ip_dump_gfx_queues); 1942 1943 return 0; 1944 } 1945 1946 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1947 u32 sh_num, u32 instance, int xcc_id) 1948 { 1949 u32 data; 1950 1951 if (instance == 0xffffffff) 1952 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1953 INSTANCE_BROADCAST_WRITES, 1); 1954 else 1955 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1956 instance); 1957 1958 if (se_num == 0xffffffff) 1959 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1960 1); 1961 else 1962 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1963 1964 if (sh_num == 0xffffffff) 1965 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1966 1); 1967 else 1968 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1969 1970 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 1971 } 1972 1973 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) 1974 { 1975 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; 1976 1977 gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); 1978 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, 1979 CC_GC_SA_UNIT_DISABLE, 1980 SA_DISABLE); 1981 gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); 1982 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, 1983 GC_USER_SA_UNIT_DISABLE, 1984 SA_DISABLE); 1985 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * 1986 adev->gfx.config.max_shader_engines); 1987 1988 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 1989 } 1990 1991 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1992 { 1993 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; 1994 u32 rb_mask; 1995 1996 gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 1997 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, 1998 CC_RB_BACKEND_DISABLE, 1999 BACKEND_DISABLE); 2000 gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 2001 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, 2002 GC_USER_RB_BACKEND_DISABLE, 2003 BACKEND_DISABLE); 2004 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * 2005 adev->gfx.config.max_shader_engines); 2006 2007 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); 2008 } 2009 2010 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 2011 { 2012 u32 rb_bitmap_per_sa; 2013 u32 rb_bitmap_width_per_sa; 2014 u32 max_sa; 2015 u32 active_sa_bitmap; 2016 u32 global_active_rb_bitmap; 2017 u32 active_rb_bitmap = 0; 2018 u32 i; 2019 2020 /* query sa bitmap from SA_UNIT_DISABLE registers */ 2021 active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); 2022 /* query rb bitmap from RB_BACKEND_DISABLE registers */ 2023 global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); 2024 2025 /* generate active rb bitmap according to active sa bitmap */ 2026 max_sa = adev->gfx.config.max_shader_engines * 2027 adev->gfx.config.max_sh_per_se; 2028 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 2029 adev->gfx.config.max_sh_per_se; 2030 rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); 2031 2032 for (i = 0; i < max_sa; i++) { 2033 if (active_sa_bitmap & (1 << i)) 2034 active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); 2035 } 2036 2037 active_rb_bitmap &= global_active_rb_bitmap; 2038 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 2039 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 2040 } 2041 2042 #define DEFAULT_SH_MEM_BASES (0x6000) 2043 #define LDS_APP_BASE 0x1 2044 #define SCRATCH_APP_BASE 0x2 2045 2046 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 2047 { 2048 int i; 2049 uint32_t sh_mem_bases; 2050 uint32_t data; 2051 2052 /* 2053 * Configure apertures: 2054 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2055 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2056 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2057 */ 2058 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 2059 SCRATCH_APP_BASE; 2060 2061 mutex_lock(&adev->srbm_mutex); 2062 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2063 soc21_grbm_select(adev, 0, 0, 0, i); 2064 /* CP and shaders */ 2065 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2066 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 2067 2068 /* Enable trap for each kfd vmid. */ 2069 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 2070 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 2071 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); 2072 } 2073 soc21_grbm_select(adev, 0, 0, 0, 0); 2074 mutex_unlock(&adev->srbm_mutex); 2075 2076 /* 2077 * Initialize all compute VMIDs to have no GDS, GWS, or OA 2078 * access. These should be enabled by FW for target VMIDs. 2079 */ 2080 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2081 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 2082 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 2083 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 2084 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 2085 } 2086 } 2087 2088 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 2089 { 2090 int vmid; 2091 2092 /* 2093 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2094 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2095 * the driver can enable them for graphics. VMID0 should maintain 2096 * access so that HWS firmware can save/restore entries. 2097 */ 2098 for (vmid = 1; vmid < 16; vmid++) { 2099 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 2100 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 2101 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 2102 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 2103 } 2104 } 2105 2106 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 2107 { 2108 /* TODO: harvest feature to be added later. */ 2109 } 2110 2111 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 2112 { 2113 /* TCCs are global (not instanced). */ 2114 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 2115 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 2116 2117 adev->gfx.config.tcc_disabled_mask = 2118 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 2119 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 2120 } 2121 2122 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 2123 { 2124 u32 tmp; 2125 int i; 2126 2127 if (!amdgpu_sriov_vf(adev)) 2128 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2129 2130 gfx_v11_0_setup_rb(adev); 2131 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 2132 gfx_v11_0_get_tcc_info(adev); 2133 adev->gfx.config.pa_sc_tile_steering_override = 0; 2134 2135 /* Set whether texture coordinate truncation is conformant. */ 2136 tmp = RREG32_SOC15(GC, 0, regTA_CNTL2); 2137 adev->gfx.config.ta_cntl2_truncate_coord_mode = 2138 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE); 2139 2140 /* XXX SH_MEM regs */ 2141 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2142 mutex_lock(&adev->srbm_mutex); 2143 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2144 soc21_grbm_select(adev, 0, 0, 0, i); 2145 /* CP and shaders */ 2146 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2147 if (i != 0) { 2148 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2149 (adev->gmc.private_aperture_start >> 48)); 2150 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2151 (adev->gmc.shared_aperture_start >> 48)); 2152 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 2153 } 2154 } 2155 soc21_grbm_select(adev, 0, 0, 0, 0); 2156 2157 mutex_unlock(&adev->srbm_mutex); 2158 2159 gfx_v11_0_init_compute_vmid(adev); 2160 gfx_v11_0_init_gds_vmid(adev); 2161 } 2162 2163 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, 2164 int me, int pipe) 2165 { 2166 if (me != 0) 2167 return 0; 2168 2169 switch (pipe) { 2170 case 0: 2171 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 2172 case 1: 2173 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 2174 default: 2175 return 0; 2176 } 2177 } 2178 2179 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, 2180 int me, int pipe) 2181 { 2182 /* 2183 * amdgpu controls only the first MEC. That's why this function only 2184 * handles the setting of interrupts for this specific MEC. All other 2185 * pipes' interrupts are set by amdkfd. 2186 */ 2187 if (me != 1) 2188 return 0; 2189 2190 switch (pipe) { 2191 case 0: 2192 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 2193 case 1: 2194 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 2195 case 2: 2196 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 2197 case 3: 2198 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 2199 default: 2200 return 0; 2201 } 2202 } 2203 2204 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2205 bool enable) 2206 { 2207 u32 tmp, cp_int_cntl_reg; 2208 int i, j; 2209 2210 if (amdgpu_sriov_vf(adev)) 2211 return; 2212 2213 for (i = 0; i < adev->gfx.me.num_me; i++) { 2214 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 2215 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 2216 2217 if (cp_int_cntl_reg) { 2218 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 2219 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 2220 enable ? 1 : 0); 2221 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 2222 enable ? 1 : 0); 2223 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 2224 enable ? 1 : 0); 2225 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 2226 enable ? 1 : 0); 2227 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); 2228 } 2229 } 2230 } 2231 } 2232 2233 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 2234 { 2235 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2236 2237 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 2238 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2239 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 2240 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2241 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 2242 2243 return 0; 2244 } 2245 2246 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 2247 { 2248 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 2249 2250 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 2251 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 2252 } 2253 2254 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 2255 { 2256 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2257 udelay(50); 2258 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2259 udelay(50); 2260 } 2261 2262 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 2263 bool enable) 2264 { 2265 uint32_t rlc_pg_cntl; 2266 2267 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 2268 2269 if (!enable) { 2270 /* RLC_PG_CNTL[23] = 0 (default) 2271 * RLC will wait for handshake acks with SMU 2272 * GFXOFF will be enabled 2273 * RLC_PG_CNTL[23] = 1 2274 * RLC will not issue any message to SMU 2275 * hence no handshake between SMU & RLC 2276 * GFXOFF will be disabled 2277 */ 2278 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2279 } else 2280 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2281 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 2282 } 2283 2284 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 2285 { 2286 /* TODO: enable rlc & smu handshake until smu 2287 * and gfxoff feature works as expected */ 2288 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 2289 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 2290 2291 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2292 udelay(50); 2293 } 2294 2295 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 2296 { 2297 uint32_t tmp; 2298 2299 /* enable Save Restore Machine */ 2300 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 2301 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2302 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 2303 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 2304 } 2305 2306 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 2307 { 2308 const struct rlc_firmware_header_v2_0 *hdr; 2309 const __le32 *fw_data; 2310 unsigned i, fw_size; 2311 2312 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2313 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2314 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2315 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2316 2317 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 2318 RLCG_UCODE_LOADING_START_ADDRESS); 2319 2320 for (i = 0; i < fw_size; i++) 2321 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 2322 le32_to_cpup(fw_data++)); 2323 2324 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2325 } 2326 2327 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 2328 { 2329 const struct rlc_firmware_header_v2_2 *hdr; 2330 const __le32 *fw_data; 2331 unsigned i, fw_size; 2332 u32 tmp; 2333 2334 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 2335 2336 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2337 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 2338 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 2339 2340 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 2341 2342 for (i = 0; i < fw_size; i++) { 2343 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2344 msleep(1); 2345 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 2346 le32_to_cpup(fw_data++)); 2347 } 2348 2349 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2350 2351 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2352 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 2353 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 2354 2355 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 2356 for (i = 0; i < fw_size; i++) { 2357 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2358 msleep(1); 2359 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 2360 le32_to_cpup(fw_data++)); 2361 } 2362 2363 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2364 2365 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 2366 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 2367 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 2368 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 2369 } 2370 2371 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 2372 { 2373 const struct rlc_firmware_header_v2_3 *hdr; 2374 const __le32 *fw_data; 2375 unsigned i, fw_size; 2376 u32 tmp; 2377 2378 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 2379 2380 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2381 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 2382 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 2383 2384 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 2385 2386 for (i = 0; i < fw_size; i++) { 2387 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2388 msleep(1); 2389 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 2390 le32_to_cpup(fw_data++)); 2391 } 2392 2393 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 2394 2395 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 2396 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 2397 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 2398 2399 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2400 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 2401 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 2402 2403 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 2404 2405 for (i = 0; i < fw_size; i++) { 2406 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2407 msleep(1); 2408 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 2409 le32_to_cpup(fw_data++)); 2410 } 2411 2412 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 2413 2414 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 2415 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 2416 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 2417 } 2418 2419 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 2420 { 2421 const struct rlc_firmware_header_v2_0 *hdr; 2422 uint16_t version_major; 2423 uint16_t version_minor; 2424 2425 if (!adev->gfx.rlc_fw) 2426 return -EINVAL; 2427 2428 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2429 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2430 2431 version_major = le16_to_cpu(hdr->header.header_version_major); 2432 version_minor = le16_to_cpu(hdr->header.header_version_minor); 2433 2434 if (version_major == 2) { 2435 gfx_v11_0_load_rlcg_microcode(adev); 2436 if (amdgpu_dpm == 1) { 2437 if (version_minor >= 2) 2438 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 2439 if (version_minor == 3) 2440 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 2441 } 2442 2443 return 0; 2444 } 2445 2446 return -EINVAL; 2447 } 2448 2449 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 2450 { 2451 int r; 2452 2453 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 2454 gfx_v11_0_init_csb(adev); 2455 2456 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 2457 gfx_v11_0_rlc_enable_srm(adev); 2458 } else { 2459 if (amdgpu_sriov_vf(adev)) { 2460 gfx_v11_0_init_csb(adev); 2461 return 0; 2462 } 2463 2464 adev->gfx.rlc.funcs->stop(adev); 2465 2466 /* disable CG */ 2467 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 2468 2469 /* disable PG */ 2470 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 2471 2472 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2473 /* legacy rlc firmware loading */ 2474 r = gfx_v11_0_rlc_load_microcode(adev); 2475 if (r) 2476 return r; 2477 } 2478 2479 gfx_v11_0_init_csb(adev); 2480 2481 adev->gfx.rlc.funcs->start(adev); 2482 } 2483 return 0; 2484 } 2485 2486 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 2487 { 2488 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2489 uint32_t tmp; 2490 int i; 2491 2492 /* Trigger an invalidation of the L1 instruction caches */ 2493 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2494 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2495 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2496 2497 /* Wait for invalidation complete */ 2498 for (i = 0; i < usec_timeout; i++) { 2499 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2500 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2501 INVALIDATE_CACHE_COMPLETE)) 2502 break; 2503 udelay(1); 2504 } 2505 2506 if (i >= usec_timeout) { 2507 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2508 return -EINVAL; 2509 } 2510 2511 if (amdgpu_emu_mode == 1) 2512 amdgpu_device_flush_hdp(adev, NULL); 2513 2514 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2515 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2516 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2517 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2518 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2519 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2520 2521 /* Program me ucode address into intruction cache address register */ 2522 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2523 lower_32_bits(addr) & 0xFFFFF000); 2524 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2525 upper_32_bits(addr)); 2526 2527 return 0; 2528 } 2529 2530 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 2531 { 2532 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2533 uint32_t tmp; 2534 int i; 2535 2536 /* Trigger an invalidation of the L1 instruction caches */ 2537 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2538 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2539 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2540 2541 /* Wait for invalidation complete */ 2542 for (i = 0; i < usec_timeout; i++) { 2543 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2544 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2545 INVALIDATE_CACHE_COMPLETE)) 2546 break; 2547 udelay(1); 2548 } 2549 2550 if (i >= usec_timeout) { 2551 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2552 return -EINVAL; 2553 } 2554 2555 if (amdgpu_emu_mode == 1) 2556 amdgpu_device_flush_hdp(adev, NULL); 2557 2558 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2559 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2560 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2561 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2562 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2563 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2564 2565 /* Program pfp ucode address into intruction cache address register */ 2566 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2567 lower_32_bits(addr) & 0xFFFFF000); 2568 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2569 upper_32_bits(addr)); 2570 2571 return 0; 2572 } 2573 2574 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2575 { 2576 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2577 uint32_t tmp; 2578 int i; 2579 2580 /* Trigger an invalidation of the L1 instruction caches */ 2581 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2582 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2583 2584 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2585 2586 /* Wait for invalidation complete */ 2587 for (i = 0; i < usec_timeout; i++) { 2588 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2589 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2590 INVALIDATE_CACHE_COMPLETE)) 2591 break; 2592 udelay(1); 2593 } 2594 2595 if (i >= usec_timeout) { 2596 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2597 return -EINVAL; 2598 } 2599 2600 if (amdgpu_emu_mode == 1) 2601 amdgpu_device_flush_hdp(adev, NULL); 2602 2603 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2604 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2605 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2606 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2607 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2608 2609 /* Program mec1 ucode address into intruction cache address register */ 2610 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2611 lower_32_bits(addr) & 0xFFFFF000); 2612 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2613 upper_32_bits(addr)); 2614 2615 return 0; 2616 } 2617 2618 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2619 { 2620 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2621 uint32_t tmp; 2622 unsigned i, pipe_id; 2623 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2624 2625 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2626 adev->gfx.pfp_fw->data; 2627 2628 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2629 lower_32_bits(addr)); 2630 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2631 upper_32_bits(addr)); 2632 2633 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2634 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2635 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2636 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2637 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2638 2639 /* 2640 * Programming any of the CP_PFP_IC_BASE registers 2641 * forces invalidation of the ME L1 I$. Wait for the 2642 * invalidation complete 2643 */ 2644 for (i = 0; i < usec_timeout; i++) { 2645 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2646 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2647 INVALIDATE_CACHE_COMPLETE)) 2648 break; 2649 udelay(1); 2650 } 2651 2652 if (i >= usec_timeout) { 2653 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2654 return -EINVAL; 2655 } 2656 2657 /* Prime the L1 instruction caches */ 2658 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2659 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2660 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2661 /* Waiting for cache primed*/ 2662 for (i = 0; i < usec_timeout; i++) { 2663 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2664 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2665 ICACHE_PRIMED)) 2666 break; 2667 udelay(1); 2668 } 2669 2670 if (i >= usec_timeout) { 2671 dev_err(adev->dev, "failed to prime instruction cache\n"); 2672 return -EINVAL; 2673 } 2674 2675 mutex_lock(&adev->srbm_mutex); 2676 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2677 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2678 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2679 (pfp_hdr->ucode_start_addr_hi << 30) | 2680 (pfp_hdr->ucode_start_addr_lo >> 2)); 2681 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2682 pfp_hdr->ucode_start_addr_hi >> 2); 2683 2684 /* 2685 * Program CP_ME_CNTL to reset given PIPE to take 2686 * effect of CP_PFP_PRGRM_CNTR_START. 2687 */ 2688 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2689 if (pipe_id == 0) 2690 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2691 PFP_PIPE0_RESET, 1); 2692 else 2693 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2694 PFP_PIPE1_RESET, 1); 2695 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2696 2697 /* Clear pfp pipe0 reset bit. */ 2698 if (pipe_id == 0) 2699 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2700 PFP_PIPE0_RESET, 0); 2701 else 2702 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2703 PFP_PIPE1_RESET, 0); 2704 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2705 2706 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2707 lower_32_bits(addr2)); 2708 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2709 upper_32_bits(addr2)); 2710 } 2711 soc21_grbm_select(adev, 0, 0, 0, 0); 2712 mutex_unlock(&adev->srbm_mutex); 2713 2714 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2715 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2716 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2717 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2718 2719 /* Invalidate the data caches */ 2720 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2721 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2722 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2723 2724 for (i = 0; i < usec_timeout; i++) { 2725 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2726 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2727 INVALIDATE_DCACHE_COMPLETE)) 2728 break; 2729 udelay(1); 2730 } 2731 2732 if (i >= usec_timeout) { 2733 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2734 return -EINVAL; 2735 } 2736 2737 return 0; 2738 } 2739 2740 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2741 { 2742 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2743 uint32_t tmp; 2744 unsigned i, pipe_id; 2745 const struct gfx_firmware_header_v2_0 *me_hdr; 2746 2747 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2748 adev->gfx.me_fw->data; 2749 2750 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2751 lower_32_bits(addr)); 2752 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2753 upper_32_bits(addr)); 2754 2755 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2756 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2757 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2758 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2759 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2760 2761 /* 2762 * Programming any of the CP_ME_IC_BASE registers 2763 * forces invalidation of the ME L1 I$. Wait for the 2764 * invalidation complete 2765 */ 2766 for (i = 0; i < usec_timeout; i++) { 2767 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2768 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2769 INVALIDATE_CACHE_COMPLETE)) 2770 break; 2771 udelay(1); 2772 } 2773 2774 if (i >= usec_timeout) { 2775 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2776 return -EINVAL; 2777 } 2778 2779 /* Prime the instruction caches */ 2780 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2781 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2782 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2783 2784 /* Waiting for instruction cache primed*/ 2785 for (i = 0; i < usec_timeout; i++) { 2786 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2787 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2788 ICACHE_PRIMED)) 2789 break; 2790 udelay(1); 2791 } 2792 2793 if (i >= usec_timeout) { 2794 dev_err(adev->dev, "failed to prime instruction cache\n"); 2795 return -EINVAL; 2796 } 2797 2798 mutex_lock(&adev->srbm_mutex); 2799 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2800 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2801 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2802 (me_hdr->ucode_start_addr_hi << 30) | 2803 (me_hdr->ucode_start_addr_lo >> 2) ); 2804 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2805 me_hdr->ucode_start_addr_hi>>2); 2806 2807 /* 2808 * Program CP_ME_CNTL to reset given PIPE to take 2809 * effect of CP_PFP_PRGRM_CNTR_START. 2810 */ 2811 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2812 if (pipe_id == 0) 2813 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2814 ME_PIPE0_RESET, 1); 2815 else 2816 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2817 ME_PIPE1_RESET, 1); 2818 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2819 2820 /* Clear pfp pipe0 reset bit. */ 2821 if (pipe_id == 0) 2822 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2823 ME_PIPE0_RESET, 0); 2824 else 2825 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2826 ME_PIPE1_RESET, 0); 2827 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2828 2829 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2830 lower_32_bits(addr2)); 2831 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2832 upper_32_bits(addr2)); 2833 } 2834 soc21_grbm_select(adev, 0, 0, 0, 0); 2835 mutex_unlock(&adev->srbm_mutex); 2836 2837 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2838 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2839 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2840 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2841 2842 /* Invalidate the data caches */ 2843 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2844 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2845 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2846 2847 for (i = 0; i < usec_timeout; i++) { 2848 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2849 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2850 INVALIDATE_DCACHE_COMPLETE)) 2851 break; 2852 udelay(1); 2853 } 2854 2855 if (i >= usec_timeout) { 2856 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2857 return -EINVAL; 2858 } 2859 2860 return 0; 2861 } 2862 2863 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2864 { 2865 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2866 uint32_t tmp; 2867 unsigned i; 2868 const struct gfx_firmware_header_v2_0 *mec_hdr; 2869 2870 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2871 adev->gfx.mec_fw->data; 2872 2873 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2874 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2875 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2876 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2877 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2878 2879 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2880 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2881 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2882 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2883 2884 mutex_lock(&adev->srbm_mutex); 2885 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2886 soc21_grbm_select(adev, 1, i, 0, 0); 2887 2888 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2889 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2890 upper_32_bits(addr2)); 2891 2892 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2893 mec_hdr->ucode_start_addr_lo >> 2 | 2894 mec_hdr->ucode_start_addr_hi << 30); 2895 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2896 mec_hdr->ucode_start_addr_hi >> 2); 2897 2898 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2899 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2900 upper_32_bits(addr)); 2901 } 2902 mutex_unlock(&adev->srbm_mutex); 2903 soc21_grbm_select(adev, 0, 0, 0, 0); 2904 2905 /* Trigger an invalidation of the L1 instruction caches */ 2906 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2907 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2908 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2909 2910 /* Wait for invalidation complete */ 2911 for (i = 0; i < usec_timeout; i++) { 2912 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2913 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2914 INVALIDATE_DCACHE_COMPLETE)) 2915 break; 2916 udelay(1); 2917 } 2918 2919 if (i >= usec_timeout) { 2920 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2921 return -EINVAL; 2922 } 2923 2924 /* Trigger an invalidation of the L1 instruction caches */ 2925 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2926 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2927 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2928 2929 /* Wait for invalidation complete */ 2930 for (i = 0; i < usec_timeout; i++) { 2931 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2932 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2933 INVALIDATE_CACHE_COMPLETE)) 2934 break; 2935 udelay(1); 2936 } 2937 2938 if (i >= usec_timeout) { 2939 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2940 return -EINVAL; 2941 } 2942 2943 return 0; 2944 } 2945 2946 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2947 { 2948 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2949 const struct gfx_firmware_header_v2_0 *me_hdr; 2950 const struct gfx_firmware_header_v2_0 *mec_hdr; 2951 uint32_t pipe_id, tmp; 2952 2953 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2954 adev->gfx.mec_fw->data; 2955 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2956 adev->gfx.me_fw->data; 2957 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2958 adev->gfx.pfp_fw->data; 2959 2960 /* config pfp program start addr */ 2961 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2962 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2963 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2964 (pfp_hdr->ucode_start_addr_hi << 30) | 2965 (pfp_hdr->ucode_start_addr_lo >> 2)); 2966 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2967 pfp_hdr->ucode_start_addr_hi >> 2); 2968 } 2969 soc21_grbm_select(adev, 0, 0, 0, 0); 2970 2971 /* reset pfp pipe */ 2972 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2973 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 2974 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 2975 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2976 2977 /* clear pfp pipe reset */ 2978 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 2979 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 2980 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2981 2982 /* config me program start addr */ 2983 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2984 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2985 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2986 (me_hdr->ucode_start_addr_hi << 30) | 2987 (me_hdr->ucode_start_addr_lo >> 2) ); 2988 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2989 me_hdr->ucode_start_addr_hi>>2); 2990 } 2991 soc21_grbm_select(adev, 0, 0, 0, 0); 2992 2993 /* reset me pipe */ 2994 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2995 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 2996 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 2997 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2998 2999 /* clear me pipe reset */ 3000 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 3001 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 3002 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3003 3004 /* config mec program start addr */ 3005 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 3006 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 3007 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3008 mec_hdr->ucode_start_addr_lo >> 2 | 3009 mec_hdr->ucode_start_addr_hi << 30); 3010 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3011 mec_hdr->ucode_start_addr_hi >> 2); 3012 } 3013 soc21_grbm_select(adev, 0, 0, 0, 0); 3014 3015 /* reset mec pipe */ 3016 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3017 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 3018 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 3019 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 3020 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 3021 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3022 3023 /* clear mec pipe reset */ 3024 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 3025 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 3026 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 3027 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 3028 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3029 } 3030 3031 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 3032 { 3033 uint32_t cp_status; 3034 uint32_t bootload_status; 3035 int i, r; 3036 uint64_t addr, addr2; 3037 3038 for (i = 0; i < adev->usec_timeout; i++) { 3039 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 3040 3041 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 3042 IP_VERSION(11, 0, 1) || 3043 amdgpu_ip_version(adev, GC_HWIP, 0) == 3044 IP_VERSION(11, 0, 4) || 3045 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || 3046 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || 3047 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || 3048 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3)) 3049 bootload_status = RREG32_SOC15(GC, 0, 3050 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 3051 else 3052 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 3053 3054 if ((cp_status == 0) && 3055 (REG_GET_FIELD(bootload_status, 3056 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 3057 break; 3058 } 3059 udelay(1); 3060 } 3061 3062 if (i >= adev->usec_timeout) { 3063 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 3064 return -ETIMEDOUT; 3065 } 3066 3067 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 3068 if (adev->gfx.rs64_enable) { 3069 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3070 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 3071 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3072 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 3073 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 3074 if (r) 3075 return r; 3076 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3077 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 3078 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3079 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 3080 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 3081 if (r) 3082 return r; 3083 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3084 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 3085 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3086 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 3087 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 3088 if (r) 3089 return r; 3090 } else { 3091 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3092 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 3093 r = gfx_v11_0_config_me_cache(adev, addr); 3094 if (r) 3095 return r; 3096 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3097 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 3098 r = gfx_v11_0_config_pfp_cache(adev, addr); 3099 if (r) 3100 return r; 3101 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3102 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 3103 r = gfx_v11_0_config_mec_cache(adev, addr); 3104 if (r) 3105 return r; 3106 } 3107 } 3108 3109 return 0; 3110 } 3111 3112 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3113 { 3114 int i; 3115 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3116 3117 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3118 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3119 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3120 3121 for (i = 0; i < adev->usec_timeout; i++) { 3122 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 3123 break; 3124 udelay(1); 3125 } 3126 3127 if (i >= adev->usec_timeout) 3128 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 3129 3130 return 0; 3131 } 3132 3133 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 3134 { 3135 int r; 3136 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3137 const __le32 *fw_data; 3138 unsigned i, fw_size; 3139 3140 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3141 adev->gfx.pfp_fw->data; 3142 3143 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3144 3145 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3146 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3147 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 3148 3149 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 3150 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3151 &adev->gfx.pfp.pfp_fw_obj, 3152 &adev->gfx.pfp.pfp_fw_gpu_addr, 3153 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3154 if (r) { 3155 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 3156 gfx_v11_0_pfp_fini(adev); 3157 return r; 3158 } 3159 3160 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 3161 3162 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3163 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3164 3165 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 3166 3167 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 3168 3169 for (i = 0; i < pfp_hdr->jt_size; i++) 3170 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 3171 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 3172 3173 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3174 3175 return 0; 3176 } 3177 3178 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 3179 { 3180 int r; 3181 const struct gfx_firmware_header_v2_0 *pfp_hdr; 3182 const __le32 *fw_ucode, *fw_data; 3183 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3184 uint32_t tmp; 3185 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3186 3187 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 3188 adev->gfx.pfp_fw->data; 3189 3190 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3191 3192 /* instruction */ 3193 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 3194 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 3195 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 3196 /* data */ 3197 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3198 le32_to_cpu(pfp_hdr->data_offset_bytes)); 3199 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 3200 3201 /* 64kb align */ 3202 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3203 64 * 1024, 3204 AMDGPU_GEM_DOMAIN_VRAM | 3205 AMDGPU_GEM_DOMAIN_GTT, 3206 &adev->gfx.pfp.pfp_fw_obj, 3207 &adev->gfx.pfp.pfp_fw_gpu_addr, 3208 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3209 if (r) { 3210 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 3211 gfx_v11_0_pfp_fini(adev); 3212 return r; 3213 } 3214 3215 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3216 64 * 1024, 3217 AMDGPU_GEM_DOMAIN_VRAM | 3218 AMDGPU_GEM_DOMAIN_GTT, 3219 &adev->gfx.pfp.pfp_fw_data_obj, 3220 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 3221 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 3222 if (r) { 3223 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 3224 gfx_v11_0_pfp_fini(adev); 3225 return r; 3226 } 3227 3228 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 3229 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 3230 3231 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3232 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 3233 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3234 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3235 3236 if (amdgpu_emu_mode == 1) 3237 amdgpu_device_flush_hdp(adev, NULL); 3238 3239 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3240 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3241 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 3242 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3243 3244 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 3245 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 3246 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 3247 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 3248 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 3249 3250 /* 3251 * Programming any of the CP_PFP_IC_BASE registers 3252 * forces invalidation of the ME L1 I$. Wait for the 3253 * invalidation complete 3254 */ 3255 for (i = 0; i < usec_timeout; i++) { 3256 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3257 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3258 INVALIDATE_CACHE_COMPLETE)) 3259 break; 3260 udelay(1); 3261 } 3262 3263 if (i >= usec_timeout) { 3264 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3265 return -EINVAL; 3266 } 3267 3268 /* Prime the L1 instruction caches */ 3269 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3270 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 3271 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 3272 /* Waiting for cache primed*/ 3273 for (i = 0; i < usec_timeout; i++) { 3274 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3275 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3276 ICACHE_PRIMED)) 3277 break; 3278 udelay(1); 3279 } 3280 3281 if (i >= usec_timeout) { 3282 dev_err(adev->dev, "failed to prime instruction cache\n"); 3283 return -EINVAL; 3284 } 3285 3286 mutex_lock(&adev->srbm_mutex); 3287 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3288 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3289 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 3290 (pfp_hdr->ucode_start_addr_hi << 30) | 3291 (pfp_hdr->ucode_start_addr_lo >> 2) ); 3292 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 3293 pfp_hdr->ucode_start_addr_hi>>2); 3294 3295 /* 3296 * Program CP_ME_CNTL to reset given PIPE to take 3297 * effect of CP_PFP_PRGRM_CNTR_START. 3298 */ 3299 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3300 if (pipe_id == 0) 3301 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3302 PFP_PIPE0_RESET, 1); 3303 else 3304 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3305 PFP_PIPE1_RESET, 1); 3306 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3307 3308 /* Clear pfp pipe0 reset bit. */ 3309 if (pipe_id == 0) 3310 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3311 PFP_PIPE0_RESET, 0); 3312 else 3313 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3314 PFP_PIPE1_RESET, 0); 3315 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3316 3317 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 3318 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3319 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 3320 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3321 } 3322 soc21_grbm_select(adev, 0, 0, 0, 0); 3323 mutex_unlock(&adev->srbm_mutex); 3324 3325 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3326 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3327 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3328 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3329 3330 /* Invalidate the data caches */ 3331 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3332 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3333 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3334 3335 for (i = 0; i < usec_timeout; i++) { 3336 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3337 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3338 INVALIDATE_DCACHE_COMPLETE)) 3339 break; 3340 udelay(1); 3341 } 3342 3343 if (i >= usec_timeout) { 3344 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3345 return -EINVAL; 3346 } 3347 3348 return 0; 3349 } 3350 3351 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 3352 { 3353 int r; 3354 const struct gfx_firmware_header_v1_0 *me_hdr; 3355 const __le32 *fw_data; 3356 unsigned i, fw_size; 3357 3358 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3359 adev->gfx.me_fw->data; 3360 3361 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3362 3363 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3364 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3365 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 3366 3367 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 3368 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3369 &adev->gfx.me.me_fw_obj, 3370 &adev->gfx.me.me_fw_gpu_addr, 3371 (void **)&adev->gfx.me.me_fw_ptr); 3372 if (r) { 3373 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 3374 gfx_v11_0_me_fini(adev); 3375 return r; 3376 } 3377 3378 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 3379 3380 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3381 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3382 3383 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 3384 3385 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 3386 3387 for (i = 0; i < me_hdr->jt_size; i++) 3388 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 3389 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 3390 3391 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 3392 3393 return 0; 3394 } 3395 3396 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 3397 { 3398 int r; 3399 const struct gfx_firmware_header_v2_0 *me_hdr; 3400 const __le32 *fw_ucode, *fw_data; 3401 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3402 uint32_t tmp; 3403 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3404 3405 me_hdr = (const struct gfx_firmware_header_v2_0 *) 3406 adev->gfx.me_fw->data; 3407 3408 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3409 3410 /* instruction */ 3411 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 3412 le32_to_cpu(me_hdr->ucode_offset_bytes)); 3413 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 3414 /* data */ 3415 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3416 le32_to_cpu(me_hdr->data_offset_bytes)); 3417 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 3418 3419 /* 64kb align*/ 3420 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3421 64 * 1024, 3422 AMDGPU_GEM_DOMAIN_VRAM | 3423 AMDGPU_GEM_DOMAIN_GTT, 3424 &adev->gfx.me.me_fw_obj, 3425 &adev->gfx.me.me_fw_gpu_addr, 3426 (void **)&adev->gfx.me.me_fw_ptr); 3427 if (r) { 3428 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 3429 gfx_v11_0_me_fini(adev); 3430 return r; 3431 } 3432 3433 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3434 64 * 1024, 3435 AMDGPU_GEM_DOMAIN_VRAM | 3436 AMDGPU_GEM_DOMAIN_GTT, 3437 &adev->gfx.me.me_fw_data_obj, 3438 &adev->gfx.me.me_fw_data_gpu_addr, 3439 (void **)&adev->gfx.me.me_fw_data_ptr); 3440 if (r) { 3441 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 3442 gfx_v11_0_pfp_fini(adev); 3443 return r; 3444 } 3445 3446 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 3447 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 3448 3449 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3450 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 3451 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3452 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3453 3454 if (amdgpu_emu_mode == 1) 3455 amdgpu_device_flush_hdp(adev, NULL); 3456 3457 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3458 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3459 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 3460 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3461 3462 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 3463 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 3464 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 3465 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 3466 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 3467 3468 /* 3469 * Programming any of the CP_ME_IC_BASE registers 3470 * forces invalidation of the ME L1 I$. Wait for the 3471 * invalidation complete 3472 */ 3473 for (i = 0; i < usec_timeout; i++) { 3474 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3475 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3476 INVALIDATE_CACHE_COMPLETE)) 3477 break; 3478 udelay(1); 3479 } 3480 3481 if (i >= usec_timeout) { 3482 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3483 return -EINVAL; 3484 } 3485 3486 /* Prime the instruction caches */ 3487 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3488 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 3489 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 3490 3491 /* Waiting for instruction cache primed*/ 3492 for (i = 0; i < usec_timeout; i++) { 3493 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3494 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3495 ICACHE_PRIMED)) 3496 break; 3497 udelay(1); 3498 } 3499 3500 if (i >= usec_timeout) { 3501 dev_err(adev->dev, "failed to prime instruction cache\n"); 3502 return -EINVAL; 3503 } 3504 3505 mutex_lock(&adev->srbm_mutex); 3506 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3507 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3508 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3509 (me_hdr->ucode_start_addr_hi << 30) | 3510 (me_hdr->ucode_start_addr_lo >> 2) ); 3511 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3512 me_hdr->ucode_start_addr_hi>>2); 3513 3514 /* 3515 * Program CP_ME_CNTL to reset given PIPE to take 3516 * effect of CP_PFP_PRGRM_CNTR_START. 3517 */ 3518 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3519 if (pipe_id == 0) 3520 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3521 ME_PIPE0_RESET, 1); 3522 else 3523 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3524 ME_PIPE1_RESET, 1); 3525 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3526 3527 /* Clear pfp pipe0 reset bit. */ 3528 if (pipe_id == 0) 3529 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3530 ME_PIPE0_RESET, 0); 3531 else 3532 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3533 ME_PIPE1_RESET, 0); 3534 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3535 3536 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 3537 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3538 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 3539 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3540 } 3541 soc21_grbm_select(adev, 0, 0, 0, 0); 3542 mutex_unlock(&adev->srbm_mutex); 3543 3544 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3545 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3546 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3547 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3548 3549 /* Invalidate the data caches */ 3550 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3551 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3552 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3553 3554 for (i = 0; i < usec_timeout; i++) { 3555 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3556 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3557 INVALIDATE_DCACHE_COMPLETE)) 3558 break; 3559 udelay(1); 3560 } 3561 3562 if (i >= usec_timeout) { 3563 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3564 return -EINVAL; 3565 } 3566 3567 return 0; 3568 } 3569 3570 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3571 { 3572 int r; 3573 3574 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3575 return -EINVAL; 3576 3577 gfx_v11_0_cp_gfx_enable(adev, false); 3578 3579 if (adev->gfx.rs64_enable) 3580 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3581 else 3582 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3583 if (r) { 3584 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3585 return r; 3586 } 3587 3588 if (adev->gfx.rs64_enable) 3589 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3590 else 3591 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3592 if (r) { 3593 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3594 return r; 3595 } 3596 3597 return 0; 3598 } 3599 3600 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3601 { 3602 struct amdgpu_ring *ring; 3603 const struct cs_section_def *sect = NULL; 3604 const struct cs_extent_def *ext = NULL; 3605 int r, i; 3606 int ctx_reg_offset; 3607 3608 /* init the CP */ 3609 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3610 adev->gfx.config.max_hw_contexts - 1); 3611 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3612 3613 if (!amdgpu_async_gfx_ring) 3614 gfx_v11_0_cp_gfx_enable(adev, true); 3615 3616 ring = &adev->gfx.gfx_ring[0]; 3617 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3618 if (r) { 3619 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3620 return r; 3621 } 3622 3623 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3624 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3625 3626 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3627 amdgpu_ring_write(ring, 0x80000000); 3628 amdgpu_ring_write(ring, 0x80000000); 3629 3630 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3631 for (ext = sect->section; ext->extent != NULL; ++ext) { 3632 if (sect->id == SECT_CONTEXT) { 3633 amdgpu_ring_write(ring, 3634 PACKET3(PACKET3_SET_CONTEXT_REG, 3635 ext->reg_count)); 3636 amdgpu_ring_write(ring, ext->reg_index - 3637 PACKET3_SET_CONTEXT_REG_START); 3638 for (i = 0; i < ext->reg_count; i++) 3639 amdgpu_ring_write(ring, ext->extent[i]); 3640 } 3641 } 3642 } 3643 3644 ctx_reg_offset = 3645 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3646 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3647 amdgpu_ring_write(ring, ctx_reg_offset); 3648 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3649 3650 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3651 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3652 3653 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3654 amdgpu_ring_write(ring, 0); 3655 3656 amdgpu_ring_commit(ring); 3657 3658 /* submit cs packet to copy state 0 to next available state */ 3659 if (adev->gfx.num_gfx_rings > 1) { 3660 /* maximum supported gfx ring is 2 */ 3661 ring = &adev->gfx.gfx_ring[1]; 3662 r = amdgpu_ring_alloc(ring, 2); 3663 if (r) { 3664 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3665 return r; 3666 } 3667 3668 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3669 amdgpu_ring_write(ring, 0); 3670 3671 amdgpu_ring_commit(ring); 3672 } 3673 return 0; 3674 } 3675 3676 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3677 CP_PIPE_ID pipe) 3678 { 3679 u32 tmp; 3680 3681 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3682 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3683 3684 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3685 } 3686 3687 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3688 struct amdgpu_ring *ring) 3689 { 3690 u32 tmp; 3691 3692 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3693 if (ring->use_doorbell) { 3694 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3695 DOORBELL_OFFSET, ring->doorbell_index); 3696 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3697 DOORBELL_EN, 1); 3698 } else { 3699 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3700 DOORBELL_EN, 0); 3701 } 3702 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3703 3704 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3705 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3706 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3707 3708 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3709 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3710 } 3711 3712 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3713 { 3714 struct amdgpu_ring *ring; 3715 u32 tmp; 3716 u32 rb_bufsz; 3717 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3718 3719 /* Set the write pointer delay */ 3720 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3721 3722 /* set the RB to use vmid 0 */ 3723 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3724 3725 /* Init gfx ring 0 for pipe 0 */ 3726 mutex_lock(&adev->srbm_mutex); 3727 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3728 3729 /* Set ring buffer size */ 3730 ring = &adev->gfx.gfx_ring[0]; 3731 rb_bufsz = order_base_2(ring->ring_size / 8); 3732 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3733 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3734 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3735 3736 /* Initialize the ring buffer's write pointers */ 3737 ring->wptr = 0; 3738 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3739 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3740 3741 /* set the wb address whether it's enabled or not */ 3742 rptr_addr = ring->rptr_gpu_addr; 3743 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3744 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3745 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3746 3747 wptr_gpu_addr = ring->wptr_gpu_addr; 3748 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3749 lower_32_bits(wptr_gpu_addr)); 3750 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3751 upper_32_bits(wptr_gpu_addr)); 3752 3753 mdelay(1); 3754 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3755 3756 rb_addr = ring->gpu_addr >> 8; 3757 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3758 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3759 3760 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3761 3762 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3763 mutex_unlock(&adev->srbm_mutex); 3764 3765 /* Init gfx ring 1 for pipe 1 */ 3766 if (adev->gfx.num_gfx_rings > 1) { 3767 mutex_lock(&adev->srbm_mutex); 3768 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3769 /* maximum supported gfx ring is 2 */ 3770 ring = &adev->gfx.gfx_ring[1]; 3771 rb_bufsz = order_base_2(ring->ring_size / 8); 3772 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3773 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3774 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3775 /* Initialize the ring buffer's write pointers */ 3776 ring->wptr = 0; 3777 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3778 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3779 /* Set the wb address whether it's enabled or not */ 3780 rptr_addr = ring->rptr_gpu_addr; 3781 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3782 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3783 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3784 wptr_gpu_addr = ring->wptr_gpu_addr; 3785 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3786 lower_32_bits(wptr_gpu_addr)); 3787 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3788 upper_32_bits(wptr_gpu_addr)); 3789 3790 mdelay(1); 3791 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3792 3793 rb_addr = ring->gpu_addr >> 8; 3794 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3795 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3796 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3797 3798 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3799 mutex_unlock(&adev->srbm_mutex); 3800 } 3801 /* Switch to pipe 0 */ 3802 mutex_lock(&adev->srbm_mutex); 3803 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3804 mutex_unlock(&adev->srbm_mutex); 3805 3806 /* start the ring */ 3807 gfx_v11_0_cp_gfx_start(adev); 3808 3809 return 0; 3810 } 3811 3812 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3813 { 3814 u32 data; 3815 3816 if (adev->gfx.rs64_enable) { 3817 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3818 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3819 enable ? 0 : 1); 3820 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3821 enable ? 0 : 1); 3822 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3823 enable ? 0 : 1); 3824 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3825 enable ? 0 : 1); 3826 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3827 enable ? 0 : 1); 3828 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3829 enable ? 1 : 0); 3830 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3831 enable ? 1 : 0); 3832 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3833 enable ? 1 : 0); 3834 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3835 enable ? 1 : 0); 3836 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3837 enable ? 0 : 1); 3838 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3839 } else { 3840 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3841 3842 if (enable) { 3843 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3844 if (!adev->enable_mes_kiq) 3845 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3846 MEC_ME2_HALT, 0); 3847 } else { 3848 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3849 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3850 } 3851 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3852 } 3853 3854 udelay(50); 3855 } 3856 3857 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3858 { 3859 const struct gfx_firmware_header_v1_0 *mec_hdr; 3860 const __le32 *fw_data; 3861 unsigned i, fw_size; 3862 u32 *fw = NULL; 3863 int r; 3864 3865 if (!adev->gfx.mec_fw) 3866 return -EINVAL; 3867 3868 gfx_v11_0_cp_compute_enable(adev, false); 3869 3870 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3871 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3872 3873 fw_data = (const __le32 *) 3874 (adev->gfx.mec_fw->data + 3875 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3876 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3877 3878 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3879 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3880 &adev->gfx.mec.mec_fw_obj, 3881 &adev->gfx.mec.mec_fw_gpu_addr, 3882 (void **)&fw); 3883 if (r) { 3884 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3885 gfx_v11_0_mec_fini(adev); 3886 return r; 3887 } 3888 3889 memcpy(fw, fw_data, fw_size); 3890 3891 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3892 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3893 3894 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3895 3896 /* MEC1 */ 3897 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3898 3899 for (i = 0; i < mec_hdr->jt_size; i++) 3900 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3901 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3902 3903 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3904 3905 return 0; 3906 } 3907 3908 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3909 { 3910 const struct gfx_firmware_header_v2_0 *mec_hdr; 3911 const __le32 *fw_ucode, *fw_data; 3912 u32 tmp, fw_ucode_size, fw_data_size; 3913 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3914 u32 *fw_ucode_ptr, *fw_data_ptr; 3915 int r; 3916 3917 if (!adev->gfx.mec_fw) 3918 return -EINVAL; 3919 3920 gfx_v11_0_cp_compute_enable(adev, false); 3921 3922 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3923 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3924 3925 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3926 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3927 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3928 3929 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3930 le32_to_cpu(mec_hdr->data_offset_bytes)); 3931 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3932 3933 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3934 64 * 1024, 3935 AMDGPU_GEM_DOMAIN_VRAM | 3936 AMDGPU_GEM_DOMAIN_GTT, 3937 &adev->gfx.mec.mec_fw_obj, 3938 &adev->gfx.mec.mec_fw_gpu_addr, 3939 (void **)&fw_ucode_ptr); 3940 if (r) { 3941 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3942 gfx_v11_0_mec_fini(adev); 3943 return r; 3944 } 3945 3946 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3947 64 * 1024, 3948 AMDGPU_GEM_DOMAIN_VRAM | 3949 AMDGPU_GEM_DOMAIN_GTT, 3950 &adev->gfx.mec.mec_fw_data_obj, 3951 &adev->gfx.mec.mec_fw_data_gpu_addr, 3952 (void **)&fw_data_ptr); 3953 if (r) { 3954 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3955 gfx_v11_0_mec_fini(adev); 3956 return r; 3957 } 3958 3959 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 3960 memcpy(fw_data_ptr, fw_data, fw_data_size); 3961 3962 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3963 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 3964 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3965 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 3966 3967 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 3968 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3969 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 3970 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3971 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 3972 3973 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 3974 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 3975 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 3976 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 3977 3978 mutex_lock(&adev->srbm_mutex); 3979 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 3980 soc21_grbm_select(adev, 1, i, 0, 0); 3981 3982 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 3983 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 3984 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 3985 3986 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3987 mec_hdr->ucode_start_addr_lo >> 2 | 3988 mec_hdr->ucode_start_addr_hi << 30); 3989 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3990 mec_hdr->ucode_start_addr_hi >> 2); 3991 3992 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 3993 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 3994 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3995 } 3996 mutex_unlock(&adev->srbm_mutex); 3997 soc21_grbm_select(adev, 0, 0, 0, 0); 3998 3999 /* Trigger an invalidation of the L1 instruction caches */ 4000 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 4001 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 4002 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 4003 4004 /* Wait for invalidation complete */ 4005 for (i = 0; i < usec_timeout; i++) { 4006 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 4007 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 4008 INVALIDATE_DCACHE_COMPLETE)) 4009 break; 4010 udelay(1); 4011 } 4012 4013 if (i >= usec_timeout) { 4014 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4015 return -EINVAL; 4016 } 4017 4018 /* Trigger an invalidation of the L1 instruction caches */ 4019 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4020 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 4021 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 4022 4023 /* Wait for invalidation complete */ 4024 for (i = 0; i < usec_timeout; i++) { 4025 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4026 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 4027 INVALIDATE_CACHE_COMPLETE)) 4028 break; 4029 udelay(1); 4030 } 4031 4032 if (i >= usec_timeout) { 4033 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4034 return -EINVAL; 4035 } 4036 4037 return 0; 4038 } 4039 4040 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 4041 { 4042 uint32_t tmp; 4043 struct amdgpu_device *adev = ring->adev; 4044 4045 /* tell RLC which is KIQ queue */ 4046 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 4047 tmp &= 0xffffff00; 4048 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4049 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); 4050 } 4051 4052 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 4053 { 4054 /* set graphics engine doorbell range */ 4055 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 4056 (adev->doorbell_index.gfx_ring0 * 2) << 2); 4057 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 4058 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 4059 4060 /* set compute engine doorbell range */ 4061 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4062 (adev->doorbell_index.kiq * 2) << 2); 4063 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4064 (adev->doorbell_index.userqueue_end * 2) << 2); 4065 } 4066 4067 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, 4068 struct v11_gfx_mqd *mqd, 4069 struct amdgpu_mqd_prop *prop) 4070 { 4071 bool priority = 0; 4072 u32 tmp; 4073 4074 /* set up default queue priority level 4075 * 0x0 = low priority, 0x1 = high priority 4076 */ 4077 if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) 4078 priority = 1; 4079 4080 tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; 4081 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); 4082 mqd->cp_gfx_hqd_queue_priority = tmp; 4083 } 4084 4085 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 4086 struct amdgpu_mqd_prop *prop) 4087 { 4088 struct v11_gfx_mqd *mqd = m; 4089 uint64_t hqd_gpu_addr, wb_gpu_addr; 4090 uint32_t tmp; 4091 uint32_t rb_bufsz; 4092 4093 /* set up gfx hqd wptr */ 4094 mqd->cp_gfx_hqd_wptr = 0; 4095 mqd->cp_gfx_hqd_wptr_hi = 0; 4096 4097 /* set the pointer to the MQD */ 4098 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 4099 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4100 4101 /* set up mqd control */ 4102 tmp = regCP_GFX_MQD_CONTROL_DEFAULT; 4103 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 4104 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 4105 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 4106 mqd->cp_gfx_mqd_control = tmp; 4107 4108 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 4109 tmp = regCP_GFX_HQD_VMID_DEFAULT; 4110 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 4111 mqd->cp_gfx_hqd_vmid = 0; 4112 4113 /* set up gfx queue priority */ 4114 gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); 4115 4116 /* set up time quantum */ 4117 tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; 4118 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 4119 mqd->cp_gfx_hqd_quantum = tmp; 4120 4121 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 4122 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4123 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 4124 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 4125 4126 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 4127 wb_gpu_addr = prop->rptr_gpu_addr; 4128 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 4129 mqd->cp_gfx_hqd_rptr_addr_hi = 4130 upper_32_bits(wb_gpu_addr) & 0xffff; 4131 4132 /* set up rb_wptr_poll addr */ 4133 wb_gpu_addr = prop->wptr_gpu_addr; 4134 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4135 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4136 4137 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 4138 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 4139 tmp = regCP_GFX_HQD_CNTL_DEFAULT; 4140 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 4141 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 4142 #ifdef __BIG_ENDIAN 4143 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 4144 #endif 4145 if (prop->tmz_queue) 4146 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); 4147 if (!prop->kernel_queue) 4148 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); 4149 mqd->cp_gfx_hqd_cntl = tmp; 4150 4151 /* set up cp_doorbell_control */ 4152 tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; 4153 if (prop->use_doorbell) { 4154 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4155 DOORBELL_OFFSET, prop->doorbell_index); 4156 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4157 DOORBELL_EN, 1); 4158 } else 4159 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4160 DOORBELL_EN, 0); 4161 mqd->cp_rb_doorbell_control = tmp; 4162 4163 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4164 mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; 4165 4166 /* active the queue */ 4167 mqd->cp_gfx_hqd_active = 1; 4168 4169 /* set gfx UQ items */ 4170 mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); 4171 mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); 4172 mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); 4173 mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); 4174 mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); 4175 mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); 4176 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4177 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4178 4179 return 0; 4180 } 4181 4182 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) 4183 { 4184 struct amdgpu_device *adev = ring->adev; 4185 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 4186 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 4187 4188 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4189 memset((void *)mqd, 0, sizeof(*mqd)); 4190 mutex_lock(&adev->srbm_mutex); 4191 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4192 amdgpu_ring_init_mqd(ring); 4193 soc21_grbm_select(adev, 0, 0, 0, 0); 4194 mutex_unlock(&adev->srbm_mutex); 4195 if (adev->gfx.me.mqd_backup[mqd_idx]) 4196 memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4197 } else { 4198 /* restore mqd with the backup copy */ 4199 if (adev->gfx.me.mqd_backup[mqd_idx]) 4200 memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 4201 /* reset the ring */ 4202 ring->wptr = 0; 4203 *ring->wptr_cpu_addr = 0; 4204 amdgpu_ring_clear_ring(ring); 4205 } 4206 4207 return 0; 4208 } 4209 4210 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 4211 { 4212 int r, i; 4213 4214 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4215 r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); 4216 if (r) 4217 return r; 4218 } 4219 4220 r = amdgpu_gfx_enable_kgq(adev, 0); 4221 if (r) 4222 return r; 4223 4224 return gfx_v11_0_cp_gfx_start(adev); 4225 } 4226 4227 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 4228 struct amdgpu_mqd_prop *prop) 4229 { 4230 struct v11_compute_mqd *mqd = m; 4231 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4232 uint32_t tmp; 4233 4234 mqd->header = 0xC0310800; 4235 mqd->compute_pipelinestat_enable = 0x00000001; 4236 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4237 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4238 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4239 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4240 mqd->compute_misc_reserved = 0x00000007; 4241 4242 eop_base_addr = prop->eop_gpu_addr >> 8; 4243 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4244 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4245 4246 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4247 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 4248 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4249 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 4250 4251 mqd->cp_hqd_eop_control = tmp; 4252 4253 /* enable doorbell? */ 4254 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4255 4256 if (prop->use_doorbell) { 4257 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4258 DOORBELL_OFFSET, prop->doorbell_index); 4259 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4260 DOORBELL_EN, 1); 4261 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4262 DOORBELL_SOURCE, 0); 4263 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4264 DOORBELL_HIT, 0); 4265 } else { 4266 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4267 DOORBELL_EN, 0); 4268 } 4269 4270 mqd->cp_hqd_pq_doorbell_control = tmp; 4271 4272 /* disable the queue if it's active */ 4273 mqd->cp_hqd_dequeue_request = 0; 4274 mqd->cp_hqd_pq_rptr = 0; 4275 mqd->cp_hqd_pq_wptr_lo = 0; 4276 mqd->cp_hqd_pq_wptr_hi = 0; 4277 4278 /* set the pointer to the MQD */ 4279 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 4280 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4281 4282 /* set MQD vmid to 0 */ 4283 tmp = regCP_MQD_CONTROL_DEFAULT; 4284 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4285 mqd->cp_mqd_control = tmp; 4286 4287 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4288 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4289 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4290 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4291 4292 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4293 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 4294 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4295 (order_base_2(prop->queue_size / 4) - 1)); 4296 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4297 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 4298 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 4299 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 4300 prop->allow_tunneling); 4301 if (prop->kernel_queue) { 4302 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4303 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4304 } 4305 if (prop->tmz_queue) 4306 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); 4307 mqd->cp_hqd_pq_control = tmp; 4308 4309 /* set the wb address whether it's enabled or not */ 4310 wb_gpu_addr = prop->rptr_gpu_addr; 4311 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4312 mqd->cp_hqd_pq_rptr_report_addr_hi = 4313 upper_32_bits(wb_gpu_addr) & 0xffff; 4314 4315 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4316 wb_gpu_addr = prop->wptr_gpu_addr; 4317 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4318 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4319 4320 tmp = 0; 4321 /* enable the doorbell if requested */ 4322 if (prop->use_doorbell) { 4323 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4324 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4325 DOORBELL_OFFSET, prop->doorbell_index); 4326 4327 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4328 DOORBELL_EN, 1); 4329 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4330 DOORBELL_SOURCE, 0); 4331 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4332 DOORBELL_HIT, 0); 4333 } 4334 4335 mqd->cp_hqd_pq_doorbell_control = tmp; 4336 4337 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4338 mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; 4339 4340 /* set the vmid for the queue */ 4341 mqd->cp_hqd_vmid = 0; 4342 4343 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 4344 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 4345 mqd->cp_hqd_persistent_state = tmp; 4346 4347 /* set MIN_IB_AVAIL_SIZE */ 4348 tmp = regCP_HQD_IB_CONTROL_DEFAULT; 4349 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4350 mqd->cp_hqd_ib_control = tmp; 4351 4352 /* set static priority for a compute queue/ring */ 4353 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 4354 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 4355 4356 mqd->cp_hqd_active = prop->hqd_active; 4357 4358 /* set UQ fenceaddress */ 4359 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4360 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4361 4362 return 0; 4363 } 4364 4365 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 4366 { 4367 struct amdgpu_device *adev = ring->adev; 4368 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4369 int j; 4370 4371 /* inactivate the queue */ 4372 if (amdgpu_sriov_vf(adev)) 4373 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 4374 4375 /* disable wptr polling */ 4376 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4377 4378 /* write the EOP addr */ 4379 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 4380 mqd->cp_hqd_eop_base_addr_lo); 4381 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 4382 mqd->cp_hqd_eop_base_addr_hi); 4383 4384 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4385 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 4386 mqd->cp_hqd_eop_control); 4387 4388 /* enable doorbell? */ 4389 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4390 mqd->cp_hqd_pq_doorbell_control); 4391 4392 /* disable the queue if it's active */ 4393 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 4394 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 4395 for (j = 0; j < adev->usec_timeout; j++) { 4396 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 4397 break; 4398 udelay(1); 4399 } 4400 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 4401 mqd->cp_hqd_dequeue_request); 4402 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 4403 mqd->cp_hqd_pq_rptr); 4404 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4405 mqd->cp_hqd_pq_wptr_lo); 4406 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4407 mqd->cp_hqd_pq_wptr_hi); 4408 } 4409 4410 /* set the pointer to the MQD */ 4411 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 4412 mqd->cp_mqd_base_addr_lo); 4413 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 4414 mqd->cp_mqd_base_addr_hi); 4415 4416 /* set MQD vmid to 0 */ 4417 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 4418 mqd->cp_mqd_control); 4419 4420 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4421 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 4422 mqd->cp_hqd_pq_base_lo); 4423 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 4424 mqd->cp_hqd_pq_base_hi); 4425 4426 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4427 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 4428 mqd->cp_hqd_pq_control); 4429 4430 /* set the wb address whether it's enabled or not */ 4431 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 4432 mqd->cp_hqd_pq_rptr_report_addr_lo); 4433 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4434 mqd->cp_hqd_pq_rptr_report_addr_hi); 4435 4436 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4437 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 4438 mqd->cp_hqd_pq_wptr_poll_addr_lo); 4439 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4440 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4441 4442 /* enable the doorbell if requested */ 4443 if (ring->use_doorbell) { 4444 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4445 (adev->doorbell_index.kiq * 2) << 2); 4446 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4447 (adev->doorbell_index.userqueue_end * 2) << 2); 4448 } 4449 4450 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4451 mqd->cp_hqd_pq_doorbell_control); 4452 4453 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4454 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4455 mqd->cp_hqd_pq_wptr_lo); 4456 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4457 mqd->cp_hqd_pq_wptr_hi); 4458 4459 /* set the vmid for the queue */ 4460 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 4461 4462 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 4463 mqd->cp_hqd_persistent_state); 4464 4465 /* activate the queue */ 4466 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 4467 mqd->cp_hqd_active); 4468 4469 if (ring->use_doorbell) 4470 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4471 4472 return 0; 4473 } 4474 4475 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 4476 { 4477 struct amdgpu_device *adev = ring->adev; 4478 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4479 4480 gfx_v11_0_kiq_setting(ring); 4481 4482 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4483 /* reset MQD to a clean status */ 4484 if (adev->gfx.kiq[0].mqd_backup) 4485 memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); 4486 4487 /* reset ring buffer */ 4488 ring->wptr = 0; 4489 amdgpu_ring_clear_ring(ring); 4490 4491 mutex_lock(&adev->srbm_mutex); 4492 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4493 gfx_v11_0_kiq_init_register(ring); 4494 soc21_grbm_select(adev, 0, 0, 0, 0); 4495 mutex_unlock(&adev->srbm_mutex); 4496 } else { 4497 memset((void *)mqd, 0, sizeof(*mqd)); 4498 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4499 amdgpu_ring_clear_ring(ring); 4500 mutex_lock(&adev->srbm_mutex); 4501 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4502 amdgpu_ring_init_mqd(ring); 4503 gfx_v11_0_kiq_init_register(ring); 4504 soc21_grbm_select(adev, 0, 0, 0, 0); 4505 mutex_unlock(&adev->srbm_mutex); 4506 4507 if (adev->gfx.kiq[0].mqd_backup) 4508 memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); 4509 } 4510 4511 return 0; 4512 } 4513 4514 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) 4515 { 4516 struct amdgpu_device *adev = ring->adev; 4517 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4518 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4519 4520 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4521 memset((void *)mqd, 0, sizeof(*mqd)); 4522 mutex_lock(&adev->srbm_mutex); 4523 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4524 amdgpu_ring_init_mqd(ring); 4525 soc21_grbm_select(adev, 0, 0, 0, 0); 4526 mutex_unlock(&adev->srbm_mutex); 4527 4528 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4529 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4530 } else { 4531 /* restore MQD to a clean status */ 4532 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4533 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4534 /* reset ring buffer */ 4535 ring->wptr = 0; 4536 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4537 amdgpu_ring_clear_ring(ring); 4538 } 4539 4540 return 0; 4541 } 4542 4543 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4544 { 4545 gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring); 4546 return 0; 4547 } 4548 4549 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4550 { 4551 int i, r; 4552 4553 if (!amdgpu_async_gfx_ring) 4554 gfx_v11_0_cp_compute_enable(adev, true); 4555 4556 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4557 r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); 4558 if (r) 4559 return r; 4560 } 4561 4562 return amdgpu_gfx_enable_kcq(adev, 0); 4563 } 4564 4565 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4566 { 4567 int r, i; 4568 struct amdgpu_ring *ring; 4569 4570 if (!(adev->flags & AMD_IS_APU)) 4571 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4572 4573 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4574 /* legacy firmware loading */ 4575 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4576 if (r) 4577 return r; 4578 4579 if (adev->gfx.rs64_enable) 4580 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4581 else 4582 r = gfx_v11_0_cp_compute_load_microcode(adev); 4583 if (r) 4584 return r; 4585 } 4586 4587 gfx_v11_0_cp_set_doorbell_range(adev); 4588 4589 if (amdgpu_async_gfx_ring) { 4590 gfx_v11_0_cp_compute_enable(adev, true); 4591 gfx_v11_0_cp_gfx_enable(adev, true); 4592 } 4593 4594 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4595 r = amdgpu_mes_kiq_hw_init(adev); 4596 else 4597 r = gfx_v11_0_kiq_resume(adev); 4598 if (r) 4599 return r; 4600 4601 r = gfx_v11_0_kcq_resume(adev); 4602 if (r) 4603 return r; 4604 4605 if (!amdgpu_async_gfx_ring) { 4606 r = gfx_v11_0_cp_gfx_resume(adev); 4607 if (r) 4608 return r; 4609 } else { 4610 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4611 if (r) 4612 return r; 4613 } 4614 4615 if (adev->gfx.disable_kq) { 4616 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4617 ring = &adev->gfx.gfx_ring[i]; 4618 /* we don't want to set ring->ready */ 4619 r = amdgpu_ring_test_ring(ring); 4620 if (r) 4621 return r; 4622 } 4623 if (amdgpu_async_gfx_ring) 4624 amdgpu_gfx_disable_kgq(adev, 0); 4625 } else { 4626 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4627 ring = &adev->gfx.gfx_ring[i]; 4628 r = amdgpu_ring_test_helper(ring); 4629 if (r) 4630 return r; 4631 } 4632 } 4633 4634 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4635 ring = &adev->gfx.compute_ring[i]; 4636 r = amdgpu_ring_test_helper(ring); 4637 if (r) 4638 return r; 4639 } 4640 4641 return 0; 4642 } 4643 4644 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4645 { 4646 gfx_v11_0_cp_gfx_enable(adev, enable); 4647 gfx_v11_0_cp_compute_enable(adev, enable); 4648 } 4649 4650 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4651 { 4652 int r; 4653 bool value; 4654 4655 r = adev->gfxhub.funcs->gart_enable(adev); 4656 if (r) 4657 return r; 4658 4659 amdgpu_device_flush_hdp(adev, NULL); 4660 4661 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 4662 false : true; 4663 4664 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4665 /* TODO investigate why this and the hdp flush above is needed, 4666 * are we missing a flush somewhere else? */ 4667 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); 4668 4669 return 0; 4670 } 4671 4672 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4673 { 4674 u32 tmp; 4675 4676 /* select RS64 */ 4677 if (adev->gfx.rs64_enable) { 4678 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4679 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4680 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4681 4682 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4683 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4684 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4685 } 4686 4687 if (amdgpu_emu_mode == 1) 4688 msleep(100); 4689 } 4690 4691 static int get_gb_addr_config(struct amdgpu_device * adev) 4692 { 4693 u32 gb_addr_config; 4694 4695 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4696 if (gb_addr_config == 0) 4697 return -EINVAL; 4698 4699 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4700 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4701 4702 adev->gfx.config.gb_addr_config = gb_addr_config; 4703 4704 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4705 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4706 GB_ADDR_CONFIG, NUM_PIPES); 4707 4708 adev->gfx.config.max_tile_pipes = 4709 adev->gfx.config.gb_addr_config_fields.num_pipes; 4710 4711 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4712 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4713 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4714 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4715 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4716 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4717 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4718 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4719 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4720 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4721 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4722 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4723 4724 return 0; 4725 } 4726 4727 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4728 { 4729 uint32_t data; 4730 4731 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4732 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4733 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4734 4735 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4736 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4737 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4738 } 4739 4740 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) 4741 { 4742 int r; 4743 struct amdgpu_device *adev = ip_block->adev; 4744 4745 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4746 adev->gfx.cleaner_shader_ptr); 4747 4748 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4749 if (adev->gfx.imu.funcs) { 4750 /* RLC autoload sequence 1: Program rlc ram */ 4751 if (adev->gfx.imu.funcs->program_rlc_ram) 4752 adev->gfx.imu.funcs->program_rlc_ram(adev); 4753 /* rlc autoload firmware */ 4754 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4755 if (r) 4756 return r; 4757 } 4758 } else { 4759 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4760 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4761 if (adev->gfx.imu.funcs->load_microcode) 4762 adev->gfx.imu.funcs->load_microcode(adev); 4763 if (adev->gfx.imu.funcs->setup_imu) 4764 adev->gfx.imu.funcs->setup_imu(adev); 4765 if (adev->gfx.imu.funcs->start_imu) 4766 adev->gfx.imu.funcs->start_imu(adev); 4767 } 4768 4769 /* disable gpa mode in backdoor loading */ 4770 gfx_v11_0_disable_gpa_mode(adev); 4771 } 4772 } 4773 4774 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4775 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4776 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4777 if (r) { 4778 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4779 return r; 4780 } 4781 } 4782 4783 adev->gfx.is_poweron = true; 4784 4785 if(get_gb_addr_config(adev)) 4786 DRM_WARN("Invalid gb_addr_config !\n"); 4787 4788 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4789 adev->gfx.rs64_enable) 4790 gfx_v11_0_config_gfx_rs64(adev); 4791 4792 r = gfx_v11_0_gfxhub_enable(adev); 4793 if (r) 4794 return r; 4795 4796 if (!amdgpu_emu_mode) 4797 gfx_v11_0_init_golden_registers(adev); 4798 4799 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4800 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4801 /** 4802 * For gfx 11, rlc firmware loading relies on smu firmware is 4803 * loaded firstly, so in direct type, it has to load smc ucode 4804 * here before rlc. 4805 */ 4806 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4807 if (r) 4808 return r; 4809 } 4810 4811 gfx_v11_0_constants_init(adev); 4812 4813 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4814 gfx_v11_0_select_cp_fw_arch(adev); 4815 4816 if (adev->nbio.funcs->gc_doorbell_init) 4817 adev->nbio.funcs->gc_doorbell_init(adev); 4818 4819 r = gfx_v11_0_rlc_resume(adev); 4820 if (r) 4821 return r; 4822 4823 /* 4824 * init golden registers and rlc resume may override some registers, 4825 * reconfig them here 4826 */ 4827 gfx_v11_0_tcp_harvest(adev); 4828 4829 r = gfx_v11_0_cp_resume(adev); 4830 if (r) 4831 return r; 4832 4833 /* get IMU version from HW if it's not set */ 4834 if (!adev->gfx.imu_fw_version) 4835 adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); 4836 4837 return r; 4838 } 4839 4840 static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, 4841 bool enable) 4842 { 4843 unsigned int irq_type; 4844 int m, p, r; 4845 4846 if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { 4847 for (m = 0; m < adev->gfx.me.num_me; m++) { 4848 for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { 4849 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; 4850 if (enable) 4851 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4852 irq_type); 4853 else 4854 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4855 irq_type); 4856 if (r) 4857 return r; 4858 } 4859 } 4860 } 4861 4862 if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { 4863 for (m = 0; m < adev->gfx.mec.num_mec; ++m) { 4864 for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { 4865 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 4866 + (m * adev->gfx.mec.num_pipe_per_mec) 4867 + p; 4868 if (enable) 4869 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4870 irq_type); 4871 else 4872 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4873 irq_type); 4874 if (r) 4875 return r; 4876 } 4877 } 4878 } 4879 4880 return 0; 4881 } 4882 4883 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) 4884 { 4885 struct amdgpu_device *adev = ip_block->adev; 4886 4887 cancel_delayed_work_sync(&adev->gfx.idle_work); 4888 4889 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4890 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4891 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4892 gfx_v11_0_set_userq_eop_interrupts(adev, false); 4893 4894 if (!adev->no_hw_access) { 4895 if (amdgpu_async_gfx_ring && 4896 !adev->gfx.disable_kq) { 4897 if (amdgpu_gfx_disable_kgq(adev, 0)) 4898 DRM_ERROR("KGQ disable failed\n"); 4899 } 4900 4901 if (amdgpu_gfx_disable_kcq(adev, 0)) 4902 DRM_ERROR("KCQ disable failed\n"); 4903 4904 amdgpu_mes_kiq_hw_fini(adev); 4905 } 4906 4907 if (amdgpu_sriov_vf(adev)) 4908 /* Remove the steps disabling CPG and clearing KIQ position, 4909 * so that CP could perform IDLE-SAVE during switch. Those 4910 * steps are necessary to avoid a DMAR error in gfx9 but it is 4911 * not reproduced on gfx11. 4912 */ 4913 return 0; 4914 4915 gfx_v11_0_cp_enable(adev, false); 4916 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4917 4918 adev->gfxhub.funcs->gart_disable(adev); 4919 4920 adev->gfx.is_poweron = false; 4921 4922 return 0; 4923 } 4924 4925 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) 4926 { 4927 return gfx_v11_0_hw_fini(ip_block); 4928 } 4929 4930 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) 4931 { 4932 return gfx_v11_0_hw_init(ip_block); 4933 } 4934 4935 static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block) 4936 { 4937 struct amdgpu_device *adev = ip_block->adev; 4938 4939 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 4940 GRBM_STATUS, GUI_ACTIVE)) 4941 return false; 4942 else 4943 return true; 4944 } 4945 4946 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 4947 { 4948 unsigned i; 4949 u32 tmp; 4950 struct amdgpu_device *adev = ip_block->adev; 4951 4952 for (i = 0; i < adev->usec_timeout; i++) { 4953 /* read MC_STATUS */ 4954 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 4955 GRBM_STATUS__GUI_ACTIVE_MASK; 4956 4957 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 4958 return 0; 4959 udelay(1); 4960 } 4961 return -ETIMEDOUT; 4962 } 4963 4964 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, 4965 bool req) 4966 { 4967 u32 i, tmp, val; 4968 4969 for (i = 0; i < adev->usec_timeout; i++) { 4970 /* Request with MeId=2, PipeId=0 */ 4971 tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); 4972 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); 4973 WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); 4974 4975 val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); 4976 if (req) { 4977 if (val == tmp) 4978 break; 4979 } else { 4980 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, 4981 REQUEST, 1); 4982 4983 /* unlocked or locked by firmware */ 4984 if (val != tmp) 4985 break; 4986 } 4987 udelay(1); 4988 } 4989 4990 if (i >= adev->usec_timeout) 4991 return -EINVAL; 4992 4993 return 0; 4994 } 4995 4996 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) 4997 { 4998 u32 grbm_soft_reset = 0; 4999 u32 tmp; 5000 int r, i, j, k; 5001 struct amdgpu_device *adev = ip_block->adev; 5002 5003 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5004 5005 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5006 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 5007 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 5008 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 5009 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 5010 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5011 5012 mutex_lock(&adev->srbm_mutex); 5013 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 5014 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 5015 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 5016 soc21_grbm_select(adev, i, k, j, 0); 5017 5018 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 5019 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 5020 } 5021 } 5022 } 5023 for (i = 0; i < adev->gfx.me.num_me; ++i) { 5024 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 5025 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 5026 soc21_grbm_select(adev, i, k, j, 0); 5027 5028 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 5029 } 5030 } 5031 } 5032 soc21_grbm_select(adev, 0, 0, 0, 0); 5033 mutex_unlock(&adev->srbm_mutex); 5034 5035 /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ 5036 mutex_lock(&adev->gfx.reset_sem_mutex); 5037 r = gfx_v11_0_request_gfx_index_mutex(adev, true); 5038 if (r) { 5039 mutex_unlock(&adev->gfx.reset_sem_mutex); 5040 DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); 5041 return r; 5042 } 5043 5044 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 5045 5046 // Read CP_VMID_RESET register three times. 5047 // to get sufficient time for GFX_HQD_ACTIVE reach 0 5048 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5049 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5050 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5051 5052 /* release the gfx mutex */ 5053 r = gfx_v11_0_request_gfx_index_mutex(adev, false); 5054 mutex_unlock(&adev->gfx.reset_sem_mutex); 5055 if (r) { 5056 DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); 5057 return r; 5058 } 5059 5060 for (i = 0; i < adev->usec_timeout; i++) { 5061 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 5062 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 5063 break; 5064 udelay(1); 5065 } 5066 if (i >= adev->usec_timeout) { 5067 printk("Failed to wait all pipes clean\n"); 5068 return -EINVAL; 5069 } 5070 5071 /********** trigger soft reset ***********/ 5072 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5073 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5074 SOFT_RESET_CP, 1); 5075 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5076 SOFT_RESET_GFX, 1); 5077 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5078 SOFT_RESET_CPF, 1); 5079 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5080 SOFT_RESET_CPC, 1); 5081 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5082 SOFT_RESET_CPG, 1); 5083 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5084 /********** exit soft reset ***********/ 5085 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5086 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5087 SOFT_RESET_CP, 0); 5088 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5089 SOFT_RESET_GFX, 0); 5090 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5091 SOFT_RESET_CPF, 0); 5092 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5093 SOFT_RESET_CPC, 0); 5094 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5095 SOFT_RESET_CPG, 0); 5096 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5097 5098 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 5099 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 5100 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 5101 5102 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 5103 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 5104 5105 for (i = 0; i < adev->usec_timeout; i++) { 5106 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 5107 break; 5108 udelay(1); 5109 } 5110 if (i >= adev->usec_timeout) { 5111 printk("Failed to wait CP_VMID_RESET to 0\n"); 5112 return -EINVAL; 5113 } 5114 5115 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5116 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5117 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5118 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5119 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5120 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5121 5122 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5123 5124 return gfx_v11_0_cp_resume(adev); 5125 } 5126 5127 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) 5128 { 5129 int i, r; 5130 struct amdgpu_device *adev = ip_block->adev; 5131 struct amdgpu_ring *ring; 5132 long tmo = msecs_to_jiffies(1000); 5133 5134 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 5135 ring = &adev->gfx.gfx_ring[i]; 5136 r = amdgpu_ring_test_ib(ring, tmo); 5137 if (r) 5138 return true; 5139 } 5140 5141 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5142 ring = &adev->gfx.compute_ring[i]; 5143 r = amdgpu_ring_test_ib(ring, tmo); 5144 if (r) 5145 return true; 5146 } 5147 5148 return false; 5149 } 5150 5151 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) 5152 { 5153 struct amdgpu_device *adev = ip_block->adev; 5154 /** 5155 * GFX soft reset will impact MES, need resume MES when do GFX soft reset 5156 */ 5157 return amdgpu_mes_resume(adev); 5158 } 5159 5160 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5161 { 5162 uint64_t clock; 5163 uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; 5164 5165 if (amdgpu_sriov_vf(adev)) { 5166 amdgpu_gfx_off_ctrl(adev, false); 5167 mutex_lock(&adev->gfx.gpu_clock_mutex); 5168 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5169 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5170 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5171 if (clock_counter_hi_pre != clock_counter_hi_after) 5172 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5173 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5174 amdgpu_gfx_off_ctrl(adev, true); 5175 } else { 5176 preempt_disable(); 5177 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5178 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5179 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5180 if (clock_counter_hi_pre != clock_counter_hi_after) 5181 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5182 preempt_enable(); 5183 } 5184 clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); 5185 5186 return clock; 5187 } 5188 5189 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5190 uint32_t vmid, 5191 uint32_t gds_base, uint32_t gds_size, 5192 uint32_t gws_base, uint32_t gws_size, 5193 uint32_t oa_base, uint32_t oa_size) 5194 { 5195 struct amdgpu_device *adev = ring->adev; 5196 5197 /* GDS Base */ 5198 gfx_v11_0_write_data_to_reg(ring, 0, false, 5199 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 5200 gds_base); 5201 5202 /* GDS Size */ 5203 gfx_v11_0_write_data_to_reg(ring, 0, false, 5204 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 5205 gds_size); 5206 5207 /* GWS */ 5208 gfx_v11_0_write_data_to_reg(ring, 0, false, 5209 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 5210 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5211 5212 /* OA */ 5213 gfx_v11_0_write_data_to_reg(ring, 0, false, 5214 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 5215 (1 << (oa_size + oa_base)) - (1 << oa_base)); 5216 } 5217 5218 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) 5219 { 5220 struct amdgpu_device *adev = ip_block->adev; 5221 5222 switch (amdgpu_user_queue) { 5223 case -1: 5224 case 0: 5225 default: 5226 adev->gfx.disable_kq = false; 5227 adev->gfx.disable_uq = true; 5228 break; 5229 case 1: 5230 adev->gfx.disable_kq = false; 5231 adev->gfx.disable_uq = false; 5232 break; 5233 case 2: 5234 adev->gfx.disable_kq = true; 5235 adev->gfx.disable_uq = false; 5236 break; 5237 } 5238 5239 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 5240 5241 if (adev->gfx.disable_kq) { 5242 /* We need one GFX ring temporarily to set up 5243 * the clear state. 5244 */ 5245 adev->gfx.num_gfx_rings = 1; 5246 adev->gfx.num_compute_rings = 0; 5247 } else { 5248 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 5249 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 5250 AMDGPU_MAX_COMPUTE_RINGS); 5251 } 5252 5253 gfx_v11_0_set_kiq_pm4_funcs(adev); 5254 gfx_v11_0_set_ring_funcs(adev); 5255 gfx_v11_0_set_irq_funcs(adev); 5256 gfx_v11_0_set_gds_init(adev); 5257 gfx_v11_0_set_rlc_funcs(adev); 5258 gfx_v11_0_set_mqd_funcs(adev); 5259 gfx_v11_0_set_imu_funcs(adev); 5260 5261 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 5262 5263 return gfx_v11_0_init_microcode(adev); 5264 } 5265 5266 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) 5267 { 5268 struct amdgpu_device *adev = ip_block->adev; 5269 int r; 5270 5271 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5272 if (r) 5273 return r; 5274 5275 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5276 if (r) 5277 return r; 5278 5279 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 5280 if (r) 5281 return r; 5282 5283 r = gfx_v11_0_set_userq_eop_interrupts(adev, true); 5284 if (r) 5285 return r; 5286 5287 return 0; 5288 } 5289 5290 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 5291 { 5292 uint32_t rlc_cntl; 5293 5294 /* if RLC is not enabled, do nothing */ 5295 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 5296 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 5297 } 5298 5299 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 5300 { 5301 uint32_t data; 5302 unsigned i; 5303 5304 data = RLC_SAFE_MODE__CMD_MASK; 5305 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5306 5307 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 5308 5309 /* wait for RLC_SAFE_MODE */ 5310 for (i = 0; i < adev->usec_timeout; i++) { 5311 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 5312 RLC_SAFE_MODE, CMD)) 5313 break; 5314 udelay(1); 5315 } 5316 } 5317 5318 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 5319 { 5320 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 5321 } 5322 5323 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 5324 bool enable) 5325 { 5326 uint32_t def, data; 5327 5328 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 5329 return; 5330 5331 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5332 5333 if (enable) 5334 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5335 else 5336 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5337 5338 if (def != data) 5339 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5340 } 5341 5342 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 5343 bool enable) 5344 { 5345 uint32_t def, data; 5346 5347 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 5348 return; 5349 5350 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5351 5352 if (enable) 5353 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5354 else 5355 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5356 5357 if (def != data) 5358 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5359 } 5360 5361 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 5362 bool enable) 5363 { 5364 uint32_t def, data; 5365 5366 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 5367 return; 5368 5369 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5370 5371 if (enable) 5372 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5373 else 5374 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5375 5376 if (def != data) 5377 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5378 } 5379 5380 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5381 bool enable) 5382 { 5383 uint32_t data, def; 5384 5385 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 5386 return; 5387 5388 /* It is disabled by HW by default */ 5389 if (enable) { 5390 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5391 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 5392 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5393 5394 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5395 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5396 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5397 5398 if (def != data) 5399 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5400 } 5401 } else { 5402 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5403 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5404 5405 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5406 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5407 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5408 5409 if (def != data) 5410 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5411 } 5412 } 5413 } 5414 5415 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5416 bool enable) 5417 { 5418 uint32_t def, data; 5419 5420 if (!(adev->cg_flags & 5421 (AMD_CG_SUPPORT_GFX_CGCG | 5422 AMD_CG_SUPPORT_GFX_CGLS | 5423 AMD_CG_SUPPORT_GFX_3D_CGCG | 5424 AMD_CG_SUPPORT_GFX_3D_CGLS))) 5425 return; 5426 5427 if (enable) { 5428 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5429 5430 /* unset CGCG override */ 5431 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5432 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5433 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5434 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5435 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 5436 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5437 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5438 5439 /* update CGCG override bits */ 5440 if (def != data) 5441 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5442 5443 /* enable cgcg FSM(0x0000363F) */ 5444 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5445 5446 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5447 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 5448 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5449 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5450 } 5451 5452 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5453 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 5454 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5455 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5456 } 5457 5458 if (def != data) 5459 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5460 5461 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5462 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5463 5464 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5465 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 5466 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5467 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5468 } 5469 5470 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5471 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 5472 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5473 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5474 } 5475 5476 if (def != data) 5477 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5478 5479 /* set IDLE_POLL_COUNT(0x00900100) */ 5480 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 5481 5482 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 5483 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5484 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5485 5486 if (def != data) 5487 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 5488 5489 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5490 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5491 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5492 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5493 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5494 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 5495 5496 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5497 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5498 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5499 5500 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5501 if (adev->sdma.num_instances > 1) { 5502 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5503 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5504 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5505 } 5506 } else { 5507 /* Program RLC_CGCG_CGLS_CTRL */ 5508 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5509 5510 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5511 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5512 5513 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5514 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5515 5516 if (def != data) 5517 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5518 5519 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5520 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5521 5522 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5523 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5524 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5525 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5526 5527 if (def != data) 5528 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5529 5530 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5531 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5532 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5533 5534 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5535 if (adev->sdma.num_instances > 1) { 5536 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5537 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5538 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5539 } 5540 } 5541 } 5542 5543 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5544 bool enable) 5545 { 5546 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5547 5548 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 5549 5550 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 5551 5552 gfx_v11_0_update_repeater_fgcg(adev, enable); 5553 5554 gfx_v11_0_update_sram_fgcg(adev, enable); 5555 5556 gfx_v11_0_update_perf_clk(adev, enable); 5557 5558 if (adev->cg_flags & 5559 (AMD_CG_SUPPORT_GFX_MGCG | 5560 AMD_CG_SUPPORT_GFX_CGLS | 5561 AMD_CG_SUPPORT_GFX_CGCG | 5562 AMD_CG_SUPPORT_GFX_3D_CGCG | 5563 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5564 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5565 5566 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5567 5568 return 0; 5569 } 5570 5571 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) 5572 { 5573 u32 reg, pre_data, data; 5574 5575 amdgpu_gfx_off_ctrl(adev, false); 5576 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5577 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) 5578 pre_data = RREG32_NO_KIQ(reg); 5579 else 5580 pre_data = RREG32(reg); 5581 5582 data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); 5583 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5584 5585 if (pre_data != data) { 5586 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { 5587 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5588 } else 5589 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5590 } 5591 amdgpu_gfx_off_ctrl(adev, true); 5592 5593 if (ring 5594 && amdgpu_sriov_is_pp_one_vf(adev) 5595 && (pre_data != data) 5596 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) 5597 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { 5598 amdgpu_ring_emit_wreg(ring, reg, data); 5599 } 5600 } 5601 5602 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5603 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5604 .set_safe_mode = gfx_v11_0_set_safe_mode, 5605 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5606 .init = gfx_v11_0_rlc_init, 5607 .get_csb_size = gfx_v11_0_get_csb_size, 5608 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5609 .resume = gfx_v11_0_rlc_resume, 5610 .stop = gfx_v11_0_rlc_stop, 5611 .reset = gfx_v11_0_rlc_reset, 5612 .start = gfx_v11_0_rlc_start, 5613 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5614 }; 5615 5616 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5617 { 5618 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5619 5620 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5621 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5622 else 5623 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5624 5625 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5626 5627 // Program RLC_PG_DELAY3 for CGPG hysteresis 5628 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5629 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5630 case IP_VERSION(11, 0, 1): 5631 case IP_VERSION(11, 0, 4): 5632 case IP_VERSION(11, 5, 0): 5633 case IP_VERSION(11, 5, 1): 5634 case IP_VERSION(11, 5, 2): 5635 case IP_VERSION(11, 5, 3): 5636 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5637 break; 5638 default: 5639 break; 5640 } 5641 } 5642 } 5643 5644 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5645 { 5646 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5647 5648 gfx_v11_cntl_power_gating(adev, enable); 5649 5650 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5651 } 5652 5653 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5654 enum amd_powergating_state state) 5655 { 5656 struct amdgpu_device *adev = ip_block->adev; 5657 bool enable = (state == AMD_PG_STATE_GATE); 5658 5659 if (amdgpu_sriov_vf(adev)) 5660 return 0; 5661 5662 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5663 case IP_VERSION(11, 0, 0): 5664 case IP_VERSION(11, 0, 2): 5665 case IP_VERSION(11, 0, 3): 5666 amdgpu_gfx_off_ctrl(adev, enable); 5667 break; 5668 case IP_VERSION(11, 0, 1): 5669 case IP_VERSION(11, 0, 4): 5670 case IP_VERSION(11, 5, 0): 5671 case IP_VERSION(11, 5, 1): 5672 case IP_VERSION(11, 5, 2): 5673 case IP_VERSION(11, 5, 3): 5674 if (!enable) 5675 amdgpu_gfx_off_ctrl(adev, false); 5676 5677 gfx_v11_cntl_pg(adev, enable); 5678 5679 if (enable) 5680 amdgpu_gfx_off_ctrl(adev, true); 5681 5682 break; 5683 default: 5684 break; 5685 } 5686 5687 return 0; 5688 } 5689 5690 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5691 enum amd_clockgating_state state) 5692 { 5693 struct amdgpu_device *adev = ip_block->adev; 5694 5695 if (amdgpu_sriov_vf(adev)) 5696 return 0; 5697 5698 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5699 case IP_VERSION(11, 0, 0): 5700 case IP_VERSION(11, 0, 1): 5701 case IP_VERSION(11, 0, 2): 5702 case IP_VERSION(11, 0, 3): 5703 case IP_VERSION(11, 0, 4): 5704 case IP_VERSION(11, 5, 0): 5705 case IP_VERSION(11, 5, 1): 5706 case IP_VERSION(11, 5, 2): 5707 case IP_VERSION(11, 5, 3): 5708 gfx_v11_0_update_gfx_clock_gating(adev, 5709 state == AMD_CG_STATE_GATE); 5710 break; 5711 default: 5712 break; 5713 } 5714 5715 return 0; 5716 } 5717 5718 static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5719 { 5720 struct amdgpu_device *adev = ip_block->adev; 5721 int data; 5722 5723 /* AMD_CG_SUPPORT_GFX_MGCG */ 5724 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5725 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5726 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5727 5728 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5729 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5730 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5731 5732 /* AMD_CG_SUPPORT_GFX_FGCG */ 5733 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5734 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5735 5736 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5737 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5738 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5739 5740 /* AMD_CG_SUPPORT_GFX_CGCG */ 5741 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5742 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5743 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5744 5745 /* AMD_CG_SUPPORT_GFX_CGLS */ 5746 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5747 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5748 5749 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5750 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5751 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5752 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5753 5754 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5755 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5756 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5757 } 5758 5759 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5760 { 5761 /* gfx11 is 32bit rptr*/ 5762 return *(uint32_t *)ring->rptr_cpu_addr; 5763 } 5764 5765 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5766 { 5767 struct amdgpu_device *adev = ring->adev; 5768 u64 wptr; 5769 5770 /* XXX check if swapping is necessary on BE */ 5771 if (ring->use_doorbell) { 5772 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5773 } else { 5774 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5775 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5776 } 5777 5778 return wptr; 5779 } 5780 5781 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5782 { 5783 struct amdgpu_device *adev = ring->adev; 5784 5785 if (ring->use_doorbell) { 5786 /* XXX check if swapping is necessary on BE */ 5787 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5788 ring->wptr); 5789 WDOORBELL64(ring->doorbell_index, ring->wptr); 5790 } else { 5791 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5792 lower_32_bits(ring->wptr)); 5793 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5794 upper_32_bits(ring->wptr)); 5795 } 5796 } 5797 5798 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5799 { 5800 /* gfx11 hardware is 32bit rptr */ 5801 return *(uint32_t *)ring->rptr_cpu_addr; 5802 } 5803 5804 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5805 { 5806 u64 wptr; 5807 5808 /* XXX check if swapping is necessary on BE */ 5809 if (ring->use_doorbell) 5810 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5811 else 5812 BUG(); 5813 return wptr; 5814 } 5815 5816 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5817 { 5818 struct amdgpu_device *adev = ring->adev; 5819 5820 /* XXX check if swapping is necessary on BE */ 5821 if (ring->use_doorbell) { 5822 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5823 ring->wptr); 5824 WDOORBELL64(ring->doorbell_index, ring->wptr); 5825 } else { 5826 BUG(); /* only DOORBELL method supported on gfx11 now */ 5827 } 5828 } 5829 5830 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5831 { 5832 struct amdgpu_device *adev = ring->adev; 5833 u32 ref_and_mask, reg_mem_engine; 5834 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5835 5836 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5837 switch (ring->me) { 5838 case 1: 5839 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5840 break; 5841 case 2: 5842 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5843 break; 5844 default: 5845 return; 5846 } 5847 reg_mem_engine = 0; 5848 } else { 5849 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; 5850 reg_mem_engine = 1; /* pfp */ 5851 } 5852 5853 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5854 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5855 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5856 ref_and_mask, ref_and_mask, 0x20); 5857 } 5858 5859 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5860 struct amdgpu_job *job, 5861 struct amdgpu_ib *ib, 5862 uint32_t flags) 5863 { 5864 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5865 u32 header, control = 0; 5866 5867 BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); 5868 5869 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5870 5871 control |= ib->length_dw | (vmid << 24); 5872 5873 if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5874 control |= INDIRECT_BUFFER_PRE_ENB(1); 5875 5876 if (flags & AMDGPU_IB_PREEMPTED) 5877 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5878 5879 if (vmid) 5880 gfx_v11_0_ring_emit_de_meta(ring, 5881 (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); 5882 } 5883 5884 amdgpu_ring_write(ring, header); 5885 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5886 amdgpu_ring_write(ring, 5887 #ifdef __BIG_ENDIAN 5888 (2 << 0) | 5889 #endif 5890 lower_32_bits(ib->gpu_addr)); 5891 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5892 amdgpu_ring_write(ring, control); 5893 } 5894 5895 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5896 struct amdgpu_job *job, 5897 struct amdgpu_ib *ib, 5898 uint32_t flags) 5899 { 5900 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5901 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5902 5903 /* Currently, there is a high possibility to get wave ID mismatch 5904 * between ME and GDS, leading to a hw deadlock, because ME generates 5905 * different wave IDs than the GDS expects. This situation happens 5906 * randomly when at least 5 compute pipes use GDS ordered append. 5907 * The wave IDs generated by ME are also wrong after suspend/resume. 5908 * Those are probably bugs somewhere else in the kernel driver. 5909 * 5910 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5911 * GDS to 0 for this ring (me/pipe). 5912 */ 5913 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5914 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5915 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 5916 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5917 } 5918 5919 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5920 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5921 amdgpu_ring_write(ring, 5922 #ifdef __BIG_ENDIAN 5923 (2 << 0) | 5924 #endif 5925 lower_32_bits(ib->gpu_addr)); 5926 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5927 amdgpu_ring_write(ring, control); 5928 } 5929 5930 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5931 u64 seq, unsigned flags) 5932 { 5933 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5934 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5935 5936 /* RELEASE_MEM - flush caches, send int */ 5937 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5938 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 5939 PACKET3_RELEASE_MEM_GCR_GL2_WB | 5940 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ 5941 PACKET3_RELEASE_MEM_GCR_GLM_WB | 5942 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 5943 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5944 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 5945 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 5946 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 5947 5948 /* 5949 * the address should be Qword aligned if 64bit write, Dword 5950 * aligned if only send 32bit data low (discard data high) 5951 */ 5952 if (write64bit) 5953 BUG_ON(addr & 0x7); 5954 else 5955 BUG_ON(addr & 0x3); 5956 amdgpu_ring_write(ring, lower_32_bits(addr)); 5957 amdgpu_ring_write(ring, upper_32_bits(addr)); 5958 amdgpu_ring_write(ring, lower_32_bits(seq)); 5959 amdgpu_ring_write(ring, upper_32_bits(seq)); 5960 amdgpu_ring_write(ring, 0); 5961 } 5962 5963 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5964 { 5965 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5966 uint32_t seq = ring->fence_drv.sync_seq; 5967 uint64_t addr = ring->fence_drv.gpu_addr; 5968 5969 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 5970 upper_32_bits(addr), seq, 0xffffffff, 4); 5971 } 5972 5973 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 5974 uint16_t pasid, uint32_t flush_type, 5975 bool all_hub, uint8_t dst_sel) 5976 { 5977 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 5978 amdgpu_ring_write(ring, 5979 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 5980 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 5981 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 5982 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 5983 } 5984 5985 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5986 unsigned vmid, uint64_t pd_addr) 5987 { 5988 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5989 5990 /* compute doesn't have PFP */ 5991 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5992 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5993 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5994 amdgpu_ring_write(ring, 0x0); 5995 } 5996 5997 /* Make sure that we can't skip the SET_Q_MODE packets when the VM 5998 * changed in any way. 5999 */ 6000 ring->set_q_mode_offs = 0; 6001 ring->set_q_mode_ptr = NULL; 6002 } 6003 6004 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6005 u64 seq, unsigned int flags) 6006 { 6007 struct amdgpu_device *adev = ring->adev; 6008 6009 /* we only allocate 32bit for each seq wb address */ 6010 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6011 6012 /* write fence seq to the "addr" */ 6013 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6014 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6015 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6016 amdgpu_ring_write(ring, lower_32_bits(addr)); 6017 amdgpu_ring_write(ring, upper_32_bits(addr)); 6018 amdgpu_ring_write(ring, lower_32_bits(seq)); 6019 6020 if (flags & AMDGPU_FENCE_FLAG_INT) { 6021 /* set register to trigger INT */ 6022 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6023 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6024 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6025 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 6026 amdgpu_ring_write(ring, 0); 6027 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6028 } 6029 } 6030 6031 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 6032 uint32_t flags) 6033 { 6034 uint32_t dw2 = 0; 6035 6036 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6037 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6038 /* set load_global_config & load_global_uconfig */ 6039 dw2 |= 0x8001; 6040 /* set load_cs_sh_regs */ 6041 dw2 |= 0x01000000; 6042 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6043 dw2 |= 0x10002; 6044 } 6045 6046 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6047 amdgpu_ring_write(ring, dw2); 6048 amdgpu_ring_write(ring, 0); 6049 } 6050 6051 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 6052 uint64_t addr) 6053 { 6054 unsigned ret; 6055 6056 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6057 amdgpu_ring_write(ring, lower_32_bits(addr)); 6058 amdgpu_ring_write(ring, upper_32_bits(addr)); 6059 /* discard following DWs if *cond_exec_gpu_addr==0 */ 6060 amdgpu_ring_write(ring, 0); 6061 ret = ring->wptr & ring->buf_mask; 6062 /* patch dummy value later */ 6063 amdgpu_ring_write(ring, 0); 6064 6065 return ret; 6066 } 6067 6068 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, 6069 u64 shadow_va, u64 csa_va, 6070 u64 gds_va, bool init_shadow, 6071 int vmid) 6072 { 6073 struct amdgpu_device *adev = ring->adev; 6074 unsigned int offs, end; 6075 6076 if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) 6077 return; 6078 6079 /* 6080 * The logic here isn't easy to understand because we need to keep state 6081 * accross multiple executions of the function as well as between the 6082 * CPU and GPU. The general idea is that the newly written GPU command 6083 * has a condition on the previous one and only executed if really 6084 * necessary. 6085 */ 6086 6087 /* 6088 * The dw in the NOP controls if the next SET_Q_MODE packet should be 6089 * executed or not. Reserve 64bits just to be on the save side. 6090 */ 6091 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); 6092 offs = ring->wptr & ring->buf_mask; 6093 6094 /* 6095 * We start with skipping the prefix SET_Q_MODE and always executing 6096 * the postfix SET_Q_MODE packet. This is changed below with a 6097 * WRITE_DATA command when the postfix executed. 6098 */ 6099 amdgpu_ring_write(ring, shadow_va ? 1 : 0); 6100 amdgpu_ring_write(ring, 0); 6101 6102 if (ring->set_q_mode_offs) { 6103 uint64_t addr; 6104 6105 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6106 addr += ring->set_q_mode_offs << 2; 6107 end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); 6108 } 6109 6110 /* 6111 * When the postfix SET_Q_MODE packet executes we need to make sure that the 6112 * next prefix SET_Q_MODE packet executes as well. 6113 */ 6114 if (!shadow_va) { 6115 uint64_t addr; 6116 6117 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6118 addr += offs << 2; 6119 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6120 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 6121 amdgpu_ring_write(ring, lower_32_bits(addr)); 6122 amdgpu_ring_write(ring, upper_32_bits(addr)); 6123 amdgpu_ring_write(ring, 0x1); 6124 } 6125 6126 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); 6127 amdgpu_ring_write(ring, lower_32_bits(shadow_va)); 6128 amdgpu_ring_write(ring, upper_32_bits(shadow_va)); 6129 amdgpu_ring_write(ring, lower_32_bits(gds_va)); 6130 amdgpu_ring_write(ring, upper_32_bits(gds_va)); 6131 amdgpu_ring_write(ring, lower_32_bits(csa_va)); 6132 amdgpu_ring_write(ring, upper_32_bits(csa_va)); 6133 amdgpu_ring_write(ring, shadow_va ? 6134 PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); 6135 amdgpu_ring_write(ring, init_shadow ? 6136 PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); 6137 6138 if (ring->set_q_mode_offs) 6139 amdgpu_ring_patch_cond_exec(ring, end); 6140 6141 if (shadow_va) { 6142 uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; 6143 6144 /* 6145 * If the tokens match try to skip the last postfix SET_Q_MODE 6146 * packet to avoid saving/restoring the state all the time. 6147 */ 6148 if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) 6149 *ring->set_q_mode_ptr = 0; 6150 6151 ring->set_q_mode_token = token; 6152 } else { 6153 ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; 6154 } 6155 6156 ring->set_q_mode_offs = offs; 6157 } 6158 6159 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) 6160 { 6161 int i, r = 0; 6162 struct amdgpu_device *adev = ring->adev; 6163 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 6164 struct amdgpu_ring *kiq_ring = &kiq->ring; 6165 unsigned long flags; 6166 6167 if (adev->enable_mes) 6168 return -EINVAL; 6169 6170 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 6171 return -EINVAL; 6172 6173 spin_lock_irqsave(&kiq->ring_lock, flags); 6174 6175 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 6176 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6177 return -ENOMEM; 6178 } 6179 6180 /* assert preemption condition */ 6181 amdgpu_ring_set_preempt_cond_exec(ring, false); 6182 6183 /* assert IB preemption, emit the trailing fence */ 6184 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 6185 ring->trail_fence_gpu_addr, 6186 ++ring->trail_seq); 6187 amdgpu_ring_commit(kiq_ring); 6188 6189 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6190 6191 /* poll the trailing fence */ 6192 for (i = 0; i < adev->usec_timeout; i++) { 6193 if (ring->trail_seq == 6194 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 6195 break; 6196 udelay(1); 6197 } 6198 6199 if (i >= adev->usec_timeout) { 6200 r = -EINVAL; 6201 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 6202 } 6203 6204 /* deassert preemption condition */ 6205 amdgpu_ring_set_preempt_cond_exec(ring, true); 6206 return r; 6207 } 6208 6209 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 6210 { 6211 struct amdgpu_device *adev = ring->adev; 6212 struct v10_de_ib_state de_payload = {0}; 6213 uint64_t offset, gds_addr, de_payload_gpu_addr; 6214 void *de_payload_cpu_addr; 6215 int cnt; 6216 6217 offset = offsetof(struct v10_gfx_meta_data, de_payload); 6218 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 6219 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 6220 6221 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 6222 AMDGPU_CSA_SIZE - adev->gds.gds_size, 6223 PAGE_SIZE); 6224 6225 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 6226 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 6227 6228 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 6229 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 6230 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 6231 WRITE_DATA_DST_SEL(8) | 6232 WR_CONFIRM) | 6233 WRITE_DATA_CACHE_POLICY(0)); 6234 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 6235 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 6236 6237 if (resume) 6238 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 6239 sizeof(de_payload) >> 2); 6240 else 6241 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 6242 sizeof(de_payload) >> 2); 6243 } 6244 6245 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 6246 bool secure) 6247 { 6248 uint32_t v = secure ? FRAME_TMZ : 0; 6249 6250 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 6251 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 6252 } 6253 6254 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 6255 uint32_t reg_val_offs) 6256 { 6257 struct amdgpu_device *adev = ring->adev; 6258 6259 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6260 amdgpu_ring_write(ring, 0 | /* src: register*/ 6261 (5 << 8) | /* dst: memory */ 6262 (1 << 20)); /* write confirm */ 6263 amdgpu_ring_write(ring, reg); 6264 amdgpu_ring_write(ring, 0); 6265 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6266 reg_val_offs * 4)); 6267 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6268 reg_val_offs * 4)); 6269 } 6270 6271 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6272 uint32_t val) 6273 { 6274 uint32_t cmd = 0; 6275 6276 switch (ring->funcs->type) { 6277 case AMDGPU_RING_TYPE_GFX: 6278 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6279 break; 6280 case AMDGPU_RING_TYPE_KIQ: 6281 cmd = (1 << 16); /* no inc addr */ 6282 break; 6283 default: 6284 cmd = WR_CONFIRM; 6285 break; 6286 } 6287 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6288 amdgpu_ring_write(ring, cmd); 6289 amdgpu_ring_write(ring, reg); 6290 amdgpu_ring_write(ring, 0); 6291 amdgpu_ring_write(ring, val); 6292 } 6293 6294 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 6295 uint32_t val, uint32_t mask) 6296 { 6297 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 6298 } 6299 6300 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 6301 uint32_t reg0, uint32_t reg1, 6302 uint32_t ref, uint32_t mask) 6303 { 6304 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6305 6306 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 6307 ref, mask, 0x20); 6308 } 6309 6310 static void 6311 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6312 uint32_t me, uint32_t pipe, 6313 enum amdgpu_interrupt_state state) 6314 { 6315 uint32_t cp_int_cntl, cp_int_cntl_reg; 6316 6317 if (!me) { 6318 switch (pipe) { 6319 case 0: 6320 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 6321 break; 6322 case 1: 6323 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 6324 break; 6325 default: 6326 DRM_DEBUG("invalid pipe %d\n", pipe); 6327 return; 6328 } 6329 } else { 6330 DRM_DEBUG("invalid me %d\n", me); 6331 return; 6332 } 6333 6334 switch (state) { 6335 case AMDGPU_IRQ_STATE_DISABLE: 6336 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6337 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6338 TIME_STAMP_INT_ENABLE, 0); 6339 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6340 GENERIC0_INT_ENABLE, 0); 6341 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6342 break; 6343 case AMDGPU_IRQ_STATE_ENABLE: 6344 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6345 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6346 TIME_STAMP_INT_ENABLE, 1); 6347 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6348 GENERIC0_INT_ENABLE, 1); 6349 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6350 break; 6351 default: 6352 break; 6353 } 6354 } 6355 6356 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6357 int me, int pipe, 6358 enum amdgpu_interrupt_state state) 6359 { 6360 u32 mec_int_cntl, mec_int_cntl_reg; 6361 6362 /* 6363 * amdgpu controls only the first MEC. That's why this function only 6364 * handles the setting of interrupts for this specific MEC. All other 6365 * pipes' interrupts are set by amdkfd. 6366 */ 6367 6368 if (me == 1) { 6369 switch (pipe) { 6370 case 0: 6371 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6372 break; 6373 case 1: 6374 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 6375 break; 6376 case 2: 6377 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 6378 break; 6379 case 3: 6380 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 6381 break; 6382 default: 6383 DRM_DEBUG("invalid pipe %d\n", pipe); 6384 return; 6385 } 6386 } else { 6387 DRM_DEBUG("invalid me %d\n", me); 6388 return; 6389 } 6390 6391 switch (state) { 6392 case AMDGPU_IRQ_STATE_DISABLE: 6393 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6394 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6395 TIME_STAMP_INT_ENABLE, 0); 6396 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6397 GENERIC0_INT_ENABLE, 0); 6398 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6399 break; 6400 case AMDGPU_IRQ_STATE_ENABLE: 6401 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6402 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6403 TIME_STAMP_INT_ENABLE, 1); 6404 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6405 GENERIC0_INT_ENABLE, 1); 6406 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6407 break; 6408 default: 6409 break; 6410 } 6411 } 6412 6413 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6414 struct amdgpu_irq_src *src, 6415 unsigned type, 6416 enum amdgpu_interrupt_state state) 6417 { 6418 switch (type) { 6419 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6420 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 6421 break; 6422 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 6423 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 6424 break; 6425 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6426 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6427 break; 6428 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6429 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6430 break; 6431 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6432 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6433 break; 6434 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6435 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6436 break; 6437 default: 6438 break; 6439 } 6440 return 0; 6441 } 6442 6443 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 6444 struct amdgpu_irq_src *source, 6445 struct amdgpu_iv_entry *entry) 6446 { 6447 u32 doorbell_offset = entry->src_data[0]; 6448 u8 me_id, pipe_id, queue_id; 6449 struct amdgpu_ring *ring; 6450 int i; 6451 6452 DRM_DEBUG("IH: CP EOP\n"); 6453 6454 if (adev->enable_mes && doorbell_offset) { 6455 struct amdgpu_userq_fence_driver *fence_drv = NULL; 6456 struct xarray *xa = &adev->userq_xa; 6457 unsigned long flags; 6458 6459 xa_lock_irqsave(xa, flags); 6460 fence_drv = xa_load(xa, doorbell_offset); 6461 if (fence_drv) 6462 amdgpu_userq_fence_driver_process(fence_drv); 6463 xa_unlock_irqrestore(xa, flags); 6464 } else { 6465 me_id = (entry->ring_id & 0x0c) >> 2; 6466 pipe_id = (entry->ring_id & 0x03) >> 0; 6467 queue_id = (entry->ring_id & 0x70) >> 4; 6468 6469 switch (me_id) { 6470 case 0: 6471 if (pipe_id == 0) 6472 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6473 else 6474 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 6475 break; 6476 case 1: 6477 case 2: 6478 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6479 ring = &adev->gfx.compute_ring[i]; 6480 /* Per-queue interrupt is supported for MEC starting from VI. 6481 * The interrupt can only be enabled/disabled per pipe instead 6482 * of per queue. 6483 */ 6484 if ((ring->me == me_id) && 6485 (ring->pipe == pipe_id) && 6486 (ring->queue == queue_id)) 6487 amdgpu_fence_process(ring); 6488 } 6489 break; 6490 } 6491 } 6492 6493 return 0; 6494 } 6495 6496 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6497 struct amdgpu_irq_src *source, 6498 unsigned int type, 6499 enum amdgpu_interrupt_state state) 6500 { 6501 u32 cp_int_cntl_reg, cp_int_cntl; 6502 int i, j; 6503 6504 switch (state) { 6505 case AMDGPU_IRQ_STATE_DISABLE: 6506 case AMDGPU_IRQ_STATE_ENABLE: 6507 for (i = 0; i < adev->gfx.me.num_me; i++) { 6508 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6509 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6510 6511 if (cp_int_cntl_reg) { 6512 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6513 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6514 PRIV_REG_INT_ENABLE, 6515 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6516 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6517 } 6518 } 6519 } 6520 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6521 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6522 /* MECs start at 1 */ 6523 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6524 6525 if (cp_int_cntl_reg) { 6526 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6527 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6528 PRIV_REG_INT_ENABLE, 6529 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6530 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6531 } 6532 } 6533 } 6534 break; 6535 default: 6536 break; 6537 } 6538 6539 return 0; 6540 } 6541 6542 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6543 struct amdgpu_irq_src *source, 6544 unsigned type, 6545 enum amdgpu_interrupt_state state) 6546 { 6547 u32 cp_int_cntl_reg, cp_int_cntl; 6548 int i, j; 6549 6550 switch (state) { 6551 case AMDGPU_IRQ_STATE_DISABLE: 6552 case AMDGPU_IRQ_STATE_ENABLE: 6553 for (i = 0; i < adev->gfx.me.num_me; i++) { 6554 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6555 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6556 6557 if (cp_int_cntl_reg) { 6558 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6559 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6560 OPCODE_ERROR_INT_ENABLE, 6561 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6562 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6563 } 6564 } 6565 } 6566 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6567 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6568 /* MECs start at 1 */ 6569 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6570 6571 if (cp_int_cntl_reg) { 6572 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6573 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6574 OPCODE_ERROR_INT_ENABLE, 6575 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6576 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6577 } 6578 } 6579 } 6580 break; 6581 default: 6582 break; 6583 } 6584 return 0; 6585 } 6586 6587 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6588 struct amdgpu_irq_src *source, 6589 unsigned int type, 6590 enum amdgpu_interrupt_state state) 6591 { 6592 u32 cp_int_cntl_reg, cp_int_cntl; 6593 int i, j; 6594 6595 switch (state) { 6596 case AMDGPU_IRQ_STATE_DISABLE: 6597 case AMDGPU_IRQ_STATE_ENABLE: 6598 for (i = 0; i < adev->gfx.me.num_me; i++) { 6599 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6600 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6601 6602 if (cp_int_cntl_reg) { 6603 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6604 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6605 PRIV_INSTR_INT_ENABLE, 6606 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6607 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6608 } 6609 } 6610 } 6611 break; 6612 default: 6613 break; 6614 } 6615 6616 return 0; 6617 } 6618 6619 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 6620 struct amdgpu_iv_entry *entry) 6621 { 6622 u8 me_id, pipe_id, queue_id; 6623 struct amdgpu_ring *ring; 6624 int i; 6625 6626 me_id = (entry->ring_id & 0x0c) >> 2; 6627 pipe_id = (entry->ring_id & 0x03) >> 0; 6628 queue_id = (entry->ring_id & 0x70) >> 4; 6629 6630 if (!adev->gfx.disable_kq) { 6631 switch (me_id) { 6632 case 0: 6633 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 6634 ring = &adev->gfx.gfx_ring[i]; 6635 if (ring->me == me_id && ring->pipe == pipe_id && 6636 ring->queue == queue_id) 6637 drm_sched_fault(&ring->sched); 6638 } 6639 break; 6640 case 1: 6641 case 2: 6642 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6643 ring = &adev->gfx.compute_ring[i]; 6644 if (ring->me == me_id && ring->pipe == pipe_id && 6645 ring->queue == queue_id) 6646 drm_sched_fault(&ring->sched); 6647 } 6648 break; 6649 default: 6650 BUG(); 6651 break; 6652 } 6653 } 6654 } 6655 6656 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 6657 struct amdgpu_irq_src *source, 6658 struct amdgpu_iv_entry *entry) 6659 { 6660 DRM_ERROR("Illegal register access in command stream\n"); 6661 gfx_v11_0_handle_priv_fault(adev, entry); 6662 return 0; 6663 } 6664 6665 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, 6666 struct amdgpu_irq_src *source, 6667 struct amdgpu_iv_entry *entry) 6668 { 6669 DRM_ERROR("Illegal opcode in command stream \n"); 6670 gfx_v11_0_handle_priv_fault(adev, entry); 6671 return 0; 6672 } 6673 6674 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6675 struct amdgpu_irq_src *source, 6676 struct amdgpu_iv_entry *entry) 6677 { 6678 DRM_ERROR("Illegal instruction in command stream\n"); 6679 gfx_v11_0_handle_priv_fault(adev, entry); 6680 return 0; 6681 } 6682 6683 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, 6684 struct amdgpu_irq_src *source, 6685 struct amdgpu_iv_entry *entry) 6686 { 6687 if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) 6688 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); 6689 6690 return 0; 6691 } 6692 6693 #if 0 6694 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6695 struct amdgpu_irq_src *src, 6696 unsigned int type, 6697 enum amdgpu_interrupt_state state) 6698 { 6699 uint32_t tmp, target; 6700 struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); 6701 6702 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6703 target += ring->pipe; 6704 6705 switch (type) { 6706 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6707 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6708 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6709 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6710 GENERIC2_INT_ENABLE, 0); 6711 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6712 6713 tmp = RREG32_SOC15_IP(GC, target); 6714 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6715 GENERIC2_INT_ENABLE, 0); 6716 WREG32_SOC15_IP(GC, target, tmp); 6717 } else { 6718 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6719 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6720 GENERIC2_INT_ENABLE, 1); 6721 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6722 6723 tmp = RREG32_SOC15_IP(GC, target); 6724 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6725 GENERIC2_INT_ENABLE, 1); 6726 WREG32_SOC15_IP(GC, target, tmp); 6727 } 6728 break; 6729 default: 6730 BUG(); /* kiq only support GENERIC2_INT now */ 6731 break; 6732 } 6733 return 0; 6734 } 6735 #endif 6736 6737 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6738 { 6739 const unsigned int gcr_cntl = 6740 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6741 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6742 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6743 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6744 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6745 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6746 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6747 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6748 6749 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6750 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6751 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6752 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6753 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6754 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6755 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6756 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6757 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6758 } 6759 6760 static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) 6761 { 6762 /* Disable the pipe reset until the CPFW fully support it.*/ 6763 dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); 6764 return false; 6765 } 6766 6767 6768 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) 6769 { 6770 struct amdgpu_device *adev = ring->adev; 6771 uint32_t reset_pipe = 0, clean_pipe = 0; 6772 int r; 6773 6774 if (!gfx_v11_pipe_reset_support(adev)) 6775 return -EOPNOTSUPP; 6776 6777 gfx_v11_0_set_safe_mode(adev, 0); 6778 mutex_lock(&adev->srbm_mutex); 6779 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6780 6781 switch (ring->pipe) { 6782 case 0: 6783 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6784 PFP_PIPE0_RESET, 1); 6785 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6786 ME_PIPE0_RESET, 1); 6787 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6788 PFP_PIPE0_RESET, 0); 6789 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6790 ME_PIPE0_RESET, 0); 6791 break; 6792 case 1: 6793 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6794 PFP_PIPE1_RESET, 1); 6795 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6796 ME_PIPE1_RESET, 1); 6797 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6798 PFP_PIPE1_RESET, 0); 6799 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6800 ME_PIPE1_RESET, 0); 6801 break; 6802 default: 6803 break; 6804 } 6805 6806 WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); 6807 WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); 6808 6809 r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - 6810 RS64_FW_UC_START_ADDR_LO; 6811 soc21_grbm_select(adev, 0, 0, 0, 0); 6812 mutex_unlock(&adev->srbm_mutex); 6813 gfx_v11_0_unset_safe_mode(adev, 0); 6814 6815 dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, 6816 r == 0 ? "successfully" : "failed"); 6817 /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, 6818 * so the pipe reset status relies on the later gfx ring test result. 6819 */ 6820 return 0; 6821 } 6822 6823 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, 6824 unsigned int vmid, 6825 struct amdgpu_fence *timedout_fence) 6826 { 6827 struct amdgpu_device *adev = ring->adev; 6828 int r; 6829 6830 amdgpu_ring_reset_helper_begin(ring, timedout_fence); 6831 6832 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); 6833 if (r) { 6834 6835 dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); 6836 r = gfx_v11_reset_gfx_pipe(ring); 6837 if (r) 6838 return r; 6839 } 6840 6841 r = gfx_v11_0_kgq_init_queue(ring, true); 6842 if (r) { 6843 dev_err(adev->dev, "failed to init kgq\n"); 6844 return r; 6845 } 6846 6847 r = amdgpu_mes_map_legacy_queue(adev, ring); 6848 if (r) { 6849 dev_err(adev->dev, "failed to remap kgq\n"); 6850 return r; 6851 } 6852 6853 return amdgpu_ring_reset_helper_end(ring, timedout_fence); 6854 } 6855 6856 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) 6857 { 6858 6859 struct amdgpu_device *adev = ring->adev; 6860 uint32_t reset_pipe = 0, clean_pipe = 0; 6861 int r; 6862 6863 if (!gfx_v11_pipe_reset_support(adev)) 6864 return -EOPNOTSUPP; 6865 6866 gfx_v11_0_set_safe_mode(adev, 0); 6867 mutex_lock(&adev->srbm_mutex); 6868 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6869 6870 reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 6871 clean_pipe = reset_pipe; 6872 6873 if (adev->gfx.rs64_enable) { 6874 6875 switch (ring->pipe) { 6876 case 0: 6877 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6878 MEC_PIPE0_RESET, 1); 6879 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6880 MEC_PIPE0_RESET, 0); 6881 break; 6882 case 1: 6883 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6884 MEC_PIPE1_RESET, 1); 6885 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6886 MEC_PIPE1_RESET, 0); 6887 break; 6888 case 2: 6889 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6890 MEC_PIPE2_RESET, 1); 6891 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6892 MEC_PIPE2_RESET, 0); 6893 break; 6894 case 3: 6895 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6896 MEC_PIPE3_RESET, 1); 6897 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6898 MEC_PIPE3_RESET, 0); 6899 break; 6900 default: 6901 break; 6902 } 6903 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); 6904 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); 6905 r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - 6906 RS64_FW_UC_START_ADDR_LO; 6907 } else { 6908 if (ring->me == 1) { 6909 switch (ring->pipe) { 6910 case 0: 6911 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6912 MEC_ME1_PIPE0_RESET, 1); 6913 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6914 MEC_ME1_PIPE0_RESET, 0); 6915 break; 6916 case 1: 6917 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6918 MEC_ME1_PIPE1_RESET, 1); 6919 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6920 MEC_ME1_PIPE1_RESET, 0); 6921 break; 6922 case 2: 6923 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6924 MEC_ME1_PIPE2_RESET, 1); 6925 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6926 MEC_ME1_PIPE2_RESET, 0); 6927 break; 6928 case 3: 6929 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6930 MEC_ME1_PIPE3_RESET, 1); 6931 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6932 MEC_ME1_PIPE3_RESET, 0); 6933 break; 6934 default: 6935 break; 6936 } 6937 /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ 6938 } else { 6939 switch (ring->pipe) { 6940 case 0: 6941 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6942 MEC_ME2_PIPE0_RESET, 1); 6943 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6944 MEC_ME2_PIPE0_RESET, 0); 6945 break; 6946 case 1: 6947 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6948 MEC_ME2_PIPE1_RESET, 1); 6949 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6950 MEC_ME2_PIPE1_RESET, 0); 6951 break; 6952 case 2: 6953 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6954 MEC_ME2_PIPE2_RESET, 1); 6955 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6956 MEC_ME2_PIPE2_RESET, 0); 6957 break; 6958 case 3: 6959 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6960 MEC_ME2_PIPE3_RESET, 1); 6961 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6962 MEC_ME2_PIPE3_RESET, 0); 6963 break; 6964 default: 6965 break; 6966 } 6967 /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ 6968 } 6969 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); 6970 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); 6971 r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); 6972 } 6973 6974 soc21_grbm_select(adev, 0, 0, 0, 0); 6975 mutex_unlock(&adev->srbm_mutex); 6976 gfx_v11_0_unset_safe_mode(adev, 0); 6977 6978 dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, 6979 r == 0 ? "successfully" : "failed"); 6980 /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe 6981 * reset status relies on the compute ring test result. 6982 */ 6983 return 0; 6984 } 6985 6986 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, 6987 unsigned int vmid, 6988 struct amdgpu_fence *timedout_fence) 6989 { 6990 struct amdgpu_device *adev = ring->adev; 6991 int r = 0; 6992 6993 amdgpu_ring_reset_helper_begin(ring, timedout_fence); 6994 6995 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); 6996 if (r) { 6997 dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); 6998 r = gfx_v11_0_reset_compute_pipe(ring); 6999 if (r) 7000 return r; 7001 } 7002 7003 r = gfx_v11_0_kcq_init_queue(ring, true); 7004 if (r) { 7005 dev_err(adev->dev, "fail to init kcq\n"); 7006 return r; 7007 } 7008 r = amdgpu_mes_map_legacy_queue(adev, ring); 7009 if (r) { 7010 dev_err(adev->dev, "failed to remap kcq\n"); 7011 return r; 7012 } 7013 7014 return amdgpu_ring_reset_helper_end(ring, timedout_fence); 7015 } 7016 7017 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 7018 { 7019 struct amdgpu_device *adev = ip_block->adev; 7020 uint32_t i, j, k, reg, index = 0; 7021 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7022 7023 if (!adev->gfx.ip_dump_core) 7024 return; 7025 7026 for (i = 0; i < reg_count; i++) 7027 drm_printf(p, "%-50s \t 0x%08x\n", 7028 gc_reg_list_11_0[i].reg_name, 7029 adev->gfx.ip_dump_core[i]); 7030 7031 /* print compute queue registers for all instances */ 7032 if (!adev->gfx.ip_dump_compute_queues) 7033 return; 7034 7035 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7036 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7037 adev->gfx.mec.num_mec, 7038 adev->gfx.mec.num_pipe_per_mec, 7039 adev->gfx.mec.num_queue_per_pipe); 7040 7041 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7042 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7043 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7044 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7045 for (reg = 0; reg < reg_count; reg++) { 7046 if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) 7047 drm_printf(p, "%-50s \t 0x%08x\n", 7048 "regCP_MEC_ME2_HEADER_DUMP", 7049 adev->gfx.ip_dump_compute_queues[index + reg]); 7050 else 7051 drm_printf(p, "%-50s \t 0x%08x\n", 7052 gc_cp_reg_list_11[reg].reg_name, 7053 adev->gfx.ip_dump_compute_queues[index + reg]); 7054 } 7055 index += reg_count; 7056 } 7057 } 7058 } 7059 7060 /* print gfx queue registers for all instances */ 7061 if (!adev->gfx.ip_dump_gfx_queues) 7062 return; 7063 7064 index = 0; 7065 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7066 drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", 7067 adev->gfx.me.num_me, 7068 adev->gfx.me.num_pipe_per_me, 7069 adev->gfx.me.num_queue_per_pipe); 7070 7071 for (i = 0; i < adev->gfx.me.num_me; i++) { 7072 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7073 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7074 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); 7075 for (reg = 0; reg < reg_count; reg++) { 7076 drm_printf(p, "%-50s \t 0x%08x\n", 7077 gc_gfx_queue_reg_list_11[reg].reg_name, 7078 adev->gfx.ip_dump_gfx_queues[index + reg]); 7079 } 7080 index += reg_count; 7081 } 7082 } 7083 } 7084 } 7085 7086 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) 7087 { 7088 struct amdgpu_device *adev = ip_block->adev; 7089 uint32_t i, j, k, reg, index = 0; 7090 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7091 7092 if (!adev->gfx.ip_dump_core) 7093 return; 7094 7095 amdgpu_gfx_off_ctrl(adev, false); 7096 for (i = 0; i < reg_count; i++) 7097 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); 7098 amdgpu_gfx_off_ctrl(adev, true); 7099 7100 /* dump compute queue registers for all instances */ 7101 if (!adev->gfx.ip_dump_compute_queues) 7102 return; 7103 7104 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7105 amdgpu_gfx_off_ctrl(adev, false); 7106 mutex_lock(&adev->srbm_mutex); 7107 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7108 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7109 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7110 /* ME0 is for GFX so start from 1 for CP */ 7111 soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); 7112 for (reg = 0; reg < reg_count; reg++) { 7113 if (i && 7114 gc_cp_reg_list_11[reg].reg_offset == 7115 regCP_MEC_ME1_HEADER_DUMP) 7116 adev->gfx.ip_dump_compute_queues[index + reg] = 7117 RREG32(SOC15_REG_OFFSET(GC, 0, 7118 regCP_MEC_ME2_HEADER_DUMP)); 7119 else 7120 adev->gfx.ip_dump_compute_queues[index + reg] = 7121 RREG32(SOC15_REG_ENTRY_OFFSET( 7122 gc_cp_reg_list_11[reg])); 7123 } 7124 index += reg_count; 7125 } 7126 } 7127 } 7128 soc21_grbm_select(adev, 0, 0, 0, 0); 7129 mutex_unlock(&adev->srbm_mutex); 7130 amdgpu_gfx_off_ctrl(adev, true); 7131 7132 /* dump gfx queue registers for all instances */ 7133 if (!adev->gfx.ip_dump_gfx_queues) 7134 return; 7135 7136 index = 0; 7137 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7138 amdgpu_gfx_off_ctrl(adev, false); 7139 mutex_lock(&adev->srbm_mutex); 7140 for (i = 0; i < adev->gfx.me.num_me; i++) { 7141 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7142 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7143 soc21_grbm_select(adev, i, j, k, 0); 7144 7145 for (reg = 0; reg < reg_count; reg++) { 7146 adev->gfx.ip_dump_gfx_queues[index + reg] = 7147 RREG32(SOC15_REG_ENTRY_OFFSET( 7148 gc_gfx_queue_reg_list_11[reg])); 7149 } 7150 index += reg_count; 7151 } 7152 } 7153 } 7154 soc21_grbm_select(adev, 0, 0, 0, 0); 7155 mutex_unlock(&adev->srbm_mutex); 7156 amdgpu_gfx_off_ctrl(adev, true); 7157 } 7158 7159 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 7160 { 7161 /* Emit the cleaner shader */ 7162 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7163 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7164 } 7165 7166 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring) 7167 { 7168 amdgpu_gfx_profile_ring_begin_use(ring); 7169 7170 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 7171 } 7172 7173 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring) 7174 { 7175 amdgpu_gfx_profile_ring_end_use(ring); 7176 7177 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 7178 } 7179 7180 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 7181 .name = "gfx_v11_0", 7182 .early_init = gfx_v11_0_early_init, 7183 .late_init = gfx_v11_0_late_init, 7184 .sw_init = gfx_v11_0_sw_init, 7185 .sw_fini = gfx_v11_0_sw_fini, 7186 .hw_init = gfx_v11_0_hw_init, 7187 .hw_fini = gfx_v11_0_hw_fini, 7188 .suspend = gfx_v11_0_suspend, 7189 .resume = gfx_v11_0_resume, 7190 .is_idle = gfx_v11_0_is_idle, 7191 .wait_for_idle = gfx_v11_0_wait_for_idle, 7192 .soft_reset = gfx_v11_0_soft_reset, 7193 .check_soft_reset = gfx_v11_0_check_soft_reset, 7194 .post_soft_reset = gfx_v11_0_post_soft_reset, 7195 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 7196 .set_powergating_state = gfx_v11_0_set_powergating_state, 7197 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 7198 .dump_ip_state = gfx_v11_ip_dump, 7199 .print_ip_state = gfx_v11_ip_print, 7200 }; 7201 7202 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7203 .type = AMDGPU_RING_TYPE_GFX, 7204 .align_mask = 0xff, 7205 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7206 .support_64bit_ptrs = true, 7207 .secure_submission_supported = true, 7208 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 7209 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 7210 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 7211 .emit_frame_size = /* totally 247 maximum if 16 IBs */ 7212 5 + /* update_spm_vmid */ 7213 5 + /* COND_EXEC */ 7214 22 + /* SET_Q_PREEMPTION_MODE */ 7215 7 + /* PIPELINE_SYNC */ 7216 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7217 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7218 4 + /* VM_FLUSH */ 7219 8 + /* FENCE for VM_FLUSH */ 7220 20 + /* GDS switch */ 7221 5 + /* COND_EXEC */ 7222 7 + /* HDP_flush */ 7223 4 + /* VGT_flush */ 7224 31 + /* DE_META */ 7225 3 + /* CNTX_CTRL */ 7226 5 + /* HDP_INVL */ 7227 22 + /* SET_Q_PREEMPTION_MODE */ 7228 8 + 8 + /* FENCE x2 */ 7229 8 + /* gfx_v11_0_emit_mem_sync */ 7230 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7231 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 7232 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 7233 .emit_fence = gfx_v11_0_ring_emit_fence, 7234 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7235 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7236 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7237 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7238 .test_ring = gfx_v11_0_ring_test_ring, 7239 .test_ib = gfx_v11_0_ring_test_ib, 7240 .insert_nop = gfx_v11_ring_insert_nop, 7241 .pad_ib = amdgpu_ring_generic_pad_ib, 7242 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 7243 .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, 7244 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 7245 .preempt_ib = gfx_v11_0_ring_preempt_ib, 7246 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 7247 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7248 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7249 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7250 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7251 .reset = gfx_v11_0_reset_kgq, 7252 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7253 .begin_use = gfx_v11_0_ring_begin_use, 7254 .end_use = gfx_v11_0_ring_end_use, 7255 }; 7256 7257 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 7258 .type = AMDGPU_RING_TYPE_COMPUTE, 7259 .align_mask = 0xff, 7260 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7261 .support_64bit_ptrs = true, 7262 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7263 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7264 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7265 .emit_frame_size = 7266 5 + /* update_spm_vmid */ 7267 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7268 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7269 5 + /* hdp invalidate */ 7270 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7271 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7272 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7273 2 + /* gfx_v11_0_ring_emit_vm_flush */ 7274 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 7275 8 + /* gfx_v11_0_emit_mem_sync */ 7276 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7277 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7278 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7279 .emit_fence = gfx_v11_0_ring_emit_fence, 7280 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7281 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7282 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7283 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7284 .test_ring = gfx_v11_0_ring_test_ring, 7285 .test_ib = gfx_v11_0_ring_test_ib, 7286 .insert_nop = gfx_v11_ring_insert_nop, 7287 .pad_ib = amdgpu_ring_generic_pad_ib, 7288 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7289 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7290 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7291 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7292 .reset = gfx_v11_0_reset_kcq, 7293 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7294 .begin_use = gfx_v11_0_ring_begin_use, 7295 .end_use = gfx_v11_0_ring_end_use, 7296 }; 7297 7298 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7299 .type = AMDGPU_RING_TYPE_KIQ, 7300 .align_mask = 0xff, 7301 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7302 .support_64bit_ptrs = true, 7303 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7304 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7305 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7306 .emit_frame_size = 7307 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7308 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7309 5 + /*hdp invalidate */ 7310 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7311 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7312 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7313 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7314 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7315 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7316 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 7317 .test_ring = gfx_v11_0_ring_test_ring, 7318 .test_ib = gfx_v11_0_ring_test_ib, 7319 .insert_nop = amdgpu_ring_insert_nop, 7320 .pad_ib = amdgpu_ring_generic_pad_ib, 7321 .emit_rreg = gfx_v11_0_ring_emit_rreg, 7322 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7323 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7324 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7325 }; 7326 7327 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 7328 { 7329 int i; 7330 7331 adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; 7332 7333 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7334 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 7335 7336 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7337 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 7338 } 7339 7340 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 7341 .set = gfx_v11_0_set_eop_interrupt_state, 7342 .process = gfx_v11_0_eop_irq, 7343 }; 7344 7345 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 7346 .set = gfx_v11_0_set_priv_reg_fault_state, 7347 .process = gfx_v11_0_priv_reg_irq, 7348 }; 7349 7350 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { 7351 .set = gfx_v11_0_set_bad_op_fault_state, 7352 .process = gfx_v11_0_bad_op_irq, 7353 }; 7354 7355 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 7356 .set = gfx_v11_0_set_priv_inst_fault_state, 7357 .process = gfx_v11_0_priv_inst_irq, 7358 }; 7359 7360 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { 7361 .process = gfx_v11_0_rlc_gc_fed_irq, 7362 }; 7363 7364 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 7365 { 7366 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7367 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 7368 7369 adev->gfx.priv_reg_irq.num_types = 1; 7370 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 7371 7372 adev->gfx.bad_op_irq.num_types = 1; 7373 adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; 7374 7375 adev->gfx.priv_inst_irq.num_types = 1; 7376 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 7377 7378 adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ 7379 adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; 7380 7381 } 7382 7383 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 7384 { 7385 if (adev->flags & AMD_IS_APU) 7386 adev->gfx.imu.mode = MISSION_MODE; 7387 else 7388 adev->gfx.imu.mode = DEBUG_MODE; 7389 7390 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 7391 } 7392 7393 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 7394 { 7395 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 7396 } 7397 7398 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 7399 { 7400 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 7401 adev->gfx.config.max_sh_per_se * 7402 adev->gfx.config.max_shader_engines; 7403 7404 adev->gds.gds_size = 0x1000; 7405 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 7406 adev->gds.gws_size = 64; 7407 adev->gds.oa_size = 16; 7408 } 7409 7410 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 7411 { 7412 /* set gfx eng mqd */ 7413 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 7414 sizeof(struct v11_gfx_mqd); 7415 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 7416 gfx_v11_0_gfx_mqd_init; 7417 /* set compute eng mqd */ 7418 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 7419 sizeof(struct v11_compute_mqd); 7420 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 7421 gfx_v11_0_compute_mqd_init; 7422 } 7423 7424 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 7425 u32 bitmap) 7426 { 7427 u32 data; 7428 7429 if (!bitmap) 7430 return; 7431 7432 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7433 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7434 7435 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 7436 } 7437 7438 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 7439 { 7440 u32 data, wgp_bitmask; 7441 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 7442 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 7443 7444 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7445 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7446 7447 wgp_bitmask = 7448 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 7449 7450 return (~data) & wgp_bitmask; 7451 } 7452 7453 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 7454 { 7455 u32 wgp_idx, wgp_active_bitmap; 7456 u32 cu_bitmap_per_wgp, cu_active_bitmap; 7457 7458 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 7459 cu_active_bitmap = 0; 7460 7461 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 7462 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 7463 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 7464 if (wgp_active_bitmap & (1 << wgp_idx)) 7465 cu_active_bitmap |= cu_bitmap_per_wgp; 7466 } 7467 7468 return cu_active_bitmap; 7469 } 7470 7471 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 7472 struct amdgpu_cu_info *cu_info) 7473 { 7474 int i, j, k, counter, active_cu_number = 0; 7475 u32 mask, bitmap; 7476 unsigned disable_masks[8 * 2]; 7477 7478 if (!adev || !cu_info) 7479 return -EINVAL; 7480 7481 amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2); 7482 7483 mutex_lock(&adev->grbm_idx_mutex); 7484 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7485 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7486 bitmap = i * adev->gfx.config.max_sh_per_se + j; 7487 if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) 7488 continue; 7489 mask = 1; 7490 counter = 0; 7491 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); 7492 if (i < 8 && j < 2) 7493 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 7494 adev, disable_masks[i * 2 + j]); 7495 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 7496 7497 /** 7498 * GFX11 could support more than 4 SEs, while the bitmap 7499 * in cu_info struct is 4x4 and ioctl interface struct 7500 * drm_amdgpu_info_device should keep stable. 7501 * So we use last two columns of bitmap to store cu mask for 7502 * SEs 4 to 7, the layout of the bitmap is as below: 7503 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 7504 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 7505 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 7506 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 7507 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 7508 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 7509 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 7510 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 7511 */ 7512 cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; 7513 7514 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 7515 if (bitmap & mask) 7516 counter++; 7517 7518 mask <<= 1; 7519 } 7520 active_cu_number += counter; 7521 } 7522 } 7523 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7524 mutex_unlock(&adev->grbm_idx_mutex); 7525 7526 cu_info->number = active_cu_number; 7527 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7528 7529 return 0; 7530 } 7531 7532 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 7533 { 7534 .type = AMD_IP_BLOCK_TYPE_GFX, 7535 .major = 11, 7536 .minor = 0, 7537 .rev = 0, 7538 .funcs = &gfx_v11_0_ip_funcs, 7539 }; 7540