1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "imu_v11_0.h" 33 #include "soc21.h" 34 #include "nvd.h" 35 36 #include "gc/gc_11_0_0_offset.h" 37 #include "gc/gc_11_0_0_sh_mask.h" 38 #include "smuio/smuio_13_0_6_offset.h" 39 #include "smuio/smuio_13_0_6_sh_mask.h" 40 #include "navi10_enum.h" 41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 42 43 #include "soc15.h" 44 #include "clearstate_gfx11.h" 45 #include "v11_structs.h" 46 #include "gfx_v11_0.h" 47 #include "gfx_v11_0_cleaner_shader.h" 48 #include "gfx_v11_0_3.h" 49 #include "nbio_v4_3.h" 50 #include "mes_v11_0.h" 51 #include "mes_userqueue.h" 52 #include "amdgpu_userq_fence.h" 53 54 #define GFX11_NUM_GFX_RINGS 1 55 #define GFX11_MEC_HPD_SIZE 2048 56 57 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 58 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 59 60 #define regCGTT_WD_CLK_CTRL 0x5086 61 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 63 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 64 #define regPC_CONFIG_CNTL_1 0x194d 65 #define regPC_CONFIG_CNTL_1_BASE_IDX 1 66 67 #define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 68 #define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 69 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 70 #define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 71 #define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 72 #define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 73 #define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 74 75 #define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 76 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 77 #define regCP_MQD_CONTROL_DEFAULT 0x00000100 78 #define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 79 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 80 #define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 81 #define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 82 #define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 83 84 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 85 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 86 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 87 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 88 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); 89 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); 90 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 91 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 92 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 93 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 94 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 95 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 97 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 99 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 100 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 101 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 102 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 103 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 105 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 107 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); 108 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); 109 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); 110 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); 111 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); 112 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); 113 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); 115 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); 117 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); 119 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); 120 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); 121 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); 122 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); 123 124 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { 125 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), 126 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), 127 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), 128 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), 129 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), 130 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), 131 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), 132 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), 133 SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), 134 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), 135 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), 136 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), 137 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), 138 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), 139 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), 140 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), 141 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), 142 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 143 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), 144 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), 145 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), 146 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), 147 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), 148 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), 149 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), 150 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 151 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), 152 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 153 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 154 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 155 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), 156 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), 157 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), 158 SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), 159 SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), 160 SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), 161 SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), 162 SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), 163 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), 164 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), 165 SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), 166 SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), 167 SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), 168 SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), 169 SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), 170 SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), 171 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), 172 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), 173 SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), 174 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), 175 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), 176 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), 177 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), 178 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), 179 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), 180 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), 181 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), 182 /* cp header registers */ 183 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 184 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 185 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 186 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 187 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 188 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 189 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 190 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 191 /* SE status registers */ 192 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), 193 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), 194 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), 195 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), 196 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), 197 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) 198 }; 199 200 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { 201 /* compute registers */ 202 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), 203 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), 204 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), 205 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), 206 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), 207 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), 208 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), 209 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), 210 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), 211 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), 212 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), 213 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), 214 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), 215 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), 216 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), 217 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), 218 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), 219 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), 220 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), 221 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), 222 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), 223 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), 224 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), 225 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), 226 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), 227 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), 228 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), 229 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), 230 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), 231 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), 232 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), 233 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), 234 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), 235 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), 236 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), 237 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), 238 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), 239 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), 240 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), 241 /* cp header registers */ 242 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 243 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 244 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 245 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 246 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 247 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 248 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 249 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 250 }; 251 252 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { 253 /* gfx queue registers */ 254 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), 255 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), 256 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), 257 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), 258 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), 259 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), 260 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), 261 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), 262 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), 263 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), 264 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), 265 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), 266 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), 267 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), 268 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), 269 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), 270 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), 271 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), 272 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), 273 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), 274 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 275 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 276 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 277 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 278 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 279 /* cp header registers */ 280 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 281 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 282 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 283 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 284 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 285 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 286 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 287 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 288 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 289 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 290 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 291 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 292 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 293 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 294 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 295 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 296 }; 297 298 static const struct soc15_reg_golden golden_settings_gc_11_0[] = { 299 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) 300 }; 301 302 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 303 { 304 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 305 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 306 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 307 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 308 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 309 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 310 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 311 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 312 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 313 }; 314 315 #define DEFAULT_SH_MEM_CONFIG \ 316 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 317 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 318 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 319 320 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 321 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 322 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 323 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 324 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 325 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 326 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 327 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 328 struct amdgpu_cu_info *cu_info); 329 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 330 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 331 u32 sh_num, u32 instance, int xcc_id); 332 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 333 334 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 335 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 336 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 337 uint32_t val); 338 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 339 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 340 uint16_t pasid, uint32_t flush_type, 341 bool all_hub, uint8_t dst_sel); 342 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 343 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 344 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 345 bool enable); 346 347 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 348 { 349 struct amdgpu_device *adev = kiq_ring->adev; 350 u64 shader_mc_addr; 351 352 /* Cleaner shader MC address */ 353 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 354 355 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 356 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 357 PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ 358 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 359 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 360 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 361 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 362 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 363 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 364 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 365 } 366 367 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 368 struct amdgpu_ring *ring) 369 { 370 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 371 uint64_t wptr_addr = ring->wptr_gpu_addr; 372 uint32_t me = 0, eng_sel = 0; 373 374 switch (ring->funcs->type) { 375 case AMDGPU_RING_TYPE_COMPUTE: 376 me = 1; 377 eng_sel = 0; 378 break; 379 case AMDGPU_RING_TYPE_GFX: 380 me = 0; 381 eng_sel = 4; 382 break; 383 case AMDGPU_RING_TYPE_MES: 384 me = 2; 385 eng_sel = 5; 386 break; 387 default: 388 WARN_ON(1); 389 } 390 391 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 392 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 393 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 394 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 395 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 396 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 397 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 398 PACKET3_MAP_QUEUES_ME((me)) | 399 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 400 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 401 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 402 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 403 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 404 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 405 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 406 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 407 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 408 } 409 410 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 411 struct amdgpu_ring *ring, 412 enum amdgpu_unmap_queues_action action, 413 u64 gpu_addr, u64 seq) 414 { 415 struct amdgpu_device *adev = kiq_ring->adev; 416 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 417 418 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { 419 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); 420 return; 421 } 422 423 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 424 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 425 PACKET3_UNMAP_QUEUES_ACTION(action) | 426 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 427 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 428 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 429 amdgpu_ring_write(kiq_ring, 430 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 431 432 if (action == PREEMPT_QUEUES_NO_UNMAP) { 433 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 434 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 435 amdgpu_ring_write(kiq_ring, seq); 436 } else { 437 amdgpu_ring_write(kiq_ring, 0); 438 amdgpu_ring_write(kiq_ring, 0); 439 amdgpu_ring_write(kiq_ring, 0); 440 } 441 } 442 443 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 444 struct amdgpu_ring *ring, 445 u64 addr, 446 u64 seq) 447 { 448 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 449 450 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 451 amdgpu_ring_write(kiq_ring, 452 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 453 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 454 PACKET3_QUERY_STATUS_COMMAND(2)); 455 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 456 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 457 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 458 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 459 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 460 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 461 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 462 } 463 464 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 465 uint16_t pasid, uint32_t flush_type, 466 bool all_hub) 467 { 468 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 469 } 470 471 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 472 .kiq_set_resources = gfx11_kiq_set_resources, 473 .kiq_map_queues = gfx11_kiq_map_queues, 474 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 475 .kiq_query_status = gfx11_kiq_query_status, 476 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 477 .set_resources_size = 8, 478 .map_queues_size = 7, 479 .unmap_queues_size = 6, 480 .query_status_size = 7, 481 .invalidate_tlbs_size = 2, 482 }; 483 484 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 485 { 486 adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; 487 } 488 489 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 490 { 491 if (amdgpu_sriov_vf(adev)) 492 return; 493 494 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 495 case IP_VERSION(11, 0, 1): 496 case IP_VERSION(11, 0, 4): 497 soc15_program_register_sequence(adev, 498 golden_settings_gc_11_0_1, 499 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 500 break; 501 default: 502 break; 503 } 504 soc15_program_register_sequence(adev, 505 golden_settings_gc_11_0, 506 (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); 507 508 } 509 510 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 511 bool wc, uint32_t reg, uint32_t val) 512 { 513 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 514 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 515 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 516 amdgpu_ring_write(ring, reg); 517 amdgpu_ring_write(ring, 0); 518 amdgpu_ring_write(ring, val); 519 } 520 521 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 522 int mem_space, int opt, uint32_t addr0, 523 uint32_t addr1, uint32_t ref, uint32_t mask, 524 uint32_t inv) 525 { 526 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 527 amdgpu_ring_write(ring, 528 /* memory (1) or register (0) */ 529 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 530 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 531 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 532 WAIT_REG_MEM_ENGINE(eng_sel))); 533 534 if (mem_space) 535 BUG_ON(addr0 & 0x3); /* Dword align */ 536 amdgpu_ring_write(ring, addr0); 537 amdgpu_ring_write(ring, addr1); 538 amdgpu_ring_write(ring, ref); 539 amdgpu_ring_write(ring, mask); 540 amdgpu_ring_write(ring, inv); /* poll interval */ 541 } 542 543 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 544 { 545 /* Header itself is a NOP packet */ 546 if (num_nop == 1) { 547 amdgpu_ring_write(ring, ring->funcs->nop); 548 return; 549 } 550 551 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 552 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 553 554 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 555 amdgpu_ring_insert_nop(ring, num_nop - 1); 556 } 557 558 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 559 { 560 struct amdgpu_device *adev = ring->adev; 561 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 562 uint32_t tmp = 0; 563 unsigned i; 564 int r; 565 566 WREG32(scratch, 0xCAFEDEAD); 567 r = amdgpu_ring_alloc(ring, 5); 568 if (r) { 569 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 570 ring->idx, r); 571 return r; 572 } 573 574 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 575 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 576 } else { 577 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 578 amdgpu_ring_write(ring, scratch - 579 PACKET3_SET_UCONFIG_REG_START); 580 amdgpu_ring_write(ring, 0xDEADBEEF); 581 } 582 amdgpu_ring_commit(ring); 583 584 for (i = 0; i < adev->usec_timeout; i++) { 585 tmp = RREG32(scratch); 586 if (tmp == 0xDEADBEEF) 587 break; 588 if (amdgpu_emu_mode == 1) 589 msleep(1); 590 else 591 udelay(1); 592 } 593 594 if (i >= adev->usec_timeout) 595 r = -ETIMEDOUT; 596 return r; 597 } 598 599 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 600 { 601 struct amdgpu_device *adev = ring->adev; 602 struct amdgpu_ib ib; 603 struct dma_fence *f = NULL; 604 unsigned index; 605 uint64_t gpu_addr; 606 volatile uint32_t *cpu_ptr; 607 long r; 608 609 /* MES KIQ fw hasn't indirect buffer support for now */ 610 if (adev->enable_mes_kiq && 611 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 612 return 0; 613 614 memset(&ib, 0, sizeof(ib)); 615 616 r = amdgpu_device_wb_get(adev, &index); 617 if (r) 618 return r; 619 620 gpu_addr = adev->wb.gpu_addr + (index * 4); 621 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 622 cpu_ptr = &adev->wb.wb[index]; 623 624 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 625 if (r) { 626 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 627 goto err1; 628 } 629 630 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 631 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 632 ib.ptr[2] = lower_32_bits(gpu_addr); 633 ib.ptr[3] = upper_32_bits(gpu_addr); 634 ib.ptr[4] = 0xDEADBEEF; 635 ib.length_dw = 5; 636 637 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 638 if (r) 639 goto err2; 640 641 r = dma_fence_wait_timeout(f, false, timeout); 642 if (r == 0) { 643 r = -ETIMEDOUT; 644 goto err2; 645 } else if (r < 0) { 646 goto err2; 647 } 648 649 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 650 r = 0; 651 else 652 r = -EINVAL; 653 err2: 654 amdgpu_ib_free(&ib, NULL); 655 dma_fence_put(f); 656 err1: 657 amdgpu_device_wb_free(adev, index); 658 return r; 659 } 660 661 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 662 { 663 amdgpu_ucode_release(&adev->gfx.pfp_fw); 664 amdgpu_ucode_release(&adev->gfx.me_fw); 665 amdgpu_ucode_release(&adev->gfx.rlc_fw); 666 amdgpu_ucode_release(&adev->gfx.mec_fw); 667 668 kfree(adev->gfx.rlc.register_list_format); 669 } 670 671 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) 672 { 673 const struct psp_firmware_header_v1_0 *toc_hdr; 674 int err = 0; 675 676 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, 677 AMDGPU_UCODE_REQUIRED, 678 "amdgpu/%s_toc.bin", ucode_prefix); 679 if (err) 680 goto out; 681 682 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 683 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 684 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 685 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 686 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 687 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 688 return 0; 689 out: 690 amdgpu_ucode_release(&adev->psp.toc_fw); 691 return err; 692 } 693 694 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) 695 { 696 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 697 case IP_VERSION(11, 0, 0): 698 case IP_VERSION(11, 0, 2): 699 case IP_VERSION(11, 0, 3): 700 if ((adev->gfx.me_fw_version >= 1505) && 701 (adev->gfx.pfp_fw_version >= 1600) && 702 (adev->gfx.mec_fw_version >= 512)) { 703 if (amdgpu_sriov_vf(adev)) 704 adev->gfx.cp_gfx_shadow = true; 705 else 706 adev->gfx.cp_gfx_shadow = false; 707 } 708 break; 709 default: 710 adev->gfx.cp_gfx_shadow = false; 711 break; 712 } 713 } 714 715 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 716 { 717 char ucode_prefix[25]; 718 int err; 719 const struct rlc_firmware_header_v2_0 *rlc_hdr; 720 uint16_t version_major; 721 uint16_t version_minor; 722 723 DRM_DEBUG("\n"); 724 725 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 726 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 727 AMDGPU_UCODE_REQUIRED, 728 "amdgpu/%s_pfp.bin", ucode_prefix); 729 if (err) 730 goto out; 731 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 732 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 733 (union amdgpu_firmware_header *) 734 adev->gfx.pfp_fw->data, 2, 0); 735 if (adev->gfx.rs64_enable) { 736 dev_info(adev->dev, "CP RS64 enable\n"); 737 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 738 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 739 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 740 } else { 741 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 742 } 743 744 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 745 AMDGPU_UCODE_REQUIRED, 746 "amdgpu/%s_me.bin", ucode_prefix); 747 if (err) 748 goto out; 749 if (adev->gfx.rs64_enable) { 750 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 751 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 752 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 753 } else { 754 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 755 } 756 757 if (!amdgpu_sriov_vf(adev)) { 758 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && 759 adev->pdev->revision == 0xCE) 760 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 761 AMDGPU_UCODE_REQUIRED, 762 "amdgpu/gc_11_0_0_rlc_1.bin"); 763 else if (amdgpu_is_kicker_fw(adev)) 764 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 765 AMDGPU_UCODE_REQUIRED, 766 "amdgpu/%s_rlc_kicker.bin", ucode_prefix); 767 else 768 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 769 AMDGPU_UCODE_REQUIRED, 770 "amdgpu/%s_rlc.bin", ucode_prefix); 771 if (err) 772 goto out; 773 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 774 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 775 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 776 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 777 if (err) 778 goto out; 779 } 780 781 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 782 AMDGPU_UCODE_REQUIRED, 783 "amdgpu/%s_mec.bin", ucode_prefix); 784 if (err) 785 goto out; 786 if (adev->gfx.rs64_enable) { 787 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 788 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 789 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 790 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 791 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 792 } else { 793 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 794 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 795 } 796 797 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 798 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix); 799 800 /* only one MEC for gfx 11.0.0. */ 801 adev->gfx.mec2_fw = NULL; 802 803 gfx_v11_0_check_fw_cp_gfx_shadow(adev); 804 805 if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { 806 err = adev->gfx.imu.funcs->init_microcode(adev); 807 if (err) 808 DRM_ERROR("Failed to init imu firmware!\n"); 809 return err; 810 } 811 812 out: 813 if (err) { 814 amdgpu_ucode_release(&adev->gfx.pfp_fw); 815 amdgpu_ucode_release(&adev->gfx.me_fw); 816 amdgpu_ucode_release(&adev->gfx.rlc_fw); 817 amdgpu_ucode_release(&adev->gfx.mec_fw); 818 } 819 820 return err; 821 } 822 823 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 824 { 825 u32 count = 0; 826 const struct cs_section_def *sect = NULL; 827 const struct cs_extent_def *ext = NULL; 828 829 /* begin clear state */ 830 count += 2; 831 /* context control state */ 832 count += 3; 833 834 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 835 for (ext = sect->section; ext->extent != NULL; ++ext) { 836 if (sect->id == SECT_CONTEXT) 837 count += 2 + ext->reg_count; 838 else 839 return 0; 840 } 841 } 842 843 /* set PA_SC_TILE_STEERING_OVERRIDE */ 844 count += 3; 845 /* end clear state */ 846 count += 2; 847 /* clear state */ 848 count += 2; 849 850 return count; 851 } 852 853 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, 854 volatile u32 *buffer) 855 { 856 u32 count = 0; 857 int ctx_reg_offset; 858 859 if (adev->gfx.rlc.cs_data == NULL) 860 return; 861 if (buffer == NULL) 862 return; 863 864 count = amdgpu_gfx_csb_preamble_start(buffer); 865 count = amdgpu_gfx_csb_data_parser(adev, buffer, count); 866 867 ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 868 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 869 buffer[count++] = cpu_to_le32(ctx_reg_offset); 870 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 871 872 amdgpu_gfx_csb_preamble_end(buffer, count); 873 } 874 875 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 876 { 877 /* clear state block */ 878 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 879 &adev->gfx.rlc.clear_state_gpu_addr, 880 (void **)&adev->gfx.rlc.cs_ptr); 881 882 /* jump table block */ 883 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 884 &adev->gfx.rlc.cp_table_gpu_addr, 885 (void **)&adev->gfx.rlc.cp_table_ptr); 886 } 887 888 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 889 { 890 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 891 892 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 893 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 894 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 895 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 896 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 897 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 898 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 899 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 900 adev->gfx.rlc.rlcg_reg_access_supported = true; 901 } 902 903 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 904 { 905 const struct cs_section_def *cs_data; 906 int r; 907 908 adev->gfx.rlc.cs_data = gfx11_cs_data; 909 910 cs_data = adev->gfx.rlc.cs_data; 911 912 if (cs_data) { 913 /* init clear state block */ 914 r = amdgpu_gfx_rlc_init_csb(adev); 915 if (r) 916 return r; 917 } 918 919 /* init spm vmid with 0xf */ 920 if (adev->gfx.rlc.funcs->update_spm_vmid) 921 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); 922 923 return 0; 924 } 925 926 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 927 { 928 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 929 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 930 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 931 } 932 933 static void gfx_v11_0_me_init(struct amdgpu_device *adev) 934 { 935 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 936 937 amdgpu_gfx_graphics_queue_acquire(adev); 938 } 939 940 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 941 { 942 int r; 943 u32 *hpd; 944 size_t mec_hpd_size; 945 946 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 947 948 /* take ownership of the relevant compute queues */ 949 amdgpu_gfx_compute_queue_acquire(adev); 950 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 951 952 if (mec_hpd_size) { 953 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 954 AMDGPU_GEM_DOMAIN_GTT, 955 &adev->gfx.mec.hpd_eop_obj, 956 &adev->gfx.mec.hpd_eop_gpu_addr, 957 (void **)&hpd); 958 if (r) { 959 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 960 gfx_v11_0_mec_fini(adev); 961 return r; 962 } 963 964 memset(hpd, 0, mec_hpd_size); 965 966 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 967 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 968 } 969 970 return 0; 971 } 972 973 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 974 { 975 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 976 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 977 (address << SQ_IND_INDEX__INDEX__SHIFT)); 978 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 979 } 980 981 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 982 uint32_t thread, uint32_t regno, 983 uint32_t num, uint32_t *out) 984 { 985 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 986 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 987 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 988 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 989 (SQ_IND_INDEX__AUTO_INCR_MASK)); 990 while (num--) 991 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 992 } 993 994 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 995 { 996 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 997 * field when performing a select_se_sh so it should be 998 * zero here */ 999 WARN_ON(simd != 0); 1000 1001 /* type 3 wave data */ 1002 dst[(*no_fields)++] = 3; 1003 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 1004 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 1005 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 1006 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 1007 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 1008 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 1009 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 1010 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 1011 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 1012 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 1013 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 1014 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 1015 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 1016 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 1017 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 1018 } 1019 1020 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1021 uint32_t wave, uint32_t start, 1022 uint32_t size, uint32_t *dst) 1023 { 1024 WARN_ON(simd != 0); 1025 1026 wave_read_regs( 1027 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 1028 dst); 1029 } 1030 1031 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1032 uint32_t wave, uint32_t thread, 1033 uint32_t start, uint32_t size, 1034 uint32_t *dst) 1035 { 1036 wave_read_regs( 1037 adev, wave, thread, 1038 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1039 } 1040 1041 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 1042 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1043 { 1044 soc21_grbm_select(adev, me, pipe, q, vm); 1045 } 1046 1047 /* all sizes are in bytes */ 1048 #define MQD_SHADOW_BASE_SIZE 73728 1049 #define MQD_SHADOW_BASE_ALIGNMENT 256 1050 #define MQD_FWWORKAREA_SIZE 484 1051 #define MQD_FWWORKAREA_ALIGNMENT 256 1052 1053 static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, 1054 struct amdgpu_gfx_shadow_info *shadow_info) 1055 { 1056 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; 1057 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; 1058 shadow_info->csa_size = MQD_FWWORKAREA_SIZE; 1059 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; 1060 } 1061 1062 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, 1063 struct amdgpu_gfx_shadow_info *shadow_info, 1064 bool skip_check) 1065 { 1066 if (adev->gfx.cp_gfx_shadow || skip_check) { 1067 gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); 1068 return 0; 1069 } else { 1070 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); 1071 return -ENOTSUPP; 1072 } 1073 } 1074 1075 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 1076 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 1077 .select_se_sh = &gfx_v11_0_select_se_sh, 1078 .read_wave_data = &gfx_v11_0_read_wave_data, 1079 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 1080 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 1081 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 1082 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 1083 .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, 1084 }; 1085 1086 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 1087 { 1088 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1089 case IP_VERSION(11, 0, 0): 1090 case IP_VERSION(11, 0, 2): 1091 adev->gfx.config.max_hw_contexts = 8; 1092 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1093 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1094 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1095 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1096 break; 1097 case IP_VERSION(11, 0, 3): 1098 adev->gfx.ras = &gfx_v11_0_3_ras; 1099 adev->gfx.config.max_hw_contexts = 8; 1100 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1101 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1102 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1103 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1104 break; 1105 case IP_VERSION(11, 0, 1): 1106 case IP_VERSION(11, 0, 4): 1107 case IP_VERSION(11, 5, 0): 1108 case IP_VERSION(11, 5, 1): 1109 case IP_VERSION(11, 5, 2): 1110 case IP_VERSION(11, 5, 3): 1111 adev->gfx.config.max_hw_contexts = 8; 1112 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1113 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1114 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1115 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 1116 break; 1117 default: 1118 BUG(); 1119 break; 1120 } 1121 1122 return 0; 1123 } 1124 1125 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1126 int me, int pipe, int queue) 1127 { 1128 struct amdgpu_ring *ring; 1129 unsigned int irq_type; 1130 unsigned int hw_prio; 1131 1132 ring = &adev->gfx.gfx_ring[ring_id]; 1133 1134 ring->me = me; 1135 ring->pipe = pipe; 1136 ring->queue = queue; 1137 1138 ring->ring_obj = NULL; 1139 ring->use_doorbell = true; 1140 if (adev->gfx.disable_kq) { 1141 ring->no_scheduler = true; 1142 ring->no_user_submission = true; 1143 } 1144 1145 if (!ring_id) 1146 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1147 else 1148 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1149 ring->vm_hub = AMDGPU_GFXHUB(0); 1150 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1151 1152 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1153 hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? 1154 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1155 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1156 hw_prio, NULL); 1157 } 1158 1159 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1160 int mec, int pipe, int queue) 1161 { 1162 int r; 1163 unsigned irq_type; 1164 struct amdgpu_ring *ring; 1165 unsigned int hw_prio; 1166 1167 ring = &adev->gfx.compute_ring[ring_id]; 1168 1169 /* mec0 is me1 */ 1170 ring->me = mec + 1; 1171 ring->pipe = pipe; 1172 ring->queue = queue; 1173 1174 ring->ring_obj = NULL; 1175 ring->use_doorbell = true; 1176 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1177 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1178 + (ring_id * GFX11_MEC_HPD_SIZE); 1179 ring->vm_hub = AMDGPU_GFXHUB(0); 1180 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1181 1182 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1183 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1184 + ring->pipe; 1185 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 1186 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1187 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1188 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1189 hw_prio, NULL); 1190 if (r) 1191 return r; 1192 1193 return 0; 1194 } 1195 1196 static struct { 1197 SOC21_FIRMWARE_ID id; 1198 unsigned int offset; 1199 unsigned int size; 1200 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 1201 1202 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 1203 { 1204 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 1205 1206 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 1207 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 1208 rlc_autoload_info[ucode->id].id = ucode->id; 1209 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 1210 rlc_autoload_info[ucode->id].size = ucode->size * 4; 1211 1212 ucode++; 1213 } 1214 } 1215 1216 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 1217 { 1218 uint32_t total_size = 0; 1219 SOC21_FIRMWARE_ID id; 1220 1221 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 1222 1223 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 1224 total_size += rlc_autoload_info[id].size; 1225 1226 /* In case the offset in rlc toc ucode is aligned */ 1227 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 1228 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 1229 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 1230 1231 return total_size; 1232 } 1233 1234 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 1235 { 1236 int r; 1237 uint32_t total_size; 1238 1239 total_size = gfx_v11_0_calc_toc_total_size(adev); 1240 1241 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 1242 AMDGPU_GEM_DOMAIN_VRAM | 1243 AMDGPU_GEM_DOMAIN_GTT, 1244 &adev->gfx.rlc.rlc_autoload_bo, 1245 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1246 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1247 1248 if (r) { 1249 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1250 return r; 1251 } 1252 1253 return 0; 1254 } 1255 1256 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1257 SOC21_FIRMWARE_ID id, 1258 const void *fw_data, 1259 uint32_t fw_size, 1260 uint32_t *fw_autoload_mask) 1261 { 1262 uint32_t toc_offset; 1263 uint32_t toc_fw_size; 1264 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1265 1266 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1267 return; 1268 1269 toc_offset = rlc_autoload_info[id].offset; 1270 toc_fw_size = rlc_autoload_info[id].size; 1271 1272 if (fw_size == 0) 1273 fw_size = toc_fw_size; 1274 1275 if (fw_size > toc_fw_size) 1276 fw_size = toc_fw_size; 1277 1278 memcpy(ptr + toc_offset, fw_data, fw_size); 1279 1280 if (fw_size < toc_fw_size) 1281 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1282 1283 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1284 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1285 } 1286 1287 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1288 uint32_t *fw_autoload_mask) 1289 { 1290 void *data; 1291 uint32_t size; 1292 uint64_t *toc_ptr; 1293 1294 *(uint64_t *)fw_autoload_mask |= 0x1; 1295 1296 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1297 1298 data = adev->psp.toc.start_addr; 1299 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1300 1301 toc_ptr = (uint64_t *)data + size / 8 - 1; 1302 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1303 1304 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1305 data, size, fw_autoload_mask); 1306 } 1307 1308 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1309 uint32_t *fw_autoload_mask) 1310 { 1311 const __le32 *fw_data; 1312 uint32_t fw_size; 1313 const struct gfx_firmware_header_v1_0 *cp_hdr; 1314 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1315 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1316 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1317 uint16_t version_major, version_minor; 1318 1319 if (adev->gfx.rs64_enable) { 1320 /* pfp ucode */ 1321 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1322 adev->gfx.pfp_fw->data; 1323 /* instruction */ 1324 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1325 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1326 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1327 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1328 fw_data, fw_size, fw_autoload_mask); 1329 /* data */ 1330 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1331 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1332 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1333 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1334 fw_data, fw_size, fw_autoload_mask); 1335 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1336 fw_data, fw_size, fw_autoload_mask); 1337 /* me ucode */ 1338 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1339 adev->gfx.me_fw->data; 1340 /* instruction */ 1341 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1342 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1343 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1344 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1345 fw_data, fw_size, fw_autoload_mask); 1346 /* data */ 1347 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1348 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1349 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1350 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1351 fw_data, fw_size, fw_autoload_mask); 1352 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1353 fw_data, fw_size, fw_autoload_mask); 1354 /* mec ucode */ 1355 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1356 adev->gfx.mec_fw->data; 1357 /* instruction */ 1358 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1359 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1360 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1361 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1362 fw_data, fw_size, fw_autoload_mask); 1363 /* data */ 1364 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1365 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1366 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1367 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1368 fw_data, fw_size, fw_autoload_mask); 1369 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1370 fw_data, fw_size, fw_autoload_mask); 1371 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1372 fw_data, fw_size, fw_autoload_mask); 1373 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1374 fw_data, fw_size, fw_autoload_mask); 1375 } else { 1376 /* pfp ucode */ 1377 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1378 adev->gfx.pfp_fw->data; 1379 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1380 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1381 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1382 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1383 fw_data, fw_size, fw_autoload_mask); 1384 1385 /* me ucode */ 1386 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1387 adev->gfx.me_fw->data; 1388 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1389 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1390 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1391 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1392 fw_data, fw_size, fw_autoload_mask); 1393 1394 /* mec ucode */ 1395 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1396 adev->gfx.mec_fw->data; 1397 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1398 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1399 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1400 cp_hdr->jt_size * 4; 1401 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1402 fw_data, fw_size, fw_autoload_mask); 1403 } 1404 1405 /* rlc ucode */ 1406 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1407 adev->gfx.rlc_fw->data; 1408 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1409 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1410 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1411 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1412 fw_data, fw_size, fw_autoload_mask); 1413 1414 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1415 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1416 if (version_major == 2) { 1417 if (version_minor >= 2) { 1418 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1419 1420 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1421 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1422 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1423 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1424 fw_data, fw_size, fw_autoload_mask); 1425 1426 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1427 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1428 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1429 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1430 fw_data, fw_size, fw_autoload_mask); 1431 } 1432 } 1433 } 1434 1435 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1436 uint32_t *fw_autoload_mask) 1437 { 1438 const __le32 *fw_data; 1439 uint32_t fw_size; 1440 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1441 1442 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1443 adev->sdma.instance[0].fw->data; 1444 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1445 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1446 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1447 1448 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1449 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1450 1451 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1452 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1453 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1454 1455 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1456 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1457 } 1458 1459 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1460 uint32_t *fw_autoload_mask) 1461 { 1462 const __le32 *fw_data; 1463 unsigned fw_size; 1464 const struct mes_firmware_header_v1_0 *mes_hdr; 1465 int pipe, ucode_id, data_id; 1466 1467 for (pipe = 0; pipe < 2; pipe++) { 1468 if (pipe==0) { 1469 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1470 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1471 } else { 1472 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1473 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1474 } 1475 1476 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1477 adev->mes.fw[pipe]->data; 1478 1479 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1480 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1481 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1482 1483 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1484 ucode_id, fw_data, fw_size, fw_autoload_mask); 1485 1486 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1487 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1488 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1489 1490 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1491 data_id, fw_data, fw_size, fw_autoload_mask); 1492 } 1493 } 1494 1495 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1496 { 1497 uint32_t rlc_g_offset, rlc_g_size; 1498 uint64_t gpu_addr; 1499 uint32_t autoload_fw_id[2]; 1500 1501 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1502 1503 /* RLC autoload sequence 2: copy ucode */ 1504 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1505 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1506 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1507 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1508 1509 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1510 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1511 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1512 1513 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1514 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1515 1516 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1517 1518 /* RLC autoload sequence 3: load IMU fw */ 1519 if (adev->gfx.imu.funcs->load_microcode) 1520 adev->gfx.imu.funcs->load_microcode(adev); 1521 /* RLC autoload sequence 4 init IMU fw */ 1522 if (adev->gfx.imu.funcs->setup_imu) 1523 adev->gfx.imu.funcs->setup_imu(adev); 1524 if (adev->gfx.imu.funcs->start_imu) 1525 adev->gfx.imu.funcs->start_imu(adev); 1526 1527 /* RLC autoload sequence 5 disable gpa mode */ 1528 gfx_v11_0_disable_gpa_mode(adev); 1529 1530 return 0; 1531 } 1532 1533 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) 1534 { 1535 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 1536 uint32_t *ptr; 1537 uint32_t inst; 1538 1539 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 1540 if (!ptr) { 1541 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 1542 adev->gfx.ip_dump_core = NULL; 1543 } else { 1544 adev->gfx.ip_dump_core = ptr; 1545 } 1546 1547 /* Allocate memory for compute queue registers for all the instances */ 1548 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 1549 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 1550 adev->gfx.mec.num_queue_per_pipe; 1551 1552 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1553 if (!ptr) { 1554 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 1555 adev->gfx.ip_dump_compute_queues = NULL; 1556 } else { 1557 adev->gfx.ip_dump_compute_queues = ptr; 1558 } 1559 1560 /* Allocate memory for gfx queue registers for all the instances */ 1561 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 1562 inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * 1563 adev->gfx.me.num_queue_per_pipe; 1564 1565 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1566 if (!ptr) { 1567 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); 1568 adev->gfx.ip_dump_gfx_queues = NULL; 1569 } else { 1570 adev->gfx.ip_dump_gfx_queues = ptr; 1571 } 1572 } 1573 1574 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) 1575 { 1576 int i, j, k, r, ring_id; 1577 int xcc_id = 0; 1578 struct amdgpu_device *adev = ip_block->adev; 1579 int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ 1580 1581 INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); 1582 1583 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1584 case IP_VERSION(11, 0, 0): 1585 case IP_VERSION(11, 0, 1): 1586 case IP_VERSION(11, 0, 2): 1587 case IP_VERSION(11, 0, 3): 1588 case IP_VERSION(11, 0, 4): 1589 case IP_VERSION(11, 5, 0): 1590 case IP_VERSION(11, 5, 1): 1591 case IP_VERSION(11, 5, 2): 1592 case IP_VERSION(11, 5, 3): 1593 adev->gfx.me.num_me = 1; 1594 adev->gfx.me.num_pipe_per_me = 1; 1595 adev->gfx.me.num_queue_per_pipe = 2; 1596 adev->gfx.mec.num_mec = 1; 1597 adev->gfx.mec.num_pipe_per_mec = 4; 1598 adev->gfx.mec.num_queue_per_pipe = 4; 1599 break; 1600 default: 1601 adev->gfx.me.num_me = 1; 1602 adev->gfx.me.num_pipe_per_me = 1; 1603 adev->gfx.me.num_queue_per_pipe = 1; 1604 adev->gfx.mec.num_mec = 1; 1605 adev->gfx.mec.num_pipe_per_mec = 4; 1606 adev->gfx.mec.num_queue_per_pipe = 8; 1607 break; 1608 } 1609 1610 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1611 case IP_VERSION(11, 0, 0): 1612 case IP_VERSION(11, 0, 2): 1613 case IP_VERSION(11, 0, 3): 1614 if (!adev->gfx.disable_uq && 1615 adev->gfx.me_fw_version >= 2390 && 1616 adev->gfx.pfp_fw_version >= 2530 && 1617 adev->gfx.mec_fw_version >= 2600 && 1618 adev->mes.fw_version[0] >= 120) { 1619 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1620 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1621 } 1622 break; 1623 case IP_VERSION(11, 0, 1): 1624 case IP_VERSION(11, 0, 4): 1625 case IP_VERSION(11, 5, 0): 1626 case IP_VERSION(11, 5, 1): 1627 case IP_VERSION(11, 5, 2): 1628 case IP_VERSION(11, 5, 3): 1629 /* add firmware version checks here */ 1630 if (0 && !adev->gfx.disable_uq) { 1631 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1632 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1633 } 1634 break; 1635 default: 1636 break; 1637 } 1638 1639 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1640 case IP_VERSION(11, 0, 0): 1641 case IP_VERSION(11, 0, 2): 1642 case IP_VERSION(11, 0, 3): 1643 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1644 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1645 if (adev->gfx.me_fw_version >= 2280 && 1646 adev->gfx.pfp_fw_version >= 2370 && 1647 adev->gfx.mec_fw_version >= 2450 && 1648 adev->mes.fw_version[0] >= 99) { 1649 adev->gfx.enable_cleaner_shader = true; 1650 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1651 if (r) { 1652 adev->gfx.enable_cleaner_shader = false; 1653 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1654 } 1655 } 1656 break; 1657 case IP_VERSION(11, 5, 0): 1658 case IP_VERSION(11, 5, 1): 1659 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1660 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1661 if (adev->gfx.mec_fw_version >= 26 && 1662 adev->mes.fw_version[0] >= 114) { 1663 adev->gfx.enable_cleaner_shader = true; 1664 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1665 if (r) { 1666 adev->gfx.enable_cleaner_shader = false; 1667 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1668 } 1669 } 1670 break; 1671 case IP_VERSION(11, 5, 2): 1672 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1673 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1674 if (adev->gfx.me_fw_version >= 12 && 1675 adev->gfx.pfp_fw_version >= 15 && 1676 adev->gfx.mec_fw_version >= 15) { 1677 adev->gfx.enable_cleaner_shader = true; 1678 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1679 if (r) { 1680 adev->gfx.enable_cleaner_shader = false; 1681 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1682 } 1683 } 1684 break; 1685 case IP_VERSION(11, 5, 3): 1686 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1687 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1688 if (adev->gfx.me_fw_version >= 7 && 1689 adev->gfx.pfp_fw_version >= 8 && 1690 adev->gfx.mec_fw_version >= 8) { 1691 adev->gfx.enable_cleaner_shader = true; 1692 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1693 if (r) { 1694 adev->gfx.enable_cleaner_shader = false; 1695 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1696 } 1697 } 1698 break; 1699 default: 1700 adev->gfx.enable_cleaner_shader = false; 1701 break; 1702 } 1703 1704 /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ 1705 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && 1706 amdgpu_sriov_is_pp_one_vf(adev)) 1707 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; 1708 1709 /* EOP Event */ 1710 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1711 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1712 &adev->gfx.eop_irq); 1713 if (r) 1714 return r; 1715 1716 /* Bad opcode Event */ 1717 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1718 GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, 1719 &adev->gfx.bad_op_irq); 1720 if (r) 1721 return r; 1722 1723 /* Privileged reg */ 1724 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1725 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1726 &adev->gfx.priv_reg_irq); 1727 if (r) 1728 return r; 1729 1730 /* Privileged inst */ 1731 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1732 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1733 &adev->gfx.priv_inst_irq); 1734 if (r) 1735 return r; 1736 1737 /* FED error */ 1738 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1739 GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, 1740 &adev->gfx.rlc_gc_fed_irq); 1741 if (r) 1742 return r; 1743 1744 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1745 1746 gfx_v11_0_me_init(adev); 1747 1748 r = gfx_v11_0_rlc_init(adev); 1749 if (r) { 1750 DRM_ERROR("Failed to init rlc BOs!\n"); 1751 return r; 1752 } 1753 1754 r = gfx_v11_0_mec_init(adev); 1755 if (r) { 1756 DRM_ERROR("Failed to init MEC BOs!\n"); 1757 return r; 1758 } 1759 1760 if (adev->gfx.num_gfx_rings) { 1761 ring_id = 0; 1762 /* set up the gfx ring */ 1763 for (i = 0; i < adev->gfx.me.num_me; i++) { 1764 for (j = 0; j < num_queue_per_pipe; j++) { 1765 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1766 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1767 continue; 1768 1769 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1770 i, k, j); 1771 if (r) 1772 return r; 1773 ring_id++; 1774 } 1775 } 1776 } 1777 } 1778 1779 if (adev->gfx.num_compute_rings) { 1780 ring_id = 0; 1781 /* set up the compute queues - allocate horizontally across pipes */ 1782 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1783 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1784 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1785 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 1786 k, j)) 1787 continue; 1788 1789 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1790 i, k, j); 1791 if (r) 1792 return r; 1793 1794 ring_id++; 1795 } 1796 } 1797 } 1798 } 1799 1800 adev->gfx.gfx_supported_reset = 1801 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1802 adev->gfx.compute_supported_reset = 1803 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1804 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1805 case IP_VERSION(11, 0, 0): 1806 case IP_VERSION(11, 0, 2): 1807 case IP_VERSION(11, 0, 3): 1808 if ((adev->gfx.me_fw_version >= 2280) && 1809 (adev->gfx.mec_fw_version >= 2410)) { 1810 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1811 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1812 } 1813 break; 1814 default: 1815 break; 1816 } 1817 1818 if (!adev->enable_mes_kiq) { 1819 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); 1820 if (r) { 1821 DRM_ERROR("Failed to init KIQ BOs!\n"); 1822 return r; 1823 } 1824 1825 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 1826 if (r) 1827 return r; 1828 } 1829 1830 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); 1831 if (r) 1832 return r; 1833 1834 /* allocate visible FB for rlc auto-loading fw */ 1835 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1836 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1837 if (r) 1838 return r; 1839 } 1840 1841 r = gfx_v11_0_gpu_early_init(adev); 1842 if (r) 1843 return r; 1844 1845 if (amdgpu_gfx_ras_sw_init(adev)) { 1846 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 1847 return -EINVAL; 1848 } 1849 1850 gfx_v11_0_alloc_ip_dump(adev); 1851 1852 r = amdgpu_gfx_sysfs_init(adev); 1853 if (r) 1854 return r; 1855 1856 return 0; 1857 } 1858 1859 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1860 { 1861 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1862 &adev->gfx.pfp.pfp_fw_gpu_addr, 1863 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1864 1865 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1866 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1867 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1868 } 1869 1870 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1871 { 1872 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1873 &adev->gfx.me.me_fw_gpu_addr, 1874 (void **)&adev->gfx.me.me_fw_ptr); 1875 1876 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1877 &adev->gfx.me.me_fw_data_gpu_addr, 1878 (void **)&adev->gfx.me.me_fw_data_ptr); 1879 } 1880 1881 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1882 { 1883 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1884 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1885 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1886 } 1887 1888 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) 1889 { 1890 int i; 1891 struct amdgpu_device *adev = ip_block->adev; 1892 1893 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1894 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1895 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1896 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1897 1898 amdgpu_gfx_mqd_sw_fini(adev, 0); 1899 1900 if (!adev->enable_mes_kiq) { 1901 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 1902 amdgpu_gfx_kiq_fini(adev, 0); 1903 } 1904 1905 amdgpu_gfx_cleaner_shader_sw_fini(adev); 1906 1907 gfx_v11_0_pfp_fini(adev); 1908 gfx_v11_0_me_fini(adev); 1909 gfx_v11_0_rlc_fini(adev); 1910 gfx_v11_0_mec_fini(adev); 1911 1912 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1913 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1914 1915 gfx_v11_0_free_microcode(adev); 1916 1917 amdgpu_gfx_sysfs_fini(adev); 1918 1919 kfree(adev->gfx.ip_dump_core); 1920 kfree(adev->gfx.ip_dump_compute_queues); 1921 kfree(adev->gfx.ip_dump_gfx_queues); 1922 1923 return 0; 1924 } 1925 1926 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1927 u32 sh_num, u32 instance, int xcc_id) 1928 { 1929 u32 data; 1930 1931 if (instance == 0xffffffff) 1932 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1933 INSTANCE_BROADCAST_WRITES, 1); 1934 else 1935 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1936 instance); 1937 1938 if (se_num == 0xffffffff) 1939 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1940 1); 1941 else 1942 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1943 1944 if (sh_num == 0xffffffff) 1945 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1946 1); 1947 else 1948 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1949 1950 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 1951 } 1952 1953 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) 1954 { 1955 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; 1956 1957 gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); 1958 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, 1959 CC_GC_SA_UNIT_DISABLE, 1960 SA_DISABLE); 1961 gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); 1962 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, 1963 GC_USER_SA_UNIT_DISABLE, 1964 SA_DISABLE); 1965 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * 1966 adev->gfx.config.max_shader_engines); 1967 1968 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 1969 } 1970 1971 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1972 { 1973 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; 1974 u32 rb_mask; 1975 1976 gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 1977 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, 1978 CC_RB_BACKEND_DISABLE, 1979 BACKEND_DISABLE); 1980 gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 1981 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, 1982 GC_USER_RB_BACKEND_DISABLE, 1983 BACKEND_DISABLE); 1984 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * 1985 adev->gfx.config.max_shader_engines); 1986 1987 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); 1988 } 1989 1990 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 1991 { 1992 u32 rb_bitmap_per_sa; 1993 u32 rb_bitmap_width_per_sa; 1994 u32 max_sa; 1995 u32 active_sa_bitmap; 1996 u32 global_active_rb_bitmap; 1997 u32 active_rb_bitmap = 0; 1998 u32 i; 1999 2000 /* query sa bitmap from SA_UNIT_DISABLE registers */ 2001 active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); 2002 /* query rb bitmap from RB_BACKEND_DISABLE registers */ 2003 global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); 2004 2005 /* generate active rb bitmap according to active sa bitmap */ 2006 max_sa = adev->gfx.config.max_shader_engines * 2007 adev->gfx.config.max_sh_per_se; 2008 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 2009 adev->gfx.config.max_sh_per_se; 2010 rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); 2011 2012 for (i = 0; i < max_sa; i++) { 2013 if (active_sa_bitmap & (1 << i)) 2014 active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); 2015 } 2016 2017 active_rb_bitmap &= global_active_rb_bitmap; 2018 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 2019 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 2020 } 2021 2022 #define DEFAULT_SH_MEM_BASES (0x6000) 2023 #define LDS_APP_BASE 0x1 2024 #define SCRATCH_APP_BASE 0x2 2025 2026 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 2027 { 2028 int i; 2029 uint32_t sh_mem_bases; 2030 uint32_t data; 2031 2032 /* 2033 * Configure apertures: 2034 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2035 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2036 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2037 */ 2038 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 2039 SCRATCH_APP_BASE; 2040 2041 mutex_lock(&adev->srbm_mutex); 2042 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2043 soc21_grbm_select(adev, 0, 0, 0, i); 2044 /* CP and shaders */ 2045 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2046 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 2047 2048 /* Enable trap for each kfd vmid. */ 2049 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 2050 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 2051 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); 2052 } 2053 soc21_grbm_select(adev, 0, 0, 0, 0); 2054 mutex_unlock(&adev->srbm_mutex); 2055 2056 /* 2057 * Initialize all compute VMIDs to have no GDS, GWS, or OA 2058 * access. These should be enabled by FW for target VMIDs. 2059 */ 2060 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2061 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 2062 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 2063 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 2064 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 2065 } 2066 } 2067 2068 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 2069 { 2070 int vmid; 2071 2072 /* 2073 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2074 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2075 * the driver can enable them for graphics. VMID0 should maintain 2076 * access so that HWS firmware can save/restore entries. 2077 */ 2078 for (vmid = 1; vmid < 16; vmid++) { 2079 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 2080 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 2081 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 2082 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 2083 } 2084 } 2085 2086 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 2087 { 2088 /* TODO: harvest feature to be added later. */ 2089 } 2090 2091 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 2092 { 2093 /* TCCs are global (not instanced). */ 2094 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 2095 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 2096 2097 adev->gfx.config.tcc_disabled_mask = 2098 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 2099 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 2100 } 2101 2102 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 2103 { 2104 u32 tmp; 2105 int i; 2106 2107 if (!amdgpu_sriov_vf(adev)) 2108 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2109 2110 gfx_v11_0_setup_rb(adev); 2111 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 2112 gfx_v11_0_get_tcc_info(adev); 2113 adev->gfx.config.pa_sc_tile_steering_override = 0; 2114 2115 /* Set whether texture coordinate truncation is conformant. */ 2116 tmp = RREG32_SOC15(GC, 0, regTA_CNTL2); 2117 adev->gfx.config.ta_cntl2_truncate_coord_mode = 2118 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE); 2119 2120 /* XXX SH_MEM regs */ 2121 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2122 mutex_lock(&adev->srbm_mutex); 2123 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2124 soc21_grbm_select(adev, 0, 0, 0, i); 2125 /* CP and shaders */ 2126 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2127 if (i != 0) { 2128 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2129 (adev->gmc.private_aperture_start >> 48)); 2130 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2131 (adev->gmc.shared_aperture_start >> 48)); 2132 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 2133 } 2134 } 2135 soc21_grbm_select(adev, 0, 0, 0, 0); 2136 2137 mutex_unlock(&adev->srbm_mutex); 2138 2139 gfx_v11_0_init_compute_vmid(adev); 2140 gfx_v11_0_init_gds_vmid(adev); 2141 } 2142 2143 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, 2144 int me, int pipe) 2145 { 2146 if (me != 0) 2147 return 0; 2148 2149 switch (pipe) { 2150 case 0: 2151 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 2152 case 1: 2153 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 2154 default: 2155 return 0; 2156 } 2157 } 2158 2159 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, 2160 int me, int pipe) 2161 { 2162 /* 2163 * amdgpu controls only the first MEC. That's why this function only 2164 * handles the setting of interrupts for this specific MEC. All other 2165 * pipes' interrupts are set by amdkfd. 2166 */ 2167 if (me != 1) 2168 return 0; 2169 2170 switch (pipe) { 2171 case 0: 2172 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 2173 case 1: 2174 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 2175 case 2: 2176 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 2177 case 3: 2178 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 2179 default: 2180 return 0; 2181 } 2182 } 2183 2184 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2185 bool enable) 2186 { 2187 u32 tmp, cp_int_cntl_reg; 2188 int i, j; 2189 2190 if (amdgpu_sriov_vf(adev)) 2191 return; 2192 2193 for (i = 0; i < adev->gfx.me.num_me; i++) { 2194 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 2195 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 2196 2197 if (cp_int_cntl_reg) { 2198 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 2199 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 2200 enable ? 1 : 0); 2201 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 2202 enable ? 1 : 0); 2203 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 2204 enable ? 1 : 0); 2205 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 2206 enable ? 1 : 0); 2207 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); 2208 } 2209 } 2210 } 2211 } 2212 2213 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 2214 { 2215 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2216 2217 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 2218 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2219 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 2220 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2221 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 2222 2223 return 0; 2224 } 2225 2226 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 2227 { 2228 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 2229 2230 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 2231 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 2232 } 2233 2234 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 2235 { 2236 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2237 udelay(50); 2238 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2239 udelay(50); 2240 } 2241 2242 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 2243 bool enable) 2244 { 2245 uint32_t rlc_pg_cntl; 2246 2247 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 2248 2249 if (!enable) { 2250 /* RLC_PG_CNTL[23] = 0 (default) 2251 * RLC will wait for handshake acks with SMU 2252 * GFXOFF will be enabled 2253 * RLC_PG_CNTL[23] = 1 2254 * RLC will not issue any message to SMU 2255 * hence no handshake between SMU & RLC 2256 * GFXOFF will be disabled 2257 */ 2258 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2259 } else 2260 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2261 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 2262 } 2263 2264 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 2265 { 2266 /* TODO: enable rlc & smu handshake until smu 2267 * and gfxoff feature works as expected */ 2268 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 2269 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 2270 2271 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2272 udelay(50); 2273 } 2274 2275 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 2276 { 2277 uint32_t tmp; 2278 2279 /* enable Save Restore Machine */ 2280 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 2281 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2282 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 2283 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 2284 } 2285 2286 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 2287 { 2288 const struct rlc_firmware_header_v2_0 *hdr; 2289 const __le32 *fw_data; 2290 unsigned i, fw_size; 2291 2292 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2293 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2294 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2295 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2296 2297 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 2298 RLCG_UCODE_LOADING_START_ADDRESS); 2299 2300 for (i = 0; i < fw_size; i++) 2301 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 2302 le32_to_cpup(fw_data++)); 2303 2304 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2305 } 2306 2307 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 2308 { 2309 const struct rlc_firmware_header_v2_2 *hdr; 2310 const __le32 *fw_data; 2311 unsigned i, fw_size; 2312 u32 tmp; 2313 2314 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 2315 2316 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2317 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 2318 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 2319 2320 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 2321 2322 for (i = 0; i < fw_size; i++) { 2323 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2324 msleep(1); 2325 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 2326 le32_to_cpup(fw_data++)); 2327 } 2328 2329 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2330 2331 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2332 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 2333 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 2334 2335 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 2336 for (i = 0; i < fw_size; i++) { 2337 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2338 msleep(1); 2339 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 2340 le32_to_cpup(fw_data++)); 2341 } 2342 2343 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2344 2345 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 2346 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 2347 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 2348 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 2349 } 2350 2351 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 2352 { 2353 const struct rlc_firmware_header_v2_3 *hdr; 2354 const __le32 *fw_data; 2355 unsigned i, fw_size; 2356 u32 tmp; 2357 2358 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 2359 2360 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2361 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 2362 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 2363 2364 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 2365 2366 for (i = 0; i < fw_size; i++) { 2367 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2368 msleep(1); 2369 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 2370 le32_to_cpup(fw_data++)); 2371 } 2372 2373 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 2374 2375 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 2376 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 2377 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 2378 2379 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2380 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 2381 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 2382 2383 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 2384 2385 for (i = 0; i < fw_size; i++) { 2386 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2387 msleep(1); 2388 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 2389 le32_to_cpup(fw_data++)); 2390 } 2391 2392 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 2393 2394 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 2395 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 2396 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 2397 } 2398 2399 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 2400 { 2401 const struct rlc_firmware_header_v2_0 *hdr; 2402 uint16_t version_major; 2403 uint16_t version_minor; 2404 2405 if (!adev->gfx.rlc_fw) 2406 return -EINVAL; 2407 2408 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2409 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2410 2411 version_major = le16_to_cpu(hdr->header.header_version_major); 2412 version_minor = le16_to_cpu(hdr->header.header_version_minor); 2413 2414 if (version_major == 2) { 2415 gfx_v11_0_load_rlcg_microcode(adev); 2416 if (amdgpu_dpm == 1) { 2417 if (version_minor >= 2) 2418 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 2419 if (version_minor == 3) 2420 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 2421 } 2422 2423 return 0; 2424 } 2425 2426 return -EINVAL; 2427 } 2428 2429 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 2430 { 2431 int r; 2432 2433 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 2434 gfx_v11_0_init_csb(adev); 2435 2436 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 2437 gfx_v11_0_rlc_enable_srm(adev); 2438 } else { 2439 if (amdgpu_sriov_vf(adev)) { 2440 gfx_v11_0_init_csb(adev); 2441 return 0; 2442 } 2443 2444 adev->gfx.rlc.funcs->stop(adev); 2445 2446 /* disable CG */ 2447 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 2448 2449 /* disable PG */ 2450 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 2451 2452 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2453 /* legacy rlc firmware loading */ 2454 r = gfx_v11_0_rlc_load_microcode(adev); 2455 if (r) 2456 return r; 2457 } 2458 2459 gfx_v11_0_init_csb(adev); 2460 2461 adev->gfx.rlc.funcs->start(adev); 2462 } 2463 return 0; 2464 } 2465 2466 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 2467 { 2468 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2469 uint32_t tmp; 2470 int i; 2471 2472 /* Trigger an invalidation of the L1 instruction caches */ 2473 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2474 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2475 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2476 2477 /* Wait for invalidation complete */ 2478 for (i = 0; i < usec_timeout; i++) { 2479 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2480 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2481 INVALIDATE_CACHE_COMPLETE)) 2482 break; 2483 udelay(1); 2484 } 2485 2486 if (i >= usec_timeout) { 2487 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2488 return -EINVAL; 2489 } 2490 2491 if (amdgpu_emu_mode == 1) 2492 amdgpu_device_flush_hdp(adev, NULL); 2493 2494 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2495 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2496 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2497 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2498 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2499 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2500 2501 /* Program me ucode address into intruction cache address register */ 2502 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2503 lower_32_bits(addr) & 0xFFFFF000); 2504 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2505 upper_32_bits(addr)); 2506 2507 return 0; 2508 } 2509 2510 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 2511 { 2512 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2513 uint32_t tmp; 2514 int i; 2515 2516 /* Trigger an invalidation of the L1 instruction caches */ 2517 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2518 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2519 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2520 2521 /* Wait for invalidation complete */ 2522 for (i = 0; i < usec_timeout; i++) { 2523 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2524 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2525 INVALIDATE_CACHE_COMPLETE)) 2526 break; 2527 udelay(1); 2528 } 2529 2530 if (i >= usec_timeout) { 2531 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2532 return -EINVAL; 2533 } 2534 2535 if (amdgpu_emu_mode == 1) 2536 amdgpu_device_flush_hdp(adev, NULL); 2537 2538 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2539 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2540 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2541 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2542 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2543 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2544 2545 /* Program pfp ucode address into intruction cache address register */ 2546 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2547 lower_32_bits(addr) & 0xFFFFF000); 2548 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2549 upper_32_bits(addr)); 2550 2551 return 0; 2552 } 2553 2554 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2555 { 2556 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2557 uint32_t tmp; 2558 int i; 2559 2560 /* Trigger an invalidation of the L1 instruction caches */ 2561 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2562 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2563 2564 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2565 2566 /* Wait for invalidation complete */ 2567 for (i = 0; i < usec_timeout; i++) { 2568 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2569 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2570 INVALIDATE_CACHE_COMPLETE)) 2571 break; 2572 udelay(1); 2573 } 2574 2575 if (i >= usec_timeout) { 2576 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2577 return -EINVAL; 2578 } 2579 2580 if (amdgpu_emu_mode == 1) 2581 amdgpu_device_flush_hdp(adev, NULL); 2582 2583 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2584 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2585 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2586 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2587 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2588 2589 /* Program mec1 ucode address into intruction cache address register */ 2590 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2591 lower_32_bits(addr) & 0xFFFFF000); 2592 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2593 upper_32_bits(addr)); 2594 2595 return 0; 2596 } 2597 2598 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2599 { 2600 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2601 uint32_t tmp; 2602 unsigned i, pipe_id; 2603 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2604 2605 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2606 adev->gfx.pfp_fw->data; 2607 2608 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2609 lower_32_bits(addr)); 2610 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2611 upper_32_bits(addr)); 2612 2613 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2614 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2615 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2616 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2617 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2618 2619 /* 2620 * Programming any of the CP_PFP_IC_BASE registers 2621 * forces invalidation of the ME L1 I$. Wait for the 2622 * invalidation complete 2623 */ 2624 for (i = 0; i < usec_timeout; i++) { 2625 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2626 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2627 INVALIDATE_CACHE_COMPLETE)) 2628 break; 2629 udelay(1); 2630 } 2631 2632 if (i >= usec_timeout) { 2633 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2634 return -EINVAL; 2635 } 2636 2637 /* Prime the L1 instruction caches */ 2638 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2639 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2640 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2641 /* Waiting for cache primed*/ 2642 for (i = 0; i < usec_timeout; i++) { 2643 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2644 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2645 ICACHE_PRIMED)) 2646 break; 2647 udelay(1); 2648 } 2649 2650 if (i >= usec_timeout) { 2651 dev_err(adev->dev, "failed to prime instruction cache\n"); 2652 return -EINVAL; 2653 } 2654 2655 mutex_lock(&adev->srbm_mutex); 2656 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2657 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2658 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2659 (pfp_hdr->ucode_start_addr_hi << 30) | 2660 (pfp_hdr->ucode_start_addr_lo >> 2)); 2661 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2662 pfp_hdr->ucode_start_addr_hi >> 2); 2663 2664 /* 2665 * Program CP_ME_CNTL to reset given PIPE to take 2666 * effect of CP_PFP_PRGRM_CNTR_START. 2667 */ 2668 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2669 if (pipe_id == 0) 2670 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2671 PFP_PIPE0_RESET, 1); 2672 else 2673 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2674 PFP_PIPE1_RESET, 1); 2675 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2676 2677 /* Clear pfp pipe0 reset bit. */ 2678 if (pipe_id == 0) 2679 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2680 PFP_PIPE0_RESET, 0); 2681 else 2682 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2683 PFP_PIPE1_RESET, 0); 2684 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2685 2686 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2687 lower_32_bits(addr2)); 2688 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2689 upper_32_bits(addr2)); 2690 } 2691 soc21_grbm_select(adev, 0, 0, 0, 0); 2692 mutex_unlock(&adev->srbm_mutex); 2693 2694 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2695 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2696 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2697 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2698 2699 /* Invalidate the data caches */ 2700 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2701 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2702 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2703 2704 for (i = 0; i < usec_timeout; i++) { 2705 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2706 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2707 INVALIDATE_DCACHE_COMPLETE)) 2708 break; 2709 udelay(1); 2710 } 2711 2712 if (i >= usec_timeout) { 2713 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2714 return -EINVAL; 2715 } 2716 2717 return 0; 2718 } 2719 2720 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2721 { 2722 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2723 uint32_t tmp; 2724 unsigned i, pipe_id; 2725 const struct gfx_firmware_header_v2_0 *me_hdr; 2726 2727 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2728 adev->gfx.me_fw->data; 2729 2730 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2731 lower_32_bits(addr)); 2732 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2733 upper_32_bits(addr)); 2734 2735 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2736 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2737 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2738 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2739 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2740 2741 /* 2742 * Programming any of the CP_ME_IC_BASE registers 2743 * forces invalidation of the ME L1 I$. Wait for the 2744 * invalidation complete 2745 */ 2746 for (i = 0; i < usec_timeout; i++) { 2747 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2748 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2749 INVALIDATE_CACHE_COMPLETE)) 2750 break; 2751 udelay(1); 2752 } 2753 2754 if (i >= usec_timeout) { 2755 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2756 return -EINVAL; 2757 } 2758 2759 /* Prime the instruction caches */ 2760 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2761 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2762 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2763 2764 /* Waiting for instruction cache primed*/ 2765 for (i = 0; i < usec_timeout; i++) { 2766 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2767 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2768 ICACHE_PRIMED)) 2769 break; 2770 udelay(1); 2771 } 2772 2773 if (i >= usec_timeout) { 2774 dev_err(adev->dev, "failed to prime instruction cache\n"); 2775 return -EINVAL; 2776 } 2777 2778 mutex_lock(&adev->srbm_mutex); 2779 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2780 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2781 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2782 (me_hdr->ucode_start_addr_hi << 30) | 2783 (me_hdr->ucode_start_addr_lo >> 2) ); 2784 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2785 me_hdr->ucode_start_addr_hi>>2); 2786 2787 /* 2788 * Program CP_ME_CNTL to reset given PIPE to take 2789 * effect of CP_PFP_PRGRM_CNTR_START. 2790 */ 2791 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2792 if (pipe_id == 0) 2793 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2794 ME_PIPE0_RESET, 1); 2795 else 2796 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2797 ME_PIPE1_RESET, 1); 2798 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2799 2800 /* Clear pfp pipe0 reset bit. */ 2801 if (pipe_id == 0) 2802 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2803 ME_PIPE0_RESET, 0); 2804 else 2805 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2806 ME_PIPE1_RESET, 0); 2807 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2808 2809 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2810 lower_32_bits(addr2)); 2811 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2812 upper_32_bits(addr2)); 2813 } 2814 soc21_grbm_select(adev, 0, 0, 0, 0); 2815 mutex_unlock(&adev->srbm_mutex); 2816 2817 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2818 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2819 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2820 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2821 2822 /* Invalidate the data caches */ 2823 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2824 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2825 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2826 2827 for (i = 0; i < usec_timeout; i++) { 2828 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2829 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2830 INVALIDATE_DCACHE_COMPLETE)) 2831 break; 2832 udelay(1); 2833 } 2834 2835 if (i >= usec_timeout) { 2836 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2837 return -EINVAL; 2838 } 2839 2840 return 0; 2841 } 2842 2843 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2844 { 2845 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2846 uint32_t tmp; 2847 unsigned i; 2848 const struct gfx_firmware_header_v2_0 *mec_hdr; 2849 2850 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2851 adev->gfx.mec_fw->data; 2852 2853 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2854 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2855 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2856 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2857 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2858 2859 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2860 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2861 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2862 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2863 2864 mutex_lock(&adev->srbm_mutex); 2865 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2866 soc21_grbm_select(adev, 1, i, 0, 0); 2867 2868 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2869 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2870 upper_32_bits(addr2)); 2871 2872 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2873 mec_hdr->ucode_start_addr_lo >> 2 | 2874 mec_hdr->ucode_start_addr_hi << 30); 2875 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2876 mec_hdr->ucode_start_addr_hi >> 2); 2877 2878 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2879 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2880 upper_32_bits(addr)); 2881 } 2882 mutex_unlock(&adev->srbm_mutex); 2883 soc21_grbm_select(adev, 0, 0, 0, 0); 2884 2885 /* Trigger an invalidation of the L1 instruction caches */ 2886 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2887 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2888 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2889 2890 /* Wait for invalidation complete */ 2891 for (i = 0; i < usec_timeout; i++) { 2892 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2893 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2894 INVALIDATE_DCACHE_COMPLETE)) 2895 break; 2896 udelay(1); 2897 } 2898 2899 if (i >= usec_timeout) { 2900 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2901 return -EINVAL; 2902 } 2903 2904 /* Trigger an invalidation of the L1 instruction caches */ 2905 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2906 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2907 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2908 2909 /* Wait for invalidation complete */ 2910 for (i = 0; i < usec_timeout; i++) { 2911 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2912 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2913 INVALIDATE_CACHE_COMPLETE)) 2914 break; 2915 udelay(1); 2916 } 2917 2918 if (i >= usec_timeout) { 2919 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2920 return -EINVAL; 2921 } 2922 2923 return 0; 2924 } 2925 2926 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2927 { 2928 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2929 const struct gfx_firmware_header_v2_0 *me_hdr; 2930 const struct gfx_firmware_header_v2_0 *mec_hdr; 2931 uint32_t pipe_id, tmp; 2932 2933 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2934 adev->gfx.mec_fw->data; 2935 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2936 adev->gfx.me_fw->data; 2937 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2938 adev->gfx.pfp_fw->data; 2939 2940 /* config pfp program start addr */ 2941 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2942 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2943 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2944 (pfp_hdr->ucode_start_addr_hi << 30) | 2945 (pfp_hdr->ucode_start_addr_lo >> 2)); 2946 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2947 pfp_hdr->ucode_start_addr_hi >> 2); 2948 } 2949 soc21_grbm_select(adev, 0, 0, 0, 0); 2950 2951 /* reset pfp pipe */ 2952 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2953 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 2954 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 2955 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2956 2957 /* clear pfp pipe reset */ 2958 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 2959 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 2960 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2961 2962 /* config me program start addr */ 2963 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2964 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2965 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2966 (me_hdr->ucode_start_addr_hi << 30) | 2967 (me_hdr->ucode_start_addr_lo >> 2) ); 2968 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2969 me_hdr->ucode_start_addr_hi>>2); 2970 } 2971 soc21_grbm_select(adev, 0, 0, 0, 0); 2972 2973 /* reset me pipe */ 2974 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2975 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 2976 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 2977 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2978 2979 /* clear me pipe reset */ 2980 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 2981 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 2982 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2983 2984 /* config mec program start addr */ 2985 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 2986 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 2987 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2988 mec_hdr->ucode_start_addr_lo >> 2 | 2989 mec_hdr->ucode_start_addr_hi << 30); 2990 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2991 mec_hdr->ucode_start_addr_hi >> 2); 2992 } 2993 soc21_grbm_select(adev, 0, 0, 0, 0); 2994 2995 /* reset mec pipe */ 2996 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 2997 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 2998 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 2999 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 3000 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 3001 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3002 3003 /* clear mec pipe reset */ 3004 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 3005 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 3006 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 3007 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 3008 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3009 } 3010 3011 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 3012 { 3013 uint32_t cp_status; 3014 uint32_t bootload_status; 3015 int i, r; 3016 uint64_t addr, addr2; 3017 3018 for (i = 0; i < adev->usec_timeout; i++) { 3019 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 3020 3021 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 3022 IP_VERSION(11, 0, 1) || 3023 amdgpu_ip_version(adev, GC_HWIP, 0) == 3024 IP_VERSION(11, 0, 4) || 3025 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || 3026 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || 3027 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || 3028 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3)) 3029 bootload_status = RREG32_SOC15(GC, 0, 3030 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 3031 else 3032 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 3033 3034 if ((cp_status == 0) && 3035 (REG_GET_FIELD(bootload_status, 3036 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 3037 break; 3038 } 3039 udelay(1); 3040 } 3041 3042 if (i >= adev->usec_timeout) { 3043 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 3044 return -ETIMEDOUT; 3045 } 3046 3047 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 3048 if (adev->gfx.rs64_enable) { 3049 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3050 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 3051 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3052 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 3053 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 3054 if (r) 3055 return r; 3056 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3057 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 3058 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3059 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 3060 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 3061 if (r) 3062 return r; 3063 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3064 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 3065 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3066 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 3067 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 3068 if (r) 3069 return r; 3070 } else { 3071 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3072 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 3073 r = gfx_v11_0_config_me_cache(adev, addr); 3074 if (r) 3075 return r; 3076 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3077 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 3078 r = gfx_v11_0_config_pfp_cache(adev, addr); 3079 if (r) 3080 return r; 3081 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3082 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 3083 r = gfx_v11_0_config_mec_cache(adev, addr); 3084 if (r) 3085 return r; 3086 } 3087 } 3088 3089 return 0; 3090 } 3091 3092 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3093 { 3094 int i; 3095 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3096 3097 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3098 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3099 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3100 3101 for (i = 0; i < adev->usec_timeout; i++) { 3102 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 3103 break; 3104 udelay(1); 3105 } 3106 3107 if (i >= adev->usec_timeout) 3108 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 3109 3110 return 0; 3111 } 3112 3113 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 3114 { 3115 int r; 3116 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3117 const __le32 *fw_data; 3118 unsigned i, fw_size; 3119 3120 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3121 adev->gfx.pfp_fw->data; 3122 3123 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3124 3125 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3126 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3127 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 3128 3129 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 3130 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3131 &adev->gfx.pfp.pfp_fw_obj, 3132 &adev->gfx.pfp.pfp_fw_gpu_addr, 3133 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3134 if (r) { 3135 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 3136 gfx_v11_0_pfp_fini(adev); 3137 return r; 3138 } 3139 3140 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 3141 3142 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3143 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3144 3145 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 3146 3147 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 3148 3149 for (i = 0; i < pfp_hdr->jt_size; i++) 3150 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 3151 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 3152 3153 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3154 3155 return 0; 3156 } 3157 3158 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 3159 { 3160 int r; 3161 const struct gfx_firmware_header_v2_0 *pfp_hdr; 3162 const __le32 *fw_ucode, *fw_data; 3163 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3164 uint32_t tmp; 3165 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3166 3167 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 3168 adev->gfx.pfp_fw->data; 3169 3170 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3171 3172 /* instruction */ 3173 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 3174 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 3175 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 3176 /* data */ 3177 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3178 le32_to_cpu(pfp_hdr->data_offset_bytes)); 3179 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 3180 3181 /* 64kb align */ 3182 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3183 64 * 1024, 3184 AMDGPU_GEM_DOMAIN_VRAM | 3185 AMDGPU_GEM_DOMAIN_GTT, 3186 &adev->gfx.pfp.pfp_fw_obj, 3187 &adev->gfx.pfp.pfp_fw_gpu_addr, 3188 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3189 if (r) { 3190 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 3191 gfx_v11_0_pfp_fini(adev); 3192 return r; 3193 } 3194 3195 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3196 64 * 1024, 3197 AMDGPU_GEM_DOMAIN_VRAM | 3198 AMDGPU_GEM_DOMAIN_GTT, 3199 &adev->gfx.pfp.pfp_fw_data_obj, 3200 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 3201 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 3202 if (r) { 3203 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 3204 gfx_v11_0_pfp_fini(adev); 3205 return r; 3206 } 3207 3208 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 3209 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 3210 3211 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3212 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 3213 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3214 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3215 3216 if (amdgpu_emu_mode == 1) 3217 amdgpu_device_flush_hdp(adev, NULL); 3218 3219 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3220 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3221 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 3222 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3223 3224 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 3225 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 3226 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 3227 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 3228 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 3229 3230 /* 3231 * Programming any of the CP_PFP_IC_BASE registers 3232 * forces invalidation of the ME L1 I$. Wait for the 3233 * invalidation complete 3234 */ 3235 for (i = 0; i < usec_timeout; i++) { 3236 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3237 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3238 INVALIDATE_CACHE_COMPLETE)) 3239 break; 3240 udelay(1); 3241 } 3242 3243 if (i >= usec_timeout) { 3244 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3245 return -EINVAL; 3246 } 3247 3248 /* Prime the L1 instruction caches */ 3249 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3250 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 3251 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 3252 /* Waiting for cache primed*/ 3253 for (i = 0; i < usec_timeout; i++) { 3254 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3255 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3256 ICACHE_PRIMED)) 3257 break; 3258 udelay(1); 3259 } 3260 3261 if (i >= usec_timeout) { 3262 dev_err(adev->dev, "failed to prime instruction cache\n"); 3263 return -EINVAL; 3264 } 3265 3266 mutex_lock(&adev->srbm_mutex); 3267 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3268 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3269 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 3270 (pfp_hdr->ucode_start_addr_hi << 30) | 3271 (pfp_hdr->ucode_start_addr_lo >> 2) ); 3272 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 3273 pfp_hdr->ucode_start_addr_hi>>2); 3274 3275 /* 3276 * Program CP_ME_CNTL to reset given PIPE to take 3277 * effect of CP_PFP_PRGRM_CNTR_START. 3278 */ 3279 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3280 if (pipe_id == 0) 3281 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3282 PFP_PIPE0_RESET, 1); 3283 else 3284 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3285 PFP_PIPE1_RESET, 1); 3286 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3287 3288 /* Clear pfp pipe0 reset bit. */ 3289 if (pipe_id == 0) 3290 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3291 PFP_PIPE0_RESET, 0); 3292 else 3293 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3294 PFP_PIPE1_RESET, 0); 3295 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3296 3297 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 3298 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3299 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 3300 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3301 } 3302 soc21_grbm_select(adev, 0, 0, 0, 0); 3303 mutex_unlock(&adev->srbm_mutex); 3304 3305 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3306 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3307 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3308 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3309 3310 /* Invalidate the data caches */ 3311 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3312 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3313 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3314 3315 for (i = 0; i < usec_timeout; i++) { 3316 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3317 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3318 INVALIDATE_DCACHE_COMPLETE)) 3319 break; 3320 udelay(1); 3321 } 3322 3323 if (i >= usec_timeout) { 3324 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3325 return -EINVAL; 3326 } 3327 3328 return 0; 3329 } 3330 3331 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 3332 { 3333 int r; 3334 const struct gfx_firmware_header_v1_0 *me_hdr; 3335 const __le32 *fw_data; 3336 unsigned i, fw_size; 3337 3338 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3339 adev->gfx.me_fw->data; 3340 3341 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3342 3343 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3344 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3345 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 3346 3347 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 3348 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3349 &adev->gfx.me.me_fw_obj, 3350 &adev->gfx.me.me_fw_gpu_addr, 3351 (void **)&adev->gfx.me.me_fw_ptr); 3352 if (r) { 3353 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 3354 gfx_v11_0_me_fini(adev); 3355 return r; 3356 } 3357 3358 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 3359 3360 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3361 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3362 3363 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 3364 3365 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 3366 3367 for (i = 0; i < me_hdr->jt_size; i++) 3368 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 3369 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 3370 3371 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 3372 3373 return 0; 3374 } 3375 3376 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 3377 { 3378 int r; 3379 const struct gfx_firmware_header_v2_0 *me_hdr; 3380 const __le32 *fw_ucode, *fw_data; 3381 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3382 uint32_t tmp; 3383 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3384 3385 me_hdr = (const struct gfx_firmware_header_v2_0 *) 3386 adev->gfx.me_fw->data; 3387 3388 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3389 3390 /* instruction */ 3391 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 3392 le32_to_cpu(me_hdr->ucode_offset_bytes)); 3393 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 3394 /* data */ 3395 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3396 le32_to_cpu(me_hdr->data_offset_bytes)); 3397 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 3398 3399 /* 64kb align*/ 3400 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3401 64 * 1024, 3402 AMDGPU_GEM_DOMAIN_VRAM | 3403 AMDGPU_GEM_DOMAIN_GTT, 3404 &adev->gfx.me.me_fw_obj, 3405 &adev->gfx.me.me_fw_gpu_addr, 3406 (void **)&adev->gfx.me.me_fw_ptr); 3407 if (r) { 3408 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 3409 gfx_v11_0_me_fini(adev); 3410 return r; 3411 } 3412 3413 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3414 64 * 1024, 3415 AMDGPU_GEM_DOMAIN_VRAM | 3416 AMDGPU_GEM_DOMAIN_GTT, 3417 &adev->gfx.me.me_fw_data_obj, 3418 &adev->gfx.me.me_fw_data_gpu_addr, 3419 (void **)&adev->gfx.me.me_fw_data_ptr); 3420 if (r) { 3421 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 3422 gfx_v11_0_pfp_fini(adev); 3423 return r; 3424 } 3425 3426 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 3427 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 3428 3429 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3430 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 3431 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3432 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3433 3434 if (amdgpu_emu_mode == 1) 3435 amdgpu_device_flush_hdp(adev, NULL); 3436 3437 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3438 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3439 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 3440 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3441 3442 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 3443 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 3444 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 3445 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 3446 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 3447 3448 /* 3449 * Programming any of the CP_ME_IC_BASE registers 3450 * forces invalidation of the ME L1 I$. Wait for the 3451 * invalidation complete 3452 */ 3453 for (i = 0; i < usec_timeout; i++) { 3454 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3455 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3456 INVALIDATE_CACHE_COMPLETE)) 3457 break; 3458 udelay(1); 3459 } 3460 3461 if (i >= usec_timeout) { 3462 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3463 return -EINVAL; 3464 } 3465 3466 /* Prime the instruction caches */ 3467 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3468 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 3469 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 3470 3471 /* Waiting for instruction cache primed*/ 3472 for (i = 0; i < usec_timeout; i++) { 3473 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3474 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3475 ICACHE_PRIMED)) 3476 break; 3477 udelay(1); 3478 } 3479 3480 if (i >= usec_timeout) { 3481 dev_err(adev->dev, "failed to prime instruction cache\n"); 3482 return -EINVAL; 3483 } 3484 3485 mutex_lock(&adev->srbm_mutex); 3486 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3487 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3488 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3489 (me_hdr->ucode_start_addr_hi << 30) | 3490 (me_hdr->ucode_start_addr_lo >> 2) ); 3491 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3492 me_hdr->ucode_start_addr_hi>>2); 3493 3494 /* 3495 * Program CP_ME_CNTL to reset given PIPE to take 3496 * effect of CP_PFP_PRGRM_CNTR_START. 3497 */ 3498 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3499 if (pipe_id == 0) 3500 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3501 ME_PIPE0_RESET, 1); 3502 else 3503 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3504 ME_PIPE1_RESET, 1); 3505 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3506 3507 /* Clear pfp pipe0 reset bit. */ 3508 if (pipe_id == 0) 3509 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3510 ME_PIPE0_RESET, 0); 3511 else 3512 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3513 ME_PIPE1_RESET, 0); 3514 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3515 3516 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 3517 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3518 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 3519 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3520 } 3521 soc21_grbm_select(adev, 0, 0, 0, 0); 3522 mutex_unlock(&adev->srbm_mutex); 3523 3524 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3525 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3526 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3527 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3528 3529 /* Invalidate the data caches */ 3530 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3531 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3532 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3533 3534 for (i = 0; i < usec_timeout; i++) { 3535 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3536 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3537 INVALIDATE_DCACHE_COMPLETE)) 3538 break; 3539 udelay(1); 3540 } 3541 3542 if (i >= usec_timeout) { 3543 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3544 return -EINVAL; 3545 } 3546 3547 return 0; 3548 } 3549 3550 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3551 { 3552 int r; 3553 3554 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3555 return -EINVAL; 3556 3557 gfx_v11_0_cp_gfx_enable(adev, false); 3558 3559 if (adev->gfx.rs64_enable) 3560 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3561 else 3562 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3563 if (r) { 3564 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3565 return r; 3566 } 3567 3568 if (adev->gfx.rs64_enable) 3569 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3570 else 3571 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3572 if (r) { 3573 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3574 return r; 3575 } 3576 3577 return 0; 3578 } 3579 3580 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3581 { 3582 struct amdgpu_ring *ring; 3583 const struct cs_section_def *sect = NULL; 3584 const struct cs_extent_def *ext = NULL; 3585 int r, i; 3586 int ctx_reg_offset; 3587 3588 /* init the CP */ 3589 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3590 adev->gfx.config.max_hw_contexts - 1); 3591 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3592 3593 if (!amdgpu_async_gfx_ring) 3594 gfx_v11_0_cp_gfx_enable(adev, true); 3595 3596 ring = &adev->gfx.gfx_ring[0]; 3597 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3598 if (r) { 3599 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3600 return r; 3601 } 3602 3603 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3604 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3605 3606 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3607 amdgpu_ring_write(ring, 0x80000000); 3608 amdgpu_ring_write(ring, 0x80000000); 3609 3610 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3611 for (ext = sect->section; ext->extent != NULL; ++ext) { 3612 if (sect->id == SECT_CONTEXT) { 3613 amdgpu_ring_write(ring, 3614 PACKET3(PACKET3_SET_CONTEXT_REG, 3615 ext->reg_count)); 3616 amdgpu_ring_write(ring, ext->reg_index - 3617 PACKET3_SET_CONTEXT_REG_START); 3618 for (i = 0; i < ext->reg_count; i++) 3619 amdgpu_ring_write(ring, ext->extent[i]); 3620 } 3621 } 3622 } 3623 3624 ctx_reg_offset = 3625 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3626 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3627 amdgpu_ring_write(ring, ctx_reg_offset); 3628 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3629 3630 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3631 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3632 3633 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3634 amdgpu_ring_write(ring, 0); 3635 3636 amdgpu_ring_commit(ring); 3637 3638 /* submit cs packet to copy state 0 to next available state */ 3639 if (adev->gfx.num_gfx_rings > 1) { 3640 /* maximum supported gfx ring is 2 */ 3641 ring = &adev->gfx.gfx_ring[1]; 3642 r = amdgpu_ring_alloc(ring, 2); 3643 if (r) { 3644 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3645 return r; 3646 } 3647 3648 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3649 amdgpu_ring_write(ring, 0); 3650 3651 amdgpu_ring_commit(ring); 3652 } 3653 return 0; 3654 } 3655 3656 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3657 CP_PIPE_ID pipe) 3658 { 3659 u32 tmp; 3660 3661 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3662 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3663 3664 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3665 } 3666 3667 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3668 struct amdgpu_ring *ring) 3669 { 3670 u32 tmp; 3671 3672 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3673 if (ring->use_doorbell) { 3674 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3675 DOORBELL_OFFSET, ring->doorbell_index); 3676 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3677 DOORBELL_EN, 1); 3678 } else { 3679 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3680 DOORBELL_EN, 0); 3681 } 3682 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3683 3684 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3685 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3686 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3687 3688 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3689 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3690 } 3691 3692 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3693 { 3694 struct amdgpu_ring *ring; 3695 u32 tmp; 3696 u32 rb_bufsz; 3697 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3698 3699 /* Set the write pointer delay */ 3700 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3701 3702 /* set the RB to use vmid 0 */ 3703 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3704 3705 /* Init gfx ring 0 for pipe 0 */ 3706 mutex_lock(&adev->srbm_mutex); 3707 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3708 3709 /* Set ring buffer size */ 3710 ring = &adev->gfx.gfx_ring[0]; 3711 rb_bufsz = order_base_2(ring->ring_size / 8); 3712 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3713 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3714 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3715 3716 /* Initialize the ring buffer's write pointers */ 3717 ring->wptr = 0; 3718 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3719 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3720 3721 /* set the wb address whether it's enabled or not */ 3722 rptr_addr = ring->rptr_gpu_addr; 3723 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3724 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3725 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3726 3727 wptr_gpu_addr = ring->wptr_gpu_addr; 3728 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3729 lower_32_bits(wptr_gpu_addr)); 3730 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3731 upper_32_bits(wptr_gpu_addr)); 3732 3733 mdelay(1); 3734 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3735 3736 rb_addr = ring->gpu_addr >> 8; 3737 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3738 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3739 3740 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3741 3742 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3743 mutex_unlock(&adev->srbm_mutex); 3744 3745 /* Init gfx ring 1 for pipe 1 */ 3746 if (adev->gfx.num_gfx_rings > 1) { 3747 mutex_lock(&adev->srbm_mutex); 3748 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3749 /* maximum supported gfx ring is 2 */ 3750 ring = &adev->gfx.gfx_ring[1]; 3751 rb_bufsz = order_base_2(ring->ring_size / 8); 3752 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3753 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3754 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3755 /* Initialize the ring buffer's write pointers */ 3756 ring->wptr = 0; 3757 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3758 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3759 /* Set the wb address whether it's enabled or not */ 3760 rptr_addr = ring->rptr_gpu_addr; 3761 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3762 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3763 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3764 wptr_gpu_addr = ring->wptr_gpu_addr; 3765 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3766 lower_32_bits(wptr_gpu_addr)); 3767 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3768 upper_32_bits(wptr_gpu_addr)); 3769 3770 mdelay(1); 3771 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3772 3773 rb_addr = ring->gpu_addr >> 8; 3774 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3775 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3776 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3777 3778 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3779 mutex_unlock(&adev->srbm_mutex); 3780 } 3781 /* Switch to pipe 0 */ 3782 mutex_lock(&adev->srbm_mutex); 3783 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3784 mutex_unlock(&adev->srbm_mutex); 3785 3786 /* start the ring */ 3787 gfx_v11_0_cp_gfx_start(adev); 3788 3789 return 0; 3790 } 3791 3792 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3793 { 3794 u32 data; 3795 3796 if (adev->gfx.rs64_enable) { 3797 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3798 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3799 enable ? 0 : 1); 3800 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3801 enable ? 0 : 1); 3802 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3803 enable ? 0 : 1); 3804 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3805 enable ? 0 : 1); 3806 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3807 enable ? 0 : 1); 3808 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3809 enable ? 1 : 0); 3810 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3811 enable ? 1 : 0); 3812 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3813 enable ? 1 : 0); 3814 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3815 enable ? 1 : 0); 3816 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3817 enable ? 0 : 1); 3818 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3819 } else { 3820 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3821 3822 if (enable) { 3823 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3824 if (!adev->enable_mes_kiq) 3825 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3826 MEC_ME2_HALT, 0); 3827 } else { 3828 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3829 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3830 } 3831 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3832 } 3833 3834 udelay(50); 3835 } 3836 3837 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3838 { 3839 const struct gfx_firmware_header_v1_0 *mec_hdr; 3840 const __le32 *fw_data; 3841 unsigned i, fw_size; 3842 u32 *fw = NULL; 3843 int r; 3844 3845 if (!adev->gfx.mec_fw) 3846 return -EINVAL; 3847 3848 gfx_v11_0_cp_compute_enable(adev, false); 3849 3850 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3851 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3852 3853 fw_data = (const __le32 *) 3854 (adev->gfx.mec_fw->data + 3855 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3856 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3857 3858 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3859 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3860 &adev->gfx.mec.mec_fw_obj, 3861 &adev->gfx.mec.mec_fw_gpu_addr, 3862 (void **)&fw); 3863 if (r) { 3864 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3865 gfx_v11_0_mec_fini(adev); 3866 return r; 3867 } 3868 3869 memcpy(fw, fw_data, fw_size); 3870 3871 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3872 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3873 3874 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3875 3876 /* MEC1 */ 3877 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3878 3879 for (i = 0; i < mec_hdr->jt_size; i++) 3880 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3881 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3882 3883 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3884 3885 return 0; 3886 } 3887 3888 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3889 { 3890 const struct gfx_firmware_header_v2_0 *mec_hdr; 3891 const __le32 *fw_ucode, *fw_data; 3892 u32 tmp, fw_ucode_size, fw_data_size; 3893 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3894 u32 *fw_ucode_ptr, *fw_data_ptr; 3895 int r; 3896 3897 if (!adev->gfx.mec_fw) 3898 return -EINVAL; 3899 3900 gfx_v11_0_cp_compute_enable(adev, false); 3901 3902 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3903 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3904 3905 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3906 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3907 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3908 3909 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3910 le32_to_cpu(mec_hdr->data_offset_bytes)); 3911 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3912 3913 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3914 64 * 1024, 3915 AMDGPU_GEM_DOMAIN_VRAM | 3916 AMDGPU_GEM_DOMAIN_GTT, 3917 &adev->gfx.mec.mec_fw_obj, 3918 &adev->gfx.mec.mec_fw_gpu_addr, 3919 (void **)&fw_ucode_ptr); 3920 if (r) { 3921 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3922 gfx_v11_0_mec_fini(adev); 3923 return r; 3924 } 3925 3926 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3927 64 * 1024, 3928 AMDGPU_GEM_DOMAIN_VRAM | 3929 AMDGPU_GEM_DOMAIN_GTT, 3930 &adev->gfx.mec.mec_fw_data_obj, 3931 &adev->gfx.mec.mec_fw_data_gpu_addr, 3932 (void **)&fw_data_ptr); 3933 if (r) { 3934 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3935 gfx_v11_0_mec_fini(adev); 3936 return r; 3937 } 3938 3939 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 3940 memcpy(fw_data_ptr, fw_data, fw_data_size); 3941 3942 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3943 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 3944 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3945 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 3946 3947 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 3948 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3949 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 3950 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3951 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 3952 3953 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 3954 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 3955 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 3956 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 3957 3958 mutex_lock(&adev->srbm_mutex); 3959 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 3960 soc21_grbm_select(adev, 1, i, 0, 0); 3961 3962 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 3963 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 3964 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 3965 3966 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3967 mec_hdr->ucode_start_addr_lo >> 2 | 3968 mec_hdr->ucode_start_addr_hi << 30); 3969 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3970 mec_hdr->ucode_start_addr_hi >> 2); 3971 3972 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 3973 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 3974 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3975 } 3976 mutex_unlock(&adev->srbm_mutex); 3977 soc21_grbm_select(adev, 0, 0, 0, 0); 3978 3979 /* Trigger an invalidation of the L1 instruction caches */ 3980 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3981 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3982 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 3983 3984 /* Wait for invalidation complete */ 3985 for (i = 0; i < usec_timeout; i++) { 3986 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3987 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 3988 INVALIDATE_DCACHE_COMPLETE)) 3989 break; 3990 udelay(1); 3991 } 3992 3993 if (i >= usec_timeout) { 3994 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3995 return -EINVAL; 3996 } 3997 3998 /* Trigger an invalidation of the L1 instruction caches */ 3999 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4000 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 4001 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 4002 4003 /* Wait for invalidation complete */ 4004 for (i = 0; i < usec_timeout; i++) { 4005 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4006 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 4007 INVALIDATE_CACHE_COMPLETE)) 4008 break; 4009 udelay(1); 4010 } 4011 4012 if (i >= usec_timeout) { 4013 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4014 return -EINVAL; 4015 } 4016 4017 return 0; 4018 } 4019 4020 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 4021 { 4022 uint32_t tmp; 4023 struct amdgpu_device *adev = ring->adev; 4024 4025 /* tell RLC which is KIQ queue */ 4026 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 4027 tmp &= 0xffffff00; 4028 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4029 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); 4030 } 4031 4032 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 4033 { 4034 /* set graphics engine doorbell range */ 4035 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 4036 (adev->doorbell_index.gfx_ring0 * 2) << 2); 4037 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 4038 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 4039 4040 /* set compute engine doorbell range */ 4041 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4042 (adev->doorbell_index.kiq * 2) << 2); 4043 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4044 (adev->doorbell_index.userqueue_end * 2) << 2); 4045 } 4046 4047 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, 4048 struct v11_gfx_mqd *mqd, 4049 struct amdgpu_mqd_prop *prop) 4050 { 4051 bool priority = 0; 4052 u32 tmp; 4053 4054 /* set up default queue priority level 4055 * 0x0 = low priority, 0x1 = high priority 4056 */ 4057 if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) 4058 priority = 1; 4059 4060 tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; 4061 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); 4062 mqd->cp_gfx_hqd_queue_priority = tmp; 4063 } 4064 4065 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 4066 struct amdgpu_mqd_prop *prop) 4067 { 4068 struct v11_gfx_mqd *mqd = m; 4069 uint64_t hqd_gpu_addr, wb_gpu_addr; 4070 uint32_t tmp; 4071 uint32_t rb_bufsz; 4072 4073 /* set up gfx hqd wptr */ 4074 mqd->cp_gfx_hqd_wptr = 0; 4075 mqd->cp_gfx_hqd_wptr_hi = 0; 4076 4077 /* set the pointer to the MQD */ 4078 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 4079 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4080 4081 /* set up mqd control */ 4082 tmp = regCP_GFX_MQD_CONTROL_DEFAULT; 4083 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 4084 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 4085 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 4086 mqd->cp_gfx_mqd_control = tmp; 4087 4088 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 4089 tmp = regCP_GFX_HQD_VMID_DEFAULT; 4090 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 4091 mqd->cp_gfx_hqd_vmid = 0; 4092 4093 /* set up gfx queue priority */ 4094 gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); 4095 4096 /* set up time quantum */ 4097 tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; 4098 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 4099 mqd->cp_gfx_hqd_quantum = tmp; 4100 4101 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 4102 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4103 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 4104 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 4105 4106 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 4107 wb_gpu_addr = prop->rptr_gpu_addr; 4108 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 4109 mqd->cp_gfx_hqd_rptr_addr_hi = 4110 upper_32_bits(wb_gpu_addr) & 0xffff; 4111 4112 /* set up rb_wptr_poll addr */ 4113 wb_gpu_addr = prop->wptr_gpu_addr; 4114 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4115 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4116 4117 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 4118 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 4119 tmp = regCP_GFX_HQD_CNTL_DEFAULT; 4120 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 4121 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 4122 #ifdef __BIG_ENDIAN 4123 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 4124 #endif 4125 if (prop->tmz_queue) 4126 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); 4127 mqd->cp_gfx_hqd_cntl = tmp; 4128 4129 /* set up cp_doorbell_control */ 4130 tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; 4131 if (prop->use_doorbell) { 4132 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4133 DOORBELL_OFFSET, prop->doorbell_index); 4134 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4135 DOORBELL_EN, 1); 4136 } else 4137 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4138 DOORBELL_EN, 0); 4139 mqd->cp_rb_doorbell_control = tmp; 4140 4141 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4142 mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; 4143 4144 /* active the queue */ 4145 mqd->cp_gfx_hqd_active = 1; 4146 4147 /* set gfx UQ items */ 4148 mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); 4149 mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); 4150 mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); 4151 mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); 4152 mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); 4153 mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); 4154 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4155 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4156 4157 return 0; 4158 } 4159 4160 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) 4161 { 4162 struct amdgpu_device *adev = ring->adev; 4163 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 4164 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 4165 4166 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4167 memset((void *)mqd, 0, sizeof(*mqd)); 4168 mutex_lock(&adev->srbm_mutex); 4169 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4170 amdgpu_ring_init_mqd(ring); 4171 soc21_grbm_select(adev, 0, 0, 0, 0); 4172 mutex_unlock(&adev->srbm_mutex); 4173 if (adev->gfx.me.mqd_backup[mqd_idx]) 4174 memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4175 } else { 4176 /* restore mqd with the backup copy */ 4177 if (adev->gfx.me.mqd_backup[mqd_idx]) 4178 memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 4179 /* reset the ring */ 4180 ring->wptr = 0; 4181 *ring->wptr_cpu_addr = 0; 4182 amdgpu_ring_clear_ring(ring); 4183 } 4184 4185 return 0; 4186 } 4187 4188 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 4189 { 4190 int r, i; 4191 4192 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4193 r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); 4194 if (r) 4195 return r; 4196 } 4197 4198 r = amdgpu_gfx_enable_kgq(adev, 0); 4199 if (r) 4200 return r; 4201 4202 return gfx_v11_0_cp_gfx_start(adev); 4203 } 4204 4205 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 4206 struct amdgpu_mqd_prop *prop) 4207 { 4208 struct v11_compute_mqd *mqd = m; 4209 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4210 uint32_t tmp; 4211 4212 mqd->header = 0xC0310800; 4213 mqd->compute_pipelinestat_enable = 0x00000001; 4214 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4215 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4216 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4217 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4218 mqd->compute_misc_reserved = 0x00000007; 4219 4220 eop_base_addr = prop->eop_gpu_addr >> 8; 4221 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4222 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4223 4224 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4225 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 4226 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4227 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 4228 4229 mqd->cp_hqd_eop_control = tmp; 4230 4231 /* enable doorbell? */ 4232 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4233 4234 if (prop->use_doorbell) { 4235 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4236 DOORBELL_OFFSET, prop->doorbell_index); 4237 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4238 DOORBELL_EN, 1); 4239 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4240 DOORBELL_SOURCE, 0); 4241 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4242 DOORBELL_HIT, 0); 4243 } else { 4244 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4245 DOORBELL_EN, 0); 4246 } 4247 4248 mqd->cp_hqd_pq_doorbell_control = tmp; 4249 4250 /* disable the queue if it's active */ 4251 mqd->cp_hqd_dequeue_request = 0; 4252 mqd->cp_hqd_pq_rptr = 0; 4253 mqd->cp_hqd_pq_wptr_lo = 0; 4254 mqd->cp_hqd_pq_wptr_hi = 0; 4255 4256 /* set the pointer to the MQD */ 4257 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 4258 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4259 4260 /* set MQD vmid to 0 */ 4261 tmp = regCP_MQD_CONTROL_DEFAULT; 4262 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4263 mqd->cp_mqd_control = tmp; 4264 4265 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4266 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4267 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4268 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4269 4270 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4271 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 4272 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4273 (order_base_2(prop->queue_size / 4) - 1)); 4274 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4275 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 4276 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 4277 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 4278 prop->allow_tunneling); 4279 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4280 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4281 if (prop->tmz_queue) 4282 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); 4283 mqd->cp_hqd_pq_control = tmp; 4284 4285 /* set the wb address whether it's enabled or not */ 4286 wb_gpu_addr = prop->rptr_gpu_addr; 4287 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4288 mqd->cp_hqd_pq_rptr_report_addr_hi = 4289 upper_32_bits(wb_gpu_addr) & 0xffff; 4290 4291 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4292 wb_gpu_addr = prop->wptr_gpu_addr; 4293 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4294 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4295 4296 tmp = 0; 4297 /* enable the doorbell if requested */ 4298 if (prop->use_doorbell) { 4299 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4300 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4301 DOORBELL_OFFSET, prop->doorbell_index); 4302 4303 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4304 DOORBELL_EN, 1); 4305 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4306 DOORBELL_SOURCE, 0); 4307 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4308 DOORBELL_HIT, 0); 4309 } 4310 4311 mqd->cp_hqd_pq_doorbell_control = tmp; 4312 4313 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4314 mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; 4315 4316 /* set the vmid for the queue */ 4317 mqd->cp_hqd_vmid = 0; 4318 4319 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 4320 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 4321 mqd->cp_hqd_persistent_state = tmp; 4322 4323 /* set MIN_IB_AVAIL_SIZE */ 4324 tmp = regCP_HQD_IB_CONTROL_DEFAULT; 4325 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4326 mqd->cp_hqd_ib_control = tmp; 4327 4328 /* set static priority for a compute queue/ring */ 4329 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 4330 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 4331 4332 mqd->cp_hqd_active = prop->hqd_active; 4333 4334 /* set UQ fenceaddress */ 4335 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4336 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4337 4338 return 0; 4339 } 4340 4341 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 4342 { 4343 struct amdgpu_device *adev = ring->adev; 4344 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4345 int j; 4346 4347 /* inactivate the queue */ 4348 if (amdgpu_sriov_vf(adev)) 4349 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 4350 4351 /* disable wptr polling */ 4352 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4353 4354 /* write the EOP addr */ 4355 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 4356 mqd->cp_hqd_eop_base_addr_lo); 4357 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 4358 mqd->cp_hqd_eop_base_addr_hi); 4359 4360 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4361 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 4362 mqd->cp_hqd_eop_control); 4363 4364 /* enable doorbell? */ 4365 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4366 mqd->cp_hqd_pq_doorbell_control); 4367 4368 /* disable the queue if it's active */ 4369 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 4370 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 4371 for (j = 0; j < adev->usec_timeout; j++) { 4372 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 4373 break; 4374 udelay(1); 4375 } 4376 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 4377 mqd->cp_hqd_dequeue_request); 4378 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 4379 mqd->cp_hqd_pq_rptr); 4380 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4381 mqd->cp_hqd_pq_wptr_lo); 4382 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4383 mqd->cp_hqd_pq_wptr_hi); 4384 } 4385 4386 /* set the pointer to the MQD */ 4387 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 4388 mqd->cp_mqd_base_addr_lo); 4389 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 4390 mqd->cp_mqd_base_addr_hi); 4391 4392 /* set MQD vmid to 0 */ 4393 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 4394 mqd->cp_mqd_control); 4395 4396 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4397 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 4398 mqd->cp_hqd_pq_base_lo); 4399 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 4400 mqd->cp_hqd_pq_base_hi); 4401 4402 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4403 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 4404 mqd->cp_hqd_pq_control); 4405 4406 /* set the wb address whether it's enabled or not */ 4407 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 4408 mqd->cp_hqd_pq_rptr_report_addr_lo); 4409 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4410 mqd->cp_hqd_pq_rptr_report_addr_hi); 4411 4412 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4413 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 4414 mqd->cp_hqd_pq_wptr_poll_addr_lo); 4415 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4416 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4417 4418 /* enable the doorbell if requested */ 4419 if (ring->use_doorbell) { 4420 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4421 (adev->doorbell_index.kiq * 2) << 2); 4422 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4423 (adev->doorbell_index.userqueue_end * 2) << 2); 4424 } 4425 4426 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4427 mqd->cp_hqd_pq_doorbell_control); 4428 4429 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4430 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4431 mqd->cp_hqd_pq_wptr_lo); 4432 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4433 mqd->cp_hqd_pq_wptr_hi); 4434 4435 /* set the vmid for the queue */ 4436 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 4437 4438 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 4439 mqd->cp_hqd_persistent_state); 4440 4441 /* activate the queue */ 4442 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 4443 mqd->cp_hqd_active); 4444 4445 if (ring->use_doorbell) 4446 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4447 4448 return 0; 4449 } 4450 4451 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 4452 { 4453 struct amdgpu_device *adev = ring->adev; 4454 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4455 4456 gfx_v11_0_kiq_setting(ring); 4457 4458 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4459 /* reset MQD to a clean status */ 4460 if (adev->gfx.kiq[0].mqd_backup) 4461 memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); 4462 4463 /* reset ring buffer */ 4464 ring->wptr = 0; 4465 amdgpu_ring_clear_ring(ring); 4466 4467 mutex_lock(&adev->srbm_mutex); 4468 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4469 gfx_v11_0_kiq_init_register(ring); 4470 soc21_grbm_select(adev, 0, 0, 0, 0); 4471 mutex_unlock(&adev->srbm_mutex); 4472 } else { 4473 memset((void *)mqd, 0, sizeof(*mqd)); 4474 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4475 amdgpu_ring_clear_ring(ring); 4476 mutex_lock(&adev->srbm_mutex); 4477 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4478 amdgpu_ring_init_mqd(ring); 4479 gfx_v11_0_kiq_init_register(ring); 4480 soc21_grbm_select(adev, 0, 0, 0, 0); 4481 mutex_unlock(&adev->srbm_mutex); 4482 4483 if (adev->gfx.kiq[0].mqd_backup) 4484 memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); 4485 } 4486 4487 return 0; 4488 } 4489 4490 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) 4491 { 4492 struct amdgpu_device *adev = ring->adev; 4493 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4494 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4495 4496 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4497 memset((void *)mqd, 0, sizeof(*mqd)); 4498 mutex_lock(&adev->srbm_mutex); 4499 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4500 amdgpu_ring_init_mqd(ring); 4501 soc21_grbm_select(adev, 0, 0, 0, 0); 4502 mutex_unlock(&adev->srbm_mutex); 4503 4504 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4505 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4506 } else { 4507 /* restore MQD to a clean status */ 4508 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4509 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4510 /* reset ring buffer */ 4511 ring->wptr = 0; 4512 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4513 amdgpu_ring_clear_ring(ring); 4514 } 4515 4516 return 0; 4517 } 4518 4519 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4520 { 4521 gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring); 4522 return 0; 4523 } 4524 4525 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4526 { 4527 int i, r; 4528 4529 if (!amdgpu_async_gfx_ring) 4530 gfx_v11_0_cp_compute_enable(adev, true); 4531 4532 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4533 r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); 4534 if (r) 4535 return r; 4536 } 4537 4538 return amdgpu_gfx_enable_kcq(adev, 0); 4539 } 4540 4541 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4542 { 4543 int r, i; 4544 struct amdgpu_ring *ring; 4545 4546 if (!(adev->flags & AMD_IS_APU)) 4547 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4548 4549 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4550 /* legacy firmware loading */ 4551 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4552 if (r) 4553 return r; 4554 4555 if (adev->gfx.rs64_enable) 4556 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4557 else 4558 r = gfx_v11_0_cp_compute_load_microcode(adev); 4559 if (r) 4560 return r; 4561 } 4562 4563 gfx_v11_0_cp_set_doorbell_range(adev); 4564 4565 if (amdgpu_async_gfx_ring) { 4566 gfx_v11_0_cp_compute_enable(adev, true); 4567 gfx_v11_0_cp_gfx_enable(adev, true); 4568 } 4569 4570 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4571 r = amdgpu_mes_kiq_hw_init(adev); 4572 else 4573 r = gfx_v11_0_kiq_resume(adev); 4574 if (r) 4575 return r; 4576 4577 r = gfx_v11_0_kcq_resume(adev); 4578 if (r) 4579 return r; 4580 4581 if (!amdgpu_async_gfx_ring) { 4582 r = gfx_v11_0_cp_gfx_resume(adev); 4583 if (r) 4584 return r; 4585 } else { 4586 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4587 if (r) 4588 return r; 4589 } 4590 4591 if (adev->gfx.disable_kq) { 4592 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4593 ring = &adev->gfx.gfx_ring[i]; 4594 /* we don't want to set ring->ready */ 4595 r = amdgpu_ring_test_ring(ring); 4596 if (r) 4597 return r; 4598 } 4599 if (amdgpu_async_gfx_ring) 4600 amdgpu_gfx_disable_kgq(adev, 0); 4601 } else { 4602 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4603 ring = &adev->gfx.gfx_ring[i]; 4604 r = amdgpu_ring_test_helper(ring); 4605 if (r) 4606 return r; 4607 } 4608 } 4609 4610 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4611 ring = &adev->gfx.compute_ring[i]; 4612 r = amdgpu_ring_test_helper(ring); 4613 if (r) 4614 return r; 4615 } 4616 4617 return 0; 4618 } 4619 4620 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4621 { 4622 gfx_v11_0_cp_gfx_enable(adev, enable); 4623 gfx_v11_0_cp_compute_enable(adev, enable); 4624 } 4625 4626 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4627 { 4628 int r; 4629 bool value; 4630 4631 r = adev->gfxhub.funcs->gart_enable(adev); 4632 if (r) 4633 return r; 4634 4635 amdgpu_device_flush_hdp(adev, NULL); 4636 4637 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 4638 false : true; 4639 4640 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4641 /* TODO investigate why this and the hdp flush above is needed, 4642 * are we missing a flush somewhere else? */ 4643 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); 4644 4645 return 0; 4646 } 4647 4648 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4649 { 4650 u32 tmp; 4651 4652 /* select RS64 */ 4653 if (adev->gfx.rs64_enable) { 4654 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4655 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4656 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4657 4658 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4659 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4660 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4661 } 4662 4663 if (amdgpu_emu_mode == 1) 4664 msleep(100); 4665 } 4666 4667 static int get_gb_addr_config(struct amdgpu_device * adev) 4668 { 4669 u32 gb_addr_config; 4670 4671 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4672 if (gb_addr_config == 0) 4673 return -EINVAL; 4674 4675 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4676 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4677 4678 adev->gfx.config.gb_addr_config = gb_addr_config; 4679 4680 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4681 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4682 GB_ADDR_CONFIG, NUM_PIPES); 4683 4684 adev->gfx.config.max_tile_pipes = 4685 adev->gfx.config.gb_addr_config_fields.num_pipes; 4686 4687 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4688 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4689 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4690 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4691 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4692 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4693 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4694 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4695 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4696 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4697 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4698 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4699 4700 return 0; 4701 } 4702 4703 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4704 { 4705 uint32_t data; 4706 4707 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4708 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4709 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4710 4711 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4712 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4713 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4714 } 4715 4716 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) 4717 { 4718 int r; 4719 struct amdgpu_device *adev = ip_block->adev; 4720 4721 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4722 adev->gfx.cleaner_shader_ptr); 4723 4724 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4725 if (adev->gfx.imu.funcs) { 4726 /* RLC autoload sequence 1: Program rlc ram */ 4727 if (adev->gfx.imu.funcs->program_rlc_ram) 4728 adev->gfx.imu.funcs->program_rlc_ram(adev); 4729 /* rlc autoload firmware */ 4730 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4731 if (r) 4732 return r; 4733 } 4734 } else { 4735 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4736 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4737 if (adev->gfx.imu.funcs->load_microcode) 4738 adev->gfx.imu.funcs->load_microcode(adev); 4739 if (adev->gfx.imu.funcs->setup_imu) 4740 adev->gfx.imu.funcs->setup_imu(adev); 4741 if (adev->gfx.imu.funcs->start_imu) 4742 adev->gfx.imu.funcs->start_imu(adev); 4743 } 4744 4745 /* disable gpa mode in backdoor loading */ 4746 gfx_v11_0_disable_gpa_mode(adev); 4747 } 4748 } 4749 4750 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4751 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4752 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4753 if (r) { 4754 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4755 return r; 4756 } 4757 } 4758 4759 adev->gfx.is_poweron = true; 4760 4761 if(get_gb_addr_config(adev)) 4762 DRM_WARN("Invalid gb_addr_config !\n"); 4763 4764 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4765 adev->gfx.rs64_enable) 4766 gfx_v11_0_config_gfx_rs64(adev); 4767 4768 r = gfx_v11_0_gfxhub_enable(adev); 4769 if (r) 4770 return r; 4771 4772 if (!amdgpu_emu_mode) 4773 gfx_v11_0_init_golden_registers(adev); 4774 4775 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4776 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4777 /** 4778 * For gfx 11, rlc firmware loading relies on smu firmware is 4779 * loaded firstly, so in direct type, it has to load smc ucode 4780 * here before rlc. 4781 */ 4782 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4783 if (r) 4784 return r; 4785 } 4786 4787 gfx_v11_0_constants_init(adev); 4788 4789 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4790 gfx_v11_0_select_cp_fw_arch(adev); 4791 4792 if (adev->nbio.funcs->gc_doorbell_init) 4793 adev->nbio.funcs->gc_doorbell_init(adev); 4794 4795 r = gfx_v11_0_rlc_resume(adev); 4796 if (r) 4797 return r; 4798 4799 /* 4800 * init golden registers and rlc resume may override some registers, 4801 * reconfig them here 4802 */ 4803 gfx_v11_0_tcp_harvest(adev); 4804 4805 r = gfx_v11_0_cp_resume(adev); 4806 if (r) 4807 return r; 4808 4809 /* get IMU version from HW if it's not set */ 4810 if (!adev->gfx.imu_fw_version) 4811 adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); 4812 4813 return r; 4814 } 4815 4816 static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, 4817 bool enable) 4818 { 4819 unsigned int irq_type; 4820 int m, p, r; 4821 4822 if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { 4823 for (m = 0; m < adev->gfx.me.num_me; m++) { 4824 for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { 4825 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; 4826 if (enable) 4827 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4828 irq_type); 4829 else 4830 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4831 irq_type); 4832 if (r) 4833 return r; 4834 } 4835 } 4836 } 4837 4838 if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { 4839 for (m = 0; m < adev->gfx.mec.num_mec; ++m) { 4840 for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { 4841 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 4842 + (m * adev->gfx.mec.num_pipe_per_mec) 4843 + p; 4844 if (enable) 4845 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4846 irq_type); 4847 else 4848 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4849 irq_type); 4850 if (r) 4851 return r; 4852 } 4853 } 4854 } 4855 4856 return 0; 4857 } 4858 4859 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) 4860 { 4861 struct amdgpu_device *adev = ip_block->adev; 4862 4863 cancel_delayed_work_sync(&adev->gfx.idle_work); 4864 4865 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4866 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4867 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4868 gfx_v11_0_set_userq_eop_interrupts(adev, false); 4869 4870 if (!adev->no_hw_access) { 4871 if (amdgpu_async_gfx_ring && 4872 !adev->gfx.disable_kq) { 4873 if (amdgpu_gfx_disable_kgq(adev, 0)) 4874 DRM_ERROR("KGQ disable failed\n"); 4875 } 4876 4877 if (amdgpu_gfx_disable_kcq(adev, 0)) 4878 DRM_ERROR("KCQ disable failed\n"); 4879 4880 amdgpu_mes_kiq_hw_fini(adev); 4881 } 4882 4883 if (amdgpu_sriov_vf(adev)) 4884 /* Remove the steps disabling CPG and clearing KIQ position, 4885 * so that CP could perform IDLE-SAVE during switch. Those 4886 * steps are necessary to avoid a DMAR error in gfx9 but it is 4887 * not reproduced on gfx11. 4888 */ 4889 return 0; 4890 4891 gfx_v11_0_cp_enable(adev, false); 4892 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4893 4894 adev->gfxhub.funcs->gart_disable(adev); 4895 4896 adev->gfx.is_poweron = false; 4897 4898 return 0; 4899 } 4900 4901 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) 4902 { 4903 return gfx_v11_0_hw_fini(ip_block); 4904 } 4905 4906 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) 4907 { 4908 return gfx_v11_0_hw_init(ip_block); 4909 } 4910 4911 static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block) 4912 { 4913 struct amdgpu_device *adev = ip_block->adev; 4914 4915 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 4916 GRBM_STATUS, GUI_ACTIVE)) 4917 return false; 4918 else 4919 return true; 4920 } 4921 4922 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 4923 { 4924 unsigned i; 4925 u32 tmp; 4926 struct amdgpu_device *adev = ip_block->adev; 4927 4928 for (i = 0; i < adev->usec_timeout; i++) { 4929 /* read MC_STATUS */ 4930 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 4931 GRBM_STATUS__GUI_ACTIVE_MASK; 4932 4933 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 4934 return 0; 4935 udelay(1); 4936 } 4937 return -ETIMEDOUT; 4938 } 4939 4940 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, 4941 bool req) 4942 { 4943 u32 i, tmp, val; 4944 4945 for (i = 0; i < adev->usec_timeout; i++) { 4946 /* Request with MeId=2, PipeId=0 */ 4947 tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); 4948 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); 4949 WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); 4950 4951 val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); 4952 if (req) { 4953 if (val == tmp) 4954 break; 4955 } else { 4956 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, 4957 REQUEST, 1); 4958 4959 /* unlocked or locked by firmware */ 4960 if (val != tmp) 4961 break; 4962 } 4963 udelay(1); 4964 } 4965 4966 if (i >= adev->usec_timeout) 4967 return -EINVAL; 4968 4969 return 0; 4970 } 4971 4972 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) 4973 { 4974 u32 grbm_soft_reset = 0; 4975 u32 tmp; 4976 int r, i, j, k; 4977 struct amdgpu_device *adev = ip_block->adev; 4978 4979 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4980 4981 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4982 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 4983 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 4984 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 4985 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 4986 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 4987 4988 mutex_lock(&adev->srbm_mutex); 4989 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 4990 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 4991 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 4992 soc21_grbm_select(adev, i, k, j, 0); 4993 4994 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 4995 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 4996 } 4997 } 4998 } 4999 for (i = 0; i < adev->gfx.me.num_me; ++i) { 5000 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 5001 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 5002 soc21_grbm_select(adev, i, k, j, 0); 5003 5004 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 5005 } 5006 } 5007 } 5008 soc21_grbm_select(adev, 0, 0, 0, 0); 5009 mutex_unlock(&adev->srbm_mutex); 5010 5011 /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ 5012 mutex_lock(&adev->gfx.reset_sem_mutex); 5013 r = gfx_v11_0_request_gfx_index_mutex(adev, true); 5014 if (r) { 5015 mutex_unlock(&adev->gfx.reset_sem_mutex); 5016 DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); 5017 return r; 5018 } 5019 5020 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 5021 5022 // Read CP_VMID_RESET register three times. 5023 // to get sufficient time for GFX_HQD_ACTIVE reach 0 5024 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5025 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5026 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5027 5028 /* release the gfx mutex */ 5029 r = gfx_v11_0_request_gfx_index_mutex(adev, false); 5030 mutex_unlock(&adev->gfx.reset_sem_mutex); 5031 if (r) { 5032 DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); 5033 return r; 5034 } 5035 5036 for (i = 0; i < adev->usec_timeout; i++) { 5037 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 5038 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 5039 break; 5040 udelay(1); 5041 } 5042 if (i >= adev->usec_timeout) { 5043 printk("Failed to wait all pipes clean\n"); 5044 return -EINVAL; 5045 } 5046 5047 /********** trigger soft reset ***********/ 5048 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5049 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5050 SOFT_RESET_CP, 1); 5051 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5052 SOFT_RESET_GFX, 1); 5053 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5054 SOFT_RESET_CPF, 1); 5055 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5056 SOFT_RESET_CPC, 1); 5057 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5058 SOFT_RESET_CPG, 1); 5059 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5060 /********** exit soft reset ***********/ 5061 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5062 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5063 SOFT_RESET_CP, 0); 5064 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5065 SOFT_RESET_GFX, 0); 5066 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5067 SOFT_RESET_CPF, 0); 5068 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5069 SOFT_RESET_CPC, 0); 5070 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5071 SOFT_RESET_CPG, 0); 5072 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5073 5074 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 5075 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 5076 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 5077 5078 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 5079 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 5080 5081 for (i = 0; i < adev->usec_timeout; i++) { 5082 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 5083 break; 5084 udelay(1); 5085 } 5086 if (i >= adev->usec_timeout) { 5087 printk("Failed to wait CP_VMID_RESET to 0\n"); 5088 return -EINVAL; 5089 } 5090 5091 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5092 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5093 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5094 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5095 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5096 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5097 5098 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5099 5100 return gfx_v11_0_cp_resume(adev); 5101 } 5102 5103 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) 5104 { 5105 int i, r; 5106 struct amdgpu_device *adev = ip_block->adev; 5107 struct amdgpu_ring *ring; 5108 long tmo = msecs_to_jiffies(1000); 5109 5110 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 5111 ring = &adev->gfx.gfx_ring[i]; 5112 r = amdgpu_ring_test_ib(ring, tmo); 5113 if (r) 5114 return true; 5115 } 5116 5117 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5118 ring = &adev->gfx.compute_ring[i]; 5119 r = amdgpu_ring_test_ib(ring, tmo); 5120 if (r) 5121 return true; 5122 } 5123 5124 return false; 5125 } 5126 5127 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) 5128 { 5129 struct amdgpu_device *adev = ip_block->adev; 5130 /** 5131 * GFX soft reset will impact MES, need resume MES when do GFX soft reset 5132 */ 5133 return amdgpu_mes_resume(adev); 5134 } 5135 5136 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5137 { 5138 uint64_t clock; 5139 uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; 5140 5141 if (amdgpu_sriov_vf(adev)) { 5142 amdgpu_gfx_off_ctrl(adev, false); 5143 mutex_lock(&adev->gfx.gpu_clock_mutex); 5144 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5145 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5146 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5147 if (clock_counter_hi_pre != clock_counter_hi_after) 5148 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5149 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5150 amdgpu_gfx_off_ctrl(adev, true); 5151 } else { 5152 preempt_disable(); 5153 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5154 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5155 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5156 if (clock_counter_hi_pre != clock_counter_hi_after) 5157 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5158 preempt_enable(); 5159 } 5160 clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); 5161 5162 return clock; 5163 } 5164 5165 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5166 uint32_t vmid, 5167 uint32_t gds_base, uint32_t gds_size, 5168 uint32_t gws_base, uint32_t gws_size, 5169 uint32_t oa_base, uint32_t oa_size) 5170 { 5171 struct amdgpu_device *adev = ring->adev; 5172 5173 /* GDS Base */ 5174 gfx_v11_0_write_data_to_reg(ring, 0, false, 5175 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 5176 gds_base); 5177 5178 /* GDS Size */ 5179 gfx_v11_0_write_data_to_reg(ring, 0, false, 5180 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 5181 gds_size); 5182 5183 /* GWS */ 5184 gfx_v11_0_write_data_to_reg(ring, 0, false, 5185 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 5186 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5187 5188 /* OA */ 5189 gfx_v11_0_write_data_to_reg(ring, 0, false, 5190 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 5191 (1 << (oa_size + oa_base)) - (1 << oa_base)); 5192 } 5193 5194 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) 5195 { 5196 struct amdgpu_device *adev = ip_block->adev; 5197 5198 switch (amdgpu_user_queue) { 5199 case -1: 5200 case 0: 5201 default: 5202 adev->gfx.disable_kq = false; 5203 adev->gfx.disable_uq = true; 5204 break; 5205 case 1: 5206 adev->gfx.disable_kq = false; 5207 adev->gfx.disable_uq = false; 5208 break; 5209 case 2: 5210 adev->gfx.disable_kq = true; 5211 adev->gfx.disable_uq = false; 5212 break; 5213 } 5214 5215 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 5216 5217 if (adev->gfx.disable_kq) { 5218 /* We need one GFX ring temporarily to set up 5219 * the clear state. 5220 */ 5221 adev->gfx.num_gfx_rings = 1; 5222 adev->gfx.num_compute_rings = 0; 5223 } else { 5224 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 5225 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 5226 AMDGPU_MAX_COMPUTE_RINGS); 5227 } 5228 5229 gfx_v11_0_set_kiq_pm4_funcs(adev); 5230 gfx_v11_0_set_ring_funcs(adev); 5231 gfx_v11_0_set_irq_funcs(adev); 5232 gfx_v11_0_set_gds_init(adev); 5233 gfx_v11_0_set_rlc_funcs(adev); 5234 gfx_v11_0_set_mqd_funcs(adev); 5235 gfx_v11_0_set_imu_funcs(adev); 5236 5237 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 5238 5239 return gfx_v11_0_init_microcode(adev); 5240 } 5241 5242 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) 5243 { 5244 struct amdgpu_device *adev = ip_block->adev; 5245 int r; 5246 5247 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5248 if (r) 5249 return r; 5250 5251 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5252 if (r) 5253 return r; 5254 5255 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 5256 if (r) 5257 return r; 5258 5259 r = gfx_v11_0_set_userq_eop_interrupts(adev, true); 5260 if (r) 5261 return r; 5262 5263 return 0; 5264 } 5265 5266 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 5267 { 5268 uint32_t rlc_cntl; 5269 5270 /* if RLC is not enabled, do nothing */ 5271 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 5272 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 5273 } 5274 5275 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 5276 { 5277 uint32_t data; 5278 unsigned i; 5279 5280 data = RLC_SAFE_MODE__CMD_MASK; 5281 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5282 5283 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 5284 5285 /* wait for RLC_SAFE_MODE */ 5286 for (i = 0; i < adev->usec_timeout; i++) { 5287 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 5288 RLC_SAFE_MODE, CMD)) 5289 break; 5290 udelay(1); 5291 } 5292 } 5293 5294 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 5295 { 5296 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 5297 } 5298 5299 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 5300 bool enable) 5301 { 5302 uint32_t def, data; 5303 5304 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 5305 return; 5306 5307 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5308 5309 if (enable) 5310 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5311 else 5312 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5313 5314 if (def != data) 5315 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5316 } 5317 5318 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 5319 bool enable) 5320 { 5321 uint32_t def, data; 5322 5323 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 5324 return; 5325 5326 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5327 5328 if (enable) 5329 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5330 else 5331 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5332 5333 if (def != data) 5334 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5335 } 5336 5337 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 5338 bool enable) 5339 { 5340 uint32_t def, data; 5341 5342 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 5343 return; 5344 5345 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5346 5347 if (enable) 5348 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5349 else 5350 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5351 5352 if (def != data) 5353 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5354 } 5355 5356 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5357 bool enable) 5358 { 5359 uint32_t data, def; 5360 5361 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 5362 return; 5363 5364 /* It is disabled by HW by default */ 5365 if (enable) { 5366 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5367 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 5368 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5369 5370 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5371 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5372 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5373 5374 if (def != data) 5375 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5376 } 5377 } else { 5378 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5379 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5380 5381 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5382 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5383 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5384 5385 if (def != data) 5386 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5387 } 5388 } 5389 } 5390 5391 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5392 bool enable) 5393 { 5394 uint32_t def, data; 5395 5396 if (!(adev->cg_flags & 5397 (AMD_CG_SUPPORT_GFX_CGCG | 5398 AMD_CG_SUPPORT_GFX_CGLS | 5399 AMD_CG_SUPPORT_GFX_3D_CGCG | 5400 AMD_CG_SUPPORT_GFX_3D_CGLS))) 5401 return; 5402 5403 if (enable) { 5404 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5405 5406 /* unset CGCG override */ 5407 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5408 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5409 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5410 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5411 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 5412 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5413 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5414 5415 /* update CGCG override bits */ 5416 if (def != data) 5417 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5418 5419 /* enable cgcg FSM(0x0000363F) */ 5420 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5421 5422 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5423 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 5424 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5425 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5426 } 5427 5428 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5429 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 5430 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5431 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5432 } 5433 5434 if (def != data) 5435 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5436 5437 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5438 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5439 5440 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5441 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 5442 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5443 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5444 } 5445 5446 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5447 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 5448 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5449 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5450 } 5451 5452 if (def != data) 5453 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5454 5455 /* set IDLE_POLL_COUNT(0x00900100) */ 5456 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 5457 5458 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 5459 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5460 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5461 5462 if (def != data) 5463 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 5464 5465 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5466 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5467 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5468 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5469 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5470 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 5471 5472 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5473 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5474 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5475 5476 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5477 if (adev->sdma.num_instances > 1) { 5478 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5479 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5480 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5481 } 5482 } else { 5483 /* Program RLC_CGCG_CGLS_CTRL */ 5484 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5485 5486 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5487 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5488 5489 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5490 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5491 5492 if (def != data) 5493 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5494 5495 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5496 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5497 5498 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5499 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5500 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5501 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5502 5503 if (def != data) 5504 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5505 5506 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5507 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5508 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5509 5510 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5511 if (adev->sdma.num_instances > 1) { 5512 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5513 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5514 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5515 } 5516 } 5517 } 5518 5519 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5520 bool enable) 5521 { 5522 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5523 5524 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 5525 5526 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 5527 5528 gfx_v11_0_update_repeater_fgcg(adev, enable); 5529 5530 gfx_v11_0_update_sram_fgcg(adev, enable); 5531 5532 gfx_v11_0_update_perf_clk(adev, enable); 5533 5534 if (adev->cg_flags & 5535 (AMD_CG_SUPPORT_GFX_MGCG | 5536 AMD_CG_SUPPORT_GFX_CGLS | 5537 AMD_CG_SUPPORT_GFX_CGCG | 5538 AMD_CG_SUPPORT_GFX_3D_CGCG | 5539 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5540 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5541 5542 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5543 5544 return 0; 5545 } 5546 5547 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) 5548 { 5549 u32 reg, pre_data, data; 5550 5551 amdgpu_gfx_off_ctrl(adev, false); 5552 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5553 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) 5554 pre_data = RREG32_NO_KIQ(reg); 5555 else 5556 pre_data = RREG32(reg); 5557 5558 data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); 5559 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5560 5561 if (pre_data != data) { 5562 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { 5563 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5564 } else 5565 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5566 } 5567 amdgpu_gfx_off_ctrl(adev, true); 5568 5569 if (ring 5570 && amdgpu_sriov_is_pp_one_vf(adev) 5571 && (pre_data != data) 5572 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) 5573 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { 5574 amdgpu_ring_emit_wreg(ring, reg, data); 5575 } 5576 } 5577 5578 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5579 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5580 .set_safe_mode = gfx_v11_0_set_safe_mode, 5581 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5582 .init = gfx_v11_0_rlc_init, 5583 .get_csb_size = gfx_v11_0_get_csb_size, 5584 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5585 .resume = gfx_v11_0_rlc_resume, 5586 .stop = gfx_v11_0_rlc_stop, 5587 .reset = gfx_v11_0_rlc_reset, 5588 .start = gfx_v11_0_rlc_start, 5589 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5590 }; 5591 5592 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5593 { 5594 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5595 5596 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5597 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5598 else 5599 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5600 5601 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5602 5603 // Program RLC_PG_DELAY3 for CGPG hysteresis 5604 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5605 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5606 case IP_VERSION(11, 0, 1): 5607 case IP_VERSION(11, 0, 4): 5608 case IP_VERSION(11, 5, 0): 5609 case IP_VERSION(11, 5, 1): 5610 case IP_VERSION(11, 5, 2): 5611 case IP_VERSION(11, 5, 3): 5612 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5613 break; 5614 default: 5615 break; 5616 } 5617 } 5618 } 5619 5620 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5621 { 5622 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5623 5624 gfx_v11_cntl_power_gating(adev, enable); 5625 5626 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5627 } 5628 5629 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5630 enum amd_powergating_state state) 5631 { 5632 struct amdgpu_device *adev = ip_block->adev; 5633 bool enable = (state == AMD_PG_STATE_GATE); 5634 5635 if (amdgpu_sriov_vf(adev)) 5636 return 0; 5637 5638 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5639 case IP_VERSION(11, 0, 0): 5640 case IP_VERSION(11, 0, 2): 5641 case IP_VERSION(11, 0, 3): 5642 amdgpu_gfx_off_ctrl(adev, enable); 5643 break; 5644 case IP_VERSION(11, 0, 1): 5645 case IP_VERSION(11, 0, 4): 5646 case IP_VERSION(11, 5, 0): 5647 case IP_VERSION(11, 5, 1): 5648 case IP_VERSION(11, 5, 2): 5649 case IP_VERSION(11, 5, 3): 5650 if (!enable) 5651 amdgpu_gfx_off_ctrl(adev, false); 5652 5653 gfx_v11_cntl_pg(adev, enable); 5654 5655 if (enable) 5656 amdgpu_gfx_off_ctrl(adev, true); 5657 5658 break; 5659 default: 5660 break; 5661 } 5662 5663 return 0; 5664 } 5665 5666 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5667 enum amd_clockgating_state state) 5668 { 5669 struct amdgpu_device *adev = ip_block->adev; 5670 5671 if (amdgpu_sriov_vf(adev)) 5672 return 0; 5673 5674 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5675 case IP_VERSION(11, 0, 0): 5676 case IP_VERSION(11, 0, 1): 5677 case IP_VERSION(11, 0, 2): 5678 case IP_VERSION(11, 0, 3): 5679 case IP_VERSION(11, 0, 4): 5680 case IP_VERSION(11, 5, 0): 5681 case IP_VERSION(11, 5, 1): 5682 case IP_VERSION(11, 5, 2): 5683 case IP_VERSION(11, 5, 3): 5684 gfx_v11_0_update_gfx_clock_gating(adev, 5685 state == AMD_CG_STATE_GATE); 5686 break; 5687 default: 5688 break; 5689 } 5690 5691 return 0; 5692 } 5693 5694 static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5695 { 5696 struct amdgpu_device *adev = ip_block->adev; 5697 int data; 5698 5699 /* AMD_CG_SUPPORT_GFX_MGCG */ 5700 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5701 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5702 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5703 5704 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5705 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5706 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5707 5708 /* AMD_CG_SUPPORT_GFX_FGCG */ 5709 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5710 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5711 5712 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5713 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5714 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5715 5716 /* AMD_CG_SUPPORT_GFX_CGCG */ 5717 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5718 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5719 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5720 5721 /* AMD_CG_SUPPORT_GFX_CGLS */ 5722 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5723 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5724 5725 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5726 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5727 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5728 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5729 5730 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5731 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5732 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5733 } 5734 5735 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5736 { 5737 /* gfx11 is 32bit rptr*/ 5738 return *(uint32_t *)ring->rptr_cpu_addr; 5739 } 5740 5741 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5742 { 5743 struct amdgpu_device *adev = ring->adev; 5744 u64 wptr; 5745 5746 /* XXX check if swapping is necessary on BE */ 5747 if (ring->use_doorbell) { 5748 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5749 } else { 5750 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5751 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5752 } 5753 5754 return wptr; 5755 } 5756 5757 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5758 { 5759 struct amdgpu_device *adev = ring->adev; 5760 5761 if (ring->use_doorbell) { 5762 /* XXX check if swapping is necessary on BE */ 5763 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5764 ring->wptr); 5765 WDOORBELL64(ring->doorbell_index, ring->wptr); 5766 } else { 5767 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5768 lower_32_bits(ring->wptr)); 5769 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5770 upper_32_bits(ring->wptr)); 5771 } 5772 } 5773 5774 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5775 { 5776 /* gfx11 hardware is 32bit rptr */ 5777 return *(uint32_t *)ring->rptr_cpu_addr; 5778 } 5779 5780 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5781 { 5782 u64 wptr; 5783 5784 /* XXX check if swapping is necessary on BE */ 5785 if (ring->use_doorbell) 5786 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5787 else 5788 BUG(); 5789 return wptr; 5790 } 5791 5792 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5793 { 5794 struct amdgpu_device *adev = ring->adev; 5795 5796 /* XXX check if swapping is necessary on BE */ 5797 if (ring->use_doorbell) { 5798 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5799 ring->wptr); 5800 WDOORBELL64(ring->doorbell_index, ring->wptr); 5801 } else { 5802 BUG(); /* only DOORBELL method supported on gfx11 now */ 5803 } 5804 } 5805 5806 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5807 { 5808 struct amdgpu_device *adev = ring->adev; 5809 u32 ref_and_mask, reg_mem_engine; 5810 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5811 5812 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5813 switch (ring->me) { 5814 case 1: 5815 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5816 break; 5817 case 2: 5818 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5819 break; 5820 default: 5821 return; 5822 } 5823 reg_mem_engine = 0; 5824 } else { 5825 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; 5826 reg_mem_engine = 1; /* pfp */ 5827 } 5828 5829 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5830 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5831 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5832 ref_and_mask, ref_and_mask, 0x20); 5833 } 5834 5835 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5836 struct amdgpu_job *job, 5837 struct amdgpu_ib *ib, 5838 uint32_t flags) 5839 { 5840 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5841 u32 header, control = 0; 5842 5843 BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); 5844 5845 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5846 5847 control |= ib->length_dw | (vmid << 24); 5848 5849 if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5850 control |= INDIRECT_BUFFER_PRE_ENB(1); 5851 5852 if (flags & AMDGPU_IB_PREEMPTED) 5853 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5854 5855 if (vmid) 5856 gfx_v11_0_ring_emit_de_meta(ring, 5857 (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); 5858 } 5859 5860 amdgpu_ring_write(ring, header); 5861 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5862 amdgpu_ring_write(ring, 5863 #ifdef __BIG_ENDIAN 5864 (2 << 0) | 5865 #endif 5866 lower_32_bits(ib->gpu_addr)); 5867 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5868 amdgpu_ring_write(ring, control); 5869 } 5870 5871 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5872 struct amdgpu_job *job, 5873 struct amdgpu_ib *ib, 5874 uint32_t flags) 5875 { 5876 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5877 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5878 5879 /* Currently, there is a high possibility to get wave ID mismatch 5880 * between ME and GDS, leading to a hw deadlock, because ME generates 5881 * different wave IDs than the GDS expects. This situation happens 5882 * randomly when at least 5 compute pipes use GDS ordered append. 5883 * The wave IDs generated by ME are also wrong after suspend/resume. 5884 * Those are probably bugs somewhere else in the kernel driver. 5885 * 5886 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5887 * GDS to 0 for this ring (me/pipe). 5888 */ 5889 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5890 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5891 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 5892 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5893 } 5894 5895 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5896 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5897 amdgpu_ring_write(ring, 5898 #ifdef __BIG_ENDIAN 5899 (2 << 0) | 5900 #endif 5901 lower_32_bits(ib->gpu_addr)); 5902 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5903 amdgpu_ring_write(ring, control); 5904 } 5905 5906 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5907 u64 seq, unsigned flags) 5908 { 5909 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5910 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5911 5912 /* RELEASE_MEM - flush caches, send int */ 5913 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5914 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 5915 PACKET3_RELEASE_MEM_GCR_GL2_WB | 5916 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ 5917 PACKET3_RELEASE_MEM_GCR_GLM_WB | 5918 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 5919 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5920 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 5921 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 5922 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 5923 5924 /* 5925 * the address should be Qword aligned if 64bit write, Dword 5926 * aligned if only send 32bit data low (discard data high) 5927 */ 5928 if (write64bit) 5929 BUG_ON(addr & 0x7); 5930 else 5931 BUG_ON(addr & 0x3); 5932 amdgpu_ring_write(ring, lower_32_bits(addr)); 5933 amdgpu_ring_write(ring, upper_32_bits(addr)); 5934 amdgpu_ring_write(ring, lower_32_bits(seq)); 5935 amdgpu_ring_write(ring, upper_32_bits(seq)); 5936 amdgpu_ring_write(ring, 0); 5937 } 5938 5939 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5940 { 5941 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5942 uint32_t seq = ring->fence_drv.sync_seq; 5943 uint64_t addr = ring->fence_drv.gpu_addr; 5944 5945 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 5946 upper_32_bits(addr), seq, 0xffffffff, 4); 5947 } 5948 5949 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 5950 uint16_t pasid, uint32_t flush_type, 5951 bool all_hub, uint8_t dst_sel) 5952 { 5953 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 5954 amdgpu_ring_write(ring, 5955 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 5956 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 5957 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 5958 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 5959 } 5960 5961 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5962 unsigned vmid, uint64_t pd_addr) 5963 { 5964 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5965 5966 /* compute doesn't have PFP */ 5967 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5968 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5969 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5970 amdgpu_ring_write(ring, 0x0); 5971 } 5972 5973 /* Make sure that we can't skip the SET_Q_MODE packets when the VM 5974 * changed in any way. 5975 */ 5976 ring->set_q_mode_offs = 0; 5977 ring->set_q_mode_ptr = NULL; 5978 } 5979 5980 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5981 u64 seq, unsigned int flags) 5982 { 5983 struct amdgpu_device *adev = ring->adev; 5984 5985 /* we only allocate 32bit for each seq wb address */ 5986 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5987 5988 /* write fence seq to the "addr" */ 5989 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5990 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5991 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5992 amdgpu_ring_write(ring, lower_32_bits(addr)); 5993 amdgpu_ring_write(ring, upper_32_bits(addr)); 5994 amdgpu_ring_write(ring, lower_32_bits(seq)); 5995 5996 if (flags & AMDGPU_FENCE_FLAG_INT) { 5997 /* set register to trigger INT */ 5998 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5999 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6000 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6001 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 6002 amdgpu_ring_write(ring, 0); 6003 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6004 } 6005 } 6006 6007 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 6008 uint32_t flags) 6009 { 6010 uint32_t dw2 = 0; 6011 6012 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6013 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6014 /* set load_global_config & load_global_uconfig */ 6015 dw2 |= 0x8001; 6016 /* set load_cs_sh_regs */ 6017 dw2 |= 0x01000000; 6018 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6019 dw2 |= 0x10002; 6020 } 6021 6022 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6023 amdgpu_ring_write(ring, dw2); 6024 amdgpu_ring_write(ring, 0); 6025 } 6026 6027 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 6028 uint64_t addr) 6029 { 6030 unsigned ret; 6031 6032 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6033 amdgpu_ring_write(ring, lower_32_bits(addr)); 6034 amdgpu_ring_write(ring, upper_32_bits(addr)); 6035 /* discard following DWs if *cond_exec_gpu_addr==0 */ 6036 amdgpu_ring_write(ring, 0); 6037 ret = ring->wptr & ring->buf_mask; 6038 /* patch dummy value later */ 6039 amdgpu_ring_write(ring, 0); 6040 6041 return ret; 6042 } 6043 6044 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, 6045 u64 shadow_va, u64 csa_va, 6046 u64 gds_va, bool init_shadow, 6047 int vmid) 6048 { 6049 struct amdgpu_device *adev = ring->adev; 6050 unsigned int offs, end; 6051 6052 if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) 6053 return; 6054 6055 /* 6056 * The logic here isn't easy to understand because we need to keep state 6057 * accross multiple executions of the function as well as between the 6058 * CPU and GPU. The general idea is that the newly written GPU command 6059 * has a condition on the previous one and only executed if really 6060 * necessary. 6061 */ 6062 6063 /* 6064 * The dw in the NOP controls if the next SET_Q_MODE packet should be 6065 * executed or not. Reserve 64bits just to be on the save side. 6066 */ 6067 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); 6068 offs = ring->wptr & ring->buf_mask; 6069 6070 /* 6071 * We start with skipping the prefix SET_Q_MODE and always executing 6072 * the postfix SET_Q_MODE packet. This is changed below with a 6073 * WRITE_DATA command when the postfix executed. 6074 */ 6075 amdgpu_ring_write(ring, shadow_va ? 1 : 0); 6076 amdgpu_ring_write(ring, 0); 6077 6078 if (ring->set_q_mode_offs) { 6079 uint64_t addr; 6080 6081 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6082 addr += ring->set_q_mode_offs << 2; 6083 end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); 6084 } 6085 6086 /* 6087 * When the postfix SET_Q_MODE packet executes we need to make sure that the 6088 * next prefix SET_Q_MODE packet executes as well. 6089 */ 6090 if (!shadow_va) { 6091 uint64_t addr; 6092 6093 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6094 addr += offs << 2; 6095 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6096 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 6097 amdgpu_ring_write(ring, lower_32_bits(addr)); 6098 amdgpu_ring_write(ring, upper_32_bits(addr)); 6099 amdgpu_ring_write(ring, 0x1); 6100 } 6101 6102 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); 6103 amdgpu_ring_write(ring, lower_32_bits(shadow_va)); 6104 amdgpu_ring_write(ring, upper_32_bits(shadow_va)); 6105 amdgpu_ring_write(ring, lower_32_bits(gds_va)); 6106 amdgpu_ring_write(ring, upper_32_bits(gds_va)); 6107 amdgpu_ring_write(ring, lower_32_bits(csa_va)); 6108 amdgpu_ring_write(ring, upper_32_bits(csa_va)); 6109 amdgpu_ring_write(ring, shadow_va ? 6110 PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); 6111 amdgpu_ring_write(ring, init_shadow ? 6112 PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); 6113 6114 if (ring->set_q_mode_offs) 6115 amdgpu_ring_patch_cond_exec(ring, end); 6116 6117 if (shadow_va) { 6118 uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; 6119 6120 /* 6121 * If the tokens match try to skip the last postfix SET_Q_MODE 6122 * packet to avoid saving/restoring the state all the time. 6123 */ 6124 if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) 6125 *ring->set_q_mode_ptr = 0; 6126 6127 ring->set_q_mode_token = token; 6128 } else { 6129 ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; 6130 } 6131 6132 ring->set_q_mode_offs = offs; 6133 } 6134 6135 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) 6136 { 6137 int i, r = 0; 6138 struct amdgpu_device *adev = ring->adev; 6139 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 6140 struct amdgpu_ring *kiq_ring = &kiq->ring; 6141 unsigned long flags; 6142 6143 if (adev->enable_mes) 6144 return -EINVAL; 6145 6146 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 6147 return -EINVAL; 6148 6149 spin_lock_irqsave(&kiq->ring_lock, flags); 6150 6151 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 6152 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6153 return -ENOMEM; 6154 } 6155 6156 /* assert preemption condition */ 6157 amdgpu_ring_set_preempt_cond_exec(ring, false); 6158 6159 /* assert IB preemption, emit the trailing fence */ 6160 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 6161 ring->trail_fence_gpu_addr, 6162 ++ring->trail_seq); 6163 amdgpu_ring_commit(kiq_ring); 6164 6165 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6166 6167 /* poll the trailing fence */ 6168 for (i = 0; i < adev->usec_timeout; i++) { 6169 if (ring->trail_seq == 6170 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 6171 break; 6172 udelay(1); 6173 } 6174 6175 if (i >= adev->usec_timeout) { 6176 r = -EINVAL; 6177 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 6178 } 6179 6180 /* deassert preemption condition */ 6181 amdgpu_ring_set_preempt_cond_exec(ring, true); 6182 return r; 6183 } 6184 6185 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 6186 { 6187 struct amdgpu_device *adev = ring->adev; 6188 struct v10_de_ib_state de_payload = {0}; 6189 uint64_t offset, gds_addr, de_payload_gpu_addr; 6190 void *de_payload_cpu_addr; 6191 int cnt; 6192 6193 offset = offsetof(struct v10_gfx_meta_data, de_payload); 6194 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 6195 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 6196 6197 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 6198 AMDGPU_CSA_SIZE - adev->gds.gds_size, 6199 PAGE_SIZE); 6200 6201 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 6202 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 6203 6204 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 6205 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 6206 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 6207 WRITE_DATA_DST_SEL(8) | 6208 WR_CONFIRM) | 6209 WRITE_DATA_CACHE_POLICY(0)); 6210 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 6211 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 6212 6213 if (resume) 6214 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 6215 sizeof(de_payload) >> 2); 6216 else 6217 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 6218 sizeof(de_payload) >> 2); 6219 } 6220 6221 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 6222 bool secure) 6223 { 6224 uint32_t v = secure ? FRAME_TMZ : 0; 6225 6226 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 6227 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 6228 } 6229 6230 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 6231 uint32_t reg_val_offs) 6232 { 6233 struct amdgpu_device *adev = ring->adev; 6234 6235 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6236 amdgpu_ring_write(ring, 0 | /* src: register*/ 6237 (5 << 8) | /* dst: memory */ 6238 (1 << 20)); /* write confirm */ 6239 amdgpu_ring_write(ring, reg); 6240 amdgpu_ring_write(ring, 0); 6241 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6242 reg_val_offs * 4)); 6243 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6244 reg_val_offs * 4)); 6245 } 6246 6247 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6248 uint32_t val) 6249 { 6250 uint32_t cmd = 0; 6251 6252 switch (ring->funcs->type) { 6253 case AMDGPU_RING_TYPE_GFX: 6254 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6255 break; 6256 case AMDGPU_RING_TYPE_KIQ: 6257 cmd = (1 << 16); /* no inc addr */ 6258 break; 6259 default: 6260 cmd = WR_CONFIRM; 6261 break; 6262 } 6263 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6264 amdgpu_ring_write(ring, cmd); 6265 amdgpu_ring_write(ring, reg); 6266 amdgpu_ring_write(ring, 0); 6267 amdgpu_ring_write(ring, val); 6268 } 6269 6270 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 6271 uint32_t val, uint32_t mask) 6272 { 6273 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 6274 } 6275 6276 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 6277 uint32_t reg0, uint32_t reg1, 6278 uint32_t ref, uint32_t mask) 6279 { 6280 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6281 6282 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 6283 ref, mask, 0x20); 6284 } 6285 6286 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, 6287 unsigned vmid) 6288 { 6289 struct amdgpu_device *adev = ring->adev; 6290 uint32_t value = 0; 6291 6292 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 6293 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 6294 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 6295 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 6296 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 6297 WREG32_SOC15(GC, 0, regSQ_CMD, value); 6298 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 6299 } 6300 6301 static void 6302 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6303 uint32_t me, uint32_t pipe, 6304 enum amdgpu_interrupt_state state) 6305 { 6306 uint32_t cp_int_cntl, cp_int_cntl_reg; 6307 6308 if (!me) { 6309 switch (pipe) { 6310 case 0: 6311 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 6312 break; 6313 case 1: 6314 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 6315 break; 6316 default: 6317 DRM_DEBUG("invalid pipe %d\n", pipe); 6318 return; 6319 } 6320 } else { 6321 DRM_DEBUG("invalid me %d\n", me); 6322 return; 6323 } 6324 6325 switch (state) { 6326 case AMDGPU_IRQ_STATE_DISABLE: 6327 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6328 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6329 TIME_STAMP_INT_ENABLE, 0); 6330 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6331 GENERIC0_INT_ENABLE, 0); 6332 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6333 break; 6334 case AMDGPU_IRQ_STATE_ENABLE: 6335 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6336 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6337 TIME_STAMP_INT_ENABLE, 1); 6338 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6339 GENERIC0_INT_ENABLE, 1); 6340 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6341 break; 6342 default: 6343 break; 6344 } 6345 } 6346 6347 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6348 int me, int pipe, 6349 enum amdgpu_interrupt_state state) 6350 { 6351 u32 mec_int_cntl, mec_int_cntl_reg; 6352 6353 /* 6354 * amdgpu controls only the first MEC. That's why this function only 6355 * handles the setting of interrupts for this specific MEC. All other 6356 * pipes' interrupts are set by amdkfd. 6357 */ 6358 6359 if (me == 1) { 6360 switch (pipe) { 6361 case 0: 6362 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6363 break; 6364 case 1: 6365 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 6366 break; 6367 case 2: 6368 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 6369 break; 6370 case 3: 6371 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 6372 break; 6373 default: 6374 DRM_DEBUG("invalid pipe %d\n", pipe); 6375 return; 6376 } 6377 } else { 6378 DRM_DEBUG("invalid me %d\n", me); 6379 return; 6380 } 6381 6382 switch (state) { 6383 case AMDGPU_IRQ_STATE_DISABLE: 6384 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6385 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6386 TIME_STAMP_INT_ENABLE, 0); 6387 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6388 GENERIC0_INT_ENABLE, 0); 6389 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6390 break; 6391 case AMDGPU_IRQ_STATE_ENABLE: 6392 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6393 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6394 TIME_STAMP_INT_ENABLE, 1); 6395 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6396 GENERIC0_INT_ENABLE, 1); 6397 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6398 break; 6399 default: 6400 break; 6401 } 6402 } 6403 6404 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6405 struct amdgpu_irq_src *src, 6406 unsigned type, 6407 enum amdgpu_interrupt_state state) 6408 { 6409 switch (type) { 6410 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6411 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 6412 break; 6413 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 6414 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 6415 break; 6416 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6417 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6418 break; 6419 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6420 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6421 break; 6422 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6423 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6424 break; 6425 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6426 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6427 break; 6428 default: 6429 break; 6430 } 6431 return 0; 6432 } 6433 6434 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 6435 struct amdgpu_irq_src *source, 6436 struct amdgpu_iv_entry *entry) 6437 { 6438 u32 doorbell_offset = entry->src_data[0]; 6439 u8 me_id, pipe_id, queue_id; 6440 struct amdgpu_ring *ring; 6441 int i; 6442 6443 DRM_DEBUG("IH: CP EOP\n"); 6444 6445 if (adev->enable_mes && doorbell_offset) { 6446 struct amdgpu_userq_fence_driver *fence_drv = NULL; 6447 struct xarray *xa = &adev->userq_xa; 6448 unsigned long flags; 6449 6450 xa_lock_irqsave(xa, flags); 6451 fence_drv = xa_load(xa, doorbell_offset); 6452 if (fence_drv) 6453 amdgpu_userq_fence_driver_process(fence_drv); 6454 xa_unlock_irqrestore(xa, flags); 6455 } else { 6456 me_id = (entry->ring_id & 0x0c) >> 2; 6457 pipe_id = (entry->ring_id & 0x03) >> 0; 6458 queue_id = (entry->ring_id & 0x70) >> 4; 6459 6460 switch (me_id) { 6461 case 0: 6462 if (pipe_id == 0) 6463 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6464 else 6465 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 6466 break; 6467 case 1: 6468 case 2: 6469 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6470 ring = &adev->gfx.compute_ring[i]; 6471 /* Per-queue interrupt is supported for MEC starting from VI. 6472 * The interrupt can only be enabled/disabled per pipe instead 6473 * of per queue. 6474 */ 6475 if ((ring->me == me_id) && 6476 (ring->pipe == pipe_id) && 6477 (ring->queue == queue_id)) 6478 amdgpu_fence_process(ring); 6479 } 6480 break; 6481 } 6482 } 6483 6484 return 0; 6485 } 6486 6487 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6488 struct amdgpu_irq_src *source, 6489 unsigned int type, 6490 enum amdgpu_interrupt_state state) 6491 { 6492 u32 cp_int_cntl_reg, cp_int_cntl; 6493 int i, j; 6494 6495 switch (state) { 6496 case AMDGPU_IRQ_STATE_DISABLE: 6497 case AMDGPU_IRQ_STATE_ENABLE: 6498 for (i = 0; i < adev->gfx.me.num_me; i++) { 6499 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6500 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6501 6502 if (cp_int_cntl_reg) { 6503 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6504 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6505 PRIV_REG_INT_ENABLE, 6506 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6507 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6508 } 6509 } 6510 } 6511 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6512 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6513 /* MECs start at 1 */ 6514 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6515 6516 if (cp_int_cntl_reg) { 6517 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6518 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6519 PRIV_REG_INT_ENABLE, 6520 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6521 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6522 } 6523 } 6524 } 6525 break; 6526 default: 6527 break; 6528 } 6529 6530 return 0; 6531 } 6532 6533 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6534 struct amdgpu_irq_src *source, 6535 unsigned type, 6536 enum amdgpu_interrupt_state state) 6537 { 6538 u32 cp_int_cntl_reg, cp_int_cntl; 6539 int i, j; 6540 6541 switch (state) { 6542 case AMDGPU_IRQ_STATE_DISABLE: 6543 case AMDGPU_IRQ_STATE_ENABLE: 6544 for (i = 0; i < adev->gfx.me.num_me; i++) { 6545 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6546 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6547 6548 if (cp_int_cntl_reg) { 6549 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6550 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6551 OPCODE_ERROR_INT_ENABLE, 6552 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6553 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6554 } 6555 } 6556 } 6557 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6558 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6559 /* MECs start at 1 */ 6560 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6561 6562 if (cp_int_cntl_reg) { 6563 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6564 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6565 OPCODE_ERROR_INT_ENABLE, 6566 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6567 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6568 } 6569 } 6570 } 6571 break; 6572 default: 6573 break; 6574 } 6575 return 0; 6576 } 6577 6578 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6579 struct amdgpu_irq_src *source, 6580 unsigned int type, 6581 enum amdgpu_interrupt_state state) 6582 { 6583 u32 cp_int_cntl_reg, cp_int_cntl; 6584 int i, j; 6585 6586 switch (state) { 6587 case AMDGPU_IRQ_STATE_DISABLE: 6588 case AMDGPU_IRQ_STATE_ENABLE: 6589 for (i = 0; i < adev->gfx.me.num_me; i++) { 6590 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6591 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6592 6593 if (cp_int_cntl_reg) { 6594 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6595 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6596 PRIV_INSTR_INT_ENABLE, 6597 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6598 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6599 } 6600 } 6601 } 6602 break; 6603 default: 6604 break; 6605 } 6606 6607 return 0; 6608 } 6609 6610 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 6611 struct amdgpu_iv_entry *entry) 6612 { 6613 u8 me_id, pipe_id, queue_id; 6614 struct amdgpu_ring *ring; 6615 int i; 6616 6617 me_id = (entry->ring_id & 0x0c) >> 2; 6618 pipe_id = (entry->ring_id & 0x03) >> 0; 6619 queue_id = (entry->ring_id & 0x70) >> 4; 6620 6621 if (!adev->gfx.disable_kq) { 6622 switch (me_id) { 6623 case 0: 6624 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 6625 ring = &adev->gfx.gfx_ring[i]; 6626 if (ring->me == me_id && ring->pipe == pipe_id && 6627 ring->queue == queue_id) 6628 drm_sched_fault(&ring->sched); 6629 } 6630 break; 6631 case 1: 6632 case 2: 6633 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6634 ring = &adev->gfx.compute_ring[i]; 6635 if (ring->me == me_id && ring->pipe == pipe_id && 6636 ring->queue == queue_id) 6637 drm_sched_fault(&ring->sched); 6638 } 6639 break; 6640 default: 6641 BUG(); 6642 break; 6643 } 6644 } 6645 } 6646 6647 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 6648 struct amdgpu_irq_src *source, 6649 struct amdgpu_iv_entry *entry) 6650 { 6651 DRM_ERROR("Illegal register access in command stream\n"); 6652 gfx_v11_0_handle_priv_fault(adev, entry); 6653 return 0; 6654 } 6655 6656 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, 6657 struct amdgpu_irq_src *source, 6658 struct amdgpu_iv_entry *entry) 6659 { 6660 DRM_ERROR("Illegal opcode in command stream \n"); 6661 gfx_v11_0_handle_priv_fault(adev, entry); 6662 return 0; 6663 } 6664 6665 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6666 struct amdgpu_irq_src *source, 6667 struct amdgpu_iv_entry *entry) 6668 { 6669 DRM_ERROR("Illegal instruction in command stream\n"); 6670 gfx_v11_0_handle_priv_fault(adev, entry); 6671 return 0; 6672 } 6673 6674 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, 6675 struct amdgpu_irq_src *source, 6676 struct amdgpu_iv_entry *entry) 6677 { 6678 if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) 6679 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); 6680 6681 return 0; 6682 } 6683 6684 #if 0 6685 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6686 struct amdgpu_irq_src *src, 6687 unsigned int type, 6688 enum amdgpu_interrupt_state state) 6689 { 6690 uint32_t tmp, target; 6691 struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); 6692 6693 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6694 target += ring->pipe; 6695 6696 switch (type) { 6697 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6698 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6699 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6700 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6701 GENERIC2_INT_ENABLE, 0); 6702 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6703 6704 tmp = RREG32_SOC15_IP(GC, target); 6705 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6706 GENERIC2_INT_ENABLE, 0); 6707 WREG32_SOC15_IP(GC, target, tmp); 6708 } else { 6709 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6710 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6711 GENERIC2_INT_ENABLE, 1); 6712 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6713 6714 tmp = RREG32_SOC15_IP(GC, target); 6715 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6716 GENERIC2_INT_ENABLE, 1); 6717 WREG32_SOC15_IP(GC, target, tmp); 6718 } 6719 break; 6720 default: 6721 BUG(); /* kiq only support GENERIC2_INT now */ 6722 break; 6723 } 6724 return 0; 6725 } 6726 #endif 6727 6728 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6729 { 6730 const unsigned int gcr_cntl = 6731 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6732 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6733 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6734 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6735 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6736 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6737 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6738 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6739 6740 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6741 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6742 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6743 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6744 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6745 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6746 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6747 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6748 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6749 } 6750 6751 static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) 6752 { 6753 /* Disable the pipe reset until the CPFW fully support it.*/ 6754 dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); 6755 return false; 6756 } 6757 6758 6759 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) 6760 { 6761 struct amdgpu_device *adev = ring->adev; 6762 uint32_t reset_pipe = 0, clean_pipe = 0; 6763 int r; 6764 6765 if (!gfx_v11_pipe_reset_support(adev)) 6766 return -EOPNOTSUPP; 6767 6768 gfx_v11_0_set_safe_mode(adev, 0); 6769 mutex_lock(&adev->srbm_mutex); 6770 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6771 6772 switch (ring->pipe) { 6773 case 0: 6774 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6775 PFP_PIPE0_RESET, 1); 6776 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6777 ME_PIPE0_RESET, 1); 6778 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6779 PFP_PIPE0_RESET, 0); 6780 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6781 ME_PIPE0_RESET, 0); 6782 break; 6783 case 1: 6784 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6785 PFP_PIPE1_RESET, 1); 6786 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6787 ME_PIPE1_RESET, 1); 6788 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6789 PFP_PIPE1_RESET, 0); 6790 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6791 ME_PIPE1_RESET, 0); 6792 break; 6793 default: 6794 break; 6795 } 6796 6797 WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); 6798 WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); 6799 6800 r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - 6801 RS64_FW_UC_START_ADDR_LO; 6802 soc21_grbm_select(adev, 0, 0, 0, 0); 6803 mutex_unlock(&adev->srbm_mutex); 6804 gfx_v11_0_unset_safe_mode(adev, 0); 6805 6806 dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, 6807 r == 0 ? "successfully" : "failed"); 6808 /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, 6809 * so the pipe reset status relies on the later gfx ring test result. 6810 */ 6811 return 0; 6812 } 6813 6814 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) 6815 { 6816 struct amdgpu_device *adev = ring->adev; 6817 int r; 6818 6819 if (amdgpu_sriov_vf(adev)) 6820 return -EINVAL; 6821 6822 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); 6823 if (r) { 6824 6825 dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); 6826 r = gfx_v11_reset_gfx_pipe(ring); 6827 if (r) 6828 return r; 6829 } 6830 6831 r = gfx_v11_0_kgq_init_queue(ring, true); 6832 if (r) { 6833 dev_err(adev->dev, "failed to init kgq\n"); 6834 return r; 6835 } 6836 6837 r = amdgpu_mes_map_legacy_queue(adev, ring); 6838 if (r) { 6839 dev_err(adev->dev, "failed to remap kgq\n"); 6840 return r; 6841 } 6842 6843 return amdgpu_ring_test_ring(ring); 6844 } 6845 6846 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) 6847 { 6848 6849 struct amdgpu_device *adev = ring->adev; 6850 uint32_t reset_pipe = 0, clean_pipe = 0; 6851 int r; 6852 6853 if (!gfx_v11_pipe_reset_support(adev)) 6854 return -EOPNOTSUPP; 6855 6856 gfx_v11_0_set_safe_mode(adev, 0); 6857 mutex_lock(&adev->srbm_mutex); 6858 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6859 6860 reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 6861 clean_pipe = reset_pipe; 6862 6863 if (adev->gfx.rs64_enable) { 6864 6865 switch (ring->pipe) { 6866 case 0: 6867 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6868 MEC_PIPE0_RESET, 1); 6869 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6870 MEC_PIPE0_RESET, 0); 6871 break; 6872 case 1: 6873 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6874 MEC_PIPE1_RESET, 1); 6875 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6876 MEC_PIPE1_RESET, 0); 6877 break; 6878 case 2: 6879 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6880 MEC_PIPE2_RESET, 1); 6881 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6882 MEC_PIPE2_RESET, 0); 6883 break; 6884 case 3: 6885 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6886 MEC_PIPE3_RESET, 1); 6887 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6888 MEC_PIPE3_RESET, 0); 6889 break; 6890 default: 6891 break; 6892 } 6893 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); 6894 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); 6895 r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - 6896 RS64_FW_UC_START_ADDR_LO; 6897 } else { 6898 if (ring->me == 1) { 6899 switch (ring->pipe) { 6900 case 0: 6901 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6902 MEC_ME1_PIPE0_RESET, 1); 6903 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6904 MEC_ME1_PIPE0_RESET, 0); 6905 break; 6906 case 1: 6907 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6908 MEC_ME1_PIPE1_RESET, 1); 6909 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6910 MEC_ME1_PIPE1_RESET, 0); 6911 break; 6912 case 2: 6913 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6914 MEC_ME1_PIPE2_RESET, 1); 6915 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6916 MEC_ME1_PIPE2_RESET, 0); 6917 break; 6918 case 3: 6919 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6920 MEC_ME1_PIPE3_RESET, 1); 6921 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6922 MEC_ME1_PIPE3_RESET, 0); 6923 break; 6924 default: 6925 break; 6926 } 6927 /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ 6928 } else { 6929 switch (ring->pipe) { 6930 case 0: 6931 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6932 MEC_ME2_PIPE0_RESET, 1); 6933 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6934 MEC_ME2_PIPE0_RESET, 0); 6935 break; 6936 case 1: 6937 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6938 MEC_ME2_PIPE1_RESET, 1); 6939 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6940 MEC_ME2_PIPE1_RESET, 0); 6941 break; 6942 case 2: 6943 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6944 MEC_ME2_PIPE2_RESET, 1); 6945 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6946 MEC_ME2_PIPE2_RESET, 0); 6947 break; 6948 case 3: 6949 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6950 MEC_ME2_PIPE3_RESET, 1); 6951 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6952 MEC_ME2_PIPE3_RESET, 0); 6953 break; 6954 default: 6955 break; 6956 } 6957 /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ 6958 } 6959 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); 6960 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); 6961 r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); 6962 } 6963 6964 soc21_grbm_select(adev, 0, 0, 0, 0); 6965 mutex_unlock(&adev->srbm_mutex); 6966 gfx_v11_0_unset_safe_mode(adev, 0); 6967 6968 dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, 6969 r == 0 ? "successfully" : "failed"); 6970 /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe 6971 * reset status relies on the compute ring test result. 6972 */ 6973 return 0; 6974 } 6975 6976 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) 6977 { 6978 struct amdgpu_device *adev = ring->adev; 6979 int r = 0; 6980 6981 if (amdgpu_sriov_vf(adev)) 6982 return -EINVAL; 6983 6984 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); 6985 if (r) { 6986 dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); 6987 r = gfx_v11_0_reset_compute_pipe(ring); 6988 if (r) 6989 return r; 6990 } 6991 6992 r = gfx_v11_0_kcq_init_queue(ring, true); 6993 if (r) { 6994 dev_err(adev->dev, "fail to init kcq\n"); 6995 return r; 6996 } 6997 r = amdgpu_mes_map_legacy_queue(adev, ring); 6998 if (r) { 6999 dev_err(adev->dev, "failed to remap kcq\n"); 7000 return r; 7001 } 7002 7003 return amdgpu_ring_test_ring(ring); 7004 } 7005 7006 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 7007 { 7008 struct amdgpu_device *adev = ip_block->adev; 7009 uint32_t i, j, k, reg, index = 0; 7010 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7011 7012 if (!adev->gfx.ip_dump_core) 7013 return; 7014 7015 for (i = 0; i < reg_count; i++) 7016 drm_printf(p, "%-50s \t 0x%08x\n", 7017 gc_reg_list_11_0[i].reg_name, 7018 adev->gfx.ip_dump_core[i]); 7019 7020 /* print compute queue registers for all instances */ 7021 if (!adev->gfx.ip_dump_compute_queues) 7022 return; 7023 7024 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7025 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7026 adev->gfx.mec.num_mec, 7027 adev->gfx.mec.num_pipe_per_mec, 7028 adev->gfx.mec.num_queue_per_pipe); 7029 7030 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7031 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7032 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7033 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7034 for (reg = 0; reg < reg_count; reg++) { 7035 if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) 7036 drm_printf(p, "%-50s \t 0x%08x\n", 7037 "regCP_MEC_ME2_HEADER_DUMP", 7038 adev->gfx.ip_dump_compute_queues[index + reg]); 7039 else 7040 drm_printf(p, "%-50s \t 0x%08x\n", 7041 gc_cp_reg_list_11[reg].reg_name, 7042 adev->gfx.ip_dump_compute_queues[index + reg]); 7043 } 7044 index += reg_count; 7045 } 7046 } 7047 } 7048 7049 /* print gfx queue registers for all instances */ 7050 if (!adev->gfx.ip_dump_gfx_queues) 7051 return; 7052 7053 index = 0; 7054 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7055 drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", 7056 adev->gfx.me.num_me, 7057 adev->gfx.me.num_pipe_per_me, 7058 adev->gfx.me.num_queue_per_pipe); 7059 7060 for (i = 0; i < adev->gfx.me.num_me; i++) { 7061 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7062 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7063 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); 7064 for (reg = 0; reg < reg_count; reg++) { 7065 drm_printf(p, "%-50s \t 0x%08x\n", 7066 gc_gfx_queue_reg_list_11[reg].reg_name, 7067 adev->gfx.ip_dump_gfx_queues[index + reg]); 7068 } 7069 index += reg_count; 7070 } 7071 } 7072 } 7073 } 7074 7075 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) 7076 { 7077 struct amdgpu_device *adev = ip_block->adev; 7078 uint32_t i, j, k, reg, index = 0; 7079 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7080 7081 if (!adev->gfx.ip_dump_core) 7082 return; 7083 7084 amdgpu_gfx_off_ctrl(adev, false); 7085 for (i = 0; i < reg_count; i++) 7086 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); 7087 amdgpu_gfx_off_ctrl(adev, true); 7088 7089 /* dump compute queue registers for all instances */ 7090 if (!adev->gfx.ip_dump_compute_queues) 7091 return; 7092 7093 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7094 amdgpu_gfx_off_ctrl(adev, false); 7095 mutex_lock(&adev->srbm_mutex); 7096 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7097 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7098 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7099 /* ME0 is for GFX so start from 1 for CP */ 7100 soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); 7101 for (reg = 0; reg < reg_count; reg++) { 7102 if (i && 7103 gc_cp_reg_list_11[reg].reg_offset == 7104 regCP_MEC_ME1_HEADER_DUMP) 7105 adev->gfx.ip_dump_compute_queues[index + reg] = 7106 RREG32(SOC15_REG_OFFSET(GC, 0, 7107 regCP_MEC_ME2_HEADER_DUMP)); 7108 else 7109 adev->gfx.ip_dump_compute_queues[index + reg] = 7110 RREG32(SOC15_REG_ENTRY_OFFSET( 7111 gc_cp_reg_list_11[reg])); 7112 } 7113 index += reg_count; 7114 } 7115 } 7116 } 7117 soc21_grbm_select(adev, 0, 0, 0, 0); 7118 mutex_unlock(&adev->srbm_mutex); 7119 amdgpu_gfx_off_ctrl(adev, true); 7120 7121 /* dump gfx queue registers for all instances */ 7122 if (!adev->gfx.ip_dump_gfx_queues) 7123 return; 7124 7125 index = 0; 7126 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7127 amdgpu_gfx_off_ctrl(adev, false); 7128 mutex_lock(&adev->srbm_mutex); 7129 for (i = 0; i < adev->gfx.me.num_me; i++) { 7130 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7131 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7132 soc21_grbm_select(adev, i, j, k, 0); 7133 7134 for (reg = 0; reg < reg_count; reg++) { 7135 adev->gfx.ip_dump_gfx_queues[index + reg] = 7136 RREG32(SOC15_REG_ENTRY_OFFSET( 7137 gc_gfx_queue_reg_list_11[reg])); 7138 } 7139 index += reg_count; 7140 } 7141 } 7142 } 7143 soc21_grbm_select(adev, 0, 0, 0, 0); 7144 mutex_unlock(&adev->srbm_mutex); 7145 amdgpu_gfx_off_ctrl(adev, true); 7146 } 7147 7148 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 7149 { 7150 /* Emit the cleaner shader */ 7151 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7152 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7153 } 7154 7155 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring) 7156 { 7157 amdgpu_gfx_profile_ring_begin_use(ring); 7158 7159 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 7160 } 7161 7162 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring) 7163 { 7164 amdgpu_gfx_profile_ring_end_use(ring); 7165 7166 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 7167 } 7168 7169 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 7170 .name = "gfx_v11_0", 7171 .early_init = gfx_v11_0_early_init, 7172 .late_init = gfx_v11_0_late_init, 7173 .sw_init = gfx_v11_0_sw_init, 7174 .sw_fini = gfx_v11_0_sw_fini, 7175 .hw_init = gfx_v11_0_hw_init, 7176 .hw_fini = gfx_v11_0_hw_fini, 7177 .suspend = gfx_v11_0_suspend, 7178 .resume = gfx_v11_0_resume, 7179 .is_idle = gfx_v11_0_is_idle, 7180 .wait_for_idle = gfx_v11_0_wait_for_idle, 7181 .soft_reset = gfx_v11_0_soft_reset, 7182 .check_soft_reset = gfx_v11_0_check_soft_reset, 7183 .post_soft_reset = gfx_v11_0_post_soft_reset, 7184 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 7185 .set_powergating_state = gfx_v11_0_set_powergating_state, 7186 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 7187 .dump_ip_state = gfx_v11_ip_dump, 7188 .print_ip_state = gfx_v11_ip_print, 7189 }; 7190 7191 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7192 .type = AMDGPU_RING_TYPE_GFX, 7193 .align_mask = 0xff, 7194 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7195 .support_64bit_ptrs = true, 7196 .secure_submission_supported = true, 7197 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 7198 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 7199 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 7200 .emit_frame_size = /* totally 247 maximum if 16 IBs */ 7201 5 + /* update_spm_vmid */ 7202 5 + /* COND_EXEC */ 7203 22 + /* SET_Q_PREEMPTION_MODE */ 7204 7 + /* PIPELINE_SYNC */ 7205 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7206 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7207 4 + /* VM_FLUSH */ 7208 8 + /* FENCE for VM_FLUSH */ 7209 20 + /* GDS switch */ 7210 5 + /* COND_EXEC */ 7211 7 + /* HDP_flush */ 7212 4 + /* VGT_flush */ 7213 31 + /* DE_META */ 7214 3 + /* CNTX_CTRL */ 7215 5 + /* HDP_INVL */ 7216 22 + /* SET_Q_PREEMPTION_MODE */ 7217 8 + 8 + /* FENCE x2 */ 7218 8 + /* gfx_v11_0_emit_mem_sync */ 7219 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7220 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 7221 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 7222 .emit_fence = gfx_v11_0_ring_emit_fence, 7223 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7224 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7225 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7226 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7227 .test_ring = gfx_v11_0_ring_test_ring, 7228 .test_ib = gfx_v11_0_ring_test_ib, 7229 .insert_nop = gfx_v11_ring_insert_nop, 7230 .pad_ib = amdgpu_ring_generic_pad_ib, 7231 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 7232 .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, 7233 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 7234 .preempt_ib = gfx_v11_0_ring_preempt_ib, 7235 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 7236 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7237 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7238 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7239 .soft_recovery = gfx_v11_0_ring_soft_recovery, 7240 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7241 .reset = gfx_v11_0_reset_kgq, 7242 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7243 .begin_use = gfx_v11_0_ring_begin_use, 7244 .end_use = gfx_v11_0_ring_end_use, 7245 }; 7246 7247 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 7248 .type = AMDGPU_RING_TYPE_COMPUTE, 7249 .align_mask = 0xff, 7250 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7251 .support_64bit_ptrs = true, 7252 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7253 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7254 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7255 .emit_frame_size = 7256 5 + /* update_spm_vmid */ 7257 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7258 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7259 5 + /* hdp invalidate */ 7260 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7261 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7262 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7263 2 + /* gfx_v11_0_ring_emit_vm_flush */ 7264 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 7265 8 + /* gfx_v11_0_emit_mem_sync */ 7266 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7267 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7268 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7269 .emit_fence = gfx_v11_0_ring_emit_fence, 7270 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7271 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7272 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7273 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7274 .test_ring = gfx_v11_0_ring_test_ring, 7275 .test_ib = gfx_v11_0_ring_test_ib, 7276 .insert_nop = gfx_v11_ring_insert_nop, 7277 .pad_ib = amdgpu_ring_generic_pad_ib, 7278 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7279 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7280 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7281 .soft_recovery = gfx_v11_0_ring_soft_recovery, 7282 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7283 .reset = gfx_v11_0_reset_kcq, 7284 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7285 .begin_use = gfx_v11_0_ring_begin_use, 7286 .end_use = gfx_v11_0_ring_end_use, 7287 }; 7288 7289 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7290 .type = AMDGPU_RING_TYPE_KIQ, 7291 .align_mask = 0xff, 7292 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7293 .support_64bit_ptrs = true, 7294 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7295 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7296 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7297 .emit_frame_size = 7298 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7299 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7300 5 + /*hdp invalidate */ 7301 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7302 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7303 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7304 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7305 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7306 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7307 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 7308 .test_ring = gfx_v11_0_ring_test_ring, 7309 .test_ib = gfx_v11_0_ring_test_ib, 7310 .insert_nop = amdgpu_ring_insert_nop, 7311 .pad_ib = amdgpu_ring_generic_pad_ib, 7312 .emit_rreg = gfx_v11_0_ring_emit_rreg, 7313 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7314 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7315 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7316 }; 7317 7318 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 7319 { 7320 int i; 7321 7322 adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; 7323 7324 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7325 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 7326 7327 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7328 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 7329 } 7330 7331 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 7332 .set = gfx_v11_0_set_eop_interrupt_state, 7333 .process = gfx_v11_0_eop_irq, 7334 }; 7335 7336 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 7337 .set = gfx_v11_0_set_priv_reg_fault_state, 7338 .process = gfx_v11_0_priv_reg_irq, 7339 }; 7340 7341 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { 7342 .set = gfx_v11_0_set_bad_op_fault_state, 7343 .process = gfx_v11_0_bad_op_irq, 7344 }; 7345 7346 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 7347 .set = gfx_v11_0_set_priv_inst_fault_state, 7348 .process = gfx_v11_0_priv_inst_irq, 7349 }; 7350 7351 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { 7352 .process = gfx_v11_0_rlc_gc_fed_irq, 7353 }; 7354 7355 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 7356 { 7357 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7358 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 7359 7360 adev->gfx.priv_reg_irq.num_types = 1; 7361 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 7362 7363 adev->gfx.bad_op_irq.num_types = 1; 7364 adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; 7365 7366 adev->gfx.priv_inst_irq.num_types = 1; 7367 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 7368 7369 adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ 7370 adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; 7371 7372 } 7373 7374 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 7375 { 7376 if (adev->flags & AMD_IS_APU) 7377 adev->gfx.imu.mode = MISSION_MODE; 7378 else 7379 adev->gfx.imu.mode = DEBUG_MODE; 7380 7381 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 7382 } 7383 7384 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 7385 { 7386 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 7387 } 7388 7389 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 7390 { 7391 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 7392 adev->gfx.config.max_sh_per_se * 7393 adev->gfx.config.max_shader_engines; 7394 7395 adev->gds.gds_size = 0x1000; 7396 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 7397 adev->gds.gws_size = 64; 7398 adev->gds.oa_size = 16; 7399 } 7400 7401 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 7402 { 7403 /* set gfx eng mqd */ 7404 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 7405 sizeof(struct v11_gfx_mqd); 7406 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 7407 gfx_v11_0_gfx_mqd_init; 7408 /* set compute eng mqd */ 7409 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 7410 sizeof(struct v11_compute_mqd); 7411 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 7412 gfx_v11_0_compute_mqd_init; 7413 } 7414 7415 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 7416 u32 bitmap) 7417 { 7418 u32 data; 7419 7420 if (!bitmap) 7421 return; 7422 7423 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7424 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7425 7426 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 7427 } 7428 7429 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 7430 { 7431 u32 data, wgp_bitmask; 7432 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 7433 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 7434 7435 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7436 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7437 7438 wgp_bitmask = 7439 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 7440 7441 return (~data) & wgp_bitmask; 7442 } 7443 7444 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 7445 { 7446 u32 wgp_idx, wgp_active_bitmap; 7447 u32 cu_bitmap_per_wgp, cu_active_bitmap; 7448 7449 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 7450 cu_active_bitmap = 0; 7451 7452 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 7453 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 7454 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 7455 if (wgp_active_bitmap & (1 << wgp_idx)) 7456 cu_active_bitmap |= cu_bitmap_per_wgp; 7457 } 7458 7459 return cu_active_bitmap; 7460 } 7461 7462 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 7463 struct amdgpu_cu_info *cu_info) 7464 { 7465 int i, j, k, counter, active_cu_number = 0; 7466 u32 mask, bitmap; 7467 unsigned disable_masks[8 * 2]; 7468 7469 if (!adev || !cu_info) 7470 return -EINVAL; 7471 7472 amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2); 7473 7474 mutex_lock(&adev->grbm_idx_mutex); 7475 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7476 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7477 bitmap = i * adev->gfx.config.max_sh_per_se + j; 7478 if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) 7479 continue; 7480 mask = 1; 7481 counter = 0; 7482 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); 7483 if (i < 8 && j < 2) 7484 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 7485 adev, disable_masks[i * 2 + j]); 7486 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 7487 7488 /** 7489 * GFX11 could support more than 4 SEs, while the bitmap 7490 * in cu_info struct is 4x4 and ioctl interface struct 7491 * drm_amdgpu_info_device should keep stable. 7492 * So we use last two columns of bitmap to store cu mask for 7493 * SEs 4 to 7, the layout of the bitmap is as below: 7494 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 7495 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 7496 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 7497 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 7498 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 7499 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 7500 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 7501 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 7502 */ 7503 cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; 7504 7505 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 7506 if (bitmap & mask) 7507 counter++; 7508 7509 mask <<= 1; 7510 } 7511 active_cu_number += counter; 7512 } 7513 } 7514 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7515 mutex_unlock(&adev->grbm_idx_mutex); 7516 7517 cu_info->number = active_cu_number; 7518 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7519 7520 return 0; 7521 } 7522 7523 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 7524 { 7525 .type = AMD_IP_BLOCK_TYPE_GFX, 7526 .major = 11, 7527 .minor = 0, 7528 .rev = 0, 7529 .funcs = &gfx_v11_0_ip_funcs, 7530 }; 7531