1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "imu_v11_0.h" 33 #include "soc21.h" 34 #include "nvd.h" 35 36 #include "gc/gc_11_0_0_offset.h" 37 #include "gc/gc_11_0_0_sh_mask.h" 38 #include "smuio/smuio_13_0_6_offset.h" 39 #include "smuio/smuio_13_0_6_sh_mask.h" 40 #include "navi10_enum.h" 41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 42 43 #include "soc15.h" 44 #include "clearstate_gfx11.h" 45 #include "v11_structs.h" 46 #include "gfx_v11_0.h" 47 #include "gfx_v11_0_cleaner_shader.h" 48 #include "gfx_v11_0_3.h" 49 #include "nbio_v4_3.h" 50 #include "mes_v11_0.h" 51 #include "mes_userqueue.h" 52 #include "amdgpu_userq_fence.h" 53 54 #define GFX11_NUM_GFX_RINGS 1 55 #define GFX11_MEC_HPD_SIZE 2048 56 57 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 58 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 59 60 #define regCGTT_WD_CLK_CTRL 0x5086 61 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 63 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 64 #define regPC_CONFIG_CNTL_1 0x194d 65 #define regPC_CONFIG_CNTL_1_BASE_IDX 1 66 67 #define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 68 #define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 69 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 70 #define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 71 #define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 72 #define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 73 #define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 74 75 #define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 76 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 77 #define regCP_MQD_CONTROL_DEFAULT 0x00000100 78 #define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 79 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 80 #define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 81 #define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 82 #define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 83 84 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 85 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 86 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 87 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 88 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); 89 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 90 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 91 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 92 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 93 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 94 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 95 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 96 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 97 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 98 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 99 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 100 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 101 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 102 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 103 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 104 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 105 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 106 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); 107 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); 108 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); 109 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); 110 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); 111 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); 112 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); 113 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); 114 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); 115 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); 116 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); 117 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); 118 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); 119 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); 120 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); 121 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); 122 123 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { 124 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), 125 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), 126 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), 127 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), 128 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), 129 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), 130 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), 131 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), 132 SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), 133 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), 134 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), 135 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), 136 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), 137 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), 138 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), 139 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), 140 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), 141 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 142 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), 143 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), 144 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), 145 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), 146 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), 147 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), 148 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), 149 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 150 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), 151 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 152 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 153 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 154 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), 155 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), 156 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), 157 SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), 158 SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), 159 SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), 160 SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), 161 SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), 162 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), 163 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), 164 SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), 165 SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), 166 SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), 167 SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), 168 SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), 169 SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), 170 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), 171 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), 172 SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), 173 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), 174 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), 175 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), 176 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), 177 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), 178 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), 179 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), 180 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), 181 /* cp header registers */ 182 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 183 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 184 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 185 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 186 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 187 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 188 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 189 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 190 /* SE status registers */ 191 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), 192 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), 193 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), 194 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), 195 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), 196 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) 197 }; 198 199 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { 200 /* compute registers */ 201 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), 202 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), 203 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), 204 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), 205 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), 206 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), 207 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), 208 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), 209 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), 210 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), 211 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), 212 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), 213 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), 214 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), 215 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), 216 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), 217 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), 218 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), 219 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), 220 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), 221 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), 222 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), 223 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), 224 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), 225 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), 226 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), 227 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), 228 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), 229 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), 230 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), 231 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), 232 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), 233 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), 234 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), 235 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), 236 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), 237 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), 238 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), 239 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), 240 /* cp header registers */ 241 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 242 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 243 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 244 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 245 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 246 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 247 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 248 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 249 }; 250 251 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { 252 /* gfx queue registers */ 253 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), 254 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), 255 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), 256 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), 257 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), 258 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), 259 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), 260 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), 261 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), 262 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), 263 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), 264 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), 265 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), 266 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), 267 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), 268 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), 269 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), 270 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), 271 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), 272 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), 273 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 274 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 275 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 276 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 277 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 278 /* cp header registers */ 279 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 280 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 281 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 282 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 283 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 284 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 285 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 286 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 287 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 288 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 289 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 290 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 291 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 292 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 293 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 294 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 295 }; 296 297 static const struct soc15_reg_golden golden_settings_gc_11_0[] = { 298 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) 299 }; 300 301 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 302 { 303 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 304 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 305 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 306 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 307 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 308 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 309 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 310 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 311 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 312 }; 313 314 #define DEFAULT_SH_MEM_CONFIG \ 315 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 316 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 317 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 318 319 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 320 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 321 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 322 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 323 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 324 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 325 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 326 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 327 struct amdgpu_cu_info *cu_info); 328 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 329 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 330 u32 sh_num, u32 instance, int xcc_id); 331 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 332 333 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 334 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 335 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 336 uint32_t val); 337 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 338 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 339 uint16_t pasid, uint32_t flush_type, 340 bool all_hub, uint8_t dst_sel); 341 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 342 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 343 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 344 bool enable); 345 346 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 347 { 348 struct amdgpu_device *adev = kiq_ring->adev; 349 u64 shader_mc_addr; 350 351 /* Cleaner shader MC address */ 352 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 353 354 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 355 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 356 PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ 357 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 358 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 359 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 360 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 361 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 362 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 363 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 364 } 365 366 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 367 struct amdgpu_ring *ring) 368 { 369 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 370 uint64_t wptr_addr = ring->wptr_gpu_addr; 371 uint32_t me = 0, eng_sel = 0; 372 373 switch (ring->funcs->type) { 374 case AMDGPU_RING_TYPE_COMPUTE: 375 me = 1; 376 eng_sel = 0; 377 break; 378 case AMDGPU_RING_TYPE_GFX: 379 me = 0; 380 eng_sel = 4; 381 break; 382 case AMDGPU_RING_TYPE_MES: 383 me = 2; 384 eng_sel = 5; 385 break; 386 default: 387 WARN_ON(1); 388 } 389 390 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 391 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 392 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 393 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 394 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 395 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 396 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 397 PACKET3_MAP_QUEUES_ME((me)) | 398 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 399 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 400 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 401 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 402 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 403 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 404 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 405 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 406 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 407 } 408 409 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 410 struct amdgpu_ring *ring, 411 enum amdgpu_unmap_queues_action action, 412 u64 gpu_addr, u64 seq) 413 { 414 struct amdgpu_device *adev = kiq_ring->adev; 415 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 416 417 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { 418 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq); 419 return; 420 } 421 422 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 423 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 424 PACKET3_UNMAP_QUEUES_ACTION(action) | 425 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 426 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 427 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 428 amdgpu_ring_write(kiq_ring, 429 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 430 431 if (action == PREEMPT_QUEUES_NO_UNMAP) { 432 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 433 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 434 amdgpu_ring_write(kiq_ring, seq); 435 } else { 436 amdgpu_ring_write(kiq_ring, 0); 437 amdgpu_ring_write(kiq_ring, 0); 438 amdgpu_ring_write(kiq_ring, 0); 439 } 440 } 441 442 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 443 struct amdgpu_ring *ring, 444 u64 addr, 445 u64 seq) 446 { 447 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 448 449 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 450 amdgpu_ring_write(kiq_ring, 451 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 452 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 453 PACKET3_QUERY_STATUS_COMMAND(2)); 454 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 455 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 456 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 457 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 458 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 459 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 460 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 461 } 462 463 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 464 uint16_t pasid, uint32_t flush_type, 465 bool all_hub) 466 { 467 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 468 } 469 470 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 471 .kiq_set_resources = gfx11_kiq_set_resources, 472 .kiq_map_queues = gfx11_kiq_map_queues, 473 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 474 .kiq_query_status = gfx11_kiq_query_status, 475 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 476 .set_resources_size = 8, 477 .map_queues_size = 7, 478 .unmap_queues_size = 6, 479 .query_status_size = 7, 480 .invalidate_tlbs_size = 2, 481 }; 482 483 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 484 { 485 adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; 486 } 487 488 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 489 { 490 if (amdgpu_sriov_vf(adev)) 491 return; 492 493 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 494 case IP_VERSION(11, 0, 1): 495 case IP_VERSION(11, 0, 4): 496 soc15_program_register_sequence(adev, 497 golden_settings_gc_11_0_1, 498 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 499 break; 500 default: 501 break; 502 } 503 soc15_program_register_sequence(adev, 504 golden_settings_gc_11_0, 505 (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); 506 507 } 508 509 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 510 bool wc, uint32_t reg, uint32_t val) 511 { 512 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 513 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 514 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 515 amdgpu_ring_write(ring, reg); 516 amdgpu_ring_write(ring, 0); 517 amdgpu_ring_write(ring, val); 518 } 519 520 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 521 int mem_space, int opt, uint32_t addr0, 522 uint32_t addr1, uint32_t ref, uint32_t mask, 523 uint32_t inv) 524 { 525 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 526 amdgpu_ring_write(ring, 527 /* memory (1) or register (0) */ 528 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 529 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 530 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 531 WAIT_REG_MEM_ENGINE(eng_sel))); 532 533 if (mem_space) 534 BUG_ON(addr0 & 0x3); /* Dword align */ 535 amdgpu_ring_write(ring, addr0); 536 amdgpu_ring_write(ring, addr1); 537 amdgpu_ring_write(ring, ref); 538 amdgpu_ring_write(ring, mask); 539 amdgpu_ring_write(ring, inv); /* poll interval */ 540 } 541 542 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 543 { 544 /* Header itself is a NOP packet */ 545 if (num_nop == 1) { 546 amdgpu_ring_write(ring, ring->funcs->nop); 547 return; 548 } 549 550 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 551 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 552 553 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 554 amdgpu_ring_insert_nop(ring, num_nop - 1); 555 } 556 557 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 558 { 559 struct amdgpu_device *adev = ring->adev; 560 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 561 uint32_t tmp = 0; 562 unsigned i; 563 int r; 564 565 WREG32(scratch, 0xCAFEDEAD); 566 r = amdgpu_ring_alloc(ring, 5); 567 if (r) { 568 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 569 ring->idx, r); 570 return r; 571 } 572 573 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 574 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 575 } else { 576 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 577 amdgpu_ring_write(ring, scratch - 578 PACKET3_SET_UCONFIG_REG_START); 579 amdgpu_ring_write(ring, 0xDEADBEEF); 580 } 581 amdgpu_ring_commit(ring); 582 583 for (i = 0; i < adev->usec_timeout; i++) { 584 tmp = RREG32(scratch); 585 if (tmp == 0xDEADBEEF) 586 break; 587 if (amdgpu_emu_mode == 1) 588 msleep(1); 589 else 590 udelay(1); 591 } 592 593 if (i >= adev->usec_timeout) 594 r = -ETIMEDOUT; 595 return r; 596 } 597 598 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 599 { 600 struct amdgpu_device *adev = ring->adev; 601 struct amdgpu_ib ib; 602 struct dma_fence *f = NULL; 603 unsigned index; 604 uint64_t gpu_addr; 605 volatile uint32_t *cpu_ptr; 606 long r; 607 608 /* MES KIQ fw hasn't indirect buffer support for now */ 609 if (adev->enable_mes_kiq && 610 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 611 return 0; 612 613 memset(&ib, 0, sizeof(ib)); 614 615 r = amdgpu_device_wb_get(adev, &index); 616 if (r) 617 return r; 618 619 gpu_addr = adev->wb.gpu_addr + (index * 4); 620 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 621 cpu_ptr = &adev->wb.wb[index]; 622 623 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 624 if (r) { 625 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 626 goto err1; 627 } 628 629 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 630 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 631 ib.ptr[2] = lower_32_bits(gpu_addr); 632 ib.ptr[3] = upper_32_bits(gpu_addr); 633 ib.ptr[4] = 0xDEADBEEF; 634 ib.length_dw = 5; 635 636 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 637 if (r) 638 goto err2; 639 640 r = dma_fence_wait_timeout(f, false, timeout); 641 if (r == 0) { 642 r = -ETIMEDOUT; 643 goto err2; 644 } else if (r < 0) { 645 goto err2; 646 } 647 648 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 649 r = 0; 650 else 651 r = -EINVAL; 652 err2: 653 amdgpu_ib_free(&ib, NULL); 654 dma_fence_put(f); 655 err1: 656 amdgpu_device_wb_free(adev, index); 657 return r; 658 } 659 660 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 661 { 662 amdgpu_ucode_release(&adev->gfx.pfp_fw); 663 amdgpu_ucode_release(&adev->gfx.me_fw); 664 amdgpu_ucode_release(&adev->gfx.rlc_fw); 665 amdgpu_ucode_release(&adev->gfx.mec_fw); 666 667 kfree(adev->gfx.rlc.register_list_format); 668 } 669 670 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) 671 { 672 const struct psp_firmware_header_v1_0 *toc_hdr; 673 int err = 0; 674 675 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, 676 AMDGPU_UCODE_REQUIRED, 677 "amdgpu/%s_toc.bin", ucode_prefix); 678 if (err) 679 goto out; 680 681 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 682 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 683 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 684 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 685 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 686 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 687 return 0; 688 out: 689 amdgpu_ucode_release(&adev->psp.toc_fw); 690 return err; 691 } 692 693 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) 694 { 695 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 696 case IP_VERSION(11, 0, 0): 697 case IP_VERSION(11, 0, 2): 698 case IP_VERSION(11, 0, 3): 699 if ((adev->gfx.me_fw_version >= 1505) && 700 (adev->gfx.pfp_fw_version >= 1600) && 701 (adev->gfx.mec_fw_version >= 512)) { 702 if (amdgpu_sriov_vf(adev)) 703 adev->gfx.cp_gfx_shadow = true; 704 else 705 adev->gfx.cp_gfx_shadow = false; 706 } 707 break; 708 default: 709 adev->gfx.cp_gfx_shadow = false; 710 break; 711 } 712 } 713 714 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 715 { 716 char ucode_prefix[25]; 717 int err; 718 const struct rlc_firmware_header_v2_0 *rlc_hdr; 719 uint16_t version_major; 720 uint16_t version_minor; 721 722 DRM_DEBUG("\n"); 723 724 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 725 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 726 AMDGPU_UCODE_REQUIRED, 727 "amdgpu/%s_pfp.bin", ucode_prefix); 728 if (err) 729 goto out; 730 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 731 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 732 (union amdgpu_firmware_header *) 733 adev->gfx.pfp_fw->data, 2, 0); 734 if (adev->gfx.rs64_enable) { 735 dev_info(adev->dev, "CP RS64 enable\n"); 736 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 737 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 738 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 739 } else { 740 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 741 } 742 743 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 744 AMDGPU_UCODE_REQUIRED, 745 "amdgpu/%s_me.bin", ucode_prefix); 746 if (err) 747 goto out; 748 if (adev->gfx.rs64_enable) { 749 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 750 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 751 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 752 } else { 753 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 754 } 755 756 if (!amdgpu_sriov_vf(adev)) { 757 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && 758 adev->pdev->revision == 0xCE) 759 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 760 AMDGPU_UCODE_REQUIRED, 761 "amdgpu/gc_11_0_0_rlc_1.bin"); 762 else 763 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 764 AMDGPU_UCODE_REQUIRED, 765 "amdgpu/%s_rlc.bin", ucode_prefix); 766 if (err) 767 goto out; 768 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 769 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 770 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 771 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 772 if (err) 773 goto out; 774 } 775 776 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 777 AMDGPU_UCODE_REQUIRED, 778 "amdgpu/%s_mec.bin", ucode_prefix); 779 if (err) 780 goto out; 781 if (adev->gfx.rs64_enable) { 782 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 783 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 784 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 785 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 786 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 787 } else { 788 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 789 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 790 } 791 792 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 793 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix); 794 795 /* only one MEC for gfx 11.0.0. */ 796 adev->gfx.mec2_fw = NULL; 797 798 gfx_v11_0_check_fw_cp_gfx_shadow(adev); 799 800 if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { 801 err = adev->gfx.imu.funcs->init_microcode(adev); 802 if (err) 803 DRM_ERROR("Failed to init imu firmware!\n"); 804 return err; 805 } 806 807 out: 808 if (err) { 809 amdgpu_ucode_release(&adev->gfx.pfp_fw); 810 amdgpu_ucode_release(&adev->gfx.me_fw); 811 amdgpu_ucode_release(&adev->gfx.rlc_fw); 812 amdgpu_ucode_release(&adev->gfx.mec_fw); 813 } 814 815 return err; 816 } 817 818 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 819 { 820 u32 count = 0; 821 const struct cs_section_def *sect = NULL; 822 const struct cs_extent_def *ext = NULL; 823 824 /* begin clear state */ 825 count += 2; 826 /* context control state */ 827 count += 3; 828 829 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 830 for (ext = sect->section; ext->extent != NULL; ++ext) { 831 if (sect->id == SECT_CONTEXT) 832 count += 2 + ext->reg_count; 833 else 834 return 0; 835 } 836 } 837 838 /* set PA_SC_TILE_STEERING_OVERRIDE */ 839 count += 3; 840 /* end clear state */ 841 count += 2; 842 /* clear state */ 843 count += 2; 844 845 return count; 846 } 847 848 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, 849 volatile u32 *buffer) 850 { 851 u32 count = 0; 852 int ctx_reg_offset; 853 854 if (adev->gfx.rlc.cs_data == NULL) 855 return; 856 if (buffer == NULL) 857 return; 858 859 count = amdgpu_gfx_csb_preamble_start(buffer); 860 count = amdgpu_gfx_csb_data_parser(adev, buffer, count); 861 862 ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 863 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 864 buffer[count++] = cpu_to_le32(ctx_reg_offset); 865 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 866 867 amdgpu_gfx_csb_preamble_end(buffer, count); 868 } 869 870 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 871 { 872 /* clear state block */ 873 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 874 &adev->gfx.rlc.clear_state_gpu_addr, 875 (void **)&adev->gfx.rlc.cs_ptr); 876 877 /* jump table block */ 878 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 879 &adev->gfx.rlc.cp_table_gpu_addr, 880 (void **)&adev->gfx.rlc.cp_table_ptr); 881 } 882 883 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 884 { 885 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 886 887 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 888 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 889 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 890 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 891 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 892 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 893 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 894 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 895 adev->gfx.rlc.rlcg_reg_access_supported = true; 896 } 897 898 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 899 { 900 const struct cs_section_def *cs_data; 901 int r; 902 903 adev->gfx.rlc.cs_data = gfx11_cs_data; 904 905 cs_data = adev->gfx.rlc.cs_data; 906 907 if (cs_data) { 908 /* init clear state block */ 909 r = amdgpu_gfx_rlc_init_csb(adev); 910 if (r) 911 return r; 912 } 913 914 /* init spm vmid with 0xf */ 915 if (adev->gfx.rlc.funcs->update_spm_vmid) 916 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); 917 918 return 0; 919 } 920 921 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 922 { 923 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 924 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 925 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 926 } 927 928 static void gfx_v11_0_me_init(struct amdgpu_device *adev) 929 { 930 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 931 932 amdgpu_gfx_graphics_queue_acquire(adev); 933 } 934 935 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 936 { 937 int r; 938 u32 *hpd; 939 size_t mec_hpd_size; 940 941 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 942 943 /* take ownership of the relevant compute queues */ 944 amdgpu_gfx_compute_queue_acquire(adev); 945 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 946 947 if (mec_hpd_size) { 948 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 949 AMDGPU_GEM_DOMAIN_GTT, 950 &adev->gfx.mec.hpd_eop_obj, 951 &adev->gfx.mec.hpd_eop_gpu_addr, 952 (void **)&hpd); 953 if (r) { 954 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 955 gfx_v11_0_mec_fini(adev); 956 return r; 957 } 958 959 memset(hpd, 0, mec_hpd_size); 960 961 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 962 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 963 } 964 965 return 0; 966 } 967 968 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 969 { 970 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 971 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 972 (address << SQ_IND_INDEX__INDEX__SHIFT)); 973 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 974 } 975 976 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 977 uint32_t thread, uint32_t regno, 978 uint32_t num, uint32_t *out) 979 { 980 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 981 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 982 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 983 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 984 (SQ_IND_INDEX__AUTO_INCR_MASK)); 985 while (num--) 986 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 987 } 988 989 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 990 { 991 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 992 * field when performing a select_se_sh so it should be 993 * zero here */ 994 WARN_ON(simd != 0); 995 996 /* type 3 wave data */ 997 dst[(*no_fields)++] = 3; 998 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 999 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 1000 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 1001 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 1002 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 1003 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 1004 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 1005 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 1006 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 1007 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 1008 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 1009 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 1010 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 1011 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 1012 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 1013 } 1014 1015 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1016 uint32_t wave, uint32_t start, 1017 uint32_t size, uint32_t *dst) 1018 { 1019 WARN_ON(simd != 0); 1020 1021 wave_read_regs( 1022 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 1023 dst); 1024 } 1025 1026 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1027 uint32_t wave, uint32_t thread, 1028 uint32_t start, uint32_t size, 1029 uint32_t *dst) 1030 { 1031 wave_read_regs( 1032 adev, wave, thread, 1033 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1034 } 1035 1036 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 1037 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1038 { 1039 soc21_grbm_select(adev, me, pipe, q, vm); 1040 } 1041 1042 /* all sizes are in bytes */ 1043 #define MQD_SHADOW_BASE_SIZE 73728 1044 #define MQD_SHADOW_BASE_ALIGNMENT 256 1045 #define MQD_FWWORKAREA_SIZE 484 1046 #define MQD_FWWORKAREA_ALIGNMENT 256 1047 1048 static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, 1049 struct amdgpu_gfx_shadow_info *shadow_info) 1050 { 1051 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; 1052 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; 1053 shadow_info->csa_size = MQD_FWWORKAREA_SIZE; 1054 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; 1055 } 1056 1057 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, 1058 struct amdgpu_gfx_shadow_info *shadow_info, 1059 bool skip_check) 1060 { 1061 if (adev->gfx.cp_gfx_shadow || skip_check) { 1062 gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); 1063 return 0; 1064 } else { 1065 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); 1066 return -ENOTSUPP; 1067 } 1068 } 1069 1070 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 1071 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 1072 .select_se_sh = &gfx_v11_0_select_se_sh, 1073 .read_wave_data = &gfx_v11_0_read_wave_data, 1074 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 1075 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 1076 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 1077 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 1078 .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, 1079 }; 1080 1081 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 1082 { 1083 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1084 case IP_VERSION(11, 0, 0): 1085 case IP_VERSION(11, 0, 2): 1086 adev->gfx.config.max_hw_contexts = 8; 1087 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1088 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1089 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1090 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1091 break; 1092 case IP_VERSION(11, 0, 3): 1093 adev->gfx.ras = &gfx_v11_0_3_ras; 1094 adev->gfx.config.max_hw_contexts = 8; 1095 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1096 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1097 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1098 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1099 break; 1100 case IP_VERSION(11, 0, 1): 1101 case IP_VERSION(11, 0, 4): 1102 case IP_VERSION(11, 5, 0): 1103 case IP_VERSION(11, 5, 1): 1104 case IP_VERSION(11, 5, 2): 1105 case IP_VERSION(11, 5, 3): 1106 adev->gfx.config.max_hw_contexts = 8; 1107 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1108 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1109 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1110 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 1111 break; 1112 default: 1113 BUG(); 1114 break; 1115 } 1116 1117 return 0; 1118 } 1119 1120 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1121 int me, int pipe, int queue) 1122 { 1123 struct amdgpu_ring *ring; 1124 unsigned int irq_type; 1125 unsigned int hw_prio; 1126 1127 ring = &adev->gfx.gfx_ring[ring_id]; 1128 1129 ring->me = me; 1130 ring->pipe = pipe; 1131 ring->queue = queue; 1132 1133 ring->ring_obj = NULL; 1134 ring->use_doorbell = true; 1135 if (adev->gfx.disable_kq) { 1136 ring->no_scheduler = true; 1137 ring->no_user_submission = true; 1138 } 1139 1140 if (!ring_id) 1141 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1142 else 1143 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1144 ring->vm_hub = AMDGPU_GFXHUB(0); 1145 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1146 1147 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1148 hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? 1149 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1150 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1151 hw_prio, NULL); 1152 } 1153 1154 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1155 int mec, int pipe, int queue) 1156 { 1157 int r; 1158 unsigned irq_type; 1159 struct amdgpu_ring *ring; 1160 unsigned int hw_prio; 1161 1162 ring = &adev->gfx.compute_ring[ring_id]; 1163 1164 /* mec0 is me1 */ 1165 ring->me = mec + 1; 1166 ring->pipe = pipe; 1167 ring->queue = queue; 1168 1169 ring->ring_obj = NULL; 1170 ring->use_doorbell = true; 1171 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1172 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1173 + (ring_id * GFX11_MEC_HPD_SIZE); 1174 ring->vm_hub = AMDGPU_GFXHUB(0); 1175 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1176 1177 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1178 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1179 + ring->pipe; 1180 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 1181 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1182 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1183 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1184 hw_prio, NULL); 1185 if (r) 1186 return r; 1187 1188 return 0; 1189 } 1190 1191 static struct { 1192 SOC21_FIRMWARE_ID id; 1193 unsigned int offset; 1194 unsigned int size; 1195 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 1196 1197 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 1198 { 1199 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 1200 1201 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 1202 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 1203 rlc_autoload_info[ucode->id].id = ucode->id; 1204 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 1205 rlc_autoload_info[ucode->id].size = ucode->size * 4; 1206 1207 ucode++; 1208 } 1209 } 1210 1211 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 1212 { 1213 uint32_t total_size = 0; 1214 SOC21_FIRMWARE_ID id; 1215 1216 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 1217 1218 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 1219 total_size += rlc_autoload_info[id].size; 1220 1221 /* In case the offset in rlc toc ucode is aligned */ 1222 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 1223 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 1224 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 1225 1226 return total_size; 1227 } 1228 1229 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 1230 { 1231 int r; 1232 uint32_t total_size; 1233 1234 total_size = gfx_v11_0_calc_toc_total_size(adev); 1235 1236 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 1237 AMDGPU_GEM_DOMAIN_VRAM | 1238 AMDGPU_GEM_DOMAIN_GTT, 1239 &adev->gfx.rlc.rlc_autoload_bo, 1240 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1241 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1242 1243 if (r) { 1244 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1245 return r; 1246 } 1247 1248 return 0; 1249 } 1250 1251 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1252 SOC21_FIRMWARE_ID id, 1253 const void *fw_data, 1254 uint32_t fw_size, 1255 uint32_t *fw_autoload_mask) 1256 { 1257 uint32_t toc_offset; 1258 uint32_t toc_fw_size; 1259 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1260 1261 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1262 return; 1263 1264 toc_offset = rlc_autoload_info[id].offset; 1265 toc_fw_size = rlc_autoload_info[id].size; 1266 1267 if (fw_size == 0) 1268 fw_size = toc_fw_size; 1269 1270 if (fw_size > toc_fw_size) 1271 fw_size = toc_fw_size; 1272 1273 memcpy(ptr + toc_offset, fw_data, fw_size); 1274 1275 if (fw_size < toc_fw_size) 1276 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1277 1278 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1279 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1280 } 1281 1282 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1283 uint32_t *fw_autoload_mask) 1284 { 1285 void *data; 1286 uint32_t size; 1287 uint64_t *toc_ptr; 1288 1289 *(uint64_t *)fw_autoload_mask |= 0x1; 1290 1291 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1292 1293 data = adev->psp.toc.start_addr; 1294 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1295 1296 toc_ptr = (uint64_t *)data + size / 8 - 1; 1297 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1298 1299 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1300 data, size, fw_autoload_mask); 1301 } 1302 1303 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1304 uint32_t *fw_autoload_mask) 1305 { 1306 const __le32 *fw_data; 1307 uint32_t fw_size; 1308 const struct gfx_firmware_header_v1_0 *cp_hdr; 1309 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1310 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1311 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1312 uint16_t version_major, version_minor; 1313 1314 if (adev->gfx.rs64_enable) { 1315 /* pfp ucode */ 1316 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1317 adev->gfx.pfp_fw->data; 1318 /* instruction */ 1319 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1320 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1321 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1322 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1323 fw_data, fw_size, fw_autoload_mask); 1324 /* data */ 1325 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1326 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1327 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1328 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1329 fw_data, fw_size, fw_autoload_mask); 1330 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1331 fw_data, fw_size, fw_autoload_mask); 1332 /* me ucode */ 1333 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1334 adev->gfx.me_fw->data; 1335 /* instruction */ 1336 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1337 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1338 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1339 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1340 fw_data, fw_size, fw_autoload_mask); 1341 /* data */ 1342 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1343 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1344 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1345 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1346 fw_data, fw_size, fw_autoload_mask); 1347 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1348 fw_data, fw_size, fw_autoload_mask); 1349 /* mec ucode */ 1350 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1351 adev->gfx.mec_fw->data; 1352 /* instruction */ 1353 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1354 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1355 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1356 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1357 fw_data, fw_size, fw_autoload_mask); 1358 /* data */ 1359 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1360 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1361 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1362 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1363 fw_data, fw_size, fw_autoload_mask); 1364 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1365 fw_data, fw_size, fw_autoload_mask); 1366 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1367 fw_data, fw_size, fw_autoload_mask); 1368 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1369 fw_data, fw_size, fw_autoload_mask); 1370 } else { 1371 /* pfp ucode */ 1372 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1373 adev->gfx.pfp_fw->data; 1374 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1375 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1376 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1377 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1378 fw_data, fw_size, fw_autoload_mask); 1379 1380 /* me ucode */ 1381 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1382 adev->gfx.me_fw->data; 1383 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1384 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1385 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1386 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1387 fw_data, fw_size, fw_autoload_mask); 1388 1389 /* mec ucode */ 1390 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1391 adev->gfx.mec_fw->data; 1392 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1393 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1394 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1395 cp_hdr->jt_size * 4; 1396 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1397 fw_data, fw_size, fw_autoload_mask); 1398 } 1399 1400 /* rlc ucode */ 1401 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1402 adev->gfx.rlc_fw->data; 1403 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1404 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1405 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1406 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1407 fw_data, fw_size, fw_autoload_mask); 1408 1409 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1410 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1411 if (version_major == 2) { 1412 if (version_minor >= 2) { 1413 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1414 1415 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1416 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1417 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1418 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1419 fw_data, fw_size, fw_autoload_mask); 1420 1421 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1422 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1423 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1424 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1425 fw_data, fw_size, fw_autoload_mask); 1426 } 1427 } 1428 } 1429 1430 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1431 uint32_t *fw_autoload_mask) 1432 { 1433 const __le32 *fw_data; 1434 uint32_t fw_size; 1435 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1436 1437 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1438 adev->sdma.instance[0].fw->data; 1439 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1440 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1441 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1442 1443 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1444 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1445 1446 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1447 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1448 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1449 1450 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1451 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1452 } 1453 1454 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1455 uint32_t *fw_autoload_mask) 1456 { 1457 const __le32 *fw_data; 1458 unsigned fw_size; 1459 const struct mes_firmware_header_v1_0 *mes_hdr; 1460 int pipe, ucode_id, data_id; 1461 1462 for (pipe = 0; pipe < 2; pipe++) { 1463 if (pipe==0) { 1464 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1465 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1466 } else { 1467 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1468 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1469 } 1470 1471 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1472 adev->mes.fw[pipe]->data; 1473 1474 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1475 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1476 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1477 1478 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1479 ucode_id, fw_data, fw_size, fw_autoload_mask); 1480 1481 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1482 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1483 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1484 1485 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1486 data_id, fw_data, fw_size, fw_autoload_mask); 1487 } 1488 } 1489 1490 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1491 { 1492 uint32_t rlc_g_offset, rlc_g_size; 1493 uint64_t gpu_addr; 1494 uint32_t autoload_fw_id[2]; 1495 1496 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1497 1498 /* RLC autoload sequence 2: copy ucode */ 1499 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1500 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1501 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1502 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1503 1504 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1505 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1506 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1507 1508 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1509 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1510 1511 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1512 1513 /* RLC autoload sequence 3: load IMU fw */ 1514 if (adev->gfx.imu.funcs->load_microcode) 1515 adev->gfx.imu.funcs->load_microcode(adev); 1516 /* RLC autoload sequence 4 init IMU fw */ 1517 if (adev->gfx.imu.funcs->setup_imu) 1518 adev->gfx.imu.funcs->setup_imu(adev); 1519 if (adev->gfx.imu.funcs->start_imu) 1520 adev->gfx.imu.funcs->start_imu(adev); 1521 1522 /* RLC autoload sequence 5 disable gpa mode */ 1523 gfx_v11_0_disable_gpa_mode(adev); 1524 1525 return 0; 1526 } 1527 1528 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) 1529 { 1530 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 1531 uint32_t *ptr; 1532 uint32_t inst; 1533 1534 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 1535 if (!ptr) { 1536 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 1537 adev->gfx.ip_dump_core = NULL; 1538 } else { 1539 adev->gfx.ip_dump_core = ptr; 1540 } 1541 1542 /* Allocate memory for compute queue registers for all the instances */ 1543 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 1544 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 1545 adev->gfx.mec.num_queue_per_pipe; 1546 1547 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1548 if (!ptr) { 1549 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 1550 adev->gfx.ip_dump_compute_queues = NULL; 1551 } else { 1552 adev->gfx.ip_dump_compute_queues = ptr; 1553 } 1554 1555 /* Allocate memory for gfx queue registers for all the instances */ 1556 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 1557 inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * 1558 adev->gfx.me.num_queue_per_pipe; 1559 1560 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1561 if (!ptr) { 1562 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); 1563 adev->gfx.ip_dump_gfx_queues = NULL; 1564 } else { 1565 adev->gfx.ip_dump_gfx_queues = ptr; 1566 } 1567 } 1568 1569 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) 1570 { 1571 int i, j, k, r, ring_id; 1572 int xcc_id = 0; 1573 struct amdgpu_device *adev = ip_block->adev; 1574 int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ 1575 1576 INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); 1577 1578 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1579 case IP_VERSION(11, 0, 0): 1580 case IP_VERSION(11, 0, 1): 1581 case IP_VERSION(11, 0, 2): 1582 case IP_VERSION(11, 0, 3): 1583 case IP_VERSION(11, 0, 4): 1584 case IP_VERSION(11, 5, 0): 1585 case IP_VERSION(11, 5, 1): 1586 case IP_VERSION(11, 5, 2): 1587 case IP_VERSION(11, 5, 3): 1588 adev->gfx.me.num_me = 1; 1589 adev->gfx.me.num_pipe_per_me = 1; 1590 adev->gfx.me.num_queue_per_pipe = 2; 1591 adev->gfx.mec.num_mec = 1; 1592 adev->gfx.mec.num_pipe_per_mec = 4; 1593 adev->gfx.mec.num_queue_per_pipe = 4; 1594 break; 1595 default: 1596 adev->gfx.me.num_me = 1; 1597 adev->gfx.me.num_pipe_per_me = 1; 1598 adev->gfx.me.num_queue_per_pipe = 1; 1599 adev->gfx.mec.num_mec = 1; 1600 adev->gfx.mec.num_pipe_per_mec = 4; 1601 adev->gfx.mec.num_queue_per_pipe = 8; 1602 break; 1603 } 1604 1605 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1606 case IP_VERSION(11, 0, 0): 1607 case IP_VERSION(11, 0, 2): 1608 case IP_VERSION(11, 0, 3): 1609 if (!adev->gfx.disable_uq && 1610 adev->gfx.me_fw_version >= 2390 && 1611 adev->gfx.pfp_fw_version >= 2530 && 1612 adev->gfx.mec_fw_version >= 2600 && 1613 adev->mes.fw_version[0] >= 120) { 1614 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1615 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1616 } 1617 break; 1618 case IP_VERSION(11, 0, 1): 1619 case IP_VERSION(11, 0, 4): 1620 case IP_VERSION(11, 5, 0): 1621 case IP_VERSION(11, 5, 1): 1622 case IP_VERSION(11, 5, 2): 1623 case IP_VERSION(11, 5, 3): 1624 /* add firmware version checks here */ 1625 if (0 && !adev->gfx.disable_uq) { 1626 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1627 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1628 } 1629 break; 1630 default: 1631 break; 1632 } 1633 1634 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1635 case IP_VERSION(11, 0, 0): 1636 case IP_VERSION(11, 0, 2): 1637 case IP_VERSION(11, 0, 3): 1638 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1639 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1640 if (adev->gfx.me_fw_version >= 2280 && 1641 adev->gfx.pfp_fw_version >= 2370 && 1642 adev->gfx.mec_fw_version >= 2450 && 1643 adev->mes.fw_version[0] >= 99) { 1644 adev->gfx.enable_cleaner_shader = true; 1645 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1646 if (r) { 1647 adev->gfx.enable_cleaner_shader = false; 1648 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1649 } 1650 } 1651 break; 1652 case IP_VERSION(11, 5, 0): 1653 case IP_VERSION(11, 5, 1): 1654 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1655 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1656 if (adev->gfx.mec_fw_version >= 26 && 1657 adev->mes.fw_version[0] >= 114) { 1658 adev->gfx.enable_cleaner_shader = true; 1659 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1660 if (r) { 1661 adev->gfx.enable_cleaner_shader = false; 1662 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1663 } 1664 } 1665 break; 1666 case IP_VERSION(11, 5, 2): 1667 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1668 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1669 if (adev->gfx.me_fw_version >= 12 && 1670 adev->gfx.pfp_fw_version >= 15 && 1671 adev->gfx.mec_fw_version >= 15) { 1672 adev->gfx.enable_cleaner_shader = true; 1673 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1674 if (r) { 1675 adev->gfx.enable_cleaner_shader = false; 1676 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1677 } 1678 } 1679 break; 1680 case IP_VERSION(11, 5, 3): 1681 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1682 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1683 if (adev->gfx.me_fw_version >= 7 && 1684 adev->gfx.pfp_fw_version >= 8 && 1685 adev->gfx.mec_fw_version >= 8) { 1686 adev->gfx.enable_cleaner_shader = true; 1687 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1688 if (r) { 1689 adev->gfx.enable_cleaner_shader = false; 1690 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1691 } 1692 } 1693 break; 1694 default: 1695 adev->gfx.enable_cleaner_shader = false; 1696 break; 1697 } 1698 1699 /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ 1700 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && 1701 amdgpu_sriov_is_pp_one_vf(adev)) 1702 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; 1703 1704 /* EOP Event */ 1705 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1706 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1707 &adev->gfx.eop_irq); 1708 if (r) 1709 return r; 1710 1711 /* Bad opcode Event */ 1712 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1713 GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, 1714 &adev->gfx.bad_op_irq); 1715 if (r) 1716 return r; 1717 1718 /* Privileged reg */ 1719 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1720 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1721 &adev->gfx.priv_reg_irq); 1722 if (r) 1723 return r; 1724 1725 /* Privileged inst */ 1726 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1727 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1728 &adev->gfx.priv_inst_irq); 1729 if (r) 1730 return r; 1731 1732 /* FED error */ 1733 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1734 GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, 1735 &adev->gfx.rlc_gc_fed_irq); 1736 if (r) 1737 return r; 1738 1739 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1740 1741 gfx_v11_0_me_init(adev); 1742 1743 r = gfx_v11_0_rlc_init(adev); 1744 if (r) { 1745 DRM_ERROR("Failed to init rlc BOs!\n"); 1746 return r; 1747 } 1748 1749 r = gfx_v11_0_mec_init(adev); 1750 if (r) { 1751 DRM_ERROR("Failed to init MEC BOs!\n"); 1752 return r; 1753 } 1754 1755 if (adev->gfx.num_gfx_rings) { 1756 ring_id = 0; 1757 /* set up the gfx ring */ 1758 for (i = 0; i < adev->gfx.me.num_me; i++) { 1759 for (j = 0; j < num_queue_per_pipe; j++) { 1760 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1761 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1762 continue; 1763 1764 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1765 i, k, j); 1766 if (r) 1767 return r; 1768 ring_id++; 1769 } 1770 } 1771 } 1772 } 1773 1774 if (adev->gfx.num_compute_rings) { 1775 ring_id = 0; 1776 /* set up the compute queues - allocate horizontally across pipes */ 1777 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1778 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1779 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1780 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 1781 k, j)) 1782 continue; 1783 1784 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1785 i, k, j); 1786 if (r) 1787 return r; 1788 1789 ring_id++; 1790 } 1791 } 1792 } 1793 } 1794 1795 adev->gfx.gfx_supported_reset = 1796 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1797 adev->gfx.compute_supported_reset = 1798 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1799 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1800 case IP_VERSION(11, 0, 0): 1801 case IP_VERSION(11, 0, 2): 1802 case IP_VERSION(11, 0, 3): 1803 if ((adev->gfx.me_fw_version >= 2280) && 1804 (adev->gfx.mec_fw_version >= 2410)) { 1805 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1806 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1807 } 1808 break; 1809 default: 1810 break; 1811 } 1812 1813 if (!adev->enable_mes_kiq) { 1814 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); 1815 if (r) { 1816 DRM_ERROR("Failed to init KIQ BOs!\n"); 1817 return r; 1818 } 1819 1820 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 1821 if (r) 1822 return r; 1823 } 1824 1825 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); 1826 if (r) 1827 return r; 1828 1829 /* allocate visible FB for rlc auto-loading fw */ 1830 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1831 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1832 if (r) 1833 return r; 1834 } 1835 1836 r = gfx_v11_0_gpu_early_init(adev); 1837 if (r) 1838 return r; 1839 1840 if (amdgpu_gfx_ras_sw_init(adev)) { 1841 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 1842 return -EINVAL; 1843 } 1844 1845 gfx_v11_0_alloc_ip_dump(adev); 1846 1847 r = amdgpu_gfx_sysfs_init(adev); 1848 if (r) 1849 return r; 1850 1851 return 0; 1852 } 1853 1854 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1855 { 1856 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1857 &adev->gfx.pfp.pfp_fw_gpu_addr, 1858 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1859 1860 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1861 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1862 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1863 } 1864 1865 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1866 { 1867 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1868 &adev->gfx.me.me_fw_gpu_addr, 1869 (void **)&adev->gfx.me.me_fw_ptr); 1870 1871 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1872 &adev->gfx.me.me_fw_data_gpu_addr, 1873 (void **)&adev->gfx.me.me_fw_data_ptr); 1874 } 1875 1876 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1877 { 1878 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1879 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1880 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1881 } 1882 1883 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) 1884 { 1885 int i; 1886 struct amdgpu_device *adev = ip_block->adev; 1887 1888 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1889 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1890 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1891 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1892 1893 amdgpu_gfx_mqd_sw_fini(adev, 0); 1894 1895 if (!adev->enable_mes_kiq) { 1896 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 1897 amdgpu_gfx_kiq_fini(adev, 0); 1898 } 1899 1900 amdgpu_gfx_cleaner_shader_sw_fini(adev); 1901 1902 gfx_v11_0_pfp_fini(adev); 1903 gfx_v11_0_me_fini(adev); 1904 gfx_v11_0_rlc_fini(adev); 1905 gfx_v11_0_mec_fini(adev); 1906 1907 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1908 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1909 1910 gfx_v11_0_free_microcode(adev); 1911 1912 amdgpu_gfx_sysfs_fini(adev); 1913 1914 kfree(adev->gfx.ip_dump_core); 1915 kfree(adev->gfx.ip_dump_compute_queues); 1916 kfree(adev->gfx.ip_dump_gfx_queues); 1917 1918 return 0; 1919 } 1920 1921 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1922 u32 sh_num, u32 instance, int xcc_id) 1923 { 1924 u32 data; 1925 1926 if (instance == 0xffffffff) 1927 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1928 INSTANCE_BROADCAST_WRITES, 1); 1929 else 1930 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1931 instance); 1932 1933 if (se_num == 0xffffffff) 1934 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1935 1); 1936 else 1937 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1938 1939 if (sh_num == 0xffffffff) 1940 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1941 1); 1942 else 1943 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1944 1945 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 1946 } 1947 1948 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) 1949 { 1950 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; 1951 1952 gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); 1953 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, 1954 CC_GC_SA_UNIT_DISABLE, 1955 SA_DISABLE); 1956 gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); 1957 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, 1958 GC_USER_SA_UNIT_DISABLE, 1959 SA_DISABLE); 1960 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * 1961 adev->gfx.config.max_shader_engines); 1962 1963 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 1964 } 1965 1966 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1967 { 1968 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; 1969 u32 rb_mask; 1970 1971 gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 1972 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, 1973 CC_RB_BACKEND_DISABLE, 1974 BACKEND_DISABLE); 1975 gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 1976 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, 1977 GC_USER_RB_BACKEND_DISABLE, 1978 BACKEND_DISABLE); 1979 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * 1980 adev->gfx.config.max_shader_engines); 1981 1982 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); 1983 } 1984 1985 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 1986 { 1987 u32 rb_bitmap_per_sa; 1988 u32 rb_bitmap_width_per_sa; 1989 u32 max_sa; 1990 u32 active_sa_bitmap; 1991 u32 global_active_rb_bitmap; 1992 u32 active_rb_bitmap = 0; 1993 u32 i; 1994 1995 /* query sa bitmap from SA_UNIT_DISABLE registers */ 1996 active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); 1997 /* query rb bitmap from RB_BACKEND_DISABLE registers */ 1998 global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); 1999 2000 /* generate active rb bitmap according to active sa bitmap */ 2001 max_sa = adev->gfx.config.max_shader_engines * 2002 adev->gfx.config.max_sh_per_se; 2003 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 2004 adev->gfx.config.max_sh_per_se; 2005 rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); 2006 2007 for (i = 0; i < max_sa; i++) { 2008 if (active_sa_bitmap & (1 << i)) 2009 active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); 2010 } 2011 2012 active_rb_bitmap &= global_active_rb_bitmap; 2013 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 2014 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 2015 } 2016 2017 #define DEFAULT_SH_MEM_BASES (0x6000) 2018 #define LDS_APP_BASE 0x1 2019 #define SCRATCH_APP_BASE 0x2 2020 2021 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 2022 { 2023 int i; 2024 uint32_t sh_mem_bases; 2025 uint32_t data; 2026 2027 /* 2028 * Configure apertures: 2029 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2030 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2031 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2032 */ 2033 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 2034 SCRATCH_APP_BASE; 2035 2036 mutex_lock(&adev->srbm_mutex); 2037 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2038 soc21_grbm_select(adev, 0, 0, 0, i); 2039 /* CP and shaders */ 2040 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2041 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 2042 2043 /* Enable trap for each kfd vmid. */ 2044 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 2045 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 2046 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); 2047 } 2048 soc21_grbm_select(adev, 0, 0, 0, 0); 2049 mutex_unlock(&adev->srbm_mutex); 2050 2051 /* 2052 * Initialize all compute VMIDs to have no GDS, GWS, or OA 2053 * access. These should be enabled by FW for target VMIDs. 2054 */ 2055 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2056 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 2057 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 2058 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 2059 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 2060 } 2061 } 2062 2063 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 2064 { 2065 int vmid; 2066 2067 /* 2068 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2069 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2070 * the driver can enable them for graphics. VMID0 should maintain 2071 * access so that HWS firmware can save/restore entries. 2072 */ 2073 for (vmid = 1; vmid < 16; vmid++) { 2074 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 2075 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 2076 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 2077 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 2078 } 2079 } 2080 2081 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 2082 { 2083 /* TODO: harvest feature to be added later. */ 2084 } 2085 2086 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 2087 { 2088 /* TCCs are global (not instanced). */ 2089 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 2090 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 2091 2092 adev->gfx.config.tcc_disabled_mask = 2093 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 2094 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 2095 } 2096 2097 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 2098 { 2099 u32 tmp; 2100 int i; 2101 2102 if (!amdgpu_sriov_vf(adev)) 2103 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2104 2105 gfx_v11_0_setup_rb(adev); 2106 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 2107 gfx_v11_0_get_tcc_info(adev); 2108 adev->gfx.config.pa_sc_tile_steering_override = 0; 2109 2110 /* Set whether texture coordinate truncation is conformant. */ 2111 tmp = RREG32_SOC15(GC, 0, regTA_CNTL2); 2112 adev->gfx.config.ta_cntl2_truncate_coord_mode = 2113 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE); 2114 2115 /* XXX SH_MEM regs */ 2116 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2117 mutex_lock(&adev->srbm_mutex); 2118 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2119 soc21_grbm_select(adev, 0, 0, 0, i); 2120 /* CP and shaders */ 2121 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2122 if (i != 0) { 2123 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2124 (adev->gmc.private_aperture_start >> 48)); 2125 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2126 (adev->gmc.shared_aperture_start >> 48)); 2127 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 2128 } 2129 } 2130 soc21_grbm_select(adev, 0, 0, 0, 0); 2131 2132 mutex_unlock(&adev->srbm_mutex); 2133 2134 gfx_v11_0_init_compute_vmid(adev); 2135 gfx_v11_0_init_gds_vmid(adev); 2136 } 2137 2138 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, 2139 int me, int pipe) 2140 { 2141 if (me != 0) 2142 return 0; 2143 2144 switch (pipe) { 2145 case 0: 2146 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 2147 case 1: 2148 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 2149 default: 2150 return 0; 2151 } 2152 } 2153 2154 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, 2155 int me, int pipe) 2156 { 2157 /* 2158 * amdgpu controls only the first MEC. That's why this function only 2159 * handles the setting of interrupts for this specific MEC. All other 2160 * pipes' interrupts are set by amdkfd. 2161 */ 2162 if (me != 1) 2163 return 0; 2164 2165 switch (pipe) { 2166 case 0: 2167 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 2168 case 1: 2169 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 2170 case 2: 2171 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 2172 case 3: 2173 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 2174 default: 2175 return 0; 2176 } 2177 } 2178 2179 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2180 bool enable) 2181 { 2182 u32 tmp, cp_int_cntl_reg; 2183 int i, j; 2184 2185 if (amdgpu_sriov_vf(adev)) 2186 return; 2187 2188 for (i = 0; i < adev->gfx.me.num_me; i++) { 2189 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 2190 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 2191 2192 if (cp_int_cntl_reg) { 2193 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 2194 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 2195 enable ? 1 : 0); 2196 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 2197 enable ? 1 : 0); 2198 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 2199 enable ? 1 : 0); 2200 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 2201 enable ? 1 : 0); 2202 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); 2203 } 2204 } 2205 } 2206 } 2207 2208 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 2209 { 2210 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2211 2212 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 2213 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2214 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 2215 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2216 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 2217 2218 return 0; 2219 } 2220 2221 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 2222 { 2223 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 2224 2225 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 2226 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 2227 } 2228 2229 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 2230 { 2231 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2232 udelay(50); 2233 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2234 udelay(50); 2235 } 2236 2237 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 2238 bool enable) 2239 { 2240 uint32_t rlc_pg_cntl; 2241 2242 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 2243 2244 if (!enable) { 2245 /* RLC_PG_CNTL[23] = 0 (default) 2246 * RLC will wait for handshake acks with SMU 2247 * GFXOFF will be enabled 2248 * RLC_PG_CNTL[23] = 1 2249 * RLC will not issue any message to SMU 2250 * hence no handshake between SMU & RLC 2251 * GFXOFF will be disabled 2252 */ 2253 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2254 } else 2255 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2256 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 2257 } 2258 2259 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 2260 { 2261 /* TODO: enable rlc & smu handshake until smu 2262 * and gfxoff feature works as expected */ 2263 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 2264 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 2265 2266 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2267 udelay(50); 2268 } 2269 2270 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 2271 { 2272 uint32_t tmp; 2273 2274 /* enable Save Restore Machine */ 2275 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 2276 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2277 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 2278 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 2279 } 2280 2281 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 2282 { 2283 const struct rlc_firmware_header_v2_0 *hdr; 2284 const __le32 *fw_data; 2285 unsigned i, fw_size; 2286 2287 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2288 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2289 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2290 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2291 2292 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 2293 RLCG_UCODE_LOADING_START_ADDRESS); 2294 2295 for (i = 0; i < fw_size; i++) 2296 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 2297 le32_to_cpup(fw_data++)); 2298 2299 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2300 } 2301 2302 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 2303 { 2304 const struct rlc_firmware_header_v2_2 *hdr; 2305 const __le32 *fw_data; 2306 unsigned i, fw_size; 2307 u32 tmp; 2308 2309 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 2310 2311 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2312 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 2313 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 2314 2315 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 2316 2317 for (i = 0; i < fw_size; i++) { 2318 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2319 msleep(1); 2320 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 2321 le32_to_cpup(fw_data++)); 2322 } 2323 2324 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2325 2326 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2327 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 2328 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 2329 2330 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 2331 for (i = 0; i < fw_size; i++) { 2332 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2333 msleep(1); 2334 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 2335 le32_to_cpup(fw_data++)); 2336 } 2337 2338 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2339 2340 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 2341 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 2342 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 2343 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 2344 } 2345 2346 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 2347 { 2348 const struct rlc_firmware_header_v2_3 *hdr; 2349 const __le32 *fw_data; 2350 unsigned i, fw_size; 2351 u32 tmp; 2352 2353 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 2354 2355 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2356 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 2357 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 2358 2359 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 2360 2361 for (i = 0; i < fw_size; i++) { 2362 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2363 msleep(1); 2364 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 2365 le32_to_cpup(fw_data++)); 2366 } 2367 2368 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 2369 2370 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 2371 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 2372 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 2373 2374 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2375 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 2376 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 2377 2378 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 2379 2380 for (i = 0; i < fw_size; i++) { 2381 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2382 msleep(1); 2383 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 2384 le32_to_cpup(fw_data++)); 2385 } 2386 2387 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 2388 2389 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 2390 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 2391 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 2392 } 2393 2394 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 2395 { 2396 const struct rlc_firmware_header_v2_0 *hdr; 2397 uint16_t version_major; 2398 uint16_t version_minor; 2399 2400 if (!adev->gfx.rlc_fw) 2401 return -EINVAL; 2402 2403 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2404 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2405 2406 version_major = le16_to_cpu(hdr->header.header_version_major); 2407 version_minor = le16_to_cpu(hdr->header.header_version_minor); 2408 2409 if (version_major == 2) { 2410 gfx_v11_0_load_rlcg_microcode(adev); 2411 if (amdgpu_dpm == 1) { 2412 if (version_minor >= 2) 2413 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 2414 if (version_minor == 3) 2415 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 2416 } 2417 2418 return 0; 2419 } 2420 2421 return -EINVAL; 2422 } 2423 2424 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 2425 { 2426 int r; 2427 2428 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 2429 gfx_v11_0_init_csb(adev); 2430 2431 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 2432 gfx_v11_0_rlc_enable_srm(adev); 2433 } else { 2434 if (amdgpu_sriov_vf(adev)) { 2435 gfx_v11_0_init_csb(adev); 2436 return 0; 2437 } 2438 2439 adev->gfx.rlc.funcs->stop(adev); 2440 2441 /* disable CG */ 2442 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 2443 2444 /* disable PG */ 2445 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 2446 2447 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2448 /* legacy rlc firmware loading */ 2449 r = gfx_v11_0_rlc_load_microcode(adev); 2450 if (r) 2451 return r; 2452 } 2453 2454 gfx_v11_0_init_csb(adev); 2455 2456 adev->gfx.rlc.funcs->start(adev); 2457 } 2458 return 0; 2459 } 2460 2461 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 2462 { 2463 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2464 uint32_t tmp; 2465 int i; 2466 2467 /* Trigger an invalidation of the L1 instruction caches */ 2468 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2469 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2470 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2471 2472 /* Wait for invalidation complete */ 2473 for (i = 0; i < usec_timeout; i++) { 2474 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2475 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2476 INVALIDATE_CACHE_COMPLETE)) 2477 break; 2478 udelay(1); 2479 } 2480 2481 if (i >= usec_timeout) { 2482 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2483 return -EINVAL; 2484 } 2485 2486 if (amdgpu_emu_mode == 1) 2487 amdgpu_device_flush_hdp(adev, NULL); 2488 2489 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2490 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2491 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2492 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2493 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2494 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2495 2496 /* Program me ucode address into intruction cache address register */ 2497 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2498 lower_32_bits(addr) & 0xFFFFF000); 2499 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2500 upper_32_bits(addr)); 2501 2502 return 0; 2503 } 2504 2505 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 2506 { 2507 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2508 uint32_t tmp; 2509 int i; 2510 2511 /* Trigger an invalidation of the L1 instruction caches */ 2512 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2513 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2514 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2515 2516 /* Wait for invalidation complete */ 2517 for (i = 0; i < usec_timeout; i++) { 2518 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2519 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2520 INVALIDATE_CACHE_COMPLETE)) 2521 break; 2522 udelay(1); 2523 } 2524 2525 if (i >= usec_timeout) { 2526 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2527 return -EINVAL; 2528 } 2529 2530 if (amdgpu_emu_mode == 1) 2531 amdgpu_device_flush_hdp(adev, NULL); 2532 2533 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2534 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2535 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2536 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2537 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2538 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2539 2540 /* Program pfp ucode address into intruction cache address register */ 2541 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2542 lower_32_bits(addr) & 0xFFFFF000); 2543 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2544 upper_32_bits(addr)); 2545 2546 return 0; 2547 } 2548 2549 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2550 { 2551 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2552 uint32_t tmp; 2553 int i; 2554 2555 /* Trigger an invalidation of the L1 instruction caches */ 2556 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2557 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2558 2559 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2560 2561 /* Wait for invalidation complete */ 2562 for (i = 0; i < usec_timeout; i++) { 2563 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2564 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2565 INVALIDATE_CACHE_COMPLETE)) 2566 break; 2567 udelay(1); 2568 } 2569 2570 if (i >= usec_timeout) { 2571 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2572 return -EINVAL; 2573 } 2574 2575 if (amdgpu_emu_mode == 1) 2576 amdgpu_device_flush_hdp(adev, NULL); 2577 2578 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2579 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2580 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2581 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2582 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2583 2584 /* Program mec1 ucode address into intruction cache address register */ 2585 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2586 lower_32_bits(addr) & 0xFFFFF000); 2587 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2588 upper_32_bits(addr)); 2589 2590 return 0; 2591 } 2592 2593 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2594 { 2595 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2596 uint32_t tmp; 2597 unsigned i, pipe_id; 2598 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2599 2600 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2601 adev->gfx.pfp_fw->data; 2602 2603 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2604 lower_32_bits(addr)); 2605 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2606 upper_32_bits(addr)); 2607 2608 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2609 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2610 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2611 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2612 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2613 2614 /* 2615 * Programming any of the CP_PFP_IC_BASE registers 2616 * forces invalidation of the ME L1 I$. Wait for the 2617 * invalidation complete 2618 */ 2619 for (i = 0; i < usec_timeout; i++) { 2620 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2621 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2622 INVALIDATE_CACHE_COMPLETE)) 2623 break; 2624 udelay(1); 2625 } 2626 2627 if (i >= usec_timeout) { 2628 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2629 return -EINVAL; 2630 } 2631 2632 /* Prime the L1 instruction caches */ 2633 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2634 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2635 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2636 /* Waiting for cache primed*/ 2637 for (i = 0; i < usec_timeout; i++) { 2638 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2639 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2640 ICACHE_PRIMED)) 2641 break; 2642 udelay(1); 2643 } 2644 2645 if (i >= usec_timeout) { 2646 dev_err(adev->dev, "failed to prime instruction cache\n"); 2647 return -EINVAL; 2648 } 2649 2650 mutex_lock(&adev->srbm_mutex); 2651 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2652 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2653 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2654 (pfp_hdr->ucode_start_addr_hi << 30) | 2655 (pfp_hdr->ucode_start_addr_lo >> 2)); 2656 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2657 pfp_hdr->ucode_start_addr_hi >> 2); 2658 2659 /* 2660 * Program CP_ME_CNTL to reset given PIPE to take 2661 * effect of CP_PFP_PRGRM_CNTR_START. 2662 */ 2663 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2664 if (pipe_id == 0) 2665 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2666 PFP_PIPE0_RESET, 1); 2667 else 2668 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2669 PFP_PIPE1_RESET, 1); 2670 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2671 2672 /* Clear pfp pipe0 reset bit. */ 2673 if (pipe_id == 0) 2674 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2675 PFP_PIPE0_RESET, 0); 2676 else 2677 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2678 PFP_PIPE1_RESET, 0); 2679 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2680 2681 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2682 lower_32_bits(addr2)); 2683 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2684 upper_32_bits(addr2)); 2685 } 2686 soc21_grbm_select(adev, 0, 0, 0, 0); 2687 mutex_unlock(&adev->srbm_mutex); 2688 2689 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2690 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2691 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2692 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2693 2694 /* Invalidate the data caches */ 2695 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2696 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2697 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2698 2699 for (i = 0; i < usec_timeout; i++) { 2700 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2701 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2702 INVALIDATE_DCACHE_COMPLETE)) 2703 break; 2704 udelay(1); 2705 } 2706 2707 if (i >= usec_timeout) { 2708 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2709 return -EINVAL; 2710 } 2711 2712 return 0; 2713 } 2714 2715 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2716 { 2717 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2718 uint32_t tmp; 2719 unsigned i, pipe_id; 2720 const struct gfx_firmware_header_v2_0 *me_hdr; 2721 2722 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2723 adev->gfx.me_fw->data; 2724 2725 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2726 lower_32_bits(addr)); 2727 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2728 upper_32_bits(addr)); 2729 2730 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2731 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2732 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2733 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2734 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2735 2736 /* 2737 * Programming any of the CP_ME_IC_BASE registers 2738 * forces invalidation of the ME L1 I$. Wait for the 2739 * invalidation complete 2740 */ 2741 for (i = 0; i < usec_timeout; i++) { 2742 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2743 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2744 INVALIDATE_CACHE_COMPLETE)) 2745 break; 2746 udelay(1); 2747 } 2748 2749 if (i >= usec_timeout) { 2750 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2751 return -EINVAL; 2752 } 2753 2754 /* Prime the instruction caches */ 2755 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2756 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2757 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2758 2759 /* Waiting for instruction cache primed*/ 2760 for (i = 0; i < usec_timeout; i++) { 2761 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2762 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2763 ICACHE_PRIMED)) 2764 break; 2765 udelay(1); 2766 } 2767 2768 if (i >= usec_timeout) { 2769 dev_err(adev->dev, "failed to prime instruction cache\n"); 2770 return -EINVAL; 2771 } 2772 2773 mutex_lock(&adev->srbm_mutex); 2774 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2775 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2776 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2777 (me_hdr->ucode_start_addr_hi << 30) | 2778 (me_hdr->ucode_start_addr_lo >> 2) ); 2779 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2780 me_hdr->ucode_start_addr_hi>>2); 2781 2782 /* 2783 * Program CP_ME_CNTL to reset given PIPE to take 2784 * effect of CP_PFP_PRGRM_CNTR_START. 2785 */ 2786 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2787 if (pipe_id == 0) 2788 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2789 ME_PIPE0_RESET, 1); 2790 else 2791 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2792 ME_PIPE1_RESET, 1); 2793 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2794 2795 /* Clear pfp pipe0 reset bit. */ 2796 if (pipe_id == 0) 2797 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2798 ME_PIPE0_RESET, 0); 2799 else 2800 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2801 ME_PIPE1_RESET, 0); 2802 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2803 2804 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2805 lower_32_bits(addr2)); 2806 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2807 upper_32_bits(addr2)); 2808 } 2809 soc21_grbm_select(adev, 0, 0, 0, 0); 2810 mutex_unlock(&adev->srbm_mutex); 2811 2812 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2813 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2814 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2815 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2816 2817 /* Invalidate the data caches */ 2818 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2819 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2820 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2821 2822 for (i = 0; i < usec_timeout; i++) { 2823 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2824 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2825 INVALIDATE_DCACHE_COMPLETE)) 2826 break; 2827 udelay(1); 2828 } 2829 2830 if (i >= usec_timeout) { 2831 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2832 return -EINVAL; 2833 } 2834 2835 return 0; 2836 } 2837 2838 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2839 { 2840 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2841 uint32_t tmp; 2842 unsigned i; 2843 const struct gfx_firmware_header_v2_0 *mec_hdr; 2844 2845 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2846 adev->gfx.mec_fw->data; 2847 2848 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2849 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2850 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2851 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2852 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2853 2854 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2855 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2856 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2857 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2858 2859 mutex_lock(&adev->srbm_mutex); 2860 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2861 soc21_grbm_select(adev, 1, i, 0, 0); 2862 2863 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2864 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2865 upper_32_bits(addr2)); 2866 2867 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2868 mec_hdr->ucode_start_addr_lo >> 2 | 2869 mec_hdr->ucode_start_addr_hi << 30); 2870 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2871 mec_hdr->ucode_start_addr_hi >> 2); 2872 2873 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2874 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2875 upper_32_bits(addr)); 2876 } 2877 mutex_unlock(&adev->srbm_mutex); 2878 soc21_grbm_select(adev, 0, 0, 0, 0); 2879 2880 /* Trigger an invalidation of the L1 instruction caches */ 2881 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2882 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2883 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2884 2885 /* Wait for invalidation complete */ 2886 for (i = 0; i < usec_timeout; i++) { 2887 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2888 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2889 INVALIDATE_DCACHE_COMPLETE)) 2890 break; 2891 udelay(1); 2892 } 2893 2894 if (i >= usec_timeout) { 2895 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2896 return -EINVAL; 2897 } 2898 2899 /* Trigger an invalidation of the L1 instruction caches */ 2900 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2901 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2902 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2903 2904 /* Wait for invalidation complete */ 2905 for (i = 0; i < usec_timeout; i++) { 2906 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2907 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2908 INVALIDATE_CACHE_COMPLETE)) 2909 break; 2910 udelay(1); 2911 } 2912 2913 if (i >= usec_timeout) { 2914 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2915 return -EINVAL; 2916 } 2917 2918 return 0; 2919 } 2920 2921 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2922 { 2923 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2924 const struct gfx_firmware_header_v2_0 *me_hdr; 2925 const struct gfx_firmware_header_v2_0 *mec_hdr; 2926 uint32_t pipe_id, tmp; 2927 2928 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2929 adev->gfx.mec_fw->data; 2930 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2931 adev->gfx.me_fw->data; 2932 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2933 adev->gfx.pfp_fw->data; 2934 2935 /* config pfp program start addr */ 2936 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2937 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2938 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2939 (pfp_hdr->ucode_start_addr_hi << 30) | 2940 (pfp_hdr->ucode_start_addr_lo >> 2)); 2941 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2942 pfp_hdr->ucode_start_addr_hi >> 2); 2943 } 2944 soc21_grbm_select(adev, 0, 0, 0, 0); 2945 2946 /* reset pfp pipe */ 2947 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2948 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 2949 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 2950 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2951 2952 /* clear pfp pipe reset */ 2953 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 2954 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 2955 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2956 2957 /* config me program start addr */ 2958 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2959 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2960 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2961 (me_hdr->ucode_start_addr_hi << 30) | 2962 (me_hdr->ucode_start_addr_lo >> 2) ); 2963 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2964 me_hdr->ucode_start_addr_hi>>2); 2965 } 2966 soc21_grbm_select(adev, 0, 0, 0, 0); 2967 2968 /* reset me pipe */ 2969 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2970 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 2971 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 2972 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2973 2974 /* clear me pipe reset */ 2975 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 2976 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 2977 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2978 2979 /* config mec program start addr */ 2980 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 2981 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 2982 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2983 mec_hdr->ucode_start_addr_lo >> 2 | 2984 mec_hdr->ucode_start_addr_hi << 30); 2985 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2986 mec_hdr->ucode_start_addr_hi >> 2); 2987 } 2988 soc21_grbm_select(adev, 0, 0, 0, 0); 2989 2990 /* reset mec pipe */ 2991 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 2992 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 2993 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 2994 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 2995 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 2996 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 2997 2998 /* clear mec pipe reset */ 2999 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 3000 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 3001 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 3002 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 3003 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3004 } 3005 3006 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 3007 { 3008 uint32_t cp_status; 3009 uint32_t bootload_status; 3010 int i, r; 3011 uint64_t addr, addr2; 3012 3013 for (i = 0; i < adev->usec_timeout; i++) { 3014 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 3015 3016 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 3017 IP_VERSION(11, 0, 1) || 3018 amdgpu_ip_version(adev, GC_HWIP, 0) == 3019 IP_VERSION(11, 0, 4) || 3020 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || 3021 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || 3022 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || 3023 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3)) 3024 bootload_status = RREG32_SOC15(GC, 0, 3025 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 3026 else 3027 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 3028 3029 if ((cp_status == 0) && 3030 (REG_GET_FIELD(bootload_status, 3031 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 3032 break; 3033 } 3034 udelay(1); 3035 } 3036 3037 if (i >= adev->usec_timeout) { 3038 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 3039 return -ETIMEDOUT; 3040 } 3041 3042 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 3043 if (adev->gfx.rs64_enable) { 3044 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3045 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 3046 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3047 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 3048 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 3049 if (r) 3050 return r; 3051 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3052 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 3053 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3054 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 3055 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 3056 if (r) 3057 return r; 3058 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3059 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 3060 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3061 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 3062 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 3063 if (r) 3064 return r; 3065 } else { 3066 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3067 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 3068 r = gfx_v11_0_config_me_cache(adev, addr); 3069 if (r) 3070 return r; 3071 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3072 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 3073 r = gfx_v11_0_config_pfp_cache(adev, addr); 3074 if (r) 3075 return r; 3076 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3077 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 3078 r = gfx_v11_0_config_mec_cache(adev, addr); 3079 if (r) 3080 return r; 3081 } 3082 } 3083 3084 return 0; 3085 } 3086 3087 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3088 { 3089 int i; 3090 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3091 3092 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3093 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3094 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3095 3096 for (i = 0; i < adev->usec_timeout; i++) { 3097 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 3098 break; 3099 udelay(1); 3100 } 3101 3102 if (i >= adev->usec_timeout) 3103 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 3104 3105 return 0; 3106 } 3107 3108 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 3109 { 3110 int r; 3111 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3112 const __le32 *fw_data; 3113 unsigned i, fw_size; 3114 3115 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3116 adev->gfx.pfp_fw->data; 3117 3118 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3119 3120 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3121 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3122 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 3123 3124 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 3125 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3126 &adev->gfx.pfp.pfp_fw_obj, 3127 &adev->gfx.pfp.pfp_fw_gpu_addr, 3128 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3129 if (r) { 3130 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 3131 gfx_v11_0_pfp_fini(adev); 3132 return r; 3133 } 3134 3135 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 3136 3137 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3138 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3139 3140 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 3141 3142 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 3143 3144 for (i = 0; i < pfp_hdr->jt_size; i++) 3145 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 3146 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 3147 3148 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3149 3150 return 0; 3151 } 3152 3153 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 3154 { 3155 int r; 3156 const struct gfx_firmware_header_v2_0 *pfp_hdr; 3157 const __le32 *fw_ucode, *fw_data; 3158 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3159 uint32_t tmp; 3160 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3161 3162 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 3163 adev->gfx.pfp_fw->data; 3164 3165 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3166 3167 /* instruction */ 3168 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 3169 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 3170 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 3171 /* data */ 3172 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3173 le32_to_cpu(pfp_hdr->data_offset_bytes)); 3174 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 3175 3176 /* 64kb align */ 3177 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3178 64 * 1024, 3179 AMDGPU_GEM_DOMAIN_VRAM | 3180 AMDGPU_GEM_DOMAIN_GTT, 3181 &adev->gfx.pfp.pfp_fw_obj, 3182 &adev->gfx.pfp.pfp_fw_gpu_addr, 3183 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3184 if (r) { 3185 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 3186 gfx_v11_0_pfp_fini(adev); 3187 return r; 3188 } 3189 3190 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3191 64 * 1024, 3192 AMDGPU_GEM_DOMAIN_VRAM | 3193 AMDGPU_GEM_DOMAIN_GTT, 3194 &adev->gfx.pfp.pfp_fw_data_obj, 3195 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 3196 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 3197 if (r) { 3198 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 3199 gfx_v11_0_pfp_fini(adev); 3200 return r; 3201 } 3202 3203 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 3204 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 3205 3206 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3207 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 3208 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3209 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3210 3211 if (amdgpu_emu_mode == 1) 3212 amdgpu_device_flush_hdp(adev, NULL); 3213 3214 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3215 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3216 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 3217 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3218 3219 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 3220 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 3221 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 3222 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 3223 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 3224 3225 /* 3226 * Programming any of the CP_PFP_IC_BASE registers 3227 * forces invalidation of the ME L1 I$. Wait for the 3228 * invalidation complete 3229 */ 3230 for (i = 0; i < usec_timeout; i++) { 3231 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3232 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3233 INVALIDATE_CACHE_COMPLETE)) 3234 break; 3235 udelay(1); 3236 } 3237 3238 if (i >= usec_timeout) { 3239 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3240 return -EINVAL; 3241 } 3242 3243 /* Prime the L1 instruction caches */ 3244 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3245 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 3246 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 3247 /* Waiting for cache primed*/ 3248 for (i = 0; i < usec_timeout; i++) { 3249 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3250 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3251 ICACHE_PRIMED)) 3252 break; 3253 udelay(1); 3254 } 3255 3256 if (i >= usec_timeout) { 3257 dev_err(adev->dev, "failed to prime instruction cache\n"); 3258 return -EINVAL; 3259 } 3260 3261 mutex_lock(&adev->srbm_mutex); 3262 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3263 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3264 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 3265 (pfp_hdr->ucode_start_addr_hi << 30) | 3266 (pfp_hdr->ucode_start_addr_lo >> 2) ); 3267 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 3268 pfp_hdr->ucode_start_addr_hi>>2); 3269 3270 /* 3271 * Program CP_ME_CNTL to reset given PIPE to take 3272 * effect of CP_PFP_PRGRM_CNTR_START. 3273 */ 3274 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3275 if (pipe_id == 0) 3276 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3277 PFP_PIPE0_RESET, 1); 3278 else 3279 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3280 PFP_PIPE1_RESET, 1); 3281 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3282 3283 /* Clear pfp pipe0 reset bit. */ 3284 if (pipe_id == 0) 3285 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3286 PFP_PIPE0_RESET, 0); 3287 else 3288 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3289 PFP_PIPE1_RESET, 0); 3290 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3291 3292 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 3293 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3294 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 3295 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3296 } 3297 soc21_grbm_select(adev, 0, 0, 0, 0); 3298 mutex_unlock(&adev->srbm_mutex); 3299 3300 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3301 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3302 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3303 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3304 3305 /* Invalidate the data caches */ 3306 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3307 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3308 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3309 3310 for (i = 0; i < usec_timeout; i++) { 3311 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3312 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3313 INVALIDATE_DCACHE_COMPLETE)) 3314 break; 3315 udelay(1); 3316 } 3317 3318 if (i >= usec_timeout) { 3319 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3320 return -EINVAL; 3321 } 3322 3323 return 0; 3324 } 3325 3326 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 3327 { 3328 int r; 3329 const struct gfx_firmware_header_v1_0 *me_hdr; 3330 const __le32 *fw_data; 3331 unsigned i, fw_size; 3332 3333 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3334 adev->gfx.me_fw->data; 3335 3336 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3337 3338 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3339 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3340 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 3341 3342 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 3343 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3344 &adev->gfx.me.me_fw_obj, 3345 &adev->gfx.me.me_fw_gpu_addr, 3346 (void **)&adev->gfx.me.me_fw_ptr); 3347 if (r) { 3348 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 3349 gfx_v11_0_me_fini(adev); 3350 return r; 3351 } 3352 3353 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 3354 3355 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3356 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3357 3358 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 3359 3360 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 3361 3362 for (i = 0; i < me_hdr->jt_size; i++) 3363 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 3364 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 3365 3366 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 3367 3368 return 0; 3369 } 3370 3371 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 3372 { 3373 int r; 3374 const struct gfx_firmware_header_v2_0 *me_hdr; 3375 const __le32 *fw_ucode, *fw_data; 3376 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3377 uint32_t tmp; 3378 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3379 3380 me_hdr = (const struct gfx_firmware_header_v2_0 *) 3381 adev->gfx.me_fw->data; 3382 3383 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3384 3385 /* instruction */ 3386 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 3387 le32_to_cpu(me_hdr->ucode_offset_bytes)); 3388 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 3389 /* data */ 3390 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3391 le32_to_cpu(me_hdr->data_offset_bytes)); 3392 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 3393 3394 /* 64kb align*/ 3395 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3396 64 * 1024, 3397 AMDGPU_GEM_DOMAIN_VRAM | 3398 AMDGPU_GEM_DOMAIN_GTT, 3399 &adev->gfx.me.me_fw_obj, 3400 &adev->gfx.me.me_fw_gpu_addr, 3401 (void **)&adev->gfx.me.me_fw_ptr); 3402 if (r) { 3403 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 3404 gfx_v11_0_me_fini(adev); 3405 return r; 3406 } 3407 3408 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3409 64 * 1024, 3410 AMDGPU_GEM_DOMAIN_VRAM | 3411 AMDGPU_GEM_DOMAIN_GTT, 3412 &adev->gfx.me.me_fw_data_obj, 3413 &adev->gfx.me.me_fw_data_gpu_addr, 3414 (void **)&adev->gfx.me.me_fw_data_ptr); 3415 if (r) { 3416 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 3417 gfx_v11_0_pfp_fini(adev); 3418 return r; 3419 } 3420 3421 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 3422 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 3423 3424 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3425 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 3426 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3427 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3428 3429 if (amdgpu_emu_mode == 1) 3430 amdgpu_device_flush_hdp(adev, NULL); 3431 3432 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3433 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3434 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 3435 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3436 3437 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 3438 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 3439 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 3440 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 3441 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 3442 3443 /* 3444 * Programming any of the CP_ME_IC_BASE registers 3445 * forces invalidation of the ME L1 I$. Wait for the 3446 * invalidation complete 3447 */ 3448 for (i = 0; i < usec_timeout; i++) { 3449 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3450 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3451 INVALIDATE_CACHE_COMPLETE)) 3452 break; 3453 udelay(1); 3454 } 3455 3456 if (i >= usec_timeout) { 3457 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3458 return -EINVAL; 3459 } 3460 3461 /* Prime the instruction caches */ 3462 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3463 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 3464 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 3465 3466 /* Waiting for instruction cache primed*/ 3467 for (i = 0; i < usec_timeout; i++) { 3468 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3469 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3470 ICACHE_PRIMED)) 3471 break; 3472 udelay(1); 3473 } 3474 3475 if (i >= usec_timeout) { 3476 dev_err(adev->dev, "failed to prime instruction cache\n"); 3477 return -EINVAL; 3478 } 3479 3480 mutex_lock(&adev->srbm_mutex); 3481 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3482 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3483 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3484 (me_hdr->ucode_start_addr_hi << 30) | 3485 (me_hdr->ucode_start_addr_lo >> 2) ); 3486 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3487 me_hdr->ucode_start_addr_hi>>2); 3488 3489 /* 3490 * Program CP_ME_CNTL to reset given PIPE to take 3491 * effect of CP_PFP_PRGRM_CNTR_START. 3492 */ 3493 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3494 if (pipe_id == 0) 3495 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3496 ME_PIPE0_RESET, 1); 3497 else 3498 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3499 ME_PIPE1_RESET, 1); 3500 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3501 3502 /* Clear pfp pipe0 reset bit. */ 3503 if (pipe_id == 0) 3504 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3505 ME_PIPE0_RESET, 0); 3506 else 3507 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3508 ME_PIPE1_RESET, 0); 3509 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3510 3511 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 3512 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3513 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 3514 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3515 } 3516 soc21_grbm_select(adev, 0, 0, 0, 0); 3517 mutex_unlock(&adev->srbm_mutex); 3518 3519 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3520 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3521 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3522 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3523 3524 /* Invalidate the data caches */ 3525 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3526 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3527 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3528 3529 for (i = 0; i < usec_timeout; i++) { 3530 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3531 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3532 INVALIDATE_DCACHE_COMPLETE)) 3533 break; 3534 udelay(1); 3535 } 3536 3537 if (i >= usec_timeout) { 3538 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3539 return -EINVAL; 3540 } 3541 3542 return 0; 3543 } 3544 3545 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3546 { 3547 int r; 3548 3549 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3550 return -EINVAL; 3551 3552 gfx_v11_0_cp_gfx_enable(adev, false); 3553 3554 if (adev->gfx.rs64_enable) 3555 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3556 else 3557 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3558 if (r) { 3559 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3560 return r; 3561 } 3562 3563 if (adev->gfx.rs64_enable) 3564 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3565 else 3566 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3567 if (r) { 3568 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3569 return r; 3570 } 3571 3572 return 0; 3573 } 3574 3575 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3576 { 3577 struct amdgpu_ring *ring; 3578 const struct cs_section_def *sect = NULL; 3579 const struct cs_extent_def *ext = NULL; 3580 int r, i; 3581 int ctx_reg_offset; 3582 3583 /* init the CP */ 3584 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3585 adev->gfx.config.max_hw_contexts - 1); 3586 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3587 3588 if (!amdgpu_async_gfx_ring) 3589 gfx_v11_0_cp_gfx_enable(adev, true); 3590 3591 ring = &adev->gfx.gfx_ring[0]; 3592 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3593 if (r) { 3594 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3595 return r; 3596 } 3597 3598 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3599 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3600 3601 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3602 amdgpu_ring_write(ring, 0x80000000); 3603 amdgpu_ring_write(ring, 0x80000000); 3604 3605 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3606 for (ext = sect->section; ext->extent != NULL; ++ext) { 3607 if (sect->id == SECT_CONTEXT) { 3608 amdgpu_ring_write(ring, 3609 PACKET3(PACKET3_SET_CONTEXT_REG, 3610 ext->reg_count)); 3611 amdgpu_ring_write(ring, ext->reg_index - 3612 PACKET3_SET_CONTEXT_REG_START); 3613 for (i = 0; i < ext->reg_count; i++) 3614 amdgpu_ring_write(ring, ext->extent[i]); 3615 } 3616 } 3617 } 3618 3619 ctx_reg_offset = 3620 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3621 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3622 amdgpu_ring_write(ring, ctx_reg_offset); 3623 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3624 3625 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3626 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3627 3628 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3629 amdgpu_ring_write(ring, 0); 3630 3631 amdgpu_ring_commit(ring); 3632 3633 /* submit cs packet to copy state 0 to next available state */ 3634 if (adev->gfx.num_gfx_rings > 1) { 3635 /* maximum supported gfx ring is 2 */ 3636 ring = &adev->gfx.gfx_ring[1]; 3637 r = amdgpu_ring_alloc(ring, 2); 3638 if (r) { 3639 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3640 return r; 3641 } 3642 3643 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3644 amdgpu_ring_write(ring, 0); 3645 3646 amdgpu_ring_commit(ring); 3647 } 3648 return 0; 3649 } 3650 3651 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3652 CP_PIPE_ID pipe) 3653 { 3654 u32 tmp; 3655 3656 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3657 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3658 3659 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3660 } 3661 3662 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3663 struct amdgpu_ring *ring) 3664 { 3665 u32 tmp; 3666 3667 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3668 if (ring->use_doorbell) { 3669 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3670 DOORBELL_OFFSET, ring->doorbell_index); 3671 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3672 DOORBELL_EN, 1); 3673 } else { 3674 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3675 DOORBELL_EN, 0); 3676 } 3677 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3678 3679 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3680 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3681 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3682 3683 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3684 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3685 } 3686 3687 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3688 { 3689 struct amdgpu_ring *ring; 3690 u32 tmp; 3691 u32 rb_bufsz; 3692 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3693 3694 /* Set the write pointer delay */ 3695 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3696 3697 /* set the RB to use vmid 0 */ 3698 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3699 3700 /* Init gfx ring 0 for pipe 0 */ 3701 mutex_lock(&adev->srbm_mutex); 3702 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3703 3704 /* Set ring buffer size */ 3705 ring = &adev->gfx.gfx_ring[0]; 3706 rb_bufsz = order_base_2(ring->ring_size / 8); 3707 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3708 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3709 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3710 3711 /* Initialize the ring buffer's write pointers */ 3712 ring->wptr = 0; 3713 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3714 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3715 3716 /* set the wb address whether it's enabled or not */ 3717 rptr_addr = ring->rptr_gpu_addr; 3718 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3719 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3720 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3721 3722 wptr_gpu_addr = ring->wptr_gpu_addr; 3723 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3724 lower_32_bits(wptr_gpu_addr)); 3725 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3726 upper_32_bits(wptr_gpu_addr)); 3727 3728 mdelay(1); 3729 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3730 3731 rb_addr = ring->gpu_addr >> 8; 3732 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3733 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3734 3735 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3736 3737 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3738 mutex_unlock(&adev->srbm_mutex); 3739 3740 /* Init gfx ring 1 for pipe 1 */ 3741 if (adev->gfx.num_gfx_rings > 1) { 3742 mutex_lock(&adev->srbm_mutex); 3743 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3744 /* maximum supported gfx ring is 2 */ 3745 ring = &adev->gfx.gfx_ring[1]; 3746 rb_bufsz = order_base_2(ring->ring_size / 8); 3747 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3748 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3749 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3750 /* Initialize the ring buffer's write pointers */ 3751 ring->wptr = 0; 3752 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3753 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3754 /* Set the wb address whether it's enabled or not */ 3755 rptr_addr = ring->rptr_gpu_addr; 3756 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3757 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3758 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3759 wptr_gpu_addr = ring->wptr_gpu_addr; 3760 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3761 lower_32_bits(wptr_gpu_addr)); 3762 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3763 upper_32_bits(wptr_gpu_addr)); 3764 3765 mdelay(1); 3766 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3767 3768 rb_addr = ring->gpu_addr >> 8; 3769 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3770 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3771 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3772 3773 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3774 mutex_unlock(&adev->srbm_mutex); 3775 } 3776 /* Switch to pipe 0 */ 3777 mutex_lock(&adev->srbm_mutex); 3778 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3779 mutex_unlock(&adev->srbm_mutex); 3780 3781 /* start the ring */ 3782 gfx_v11_0_cp_gfx_start(adev); 3783 3784 return 0; 3785 } 3786 3787 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3788 { 3789 u32 data; 3790 3791 if (adev->gfx.rs64_enable) { 3792 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3793 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3794 enable ? 0 : 1); 3795 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3796 enable ? 0 : 1); 3797 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3798 enable ? 0 : 1); 3799 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3800 enable ? 0 : 1); 3801 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3802 enable ? 0 : 1); 3803 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3804 enable ? 1 : 0); 3805 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3806 enable ? 1 : 0); 3807 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3808 enable ? 1 : 0); 3809 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3810 enable ? 1 : 0); 3811 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3812 enable ? 0 : 1); 3813 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3814 } else { 3815 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3816 3817 if (enable) { 3818 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3819 if (!adev->enable_mes_kiq) 3820 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3821 MEC_ME2_HALT, 0); 3822 } else { 3823 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3824 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3825 } 3826 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3827 } 3828 3829 udelay(50); 3830 } 3831 3832 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3833 { 3834 const struct gfx_firmware_header_v1_0 *mec_hdr; 3835 const __le32 *fw_data; 3836 unsigned i, fw_size; 3837 u32 *fw = NULL; 3838 int r; 3839 3840 if (!adev->gfx.mec_fw) 3841 return -EINVAL; 3842 3843 gfx_v11_0_cp_compute_enable(adev, false); 3844 3845 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3846 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3847 3848 fw_data = (const __le32 *) 3849 (adev->gfx.mec_fw->data + 3850 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3851 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3852 3853 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3854 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3855 &adev->gfx.mec.mec_fw_obj, 3856 &adev->gfx.mec.mec_fw_gpu_addr, 3857 (void **)&fw); 3858 if (r) { 3859 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3860 gfx_v11_0_mec_fini(adev); 3861 return r; 3862 } 3863 3864 memcpy(fw, fw_data, fw_size); 3865 3866 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3867 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3868 3869 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3870 3871 /* MEC1 */ 3872 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3873 3874 for (i = 0; i < mec_hdr->jt_size; i++) 3875 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3876 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3877 3878 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3879 3880 return 0; 3881 } 3882 3883 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3884 { 3885 const struct gfx_firmware_header_v2_0 *mec_hdr; 3886 const __le32 *fw_ucode, *fw_data; 3887 u32 tmp, fw_ucode_size, fw_data_size; 3888 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3889 u32 *fw_ucode_ptr, *fw_data_ptr; 3890 int r; 3891 3892 if (!adev->gfx.mec_fw) 3893 return -EINVAL; 3894 3895 gfx_v11_0_cp_compute_enable(adev, false); 3896 3897 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3898 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3899 3900 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3901 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3902 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3903 3904 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3905 le32_to_cpu(mec_hdr->data_offset_bytes)); 3906 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3907 3908 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3909 64 * 1024, 3910 AMDGPU_GEM_DOMAIN_VRAM | 3911 AMDGPU_GEM_DOMAIN_GTT, 3912 &adev->gfx.mec.mec_fw_obj, 3913 &adev->gfx.mec.mec_fw_gpu_addr, 3914 (void **)&fw_ucode_ptr); 3915 if (r) { 3916 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3917 gfx_v11_0_mec_fini(adev); 3918 return r; 3919 } 3920 3921 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3922 64 * 1024, 3923 AMDGPU_GEM_DOMAIN_VRAM | 3924 AMDGPU_GEM_DOMAIN_GTT, 3925 &adev->gfx.mec.mec_fw_data_obj, 3926 &adev->gfx.mec.mec_fw_data_gpu_addr, 3927 (void **)&fw_data_ptr); 3928 if (r) { 3929 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3930 gfx_v11_0_mec_fini(adev); 3931 return r; 3932 } 3933 3934 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 3935 memcpy(fw_data_ptr, fw_data, fw_data_size); 3936 3937 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3938 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 3939 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3940 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 3941 3942 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 3943 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3944 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 3945 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3946 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 3947 3948 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 3949 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 3950 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 3951 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 3952 3953 mutex_lock(&adev->srbm_mutex); 3954 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 3955 soc21_grbm_select(adev, 1, i, 0, 0); 3956 3957 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 3958 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 3959 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 3960 3961 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3962 mec_hdr->ucode_start_addr_lo >> 2 | 3963 mec_hdr->ucode_start_addr_hi << 30); 3964 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3965 mec_hdr->ucode_start_addr_hi >> 2); 3966 3967 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 3968 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 3969 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3970 } 3971 mutex_unlock(&adev->srbm_mutex); 3972 soc21_grbm_select(adev, 0, 0, 0, 0); 3973 3974 /* Trigger an invalidation of the L1 instruction caches */ 3975 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3976 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3977 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 3978 3979 /* Wait for invalidation complete */ 3980 for (i = 0; i < usec_timeout; i++) { 3981 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 3982 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 3983 INVALIDATE_DCACHE_COMPLETE)) 3984 break; 3985 udelay(1); 3986 } 3987 3988 if (i >= usec_timeout) { 3989 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3990 return -EINVAL; 3991 } 3992 3993 /* Trigger an invalidation of the L1 instruction caches */ 3994 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 3995 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 3996 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 3997 3998 /* Wait for invalidation complete */ 3999 for (i = 0; i < usec_timeout; i++) { 4000 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4001 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 4002 INVALIDATE_CACHE_COMPLETE)) 4003 break; 4004 udelay(1); 4005 } 4006 4007 if (i >= usec_timeout) { 4008 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4009 return -EINVAL; 4010 } 4011 4012 return 0; 4013 } 4014 4015 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 4016 { 4017 uint32_t tmp; 4018 struct amdgpu_device *adev = ring->adev; 4019 4020 /* tell RLC which is KIQ queue */ 4021 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 4022 tmp &= 0xffffff00; 4023 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4024 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); 4025 } 4026 4027 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 4028 { 4029 /* set graphics engine doorbell range */ 4030 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 4031 (adev->doorbell_index.gfx_ring0 * 2) << 2); 4032 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 4033 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 4034 4035 /* set compute engine doorbell range */ 4036 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4037 (adev->doorbell_index.kiq * 2) << 2); 4038 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4039 (adev->doorbell_index.userqueue_end * 2) << 2); 4040 } 4041 4042 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, 4043 struct v11_gfx_mqd *mqd, 4044 struct amdgpu_mqd_prop *prop) 4045 { 4046 bool priority = 0; 4047 u32 tmp; 4048 4049 /* set up default queue priority level 4050 * 0x0 = low priority, 0x1 = high priority 4051 */ 4052 if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) 4053 priority = 1; 4054 4055 tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; 4056 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); 4057 mqd->cp_gfx_hqd_queue_priority = tmp; 4058 } 4059 4060 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 4061 struct amdgpu_mqd_prop *prop) 4062 { 4063 struct v11_gfx_mqd *mqd = m; 4064 uint64_t hqd_gpu_addr, wb_gpu_addr; 4065 uint32_t tmp; 4066 uint32_t rb_bufsz; 4067 4068 /* set up gfx hqd wptr */ 4069 mqd->cp_gfx_hqd_wptr = 0; 4070 mqd->cp_gfx_hqd_wptr_hi = 0; 4071 4072 /* set the pointer to the MQD */ 4073 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 4074 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4075 4076 /* set up mqd control */ 4077 tmp = regCP_GFX_MQD_CONTROL_DEFAULT; 4078 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 4079 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 4080 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 4081 mqd->cp_gfx_mqd_control = tmp; 4082 4083 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 4084 tmp = regCP_GFX_HQD_VMID_DEFAULT; 4085 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 4086 mqd->cp_gfx_hqd_vmid = 0; 4087 4088 /* set up gfx queue priority */ 4089 gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); 4090 4091 /* set up time quantum */ 4092 tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; 4093 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 4094 mqd->cp_gfx_hqd_quantum = tmp; 4095 4096 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 4097 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4098 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 4099 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 4100 4101 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 4102 wb_gpu_addr = prop->rptr_gpu_addr; 4103 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 4104 mqd->cp_gfx_hqd_rptr_addr_hi = 4105 upper_32_bits(wb_gpu_addr) & 0xffff; 4106 4107 /* set up rb_wptr_poll addr */ 4108 wb_gpu_addr = prop->wptr_gpu_addr; 4109 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4110 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4111 4112 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 4113 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 4114 tmp = regCP_GFX_HQD_CNTL_DEFAULT; 4115 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 4116 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 4117 #ifdef __BIG_ENDIAN 4118 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 4119 #endif 4120 if (prop->tmz_queue) 4121 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); 4122 mqd->cp_gfx_hqd_cntl = tmp; 4123 4124 /* set up cp_doorbell_control */ 4125 tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; 4126 if (prop->use_doorbell) { 4127 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4128 DOORBELL_OFFSET, prop->doorbell_index); 4129 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4130 DOORBELL_EN, 1); 4131 } else 4132 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4133 DOORBELL_EN, 0); 4134 mqd->cp_rb_doorbell_control = tmp; 4135 4136 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4137 mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; 4138 4139 /* active the queue */ 4140 mqd->cp_gfx_hqd_active = 1; 4141 4142 /* set gfx UQ items */ 4143 mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); 4144 mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); 4145 mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); 4146 mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); 4147 mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); 4148 mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); 4149 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4150 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4151 4152 return 0; 4153 } 4154 4155 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) 4156 { 4157 struct amdgpu_device *adev = ring->adev; 4158 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 4159 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 4160 4161 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4162 memset((void *)mqd, 0, sizeof(*mqd)); 4163 mutex_lock(&adev->srbm_mutex); 4164 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4165 amdgpu_ring_init_mqd(ring); 4166 soc21_grbm_select(adev, 0, 0, 0, 0); 4167 mutex_unlock(&adev->srbm_mutex); 4168 if (adev->gfx.me.mqd_backup[mqd_idx]) 4169 memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4170 } else { 4171 /* restore mqd with the backup copy */ 4172 if (adev->gfx.me.mqd_backup[mqd_idx]) 4173 memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 4174 /* reset the ring */ 4175 ring->wptr = 0; 4176 *ring->wptr_cpu_addr = 0; 4177 amdgpu_ring_clear_ring(ring); 4178 } 4179 4180 return 0; 4181 } 4182 4183 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 4184 { 4185 int r, i; 4186 4187 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4188 r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); 4189 if (r) 4190 return r; 4191 } 4192 4193 r = amdgpu_gfx_enable_kgq(adev, 0); 4194 if (r) 4195 return r; 4196 4197 return gfx_v11_0_cp_gfx_start(adev); 4198 } 4199 4200 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 4201 struct amdgpu_mqd_prop *prop) 4202 { 4203 struct v11_compute_mqd *mqd = m; 4204 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4205 uint32_t tmp; 4206 4207 mqd->header = 0xC0310800; 4208 mqd->compute_pipelinestat_enable = 0x00000001; 4209 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4210 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4211 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4212 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4213 mqd->compute_misc_reserved = 0x00000007; 4214 4215 eop_base_addr = prop->eop_gpu_addr >> 8; 4216 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4217 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4218 4219 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4220 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 4221 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4222 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 4223 4224 mqd->cp_hqd_eop_control = tmp; 4225 4226 /* enable doorbell? */ 4227 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4228 4229 if (prop->use_doorbell) { 4230 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4231 DOORBELL_OFFSET, prop->doorbell_index); 4232 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4233 DOORBELL_EN, 1); 4234 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4235 DOORBELL_SOURCE, 0); 4236 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4237 DOORBELL_HIT, 0); 4238 } else { 4239 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4240 DOORBELL_EN, 0); 4241 } 4242 4243 mqd->cp_hqd_pq_doorbell_control = tmp; 4244 4245 /* disable the queue if it's active */ 4246 mqd->cp_hqd_dequeue_request = 0; 4247 mqd->cp_hqd_pq_rptr = 0; 4248 mqd->cp_hqd_pq_wptr_lo = 0; 4249 mqd->cp_hqd_pq_wptr_hi = 0; 4250 4251 /* set the pointer to the MQD */ 4252 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 4253 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4254 4255 /* set MQD vmid to 0 */ 4256 tmp = regCP_MQD_CONTROL_DEFAULT; 4257 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4258 mqd->cp_mqd_control = tmp; 4259 4260 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4261 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4262 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4263 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4264 4265 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4266 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 4267 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4268 (order_base_2(prop->queue_size / 4) - 1)); 4269 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4270 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 4271 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 4272 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 4273 prop->allow_tunneling); 4274 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4275 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4276 if (prop->tmz_queue) 4277 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); 4278 mqd->cp_hqd_pq_control = tmp; 4279 4280 /* set the wb address whether it's enabled or not */ 4281 wb_gpu_addr = prop->rptr_gpu_addr; 4282 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4283 mqd->cp_hqd_pq_rptr_report_addr_hi = 4284 upper_32_bits(wb_gpu_addr) & 0xffff; 4285 4286 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4287 wb_gpu_addr = prop->wptr_gpu_addr; 4288 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4289 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4290 4291 tmp = 0; 4292 /* enable the doorbell if requested */ 4293 if (prop->use_doorbell) { 4294 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4295 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4296 DOORBELL_OFFSET, prop->doorbell_index); 4297 4298 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4299 DOORBELL_EN, 1); 4300 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4301 DOORBELL_SOURCE, 0); 4302 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4303 DOORBELL_HIT, 0); 4304 } 4305 4306 mqd->cp_hqd_pq_doorbell_control = tmp; 4307 4308 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4309 mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; 4310 4311 /* set the vmid for the queue */ 4312 mqd->cp_hqd_vmid = 0; 4313 4314 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 4315 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 4316 mqd->cp_hqd_persistent_state = tmp; 4317 4318 /* set MIN_IB_AVAIL_SIZE */ 4319 tmp = regCP_HQD_IB_CONTROL_DEFAULT; 4320 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4321 mqd->cp_hqd_ib_control = tmp; 4322 4323 /* set static priority for a compute queue/ring */ 4324 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 4325 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 4326 4327 mqd->cp_hqd_active = prop->hqd_active; 4328 4329 /* set UQ fenceaddress */ 4330 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4331 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4332 4333 return 0; 4334 } 4335 4336 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 4337 { 4338 struct amdgpu_device *adev = ring->adev; 4339 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4340 int j; 4341 4342 /* inactivate the queue */ 4343 if (amdgpu_sriov_vf(adev)) 4344 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 4345 4346 /* disable wptr polling */ 4347 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4348 4349 /* write the EOP addr */ 4350 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 4351 mqd->cp_hqd_eop_base_addr_lo); 4352 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 4353 mqd->cp_hqd_eop_base_addr_hi); 4354 4355 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4356 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 4357 mqd->cp_hqd_eop_control); 4358 4359 /* enable doorbell? */ 4360 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4361 mqd->cp_hqd_pq_doorbell_control); 4362 4363 /* disable the queue if it's active */ 4364 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 4365 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 4366 for (j = 0; j < adev->usec_timeout; j++) { 4367 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 4368 break; 4369 udelay(1); 4370 } 4371 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 4372 mqd->cp_hqd_dequeue_request); 4373 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 4374 mqd->cp_hqd_pq_rptr); 4375 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4376 mqd->cp_hqd_pq_wptr_lo); 4377 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4378 mqd->cp_hqd_pq_wptr_hi); 4379 } 4380 4381 /* set the pointer to the MQD */ 4382 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 4383 mqd->cp_mqd_base_addr_lo); 4384 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 4385 mqd->cp_mqd_base_addr_hi); 4386 4387 /* set MQD vmid to 0 */ 4388 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 4389 mqd->cp_mqd_control); 4390 4391 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4392 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 4393 mqd->cp_hqd_pq_base_lo); 4394 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 4395 mqd->cp_hqd_pq_base_hi); 4396 4397 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4398 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 4399 mqd->cp_hqd_pq_control); 4400 4401 /* set the wb address whether it's enabled or not */ 4402 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 4403 mqd->cp_hqd_pq_rptr_report_addr_lo); 4404 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4405 mqd->cp_hqd_pq_rptr_report_addr_hi); 4406 4407 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4408 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 4409 mqd->cp_hqd_pq_wptr_poll_addr_lo); 4410 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4411 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4412 4413 /* enable the doorbell if requested */ 4414 if (ring->use_doorbell) { 4415 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4416 (adev->doorbell_index.kiq * 2) << 2); 4417 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4418 (adev->doorbell_index.userqueue_end * 2) << 2); 4419 } 4420 4421 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4422 mqd->cp_hqd_pq_doorbell_control); 4423 4424 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4425 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4426 mqd->cp_hqd_pq_wptr_lo); 4427 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4428 mqd->cp_hqd_pq_wptr_hi); 4429 4430 /* set the vmid for the queue */ 4431 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 4432 4433 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 4434 mqd->cp_hqd_persistent_state); 4435 4436 /* activate the queue */ 4437 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 4438 mqd->cp_hqd_active); 4439 4440 if (ring->use_doorbell) 4441 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4442 4443 return 0; 4444 } 4445 4446 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 4447 { 4448 struct amdgpu_device *adev = ring->adev; 4449 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4450 4451 gfx_v11_0_kiq_setting(ring); 4452 4453 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4454 /* reset MQD to a clean status */ 4455 if (adev->gfx.kiq[0].mqd_backup) 4456 memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); 4457 4458 /* reset ring buffer */ 4459 ring->wptr = 0; 4460 amdgpu_ring_clear_ring(ring); 4461 4462 mutex_lock(&adev->srbm_mutex); 4463 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4464 gfx_v11_0_kiq_init_register(ring); 4465 soc21_grbm_select(adev, 0, 0, 0, 0); 4466 mutex_unlock(&adev->srbm_mutex); 4467 } else { 4468 memset((void *)mqd, 0, sizeof(*mqd)); 4469 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4470 amdgpu_ring_clear_ring(ring); 4471 mutex_lock(&adev->srbm_mutex); 4472 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4473 amdgpu_ring_init_mqd(ring); 4474 gfx_v11_0_kiq_init_register(ring); 4475 soc21_grbm_select(adev, 0, 0, 0, 0); 4476 mutex_unlock(&adev->srbm_mutex); 4477 4478 if (adev->gfx.kiq[0].mqd_backup) 4479 memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); 4480 } 4481 4482 return 0; 4483 } 4484 4485 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) 4486 { 4487 struct amdgpu_device *adev = ring->adev; 4488 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4489 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4490 4491 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4492 memset((void *)mqd, 0, sizeof(*mqd)); 4493 mutex_lock(&adev->srbm_mutex); 4494 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4495 amdgpu_ring_init_mqd(ring); 4496 soc21_grbm_select(adev, 0, 0, 0, 0); 4497 mutex_unlock(&adev->srbm_mutex); 4498 4499 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4500 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4501 } else { 4502 /* restore MQD to a clean status */ 4503 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4504 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4505 /* reset ring buffer */ 4506 ring->wptr = 0; 4507 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4508 amdgpu_ring_clear_ring(ring); 4509 } 4510 4511 return 0; 4512 } 4513 4514 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4515 { 4516 gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring); 4517 return 0; 4518 } 4519 4520 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4521 { 4522 int i, r; 4523 4524 if (!amdgpu_async_gfx_ring) 4525 gfx_v11_0_cp_compute_enable(adev, true); 4526 4527 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4528 r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); 4529 if (r) 4530 return r; 4531 } 4532 4533 return amdgpu_gfx_enable_kcq(adev, 0); 4534 } 4535 4536 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4537 { 4538 int r, i; 4539 struct amdgpu_ring *ring; 4540 4541 if (!(adev->flags & AMD_IS_APU)) 4542 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4543 4544 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4545 /* legacy firmware loading */ 4546 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4547 if (r) 4548 return r; 4549 4550 if (adev->gfx.rs64_enable) 4551 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4552 else 4553 r = gfx_v11_0_cp_compute_load_microcode(adev); 4554 if (r) 4555 return r; 4556 } 4557 4558 gfx_v11_0_cp_set_doorbell_range(adev); 4559 4560 if (amdgpu_async_gfx_ring) { 4561 gfx_v11_0_cp_compute_enable(adev, true); 4562 gfx_v11_0_cp_gfx_enable(adev, true); 4563 } 4564 4565 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4566 r = amdgpu_mes_kiq_hw_init(adev); 4567 else 4568 r = gfx_v11_0_kiq_resume(adev); 4569 if (r) 4570 return r; 4571 4572 r = gfx_v11_0_kcq_resume(adev); 4573 if (r) 4574 return r; 4575 4576 if (!amdgpu_async_gfx_ring) { 4577 r = gfx_v11_0_cp_gfx_resume(adev); 4578 if (r) 4579 return r; 4580 } else { 4581 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4582 if (r) 4583 return r; 4584 } 4585 4586 if (adev->gfx.disable_kq) { 4587 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4588 ring = &adev->gfx.gfx_ring[i]; 4589 /* we don't want to set ring->ready */ 4590 r = amdgpu_ring_test_ring(ring); 4591 if (r) 4592 return r; 4593 } 4594 if (amdgpu_async_gfx_ring) 4595 amdgpu_gfx_disable_kgq(adev, 0); 4596 } else { 4597 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4598 ring = &adev->gfx.gfx_ring[i]; 4599 r = amdgpu_ring_test_helper(ring); 4600 if (r) 4601 return r; 4602 } 4603 } 4604 4605 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4606 ring = &adev->gfx.compute_ring[i]; 4607 r = amdgpu_ring_test_helper(ring); 4608 if (r) 4609 return r; 4610 } 4611 4612 return 0; 4613 } 4614 4615 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4616 { 4617 gfx_v11_0_cp_gfx_enable(adev, enable); 4618 gfx_v11_0_cp_compute_enable(adev, enable); 4619 } 4620 4621 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4622 { 4623 int r; 4624 bool value; 4625 4626 r = adev->gfxhub.funcs->gart_enable(adev); 4627 if (r) 4628 return r; 4629 4630 amdgpu_device_flush_hdp(adev, NULL); 4631 4632 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 4633 false : true; 4634 4635 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4636 /* TODO investigate why this and the hdp flush above is needed, 4637 * are we missing a flush somewhere else? */ 4638 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); 4639 4640 return 0; 4641 } 4642 4643 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4644 { 4645 u32 tmp; 4646 4647 /* select RS64 */ 4648 if (adev->gfx.rs64_enable) { 4649 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4650 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4651 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4652 4653 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4654 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4655 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4656 } 4657 4658 if (amdgpu_emu_mode == 1) 4659 msleep(100); 4660 } 4661 4662 static int get_gb_addr_config(struct amdgpu_device * adev) 4663 { 4664 u32 gb_addr_config; 4665 4666 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4667 if (gb_addr_config == 0) 4668 return -EINVAL; 4669 4670 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4671 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4672 4673 adev->gfx.config.gb_addr_config = gb_addr_config; 4674 4675 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4676 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4677 GB_ADDR_CONFIG, NUM_PIPES); 4678 4679 adev->gfx.config.max_tile_pipes = 4680 adev->gfx.config.gb_addr_config_fields.num_pipes; 4681 4682 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4683 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4684 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4685 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4686 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4687 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4688 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4689 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4690 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4691 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4692 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4693 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4694 4695 return 0; 4696 } 4697 4698 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4699 { 4700 uint32_t data; 4701 4702 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4703 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4704 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4705 4706 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4707 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4708 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4709 } 4710 4711 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) 4712 { 4713 int r; 4714 struct amdgpu_device *adev = ip_block->adev; 4715 4716 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4717 adev->gfx.cleaner_shader_ptr); 4718 4719 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4720 if (adev->gfx.imu.funcs) { 4721 /* RLC autoload sequence 1: Program rlc ram */ 4722 if (adev->gfx.imu.funcs->program_rlc_ram) 4723 adev->gfx.imu.funcs->program_rlc_ram(adev); 4724 /* rlc autoload firmware */ 4725 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4726 if (r) 4727 return r; 4728 } 4729 } else { 4730 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4731 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4732 if (adev->gfx.imu.funcs->load_microcode) 4733 adev->gfx.imu.funcs->load_microcode(adev); 4734 if (adev->gfx.imu.funcs->setup_imu) 4735 adev->gfx.imu.funcs->setup_imu(adev); 4736 if (adev->gfx.imu.funcs->start_imu) 4737 adev->gfx.imu.funcs->start_imu(adev); 4738 } 4739 4740 /* disable gpa mode in backdoor loading */ 4741 gfx_v11_0_disable_gpa_mode(adev); 4742 } 4743 } 4744 4745 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4746 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4747 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4748 if (r) { 4749 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4750 return r; 4751 } 4752 } 4753 4754 adev->gfx.is_poweron = true; 4755 4756 if(get_gb_addr_config(adev)) 4757 DRM_WARN("Invalid gb_addr_config !\n"); 4758 4759 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4760 adev->gfx.rs64_enable) 4761 gfx_v11_0_config_gfx_rs64(adev); 4762 4763 r = gfx_v11_0_gfxhub_enable(adev); 4764 if (r) 4765 return r; 4766 4767 if (!amdgpu_emu_mode) 4768 gfx_v11_0_init_golden_registers(adev); 4769 4770 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4771 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4772 /** 4773 * For gfx 11, rlc firmware loading relies on smu firmware is 4774 * loaded firstly, so in direct type, it has to load smc ucode 4775 * here before rlc. 4776 */ 4777 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4778 if (r) 4779 return r; 4780 } 4781 4782 gfx_v11_0_constants_init(adev); 4783 4784 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4785 gfx_v11_0_select_cp_fw_arch(adev); 4786 4787 if (adev->nbio.funcs->gc_doorbell_init) 4788 adev->nbio.funcs->gc_doorbell_init(adev); 4789 4790 r = gfx_v11_0_rlc_resume(adev); 4791 if (r) 4792 return r; 4793 4794 /* 4795 * init golden registers and rlc resume may override some registers, 4796 * reconfig them here 4797 */ 4798 gfx_v11_0_tcp_harvest(adev); 4799 4800 r = gfx_v11_0_cp_resume(adev); 4801 if (r) 4802 return r; 4803 4804 /* get IMU version from HW if it's not set */ 4805 if (!adev->gfx.imu_fw_version) 4806 adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); 4807 4808 return r; 4809 } 4810 4811 static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, 4812 bool enable) 4813 { 4814 unsigned int irq_type; 4815 int m, p, r; 4816 4817 if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { 4818 for (m = 0; m < adev->gfx.me.num_me; m++) { 4819 for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { 4820 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; 4821 if (enable) 4822 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4823 irq_type); 4824 else 4825 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4826 irq_type); 4827 if (r) 4828 return r; 4829 } 4830 } 4831 } 4832 4833 if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { 4834 for (m = 0; m < adev->gfx.mec.num_mec; ++m) { 4835 for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { 4836 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 4837 + (m * adev->gfx.mec.num_pipe_per_mec) 4838 + p; 4839 if (enable) 4840 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4841 irq_type); 4842 else 4843 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4844 irq_type); 4845 if (r) 4846 return r; 4847 } 4848 } 4849 } 4850 4851 return 0; 4852 } 4853 4854 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) 4855 { 4856 struct amdgpu_device *adev = ip_block->adev; 4857 4858 cancel_delayed_work_sync(&adev->gfx.idle_work); 4859 4860 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4861 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4862 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4863 gfx_v11_0_set_userq_eop_interrupts(adev, false); 4864 4865 if (!adev->no_hw_access) { 4866 if (amdgpu_async_gfx_ring && 4867 !adev->gfx.disable_kq) { 4868 if (amdgpu_gfx_disable_kgq(adev, 0)) 4869 DRM_ERROR("KGQ disable failed\n"); 4870 } 4871 4872 if (amdgpu_gfx_disable_kcq(adev, 0)) 4873 DRM_ERROR("KCQ disable failed\n"); 4874 4875 amdgpu_mes_kiq_hw_fini(adev); 4876 } 4877 4878 if (amdgpu_sriov_vf(adev)) 4879 /* Remove the steps disabling CPG and clearing KIQ position, 4880 * so that CP could perform IDLE-SAVE during switch. Those 4881 * steps are necessary to avoid a DMAR error in gfx9 but it is 4882 * not reproduced on gfx11. 4883 */ 4884 return 0; 4885 4886 gfx_v11_0_cp_enable(adev, false); 4887 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4888 4889 adev->gfxhub.funcs->gart_disable(adev); 4890 4891 adev->gfx.is_poweron = false; 4892 4893 return 0; 4894 } 4895 4896 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) 4897 { 4898 return gfx_v11_0_hw_fini(ip_block); 4899 } 4900 4901 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) 4902 { 4903 return gfx_v11_0_hw_init(ip_block); 4904 } 4905 4906 static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block) 4907 { 4908 struct amdgpu_device *adev = ip_block->adev; 4909 4910 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 4911 GRBM_STATUS, GUI_ACTIVE)) 4912 return false; 4913 else 4914 return true; 4915 } 4916 4917 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 4918 { 4919 unsigned i; 4920 u32 tmp; 4921 struct amdgpu_device *adev = ip_block->adev; 4922 4923 for (i = 0; i < adev->usec_timeout; i++) { 4924 /* read MC_STATUS */ 4925 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 4926 GRBM_STATUS__GUI_ACTIVE_MASK; 4927 4928 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 4929 return 0; 4930 udelay(1); 4931 } 4932 return -ETIMEDOUT; 4933 } 4934 4935 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, 4936 bool req) 4937 { 4938 u32 i, tmp, val; 4939 4940 for (i = 0; i < adev->usec_timeout; i++) { 4941 /* Request with MeId=2, PipeId=0 */ 4942 tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); 4943 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); 4944 WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); 4945 4946 val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); 4947 if (req) { 4948 if (val == tmp) 4949 break; 4950 } else { 4951 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, 4952 REQUEST, 1); 4953 4954 /* unlocked or locked by firmware */ 4955 if (val != tmp) 4956 break; 4957 } 4958 udelay(1); 4959 } 4960 4961 if (i >= adev->usec_timeout) 4962 return -EINVAL; 4963 4964 return 0; 4965 } 4966 4967 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) 4968 { 4969 u32 grbm_soft_reset = 0; 4970 u32 tmp; 4971 int r, i, j, k; 4972 struct amdgpu_device *adev = ip_block->adev; 4973 4974 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4975 4976 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 4977 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 4978 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 4979 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 4980 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 4981 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 4982 4983 mutex_lock(&adev->srbm_mutex); 4984 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 4985 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 4986 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 4987 soc21_grbm_select(adev, i, k, j, 0); 4988 4989 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 4990 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 4991 } 4992 } 4993 } 4994 for (i = 0; i < adev->gfx.me.num_me; ++i) { 4995 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 4996 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 4997 soc21_grbm_select(adev, i, k, j, 0); 4998 4999 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 5000 } 5001 } 5002 } 5003 soc21_grbm_select(adev, 0, 0, 0, 0); 5004 mutex_unlock(&adev->srbm_mutex); 5005 5006 /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ 5007 mutex_lock(&adev->gfx.reset_sem_mutex); 5008 r = gfx_v11_0_request_gfx_index_mutex(adev, true); 5009 if (r) { 5010 mutex_unlock(&adev->gfx.reset_sem_mutex); 5011 DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); 5012 return r; 5013 } 5014 5015 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 5016 5017 // Read CP_VMID_RESET register three times. 5018 // to get sufficient time for GFX_HQD_ACTIVE reach 0 5019 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5020 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5021 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5022 5023 /* release the gfx mutex */ 5024 r = gfx_v11_0_request_gfx_index_mutex(adev, false); 5025 mutex_unlock(&adev->gfx.reset_sem_mutex); 5026 if (r) { 5027 DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); 5028 return r; 5029 } 5030 5031 for (i = 0; i < adev->usec_timeout; i++) { 5032 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 5033 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 5034 break; 5035 udelay(1); 5036 } 5037 if (i >= adev->usec_timeout) { 5038 printk("Failed to wait all pipes clean\n"); 5039 return -EINVAL; 5040 } 5041 5042 /********** trigger soft reset ***********/ 5043 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5044 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5045 SOFT_RESET_CP, 1); 5046 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5047 SOFT_RESET_GFX, 1); 5048 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5049 SOFT_RESET_CPF, 1); 5050 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5051 SOFT_RESET_CPC, 1); 5052 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5053 SOFT_RESET_CPG, 1); 5054 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5055 /********** exit soft reset ***********/ 5056 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5057 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5058 SOFT_RESET_CP, 0); 5059 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5060 SOFT_RESET_GFX, 0); 5061 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5062 SOFT_RESET_CPF, 0); 5063 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5064 SOFT_RESET_CPC, 0); 5065 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5066 SOFT_RESET_CPG, 0); 5067 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5068 5069 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 5070 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 5071 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 5072 5073 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 5074 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 5075 5076 for (i = 0; i < adev->usec_timeout; i++) { 5077 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 5078 break; 5079 udelay(1); 5080 } 5081 if (i >= adev->usec_timeout) { 5082 printk("Failed to wait CP_VMID_RESET to 0\n"); 5083 return -EINVAL; 5084 } 5085 5086 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5087 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5088 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5089 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5090 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5091 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5092 5093 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5094 5095 return gfx_v11_0_cp_resume(adev); 5096 } 5097 5098 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) 5099 { 5100 int i, r; 5101 struct amdgpu_device *adev = ip_block->adev; 5102 struct amdgpu_ring *ring; 5103 long tmo = msecs_to_jiffies(1000); 5104 5105 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 5106 ring = &adev->gfx.gfx_ring[i]; 5107 r = amdgpu_ring_test_ib(ring, tmo); 5108 if (r) 5109 return true; 5110 } 5111 5112 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5113 ring = &adev->gfx.compute_ring[i]; 5114 r = amdgpu_ring_test_ib(ring, tmo); 5115 if (r) 5116 return true; 5117 } 5118 5119 return false; 5120 } 5121 5122 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) 5123 { 5124 struct amdgpu_device *adev = ip_block->adev; 5125 /** 5126 * GFX soft reset will impact MES, need resume MES when do GFX soft reset 5127 */ 5128 return amdgpu_mes_resume(adev); 5129 } 5130 5131 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5132 { 5133 uint64_t clock; 5134 uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; 5135 5136 if (amdgpu_sriov_vf(adev)) { 5137 amdgpu_gfx_off_ctrl(adev, false); 5138 mutex_lock(&adev->gfx.gpu_clock_mutex); 5139 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5140 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5141 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5142 if (clock_counter_hi_pre != clock_counter_hi_after) 5143 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5144 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5145 amdgpu_gfx_off_ctrl(adev, true); 5146 } else { 5147 preempt_disable(); 5148 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5149 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5150 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5151 if (clock_counter_hi_pre != clock_counter_hi_after) 5152 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5153 preempt_enable(); 5154 } 5155 clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); 5156 5157 return clock; 5158 } 5159 5160 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5161 uint32_t vmid, 5162 uint32_t gds_base, uint32_t gds_size, 5163 uint32_t gws_base, uint32_t gws_size, 5164 uint32_t oa_base, uint32_t oa_size) 5165 { 5166 struct amdgpu_device *adev = ring->adev; 5167 5168 /* GDS Base */ 5169 gfx_v11_0_write_data_to_reg(ring, 0, false, 5170 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 5171 gds_base); 5172 5173 /* GDS Size */ 5174 gfx_v11_0_write_data_to_reg(ring, 0, false, 5175 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 5176 gds_size); 5177 5178 /* GWS */ 5179 gfx_v11_0_write_data_to_reg(ring, 0, false, 5180 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 5181 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5182 5183 /* OA */ 5184 gfx_v11_0_write_data_to_reg(ring, 0, false, 5185 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 5186 (1 << (oa_size + oa_base)) - (1 << oa_base)); 5187 } 5188 5189 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) 5190 { 5191 struct amdgpu_device *adev = ip_block->adev; 5192 5193 switch (amdgpu_user_queue) { 5194 case -1: 5195 case 0: 5196 default: 5197 adev->gfx.disable_kq = false; 5198 adev->gfx.disable_uq = true; 5199 break; 5200 case 1: 5201 adev->gfx.disable_kq = false; 5202 adev->gfx.disable_uq = false; 5203 break; 5204 case 2: 5205 adev->gfx.disable_kq = true; 5206 adev->gfx.disable_uq = false; 5207 break; 5208 } 5209 5210 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 5211 5212 if (adev->gfx.disable_kq) { 5213 /* We need one GFX ring temporarily to set up 5214 * the clear state. 5215 */ 5216 adev->gfx.num_gfx_rings = 1; 5217 adev->gfx.num_compute_rings = 0; 5218 } else { 5219 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 5220 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 5221 AMDGPU_MAX_COMPUTE_RINGS); 5222 } 5223 5224 gfx_v11_0_set_kiq_pm4_funcs(adev); 5225 gfx_v11_0_set_ring_funcs(adev); 5226 gfx_v11_0_set_irq_funcs(adev); 5227 gfx_v11_0_set_gds_init(adev); 5228 gfx_v11_0_set_rlc_funcs(adev); 5229 gfx_v11_0_set_mqd_funcs(adev); 5230 gfx_v11_0_set_imu_funcs(adev); 5231 5232 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 5233 5234 return gfx_v11_0_init_microcode(adev); 5235 } 5236 5237 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) 5238 { 5239 struct amdgpu_device *adev = ip_block->adev; 5240 int r; 5241 5242 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5243 if (r) 5244 return r; 5245 5246 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5247 if (r) 5248 return r; 5249 5250 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 5251 if (r) 5252 return r; 5253 5254 r = gfx_v11_0_set_userq_eop_interrupts(adev, true); 5255 if (r) 5256 return r; 5257 5258 return 0; 5259 } 5260 5261 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 5262 { 5263 uint32_t rlc_cntl; 5264 5265 /* if RLC is not enabled, do nothing */ 5266 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 5267 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 5268 } 5269 5270 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 5271 { 5272 uint32_t data; 5273 unsigned i; 5274 5275 data = RLC_SAFE_MODE__CMD_MASK; 5276 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5277 5278 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 5279 5280 /* wait for RLC_SAFE_MODE */ 5281 for (i = 0; i < adev->usec_timeout; i++) { 5282 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 5283 RLC_SAFE_MODE, CMD)) 5284 break; 5285 udelay(1); 5286 } 5287 } 5288 5289 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 5290 { 5291 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 5292 } 5293 5294 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 5295 bool enable) 5296 { 5297 uint32_t def, data; 5298 5299 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 5300 return; 5301 5302 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5303 5304 if (enable) 5305 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5306 else 5307 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5308 5309 if (def != data) 5310 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5311 } 5312 5313 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 5314 bool enable) 5315 { 5316 uint32_t def, data; 5317 5318 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 5319 return; 5320 5321 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5322 5323 if (enable) 5324 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5325 else 5326 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5327 5328 if (def != data) 5329 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5330 } 5331 5332 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 5333 bool enable) 5334 { 5335 uint32_t def, data; 5336 5337 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 5338 return; 5339 5340 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5341 5342 if (enable) 5343 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5344 else 5345 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5346 5347 if (def != data) 5348 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5349 } 5350 5351 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5352 bool enable) 5353 { 5354 uint32_t data, def; 5355 5356 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 5357 return; 5358 5359 /* It is disabled by HW by default */ 5360 if (enable) { 5361 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5362 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 5363 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5364 5365 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5366 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5367 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5368 5369 if (def != data) 5370 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5371 } 5372 } else { 5373 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5374 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5375 5376 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5377 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5378 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5379 5380 if (def != data) 5381 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5382 } 5383 } 5384 } 5385 5386 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5387 bool enable) 5388 { 5389 uint32_t def, data; 5390 5391 if (!(adev->cg_flags & 5392 (AMD_CG_SUPPORT_GFX_CGCG | 5393 AMD_CG_SUPPORT_GFX_CGLS | 5394 AMD_CG_SUPPORT_GFX_3D_CGCG | 5395 AMD_CG_SUPPORT_GFX_3D_CGLS))) 5396 return; 5397 5398 if (enable) { 5399 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5400 5401 /* unset CGCG override */ 5402 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5403 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5404 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5405 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5406 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 5407 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5408 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5409 5410 /* update CGCG override bits */ 5411 if (def != data) 5412 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5413 5414 /* enable cgcg FSM(0x0000363F) */ 5415 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5416 5417 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5418 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 5419 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5420 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5421 } 5422 5423 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5424 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 5425 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5426 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5427 } 5428 5429 if (def != data) 5430 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5431 5432 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5433 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5434 5435 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5436 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 5437 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5438 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5439 } 5440 5441 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5442 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 5443 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5444 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5445 } 5446 5447 if (def != data) 5448 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5449 5450 /* set IDLE_POLL_COUNT(0x00900100) */ 5451 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 5452 5453 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 5454 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5455 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5456 5457 if (def != data) 5458 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 5459 5460 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5461 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5462 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5463 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5464 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5465 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 5466 5467 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5468 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5469 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5470 5471 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5472 if (adev->sdma.num_instances > 1) { 5473 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5474 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5475 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5476 } 5477 } else { 5478 /* Program RLC_CGCG_CGLS_CTRL */ 5479 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5480 5481 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5482 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5483 5484 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5485 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5486 5487 if (def != data) 5488 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5489 5490 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5491 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5492 5493 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5494 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5495 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5496 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5497 5498 if (def != data) 5499 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5500 5501 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5502 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5503 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5504 5505 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5506 if (adev->sdma.num_instances > 1) { 5507 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5508 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5509 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5510 } 5511 } 5512 } 5513 5514 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5515 bool enable) 5516 { 5517 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5518 5519 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 5520 5521 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 5522 5523 gfx_v11_0_update_repeater_fgcg(adev, enable); 5524 5525 gfx_v11_0_update_sram_fgcg(adev, enable); 5526 5527 gfx_v11_0_update_perf_clk(adev, enable); 5528 5529 if (adev->cg_flags & 5530 (AMD_CG_SUPPORT_GFX_MGCG | 5531 AMD_CG_SUPPORT_GFX_CGLS | 5532 AMD_CG_SUPPORT_GFX_CGCG | 5533 AMD_CG_SUPPORT_GFX_3D_CGCG | 5534 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5535 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5536 5537 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5538 5539 return 0; 5540 } 5541 5542 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) 5543 { 5544 u32 reg, pre_data, data; 5545 5546 amdgpu_gfx_off_ctrl(adev, false); 5547 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5548 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) 5549 pre_data = RREG32_NO_KIQ(reg); 5550 else 5551 pre_data = RREG32(reg); 5552 5553 data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); 5554 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5555 5556 if (pre_data != data) { 5557 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { 5558 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5559 } else 5560 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5561 } 5562 amdgpu_gfx_off_ctrl(adev, true); 5563 5564 if (ring 5565 && amdgpu_sriov_is_pp_one_vf(adev) 5566 && (pre_data != data) 5567 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) 5568 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { 5569 amdgpu_ring_emit_wreg(ring, reg, data); 5570 } 5571 } 5572 5573 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5574 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5575 .set_safe_mode = gfx_v11_0_set_safe_mode, 5576 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5577 .init = gfx_v11_0_rlc_init, 5578 .get_csb_size = gfx_v11_0_get_csb_size, 5579 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5580 .resume = gfx_v11_0_rlc_resume, 5581 .stop = gfx_v11_0_rlc_stop, 5582 .reset = gfx_v11_0_rlc_reset, 5583 .start = gfx_v11_0_rlc_start, 5584 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5585 }; 5586 5587 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5588 { 5589 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5590 5591 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5592 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5593 else 5594 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5595 5596 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5597 5598 // Program RLC_PG_DELAY3 for CGPG hysteresis 5599 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5600 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5601 case IP_VERSION(11, 0, 1): 5602 case IP_VERSION(11, 0, 4): 5603 case IP_VERSION(11, 5, 0): 5604 case IP_VERSION(11, 5, 1): 5605 case IP_VERSION(11, 5, 2): 5606 case IP_VERSION(11, 5, 3): 5607 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5608 break; 5609 default: 5610 break; 5611 } 5612 } 5613 } 5614 5615 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5616 { 5617 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5618 5619 gfx_v11_cntl_power_gating(adev, enable); 5620 5621 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5622 } 5623 5624 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5625 enum amd_powergating_state state) 5626 { 5627 struct amdgpu_device *adev = ip_block->adev; 5628 bool enable = (state == AMD_PG_STATE_GATE); 5629 5630 if (amdgpu_sriov_vf(adev)) 5631 return 0; 5632 5633 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5634 case IP_VERSION(11, 0, 0): 5635 case IP_VERSION(11, 0, 2): 5636 case IP_VERSION(11, 0, 3): 5637 amdgpu_gfx_off_ctrl(adev, enable); 5638 break; 5639 case IP_VERSION(11, 0, 1): 5640 case IP_VERSION(11, 0, 4): 5641 case IP_VERSION(11, 5, 0): 5642 case IP_VERSION(11, 5, 1): 5643 case IP_VERSION(11, 5, 2): 5644 case IP_VERSION(11, 5, 3): 5645 if (!enable) 5646 amdgpu_gfx_off_ctrl(adev, false); 5647 5648 gfx_v11_cntl_pg(adev, enable); 5649 5650 if (enable) 5651 amdgpu_gfx_off_ctrl(adev, true); 5652 5653 break; 5654 default: 5655 break; 5656 } 5657 5658 return 0; 5659 } 5660 5661 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5662 enum amd_clockgating_state state) 5663 { 5664 struct amdgpu_device *adev = ip_block->adev; 5665 5666 if (amdgpu_sriov_vf(adev)) 5667 return 0; 5668 5669 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5670 case IP_VERSION(11, 0, 0): 5671 case IP_VERSION(11, 0, 1): 5672 case IP_VERSION(11, 0, 2): 5673 case IP_VERSION(11, 0, 3): 5674 case IP_VERSION(11, 0, 4): 5675 case IP_VERSION(11, 5, 0): 5676 case IP_VERSION(11, 5, 1): 5677 case IP_VERSION(11, 5, 2): 5678 case IP_VERSION(11, 5, 3): 5679 gfx_v11_0_update_gfx_clock_gating(adev, 5680 state == AMD_CG_STATE_GATE); 5681 break; 5682 default: 5683 break; 5684 } 5685 5686 return 0; 5687 } 5688 5689 static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5690 { 5691 struct amdgpu_device *adev = ip_block->adev; 5692 int data; 5693 5694 /* AMD_CG_SUPPORT_GFX_MGCG */ 5695 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5696 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5697 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5698 5699 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5700 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5701 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5702 5703 /* AMD_CG_SUPPORT_GFX_FGCG */ 5704 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5705 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5706 5707 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5708 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5709 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5710 5711 /* AMD_CG_SUPPORT_GFX_CGCG */ 5712 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5713 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5714 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5715 5716 /* AMD_CG_SUPPORT_GFX_CGLS */ 5717 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5718 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5719 5720 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5721 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5722 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5723 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5724 5725 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5726 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5727 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5728 } 5729 5730 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5731 { 5732 /* gfx11 is 32bit rptr*/ 5733 return *(uint32_t *)ring->rptr_cpu_addr; 5734 } 5735 5736 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5737 { 5738 struct amdgpu_device *adev = ring->adev; 5739 u64 wptr; 5740 5741 /* XXX check if swapping is necessary on BE */ 5742 if (ring->use_doorbell) { 5743 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5744 } else { 5745 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5746 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5747 } 5748 5749 return wptr; 5750 } 5751 5752 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5753 { 5754 struct amdgpu_device *adev = ring->adev; 5755 5756 if (ring->use_doorbell) { 5757 /* XXX check if swapping is necessary on BE */ 5758 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5759 ring->wptr); 5760 WDOORBELL64(ring->doorbell_index, ring->wptr); 5761 } else { 5762 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5763 lower_32_bits(ring->wptr)); 5764 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5765 upper_32_bits(ring->wptr)); 5766 } 5767 } 5768 5769 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5770 { 5771 /* gfx11 hardware is 32bit rptr */ 5772 return *(uint32_t *)ring->rptr_cpu_addr; 5773 } 5774 5775 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5776 { 5777 u64 wptr; 5778 5779 /* XXX check if swapping is necessary on BE */ 5780 if (ring->use_doorbell) 5781 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5782 else 5783 BUG(); 5784 return wptr; 5785 } 5786 5787 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5788 { 5789 struct amdgpu_device *adev = ring->adev; 5790 5791 /* XXX check if swapping is necessary on BE */ 5792 if (ring->use_doorbell) { 5793 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5794 ring->wptr); 5795 WDOORBELL64(ring->doorbell_index, ring->wptr); 5796 } else { 5797 BUG(); /* only DOORBELL method supported on gfx11 now */ 5798 } 5799 } 5800 5801 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5802 { 5803 struct amdgpu_device *adev = ring->adev; 5804 u32 ref_and_mask, reg_mem_engine; 5805 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5806 5807 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5808 switch (ring->me) { 5809 case 1: 5810 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5811 break; 5812 case 2: 5813 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5814 break; 5815 default: 5816 return; 5817 } 5818 reg_mem_engine = 0; 5819 } else { 5820 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; 5821 reg_mem_engine = 1; /* pfp */ 5822 } 5823 5824 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5825 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5826 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5827 ref_and_mask, ref_and_mask, 0x20); 5828 } 5829 5830 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5831 struct amdgpu_job *job, 5832 struct amdgpu_ib *ib, 5833 uint32_t flags) 5834 { 5835 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5836 u32 header, control = 0; 5837 5838 BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE); 5839 5840 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5841 5842 control |= ib->length_dw | (vmid << 24); 5843 5844 if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5845 control |= INDIRECT_BUFFER_PRE_ENB(1); 5846 5847 if (flags & AMDGPU_IB_PREEMPTED) 5848 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5849 5850 if (vmid) 5851 gfx_v11_0_ring_emit_de_meta(ring, 5852 (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); 5853 } 5854 5855 amdgpu_ring_write(ring, header); 5856 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5857 amdgpu_ring_write(ring, 5858 #ifdef __BIG_ENDIAN 5859 (2 << 0) | 5860 #endif 5861 lower_32_bits(ib->gpu_addr)); 5862 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5863 amdgpu_ring_write(ring, control); 5864 } 5865 5866 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5867 struct amdgpu_job *job, 5868 struct amdgpu_ib *ib, 5869 uint32_t flags) 5870 { 5871 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5872 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5873 5874 /* Currently, there is a high possibility to get wave ID mismatch 5875 * between ME and GDS, leading to a hw deadlock, because ME generates 5876 * different wave IDs than the GDS expects. This situation happens 5877 * randomly when at least 5 compute pipes use GDS ordered append. 5878 * The wave IDs generated by ME are also wrong after suspend/resume. 5879 * Those are probably bugs somewhere else in the kernel driver. 5880 * 5881 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5882 * GDS to 0 for this ring (me/pipe). 5883 */ 5884 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5885 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5886 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 5887 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5888 } 5889 5890 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5891 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5892 amdgpu_ring_write(ring, 5893 #ifdef __BIG_ENDIAN 5894 (2 << 0) | 5895 #endif 5896 lower_32_bits(ib->gpu_addr)); 5897 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5898 amdgpu_ring_write(ring, control); 5899 } 5900 5901 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5902 u64 seq, unsigned flags) 5903 { 5904 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5905 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5906 5907 /* RELEASE_MEM - flush caches, send int */ 5908 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5909 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 5910 PACKET3_RELEASE_MEM_GCR_GL2_WB | 5911 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ 5912 PACKET3_RELEASE_MEM_GCR_GLM_WB | 5913 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 5914 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5915 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 5916 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 5917 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 5918 5919 /* 5920 * the address should be Qword aligned if 64bit write, Dword 5921 * aligned if only send 32bit data low (discard data high) 5922 */ 5923 if (write64bit) 5924 BUG_ON(addr & 0x7); 5925 else 5926 BUG_ON(addr & 0x3); 5927 amdgpu_ring_write(ring, lower_32_bits(addr)); 5928 amdgpu_ring_write(ring, upper_32_bits(addr)); 5929 amdgpu_ring_write(ring, lower_32_bits(seq)); 5930 amdgpu_ring_write(ring, upper_32_bits(seq)); 5931 amdgpu_ring_write(ring, 0); 5932 } 5933 5934 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5935 { 5936 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5937 uint32_t seq = ring->fence_drv.sync_seq; 5938 uint64_t addr = ring->fence_drv.gpu_addr; 5939 5940 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 5941 upper_32_bits(addr), seq, 0xffffffff, 4); 5942 } 5943 5944 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 5945 uint16_t pasid, uint32_t flush_type, 5946 bool all_hub, uint8_t dst_sel) 5947 { 5948 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 5949 amdgpu_ring_write(ring, 5950 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 5951 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 5952 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 5953 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 5954 } 5955 5956 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5957 unsigned vmid, uint64_t pd_addr) 5958 { 5959 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5960 5961 /* compute doesn't have PFP */ 5962 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5963 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5964 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5965 amdgpu_ring_write(ring, 0x0); 5966 } 5967 5968 /* Make sure that we can't skip the SET_Q_MODE packets when the VM 5969 * changed in any way. 5970 */ 5971 ring->set_q_mode_offs = 0; 5972 ring->set_q_mode_ptr = NULL; 5973 } 5974 5975 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 5976 u64 seq, unsigned int flags) 5977 { 5978 struct amdgpu_device *adev = ring->adev; 5979 5980 /* we only allocate 32bit for each seq wb address */ 5981 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 5982 5983 /* write fence seq to the "addr" */ 5984 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5985 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5986 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 5987 amdgpu_ring_write(ring, lower_32_bits(addr)); 5988 amdgpu_ring_write(ring, upper_32_bits(addr)); 5989 amdgpu_ring_write(ring, lower_32_bits(seq)); 5990 5991 if (flags & AMDGPU_FENCE_FLAG_INT) { 5992 /* set register to trigger INT */ 5993 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5994 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5995 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 5996 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 5997 amdgpu_ring_write(ring, 0); 5998 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 5999 } 6000 } 6001 6002 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 6003 uint32_t flags) 6004 { 6005 uint32_t dw2 = 0; 6006 6007 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6008 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6009 /* set load_global_config & load_global_uconfig */ 6010 dw2 |= 0x8001; 6011 /* set load_cs_sh_regs */ 6012 dw2 |= 0x01000000; 6013 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6014 dw2 |= 0x10002; 6015 } 6016 6017 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6018 amdgpu_ring_write(ring, dw2); 6019 amdgpu_ring_write(ring, 0); 6020 } 6021 6022 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 6023 uint64_t addr) 6024 { 6025 unsigned ret; 6026 6027 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6028 amdgpu_ring_write(ring, lower_32_bits(addr)); 6029 amdgpu_ring_write(ring, upper_32_bits(addr)); 6030 /* discard following DWs if *cond_exec_gpu_addr==0 */ 6031 amdgpu_ring_write(ring, 0); 6032 ret = ring->wptr & ring->buf_mask; 6033 /* patch dummy value later */ 6034 amdgpu_ring_write(ring, 0); 6035 6036 return ret; 6037 } 6038 6039 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, 6040 u64 shadow_va, u64 csa_va, 6041 u64 gds_va, bool init_shadow, 6042 int vmid) 6043 { 6044 struct amdgpu_device *adev = ring->adev; 6045 unsigned int offs, end; 6046 6047 if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) 6048 return; 6049 6050 /* 6051 * The logic here isn't easy to understand because we need to keep state 6052 * accross multiple executions of the function as well as between the 6053 * CPU and GPU. The general idea is that the newly written GPU command 6054 * has a condition on the previous one and only executed if really 6055 * necessary. 6056 */ 6057 6058 /* 6059 * The dw in the NOP controls if the next SET_Q_MODE packet should be 6060 * executed or not. Reserve 64bits just to be on the save side. 6061 */ 6062 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); 6063 offs = ring->wptr & ring->buf_mask; 6064 6065 /* 6066 * We start with skipping the prefix SET_Q_MODE and always executing 6067 * the postfix SET_Q_MODE packet. This is changed below with a 6068 * WRITE_DATA command when the postfix executed. 6069 */ 6070 amdgpu_ring_write(ring, shadow_va ? 1 : 0); 6071 amdgpu_ring_write(ring, 0); 6072 6073 if (ring->set_q_mode_offs) { 6074 uint64_t addr; 6075 6076 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6077 addr += ring->set_q_mode_offs << 2; 6078 end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); 6079 } 6080 6081 /* 6082 * When the postfix SET_Q_MODE packet executes we need to make sure that the 6083 * next prefix SET_Q_MODE packet executes as well. 6084 */ 6085 if (!shadow_va) { 6086 uint64_t addr; 6087 6088 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6089 addr += offs << 2; 6090 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6091 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 6092 amdgpu_ring_write(ring, lower_32_bits(addr)); 6093 amdgpu_ring_write(ring, upper_32_bits(addr)); 6094 amdgpu_ring_write(ring, 0x1); 6095 } 6096 6097 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); 6098 amdgpu_ring_write(ring, lower_32_bits(shadow_va)); 6099 amdgpu_ring_write(ring, upper_32_bits(shadow_va)); 6100 amdgpu_ring_write(ring, lower_32_bits(gds_va)); 6101 amdgpu_ring_write(ring, upper_32_bits(gds_va)); 6102 amdgpu_ring_write(ring, lower_32_bits(csa_va)); 6103 amdgpu_ring_write(ring, upper_32_bits(csa_va)); 6104 amdgpu_ring_write(ring, shadow_va ? 6105 PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); 6106 amdgpu_ring_write(ring, init_shadow ? 6107 PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); 6108 6109 if (ring->set_q_mode_offs) 6110 amdgpu_ring_patch_cond_exec(ring, end); 6111 6112 if (shadow_va) { 6113 uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; 6114 6115 /* 6116 * If the tokens match try to skip the last postfix SET_Q_MODE 6117 * packet to avoid saving/restoring the state all the time. 6118 */ 6119 if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) 6120 *ring->set_q_mode_ptr = 0; 6121 6122 ring->set_q_mode_token = token; 6123 } else { 6124 ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; 6125 } 6126 6127 ring->set_q_mode_offs = offs; 6128 } 6129 6130 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) 6131 { 6132 int i, r = 0; 6133 struct amdgpu_device *adev = ring->adev; 6134 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 6135 struct amdgpu_ring *kiq_ring = &kiq->ring; 6136 unsigned long flags; 6137 6138 if (adev->enable_mes) 6139 return -EINVAL; 6140 6141 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 6142 return -EINVAL; 6143 6144 spin_lock_irqsave(&kiq->ring_lock, flags); 6145 6146 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 6147 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6148 return -ENOMEM; 6149 } 6150 6151 /* assert preemption condition */ 6152 amdgpu_ring_set_preempt_cond_exec(ring, false); 6153 6154 /* assert IB preemption, emit the trailing fence */ 6155 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 6156 ring->trail_fence_gpu_addr, 6157 ++ring->trail_seq); 6158 amdgpu_ring_commit(kiq_ring); 6159 6160 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6161 6162 /* poll the trailing fence */ 6163 for (i = 0; i < adev->usec_timeout; i++) { 6164 if (ring->trail_seq == 6165 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 6166 break; 6167 udelay(1); 6168 } 6169 6170 if (i >= adev->usec_timeout) { 6171 r = -EINVAL; 6172 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 6173 } 6174 6175 /* deassert preemption condition */ 6176 amdgpu_ring_set_preempt_cond_exec(ring, true); 6177 return r; 6178 } 6179 6180 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 6181 { 6182 struct amdgpu_device *adev = ring->adev; 6183 struct v10_de_ib_state de_payload = {0}; 6184 uint64_t offset, gds_addr, de_payload_gpu_addr; 6185 void *de_payload_cpu_addr; 6186 int cnt; 6187 6188 offset = offsetof(struct v10_gfx_meta_data, de_payload); 6189 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 6190 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 6191 6192 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 6193 AMDGPU_CSA_SIZE - adev->gds.gds_size, 6194 PAGE_SIZE); 6195 6196 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 6197 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 6198 6199 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 6200 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 6201 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 6202 WRITE_DATA_DST_SEL(8) | 6203 WR_CONFIRM) | 6204 WRITE_DATA_CACHE_POLICY(0)); 6205 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 6206 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 6207 6208 if (resume) 6209 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 6210 sizeof(de_payload) >> 2); 6211 else 6212 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 6213 sizeof(de_payload) >> 2); 6214 } 6215 6216 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 6217 bool secure) 6218 { 6219 uint32_t v = secure ? FRAME_TMZ : 0; 6220 6221 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 6222 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 6223 } 6224 6225 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 6226 uint32_t reg_val_offs) 6227 { 6228 struct amdgpu_device *adev = ring->adev; 6229 6230 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6231 amdgpu_ring_write(ring, 0 | /* src: register*/ 6232 (5 << 8) | /* dst: memory */ 6233 (1 << 20)); /* write confirm */ 6234 amdgpu_ring_write(ring, reg); 6235 amdgpu_ring_write(ring, 0); 6236 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6237 reg_val_offs * 4)); 6238 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6239 reg_val_offs * 4)); 6240 } 6241 6242 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6243 uint32_t val) 6244 { 6245 uint32_t cmd = 0; 6246 6247 switch (ring->funcs->type) { 6248 case AMDGPU_RING_TYPE_GFX: 6249 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6250 break; 6251 case AMDGPU_RING_TYPE_KIQ: 6252 cmd = (1 << 16); /* no inc addr */ 6253 break; 6254 default: 6255 cmd = WR_CONFIRM; 6256 break; 6257 } 6258 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6259 amdgpu_ring_write(ring, cmd); 6260 amdgpu_ring_write(ring, reg); 6261 amdgpu_ring_write(ring, 0); 6262 amdgpu_ring_write(ring, val); 6263 } 6264 6265 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 6266 uint32_t val, uint32_t mask) 6267 { 6268 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 6269 } 6270 6271 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 6272 uint32_t reg0, uint32_t reg1, 6273 uint32_t ref, uint32_t mask) 6274 { 6275 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6276 6277 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 6278 ref, mask, 0x20); 6279 } 6280 6281 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, 6282 unsigned vmid) 6283 { 6284 struct amdgpu_device *adev = ring->adev; 6285 uint32_t value = 0; 6286 6287 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 6288 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 6289 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 6290 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 6291 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 6292 WREG32_SOC15(GC, 0, regSQ_CMD, value); 6293 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 6294 } 6295 6296 static void 6297 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6298 uint32_t me, uint32_t pipe, 6299 enum amdgpu_interrupt_state state) 6300 { 6301 uint32_t cp_int_cntl, cp_int_cntl_reg; 6302 6303 if (!me) { 6304 switch (pipe) { 6305 case 0: 6306 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 6307 break; 6308 case 1: 6309 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 6310 break; 6311 default: 6312 DRM_DEBUG("invalid pipe %d\n", pipe); 6313 return; 6314 } 6315 } else { 6316 DRM_DEBUG("invalid me %d\n", me); 6317 return; 6318 } 6319 6320 switch (state) { 6321 case AMDGPU_IRQ_STATE_DISABLE: 6322 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6323 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6324 TIME_STAMP_INT_ENABLE, 0); 6325 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6326 GENERIC0_INT_ENABLE, 0); 6327 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6328 break; 6329 case AMDGPU_IRQ_STATE_ENABLE: 6330 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6331 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6332 TIME_STAMP_INT_ENABLE, 1); 6333 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6334 GENERIC0_INT_ENABLE, 1); 6335 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6336 break; 6337 default: 6338 break; 6339 } 6340 } 6341 6342 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6343 int me, int pipe, 6344 enum amdgpu_interrupt_state state) 6345 { 6346 u32 mec_int_cntl, mec_int_cntl_reg; 6347 6348 /* 6349 * amdgpu controls only the first MEC. That's why this function only 6350 * handles the setting of interrupts for this specific MEC. All other 6351 * pipes' interrupts are set by amdkfd. 6352 */ 6353 6354 if (me == 1) { 6355 switch (pipe) { 6356 case 0: 6357 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6358 break; 6359 case 1: 6360 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 6361 break; 6362 case 2: 6363 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 6364 break; 6365 case 3: 6366 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 6367 break; 6368 default: 6369 DRM_DEBUG("invalid pipe %d\n", pipe); 6370 return; 6371 } 6372 } else { 6373 DRM_DEBUG("invalid me %d\n", me); 6374 return; 6375 } 6376 6377 switch (state) { 6378 case AMDGPU_IRQ_STATE_DISABLE: 6379 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6380 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6381 TIME_STAMP_INT_ENABLE, 0); 6382 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6383 GENERIC0_INT_ENABLE, 0); 6384 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6385 break; 6386 case AMDGPU_IRQ_STATE_ENABLE: 6387 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6388 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6389 TIME_STAMP_INT_ENABLE, 1); 6390 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6391 GENERIC0_INT_ENABLE, 1); 6392 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6393 break; 6394 default: 6395 break; 6396 } 6397 } 6398 6399 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6400 struct amdgpu_irq_src *src, 6401 unsigned type, 6402 enum amdgpu_interrupt_state state) 6403 { 6404 switch (type) { 6405 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6406 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 6407 break; 6408 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 6409 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 6410 break; 6411 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6412 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6413 break; 6414 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6415 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6416 break; 6417 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6418 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6419 break; 6420 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6421 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6422 break; 6423 default: 6424 break; 6425 } 6426 return 0; 6427 } 6428 6429 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 6430 struct amdgpu_irq_src *source, 6431 struct amdgpu_iv_entry *entry) 6432 { 6433 u32 doorbell_offset = entry->src_data[0]; 6434 u8 me_id, pipe_id, queue_id; 6435 struct amdgpu_ring *ring; 6436 int i; 6437 6438 DRM_DEBUG("IH: CP EOP\n"); 6439 6440 if (adev->enable_mes && doorbell_offset) { 6441 struct amdgpu_userq_fence_driver *fence_drv = NULL; 6442 struct xarray *xa = &adev->userq_xa; 6443 unsigned long flags; 6444 6445 xa_lock_irqsave(xa, flags); 6446 fence_drv = xa_load(xa, doorbell_offset); 6447 if (fence_drv) 6448 amdgpu_userq_fence_driver_process(fence_drv); 6449 xa_unlock_irqrestore(xa, flags); 6450 } else { 6451 me_id = (entry->ring_id & 0x0c) >> 2; 6452 pipe_id = (entry->ring_id & 0x03) >> 0; 6453 queue_id = (entry->ring_id & 0x70) >> 4; 6454 6455 switch (me_id) { 6456 case 0: 6457 if (pipe_id == 0) 6458 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6459 else 6460 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 6461 break; 6462 case 1: 6463 case 2: 6464 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6465 ring = &adev->gfx.compute_ring[i]; 6466 /* Per-queue interrupt is supported for MEC starting from VI. 6467 * The interrupt can only be enabled/disabled per pipe instead 6468 * of per queue. 6469 */ 6470 if ((ring->me == me_id) && 6471 (ring->pipe == pipe_id) && 6472 (ring->queue == queue_id)) 6473 amdgpu_fence_process(ring); 6474 } 6475 break; 6476 } 6477 } 6478 6479 return 0; 6480 } 6481 6482 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6483 struct amdgpu_irq_src *source, 6484 unsigned int type, 6485 enum amdgpu_interrupt_state state) 6486 { 6487 u32 cp_int_cntl_reg, cp_int_cntl; 6488 int i, j; 6489 6490 switch (state) { 6491 case AMDGPU_IRQ_STATE_DISABLE: 6492 case AMDGPU_IRQ_STATE_ENABLE: 6493 for (i = 0; i < adev->gfx.me.num_me; i++) { 6494 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6495 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6496 6497 if (cp_int_cntl_reg) { 6498 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6499 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6500 PRIV_REG_INT_ENABLE, 6501 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6502 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6503 } 6504 } 6505 } 6506 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6507 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6508 /* MECs start at 1 */ 6509 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6510 6511 if (cp_int_cntl_reg) { 6512 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6513 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6514 PRIV_REG_INT_ENABLE, 6515 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6516 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6517 } 6518 } 6519 } 6520 break; 6521 default: 6522 break; 6523 } 6524 6525 return 0; 6526 } 6527 6528 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6529 struct amdgpu_irq_src *source, 6530 unsigned type, 6531 enum amdgpu_interrupt_state state) 6532 { 6533 u32 cp_int_cntl_reg, cp_int_cntl; 6534 int i, j; 6535 6536 switch (state) { 6537 case AMDGPU_IRQ_STATE_DISABLE: 6538 case AMDGPU_IRQ_STATE_ENABLE: 6539 for (i = 0; i < adev->gfx.me.num_me; i++) { 6540 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6541 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6542 6543 if (cp_int_cntl_reg) { 6544 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6545 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6546 OPCODE_ERROR_INT_ENABLE, 6547 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6548 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6549 } 6550 } 6551 } 6552 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6553 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6554 /* MECs start at 1 */ 6555 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6556 6557 if (cp_int_cntl_reg) { 6558 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6559 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6560 OPCODE_ERROR_INT_ENABLE, 6561 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6562 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6563 } 6564 } 6565 } 6566 break; 6567 default: 6568 break; 6569 } 6570 return 0; 6571 } 6572 6573 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6574 struct amdgpu_irq_src *source, 6575 unsigned int type, 6576 enum amdgpu_interrupt_state state) 6577 { 6578 u32 cp_int_cntl_reg, cp_int_cntl; 6579 int i, j; 6580 6581 switch (state) { 6582 case AMDGPU_IRQ_STATE_DISABLE: 6583 case AMDGPU_IRQ_STATE_ENABLE: 6584 for (i = 0; i < adev->gfx.me.num_me; i++) { 6585 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6586 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6587 6588 if (cp_int_cntl_reg) { 6589 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6590 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6591 PRIV_INSTR_INT_ENABLE, 6592 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6593 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6594 } 6595 } 6596 } 6597 break; 6598 default: 6599 break; 6600 } 6601 6602 return 0; 6603 } 6604 6605 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 6606 struct amdgpu_iv_entry *entry) 6607 { 6608 u8 me_id, pipe_id, queue_id; 6609 struct amdgpu_ring *ring; 6610 int i; 6611 6612 me_id = (entry->ring_id & 0x0c) >> 2; 6613 pipe_id = (entry->ring_id & 0x03) >> 0; 6614 queue_id = (entry->ring_id & 0x70) >> 4; 6615 6616 if (!adev->gfx.disable_kq) { 6617 switch (me_id) { 6618 case 0: 6619 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 6620 ring = &adev->gfx.gfx_ring[i]; 6621 if (ring->me == me_id && ring->pipe == pipe_id && 6622 ring->queue == queue_id) 6623 drm_sched_fault(&ring->sched); 6624 } 6625 break; 6626 case 1: 6627 case 2: 6628 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6629 ring = &adev->gfx.compute_ring[i]; 6630 if (ring->me == me_id && ring->pipe == pipe_id && 6631 ring->queue == queue_id) 6632 drm_sched_fault(&ring->sched); 6633 } 6634 break; 6635 default: 6636 BUG(); 6637 break; 6638 } 6639 } 6640 } 6641 6642 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 6643 struct amdgpu_irq_src *source, 6644 struct amdgpu_iv_entry *entry) 6645 { 6646 DRM_ERROR("Illegal register access in command stream\n"); 6647 gfx_v11_0_handle_priv_fault(adev, entry); 6648 return 0; 6649 } 6650 6651 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, 6652 struct amdgpu_irq_src *source, 6653 struct amdgpu_iv_entry *entry) 6654 { 6655 DRM_ERROR("Illegal opcode in command stream \n"); 6656 gfx_v11_0_handle_priv_fault(adev, entry); 6657 return 0; 6658 } 6659 6660 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6661 struct amdgpu_irq_src *source, 6662 struct amdgpu_iv_entry *entry) 6663 { 6664 DRM_ERROR("Illegal instruction in command stream\n"); 6665 gfx_v11_0_handle_priv_fault(adev, entry); 6666 return 0; 6667 } 6668 6669 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, 6670 struct amdgpu_irq_src *source, 6671 struct amdgpu_iv_entry *entry) 6672 { 6673 if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) 6674 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); 6675 6676 return 0; 6677 } 6678 6679 #if 0 6680 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6681 struct amdgpu_irq_src *src, 6682 unsigned int type, 6683 enum amdgpu_interrupt_state state) 6684 { 6685 uint32_t tmp, target; 6686 struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); 6687 6688 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6689 target += ring->pipe; 6690 6691 switch (type) { 6692 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6693 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6694 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6695 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6696 GENERIC2_INT_ENABLE, 0); 6697 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6698 6699 tmp = RREG32_SOC15_IP(GC, target); 6700 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6701 GENERIC2_INT_ENABLE, 0); 6702 WREG32_SOC15_IP(GC, target, tmp); 6703 } else { 6704 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6705 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6706 GENERIC2_INT_ENABLE, 1); 6707 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6708 6709 tmp = RREG32_SOC15_IP(GC, target); 6710 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6711 GENERIC2_INT_ENABLE, 1); 6712 WREG32_SOC15_IP(GC, target, tmp); 6713 } 6714 break; 6715 default: 6716 BUG(); /* kiq only support GENERIC2_INT now */ 6717 break; 6718 } 6719 return 0; 6720 } 6721 #endif 6722 6723 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6724 { 6725 const unsigned int gcr_cntl = 6726 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6727 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6728 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6729 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6730 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6731 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6732 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6733 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6734 6735 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6736 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6737 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6738 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6739 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6740 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6741 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6742 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6743 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6744 } 6745 6746 static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) 6747 { 6748 /* Disable the pipe reset until the CPFW fully support it.*/ 6749 dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); 6750 return false; 6751 } 6752 6753 6754 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) 6755 { 6756 struct amdgpu_device *adev = ring->adev; 6757 uint32_t reset_pipe = 0, clean_pipe = 0; 6758 int r; 6759 6760 if (!gfx_v11_pipe_reset_support(adev)) 6761 return -EOPNOTSUPP; 6762 6763 gfx_v11_0_set_safe_mode(adev, 0); 6764 mutex_lock(&adev->srbm_mutex); 6765 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6766 6767 switch (ring->pipe) { 6768 case 0: 6769 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6770 PFP_PIPE0_RESET, 1); 6771 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6772 ME_PIPE0_RESET, 1); 6773 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6774 PFP_PIPE0_RESET, 0); 6775 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6776 ME_PIPE0_RESET, 0); 6777 break; 6778 case 1: 6779 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6780 PFP_PIPE1_RESET, 1); 6781 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6782 ME_PIPE1_RESET, 1); 6783 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6784 PFP_PIPE1_RESET, 0); 6785 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6786 ME_PIPE1_RESET, 0); 6787 break; 6788 default: 6789 break; 6790 } 6791 6792 WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); 6793 WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); 6794 6795 r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - 6796 RS64_FW_UC_START_ADDR_LO; 6797 soc21_grbm_select(adev, 0, 0, 0, 0); 6798 mutex_unlock(&adev->srbm_mutex); 6799 gfx_v11_0_unset_safe_mode(adev, 0); 6800 6801 dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, 6802 r == 0 ? "successfully" : "failed"); 6803 /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, 6804 * so the pipe reset status relies on the later gfx ring test result. 6805 */ 6806 return 0; 6807 } 6808 6809 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) 6810 { 6811 struct amdgpu_device *adev = ring->adev; 6812 int r; 6813 6814 if (amdgpu_sriov_vf(adev)) 6815 return -EINVAL; 6816 6817 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); 6818 if (r) { 6819 6820 dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); 6821 r = gfx_v11_reset_gfx_pipe(ring); 6822 if (r) 6823 return r; 6824 } 6825 6826 r = gfx_v11_0_kgq_init_queue(ring, true); 6827 if (r) { 6828 dev_err(adev->dev, "failed to init kgq\n"); 6829 return r; 6830 } 6831 6832 r = amdgpu_mes_map_legacy_queue(adev, ring); 6833 if (r) { 6834 dev_err(adev->dev, "failed to remap kgq\n"); 6835 return r; 6836 } 6837 6838 return amdgpu_ring_test_ring(ring); 6839 } 6840 6841 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) 6842 { 6843 6844 struct amdgpu_device *adev = ring->adev; 6845 uint32_t reset_pipe = 0, clean_pipe = 0; 6846 int r; 6847 6848 if (!gfx_v11_pipe_reset_support(adev)) 6849 return -EOPNOTSUPP; 6850 6851 gfx_v11_0_set_safe_mode(adev, 0); 6852 mutex_lock(&adev->srbm_mutex); 6853 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6854 6855 reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 6856 clean_pipe = reset_pipe; 6857 6858 if (adev->gfx.rs64_enable) { 6859 6860 switch (ring->pipe) { 6861 case 0: 6862 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6863 MEC_PIPE0_RESET, 1); 6864 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6865 MEC_PIPE0_RESET, 0); 6866 break; 6867 case 1: 6868 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6869 MEC_PIPE1_RESET, 1); 6870 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6871 MEC_PIPE1_RESET, 0); 6872 break; 6873 case 2: 6874 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6875 MEC_PIPE2_RESET, 1); 6876 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6877 MEC_PIPE2_RESET, 0); 6878 break; 6879 case 3: 6880 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6881 MEC_PIPE3_RESET, 1); 6882 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6883 MEC_PIPE3_RESET, 0); 6884 break; 6885 default: 6886 break; 6887 } 6888 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); 6889 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); 6890 r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - 6891 RS64_FW_UC_START_ADDR_LO; 6892 } else { 6893 if (ring->me == 1) { 6894 switch (ring->pipe) { 6895 case 0: 6896 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6897 MEC_ME1_PIPE0_RESET, 1); 6898 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6899 MEC_ME1_PIPE0_RESET, 0); 6900 break; 6901 case 1: 6902 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6903 MEC_ME1_PIPE1_RESET, 1); 6904 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6905 MEC_ME1_PIPE1_RESET, 0); 6906 break; 6907 case 2: 6908 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6909 MEC_ME1_PIPE2_RESET, 1); 6910 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6911 MEC_ME1_PIPE2_RESET, 0); 6912 break; 6913 case 3: 6914 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6915 MEC_ME1_PIPE3_RESET, 1); 6916 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6917 MEC_ME1_PIPE3_RESET, 0); 6918 break; 6919 default: 6920 break; 6921 } 6922 /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ 6923 } else { 6924 switch (ring->pipe) { 6925 case 0: 6926 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6927 MEC_ME2_PIPE0_RESET, 1); 6928 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6929 MEC_ME2_PIPE0_RESET, 0); 6930 break; 6931 case 1: 6932 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6933 MEC_ME2_PIPE1_RESET, 1); 6934 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6935 MEC_ME2_PIPE1_RESET, 0); 6936 break; 6937 case 2: 6938 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6939 MEC_ME2_PIPE2_RESET, 1); 6940 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6941 MEC_ME2_PIPE2_RESET, 0); 6942 break; 6943 case 3: 6944 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6945 MEC_ME2_PIPE3_RESET, 1); 6946 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6947 MEC_ME2_PIPE3_RESET, 0); 6948 break; 6949 default: 6950 break; 6951 } 6952 /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ 6953 } 6954 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); 6955 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); 6956 r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); 6957 } 6958 6959 soc21_grbm_select(adev, 0, 0, 0, 0); 6960 mutex_unlock(&adev->srbm_mutex); 6961 gfx_v11_0_unset_safe_mode(adev, 0); 6962 6963 dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, 6964 r == 0 ? "successfully" : "failed"); 6965 /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe 6966 * reset status relies on the compute ring test result. 6967 */ 6968 return 0; 6969 } 6970 6971 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) 6972 { 6973 struct amdgpu_device *adev = ring->adev; 6974 int r = 0; 6975 6976 if (amdgpu_sriov_vf(adev)) 6977 return -EINVAL; 6978 6979 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); 6980 if (r) { 6981 dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); 6982 r = gfx_v11_0_reset_compute_pipe(ring); 6983 if (r) 6984 return r; 6985 } 6986 6987 r = gfx_v11_0_kcq_init_queue(ring, true); 6988 if (r) { 6989 dev_err(adev->dev, "fail to init kcq\n"); 6990 return r; 6991 } 6992 r = amdgpu_mes_map_legacy_queue(adev, ring); 6993 if (r) { 6994 dev_err(adev->dev, "failed to remap kcq\n"); 6995 return r; 6996 } 6997 6998 return amdgpu_ring_test_ring(ring); 6999 } 7000 7001 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 7002 { 7003 struct amdgpu_device *adev = ip_block->adev; 7004 uint32_t i, j, k, reg, index = 0; 7005 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7006 7007 if (!adev->gfx.ip_dump_core) 7008 return; 7009 7010 for (i = 0; i < reg_count; i++) 7011 drm_printf(p, "%-50s \t 0x%08x\n", 7012 gc_reg_list_11_0[i].reg_name, 7013 adev->gfx.ip_dump_core[i]); 7014 7015 /* print compute queue registers for all instances */ 7016 if (!adev->gfx.ip_dump_compute_queues) 7017 return; 7018 7019 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7020 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7021 adev->gfx.mec.num_mec, 7022 adev->gfx.mec.num_pipe_per_mec, 7023 adev->gfx.mec.num_queue_per_pipe); 7024 7025 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7026 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7027 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7028 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7029 for (reg = 0; reg < reg_count; reg++) { 7030 if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) 7031 drm_printf(p, "%-50s \t 0x%08x\n", 7032 "regCP_MEC_ME2_HEADER_DUMP", 7033 adev->gfx.ip_dump_compute_queues[index + reg]); 7034 else 7035 drm_printf(p, "%-50s \t 0x%08x\n", 7036 gc_cp_reg_list_11[reg].reg_name, 7037 adev->gfx.ip_dump_compute_queues[index + reg]); 7038 } 7039 index += reg_count; 7040 } 7041 } 7042 } 7043 7044 /* print gfx queue registers for all instances */ 7045 if (!adev->gfx.ip_dump_gfx_queues) 7046 return; 7047 7048 index = 0; 7049 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7050 drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", 7051 adev->gfx.me.num_me, 7052 adev->gfx.me.num_pipe_per_me, 7053 adev->gfx.me.num_queue_per_pipe); 7054 7055 for (i = 0; i < adev->gfx.me.num_me; i++) { 7056 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7057 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7058 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); 7059 for (reg = 0; reg < reg_count; reg++) { 7060 drm_printf(p, "%-50s \t 0x%08x\n", 7061 gc_gfx_queue_reg_list_11[reg].reg_name, 7062 adev->gfx.ip_dump_gfx_queues[index + reg]); 7063 } 7064 index += reg_count; 7065 } 7066 } 7067 } 7068 } 7069 7070 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) 7071 { 7072 struct amdgpu_device *adev = ip_block->adev; 7073 uint32_t i, j, k, reg, index = 0; 7074 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7075 7076 if (!adev->gfx.ip_dump_core) 7077 return; 7078 7079 amdgpu_gfx_off_ctrl(adev, false); 7080 for (i = 0; i < reg_count; i++) 7081 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); 7082 amdgpu_gfx_off_ctrl(adev, true); 7083 7084 /* dump compute queue registers for all instances */ 7085 if (!adev->gfx.ip_dump_compute_queues) 7086 return; 7087 7088 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7089 amdgpu_gfx_off_ctrl(adev, false); 7090 mutex_lock(&adev->srbm_mutex); 7091 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7092 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7093 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7094 /* ME0 is for GFX so start from 1 for CP */ 7095 soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); 7096 for (reg = 0; reg < reg_count; reg++) { 7097 if (i && 7098 gc_cp_reg_list_11[reg].reg_offset == 7099 regCP_MEC_ME1_HEADER_DUMP) 7100 adev->gfx.ip_dump_compute_queues[index + reg] = 7101 RREG32(SOC15_REG_OFFSET(GC, 0, 7102 regCP_MEC_ME2_HEADER_DUMP)); 7103 else 7104 adev->gfx.ip_dump_compute_queues[index + reg] = 7105 RREG32(SOC15_REG_ENTRY_OFFSET( 7106 gc_cp_reg_list_11[reg])); 7107 } 7108 index += reg_count; 7109 } 7110 } 7111 } 7112 soc21_grbm_select(adev, 0, 0, 0, 0); 7113 mutex_unlock(&adev->srbm_mutex); 7114 amdgpu_gfx_off_ctrl(adev, true); 7115 7116 /* dump gfx queue registers for all instances */ 7117 if (!adev->gfx.ip_dump_gfx_queues) 7118 return; 7119 7120 index = 0; 7121 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7122 amdgpu_gfx_off_ctrl(adev, false); 7123 mutex_lock(&adev->srbm_mutex); 7124 for (i = 0; i < adev->gfx.me.num_me; i++) { 7125 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7126 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7127 soc21_grbm_select(adev, i, j, k, 0); 7128 7129 for (reg = 0; reg < reg_count; reg++) { 7130 adev->gfx.ip_dump_gfx_queues[index + reg] = 7131 RREG32(SOC15_REG_ENTRY_OFFSET( 7132 gc_gfx_queue_reg_list_11[reg])); 7133 } 7134 index += reg_count; 7135 } 7136 } 7137 } 7138 soc21_grbm_select(adev, 0, 0, 0, 0); 7139 mutex_unlock(&adev->srbm_mutex); 7140 amdgpu_gfx_off_ctrl(adev, true); 7141 } 7142 7143 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 7144 { 7145 /* Emit the cleaner shader */ 7146 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7147 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7148 } 7149 7150 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring) 7151 { 7152 amdgpu_gfx_profile_ring_begin_use(ring); 7153 7154 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 7155 } 7156 7157 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring) 7158 { 7159 amdgpu_gfx_profile_ring_end_use(ring); 7160 7161 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 7162 } 7163 7164 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 7165 .name = "gfx_v11_0", 7166 .early_init = gfx_v11_0_early_init, 7167 .late_init = gfx_v11_0_late_init, 7168 .sw_init = gfx_v11_0_sw_init, 7169 .sw_fini = gfx_v11_0_sw_fini, 7170 .hw_init = gfx_v11_0_hw_init, 7171 .hw_fini = gfx_v11_0_hw_fini, 7172 .suspend = gfx_v11_0_suspend, 7173 .resume = gfx_v11_0_resume, 7174 .is_idle = gfx_v11_0_is_idle, 7175 .wait_for_idle = gfx_v11_0_wait_for_idle, 7176 .soft_reset = gfx_v11_0_soft_reset, 7177 .check_soft_reset = gfx_v11_0_check_soft_reset, 7178 .post_soft_reset = gfx_v11_0_post_soft_reset, 7179 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 7180 .set_powergating_state = gfx_v11_0_set_powergating_state, 7181 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 7182 .dump_ip_state = gfx_v11_ip_dump, 7183 .print_ip_state = gfx_v11_ip_print, 7184 }; 7185 7186 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7187 .type = AMDGPU_RING_TYPE_GFX, 7188 .align_mask = 0xff, 7189 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7190 .support_64bit_ptrs = true, 7191 .secure_submission_supported = true, 7192 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 7193 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 7194 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 7195 .emit_frame_size = /* totally 247 maximum if 16 IBs */ 7196 5 + /* update_spm_vmid */ 7197 5 + /* COND_EXEC */ 7198 22 + /* SET_Q_PREEMPTION_MODE */ 7199 7 + /* PIPELINE_SYNC */ 7200 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7201 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7202 4 + /* VM_FLUSH */ 7203 8 + /* FENCE for VM_FLUSH */ 7204 20 + /* GDS switch */ 7205 5 + /* COND_EXEC */ 7206 7 + /* HDP_flush */ 7207 4 + /* VGT_flush */ 7208 31 + /* DE_META */ 7209 3 + /* CNTX_CTRL */ 7210 5 + /* HDP_INVL */ 7211 22 + /* SET_Q_PREEMPTION_MODE */ 7212 8 + 8 + /* FENCE x2 */ 7213 8 + /* gfx_v11_0_emit_mem_sync */ 7214 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7215 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 7216 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 7217 .emit_fence = gfx_v11_0_ring_emit_fence, 7218 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7219 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7220 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7221 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7222 .test_ring = gfx_v11_0_ring_test_ring, 7223 .test_ib = gfx_v11_0_ring_test_ib, 7224 .insert_nop = gfx_v11_ring_insert_nop, 7225 .pad_ib = amdgpu_ring_generic_pad_ib, 7226 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 7227 .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, 7228 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 7229 .preempt_ib = gfx_v11_0_ring_preempt_ib, 7230 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 7231 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7232 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7233 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7234 .soft_recovery = gfx_v11_0_ring_soft_recovery, 7235 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7236 .reset = gfx_v11_0_reset_kgq, 7237 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7238 .begin_use = gfx_v11_0_ring_begin_use, 7239 .end_use = gfx_v11_0_ring_end_use, 7240 }; 7241 7242 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 7243 .type = AMDGPU_RING_TYPE_COMPUTE, 7244 .align_mask = 0xff, 7245 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7246 .support_64bit_ptrs = true, 7247 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7248 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7249 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7250 .emit_frame_size = 7251 5 + /* update_spm_vmid */ 7252 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7253 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7254 5 + /* hdp invalidate */ 7255 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7256 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7257 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7258 2 + /* gfx_v11_0_ring_emit_vm_flush */ 7259 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 7260 8 + /* gfx_v11_0_emit_mem_sync */ 7261 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7262 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7263 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7264 .emit_fence = gfx_v11_0_ring_emit_fence, 7265 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7266 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7267 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7268 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7269 .test_ring = gfx_v11_0_ring_test_ring, 7270 .test_ib = gfx_v11_0_ring_test_ib, 7271 .insert_nop = gfx_v11_ring_insert_nop, 7272 .pad_ib = amdgpu_ring_generic_pad_ib, 7273 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7274 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7275 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7276 .soft_recovery = gfx_v11_0_ring_soft_recovery, 7277 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7278 .reset = gfx_v11_0_reset_kcq, 7279 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7280 .begin_use = gfx_v11_0_ring_begin_use, 7281 .end_use = gfx_v11_0_ring_end_use, 7282 }; 7283 7284 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7285 .type = AMDGPU_RING_TYPE_KIQ, 7286 .align_mask = 0xff, 7287 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7288 .support_64bit_ptrs = true, 7289 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7290 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7291 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7292 .emit_frame_size = 7293 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7294 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7295 5 + /*hdp invalidate */ 7296 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7297 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7298 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7299 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7300 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7301 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7302 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 7303 .test_ring = gfx_v11_0_ring_test_ring, 7304 .test_ib = gfx_v11_0_ring_test_ib, 7305 .insert_nop = amdgpu_ring_insert_nop, 7306 .pad_ib = amdgpu_ring_generic_pad_ib, 7307 .emit_rreg = gfx_v11_0_ring_emit_rreg, 7308 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7309 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7310 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7311 }; 7312 7313 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 7314 { 7315 int i; 7316 7317 adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; 7318 7319 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7320 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 7321 7322 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7323 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 7324 } 7325 7326 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 7327 .set = gfx_v11_0_set_eop_interrupt_state, 7328 .process = gfx_v11_0_eop_irq, 7329 }; 7330 7331 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 7332 .set = gfx_v11_0_set_priv_reg_fault_state, 7333 .process = gfx_v11_0_priv_reg_irq, 7334 }; 7335 7336 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { 7337 .set = gfx_v11_0_set_bad_op_fault_state, 7338 .process = gfx_v11_0_bad_op_irq, 7339 }; 7340 7341 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 7342 .set = gfx_v11_0_set_priv_inst_fault_state, 7343 .process = gfx_v11_0_priv_inst_irq, 7344 }; 7345 7346 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { 7347 .process = gfx_v11_0_rlc_gc_fed_irq, 7348 }; 7349 7350 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 7351 { 7352 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7353 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 7354 7355 adev->gfx.priv_reg_irq.num_types = 1; 7356 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 7357 7358 adev->gfx.bad_op_irq.num_types = 1; 7359 adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; 7360 7361 adev->gfx.priv_inst_irq.num_types = 1; 7362 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 7363 7364 adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ 7365 adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; 7366 7367 } 7368 7369 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 7370 { 7371 if (adev->flags & AMD_IS_APU) 7372 adev->gfx.imu.mode = MISSION_MODE; 7373 else 7374 adev->gfx.imu.mode = DEBUG_MODE; 7375 7376 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 7377 } 7378 7379 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 7380 { 7381 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 7382 } 7383 7384 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 7385 { 7386 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 7387 adev->gfx.config.max_sh_per_se * 7388 adev->gfx.config.max_shader_engines; 7389 7390 adev->gds.gds_size = 0x1000; 7391 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 7392 adev->gds.gws_size = 64; 7393 adev->gds.oa_size = 16; 7394 } 7395 7396 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 7397 { 7398 /* set gfx eng mqd */ 7399 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 7400 sizeof(struct v11_gfx_mqd); 7401 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 7402 gfx_v11_0_gfx_mqd_init; 7403 /* set compute eng mqd */ 7404 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 7405 sizeof(struct v11_compute_mqd); 7406 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 7407 gfx_v11_0_compute_mqd_init; 7408 } 7409 7410 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 7411 u32 bitmap) 7412 { 7413 u32 data; 7414 7415 if (!bitmap) 7416 return; 7417 7418 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7419 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7420 7421 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 7422 } 7423 7424 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 7425 { 7426 u32 data, wgp_bitmask; 7427 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 7428 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 7429 7430 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7431 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7432 7433 wgp_bitmask = 7434 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 7435 7436 return (~data) & wgp_bitmask; 7437 } 7438 7439 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 7440 { 7441 u32 wgp_idx, wgp_active_bitmap; 7442 u32 cu_bitmap_per_wgp, cu_active_bitmap; 7443 7444 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 7445 cu_active_bitmap = 0; 7446 7447 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 7448 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 7449 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 7450 if (wgp_active_bitmap & (1 << wgp_idx)) 7451 cu_active_bitmap |= cu_bitmap_per_wgp; 7452 } 7453 7454 return cu_active_bitmap; 7455 } 7456 7457 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 7458 struct amdgpu_cu_info *cu_info) 7459 { 7460 int i, j, k, counter, active_cu_number = 0; 7461 u32 mask, bitmap; 7462 unsigned disable_masks[8 * 2]; 7463 7464 if (!adev || !cu_info) 7465 return -EINVAL; 7466 7467 amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2); 7468 7469 mutex_lock(&adev->grbm_idx_mutex); 7470 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7471 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7472 bitmap = i * adev->gfx.config.max_sh_per_se + j; 7473 if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) 7474 continue; 7475 mask = 1; 7476 counter = 0; 7477 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); 7478 if (i < 8 && j < 2) 7479 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 7480 adev, disable_masks[i * 2 + j]); 7481 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 7482 7483 /** 7484 * GFX11 could support more than 4 SEs, while the bitmap 7485 * in cu_info struct is 4x4 and ioctl interface struct 7486 * drm_amdgpu_info_device should keep stable. 7487 * So we use last two columns of bitmap to store cu mask for 7488 * SEs 4 to 7, the layout of the bitmap is as below: 7489 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 7490 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 7491 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 7492 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 7493 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 7494 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 7495 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 7496 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 7497 */ 7498 cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; 7499 7500 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 7501 if (bitmap & mask) 7502 counter++; 7503 7504 mask <<= 1; 7505 } 7506 active_cu_number += counter; 7507 } 7508 } 7509 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7510 mutex_unlock(&adev->grbm_idx_mutex); 7511 7512 cu_info->number = active_cu_number; 7513 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7514 7515 return 0; 7516 } 7517 7518 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 7519 { 7520 .type = AMD_IP_BLOCK_TYPE_GFX, 7521 .major = 11, 7522 .minor = 0, 7523 .rev = 0, 7524 .funcs = &gfx_v11_0_ip_funcs, 7525 }; 7526