1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "imu_v11_0.h" 33 #include "soc21.h" 34 #include "nvd.h" 35 36 #include "gc/gc_11_0_0_offset.h" 37 #include "gc/gc_11_0_0_sh_mask.h" 38 #include "smuio/smuio_13_0_6_offset.h" 39 #include "smuio/smuio_13_0_6_sh_mask.h" 40 #include "navi10_enum.h" 41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 42 43 #include "soc15.h" 44 #include "clearstate_gfx11.h" 45 #include "v11_structs.h" 46 #include "gfx_v11_0.h" 47 #include "gfx_v11_0_cleaner_shader.h" 48 #include "gfx_v11_0_3.h" 49 #include "nbio_v4_3.h" 50 #include "mes_v11_0.h" 51 #include "mes_userqueue.h" 52 #include "amdgpu_userq_fence.h" 53 54 #define GFX11_NUM_GFX_RINGS 1 55 #define GFX11_MEC_HPD_SIZE 2048 56 57 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 58 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 59 60 #define regCGTT_WD_CLK_CTRL 0x5086 61 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 63 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 64 #define regPC_CONFIG_CNTL_1 0x194d 65 #define regPC_CONFIG_CNTL_1_BASE_IDX 1 66 67 #define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 68 #define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 69 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 70 #define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 71 #define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 72 #define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 73 #define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 74 75 #define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 76 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 77 #define regCP_MQD_CONTROL_DEFAULT 0x00000100 78 #define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 79 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 80 #define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 81 #define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 82 #define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 83 84 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 85 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 86 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 87 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 88 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); 89 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); 90 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 91 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 92 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 93 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 94 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 95 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 97 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 99 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 100 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 101 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 102 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 103 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 105 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 107 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); 108 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); 109 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); 110 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); 111 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); 112 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); 113 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); 115 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); 117 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); 119 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); 120 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); 121 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); 122 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); 123 124 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { 125 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), 126 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), 127 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), 128 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), 129 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), 130 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), 131 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), 132 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), 133 SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), 134 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), 135 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), 136 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), 137 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), 138 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), 139 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), 140 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), 141 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), 142 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 143 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), 144 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), 145 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), 146 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), 147 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), 148 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), 149 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), 150 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 151 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), 152 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 153 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 154 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 155 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), 156 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), 157 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), 158 SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), 159 SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), 160 SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), 161 SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), 162 SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), 163 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), 164 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), 165 SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), 166 SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), 167 SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), 168 SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), 169 SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), 170 SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), 171 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), 172 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), 173 SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), 174 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), 175 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), 176 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), 177 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), 178 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), 179 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), 180 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), 181 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), 182 /* cp header registers */ 183 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 184 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 185 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 186 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 187 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 188 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 189 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 190 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 191 /* SE status registers */ 192 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), 193 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), 194 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), 195 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), 196 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), 197 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) 198 }; 199 200 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { 201 /* compute registers */ 202 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), 203 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), 204 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), 205 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), 206 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), 207 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), 208 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), 209 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), 210 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), 211 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), 212 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), 213 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), 214 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), 215 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), 216 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), 217 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), 218 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), 219 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), 220 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), 221 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), 222 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), 223 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), 224 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), 225 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), 226 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), 227 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), 228 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), 229 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), 230 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), 231 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), 232 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), 233 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), 234 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), 235 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), 236 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), 237 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), 238 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), 239 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), 240 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), 241 /* cp header registers */ 242 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 243 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 244 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 245 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 246 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 247 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 248 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 249 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 250 }; 251 252 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { 253 /* gfx queue registers */ 254 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), 255 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), 256 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), 257 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), 258 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), 259 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), 260 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), 261 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), 262 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), 263 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), 264 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), 265 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), 266 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), 267 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), 268 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), 269 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), 270 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), 271 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), 272 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), 273 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), 274 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 275 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 276 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 277 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 278 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 279 /* cp header registers */ 280 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 281 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 282 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 283 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 284 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 285 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 286 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 287 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 288 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 289 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 290 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 291 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 292 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 293 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 294 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 295 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 296 }; 297 298 static const struct soc15_reg_golden golden_settings_gc_11_0[] = { 299 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) 300 }; 301 302 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 303 { 304 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 305 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 306 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 307 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 308 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 309 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 310 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 311 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 312 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 313 }; 314 315 #define DEFAULT_SH_MEM_CONFIG \ 316 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 317 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 318 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 319 320 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 321 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 322 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 323 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 324 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 325 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 326 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 327 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 328 struct amdgpu_cu_info *cu_info); 329 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 330 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 331 u32 sh_num, u32 instance, int xcc_id); 332 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 333 334 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 335 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 336 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 337 uint32_t val); 338 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 339 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 340 uint16_t pasid, uint32_t flush_type, 341 bool all_hub, uint8_t dst_sel); 342 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 343 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 344 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 345 bool enable); 346 347 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 348 { 349 struct amdgpu_device *adev = kiq_ring->adev; 350 u64 shader_mc_addr; 351 352 /* Cleaner shader MC address */ 353 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 354 355 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 356 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 357 PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ 358 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 359 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 360 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 361 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 362 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 363 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 364 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 365 } 366 367 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 368 struct amdgpu_ring *ring) 369 { 370 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 371 uint64_t wptr_addr = ring->wptr_gpu_addr; 372 uint32_t me = 0, eng_sel = 0; 373 374 switch (ring->funcs->type) { 375 case AMDGPU_RING_TYPE_COMPUTE: 376 me = 1; 377 eng_sel = 0; 378 break; 379 case AMDGPU_RING_TYPE_GFX: 380 me = 0; 381 eng_sel = 4; 382 break; 383 case AMDGPU_RING_TYPE_MES: 384 me = 2; 385 eng_sel = 5; 386 break; 387 default: 388 WARN_ON(1); 389 } 390 391 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 392 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 393 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 394 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 395 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 396 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 397 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 398 PACKET3_MAP_QUEUES_ME((me)) | 399 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 400 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 401 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 402 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 403 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 404 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 405 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 406 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 407 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 408 } 409 410 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 411 struct amdgpu_ring *ring, 412 enum amdgpu_unmap_queues_action action, 413 u64 gpu_addr, u64 seq) 414 { 415 struct amdgpu_device *adev = kiq_ring->adev; 416 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 417 418 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { 419 amdgpu_mes_unmap_legacy_queue(adev, ring, action, 420 gpu_addr, seq, 0); 421 return; 422 } 423 424 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 425 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 426 PACKET3_UNMAP_QUEUES_ACTION(action) | 427 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 428 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 429 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 430 amdgpu_ring_write(kiq_ring, 431 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 432 433 if (action == PREEMPT_QUEUES_NO_UNMAP) { 434 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 435 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 436 amdgpu_ring_write(kiq_ring, seq); 437 } else { 438 amdgpu_ring_write(kiq_ring, 0); 439 amdgpu_ring_write(kiq_ring, 0); 440 amdgpu_ring_write(kiq_ring, 0); 441 } 442 } 443 444 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 445 struct amdgpu_ring *ring, 446 u64 addr, 447 u64 seq) 448 { 449 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 450 451 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 452 amdgpu_ring_write(kiq_ring, 453 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 454 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 455 PACKET3_QUERY_STATUS_COMMAND(2)); 456 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 457 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 458 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 459 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 460 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 461 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 462 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 463 } 464 465 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 466 uint16_t pasid, uint32_t flush_type, 467 bool all_hub) 468 { 469 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 470 } 471 472 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 473 .kiq_set_resources = gfx11_kiq_set_resources, 474 .kiq_map_queues = gfx11_kiq_map_queues, 475 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 476 .kiq_query_status = gfx11_kiq_query_status, 477 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 478 .set_resources_size = 8, 479 .map_queues_size = 7, 480 .unmap_queues_size = 6, 481 .query_status_size = 7, 482 .invalidate_tlbs_size = 2, 483 }; 484 485 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 486 { 487 adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; 488 } 489 490 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 491 { 492 if (amdgpu_sriov_vf(adev)) 493 return; 494 495 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 496 case IP_VERSION(11, 0, 1): 497 case IP_VERSION(11, 0, 4): 498 soc15_program_register_sequence(adev, 499 golden_settings_gc_11_0_1, 500 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 501 break; 502 default: 503 break; 504 } 505 soc15_program_register_sequence(adev, 506 golden_settings_gc_11_0, 507 (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); 508 509 } 510 511 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 512 bool wc, uint32_t reg, uint32_t val) 513 { 514 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 515 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 516 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 517 amdgpu_ring_write(ring, reg); 518 amdgpu_ring_write(ring, 0); 519 amdgpu_ring_write(ring, val); 520 } 521 522 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 523 int mem_space, int opt, uint32_t addr0, 524 uint32_t addr1, uint32_t ref, uint32_t mask, 525 uint32_t inv) 526 { 527 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 528 amdgpu_ring_write(ring, 529 /* memory (1) or register (0) */ 530 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 531 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 532 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 533 WAIT_REG_MEM_ENGINE(eng_sel))); 534 535 if (mem_space) 536 BUG_ON(addr0 & 0x3); /* Dword align */ 537 amdgpu_ring_write(ring, addr0); 538 amdgpu_ring_write(ring, addr1); 539 amdgpu_ring_write(ring, ref); 540 amdgpu_ring_write(ring, mask); 541 amdgpu_ring_write(ring, inv); /* poll interval */ 542 } 543 544 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 545 { 546 /* Header itself is a NOP packet */ 547 if (num_nop == 1) { 548 amdgpu_ring_write(ring, ring->funcs->nop); 549 return; 550 } 551 552 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 553 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 554 555 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 556 amdgpu_ring_insert_nop(ring, num_nop - 1); 557 } 558 559 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 560 { 561 struct amdgpu_device *adev = ring->adev; 562 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 563 uint32_t tmp = 0; 564 unsigned i; 565 int r; 566 567 WREG32(scratch, 0xCAFEDEAD); 568 r = amdgpu_ring_alloc(ring, 5); 569 if (r) { 570 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 571 ring->idx, r); 572 return r; 573 } 574 575 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 576 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 577 } else { 578 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 579 amdgpu_ring_write(ring, scratch - 580 PACKET3_SET_UCONFIG_REG_START); 581 amdgpu_ring_write(ring, 0xDEADBEEF); 582 } 583 amdgpu_ring_commit(ring); 584 585 for (i = 0; i < adev->usec_timeout; i++) { 586 tmp = RREG32(scratch); 587 if (tmp == 0xDEADBEEF) 588 break; 589 if (amdgpu_emu_mode == 1) 590 msleep(1); 591 else 592 udelay(1); 593 } 594 595 if (i >= adev->usec_timeout) 596 r = -ETIMEDOUT; 597 return r; 598 } 599 600 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 601 { 602 struct amdgpu_device *adev = ring->adev; 603 struct amdgpu_ib ib; 604 struct dma_fence *f = NULL; 605 unsigned index; 606 uint64_t gpu_addr; 607 uint32_t *cpu_ptr; 608 long r; 609 610 /* MES KIQ fw hasn't indirect buffer support for now */ 611 if (adev->enable_mes_kiq && 612 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 613 return 0; 614 615 memset(&ib, 0, sizeof(ib)); 616 617 r = amdgpu_device_wb_get(adev, &index); 618 if (r) 619 return r; 620 621 gpu_addr = adev->wb.gpu_addr + (index * 4); 622 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 623 cpu_ptr = &adev->wb.wb[index]; 624 625 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 626 if (r) { 627 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 628 goto err1; 629 } 630 631 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 632 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 633 ib.ptr[2] = lower_32_bits(gpu_addr); 634 ib.ptr[3] = upper_32_bits(gpu_addr); 635 ib.ptr[4] = 0xDEADBEEF; 636 ib.length_dw = 5; 637 638 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 639 if (r) 640 goto err2; 641 642 r = dma_fence_wait_timeout(f, false, timeout); 643 if (r == 0) { 644 r = -ETIMEDOUT; 645 goto err2; 646 } else if (r < 0) { 647 goto err2; 648 } 649 650 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 651 r = 0; 652 else 653 r = -EINVAL; 654 err2: 655 amdgpu_ib_free(&ib, NULL); 656 dma_fence_put(f); 657 err1: 658 amdgpu_device_wb_free(adev, index); 659 return r; 660 } 661 662 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 663 { 664 amdgpu_ucode_release(&adev->gfx.pfp_fw); 665 amdgpu_ucode_release(&adev->gfx.me_fw); 666 amdgpu_ucode_release(&adev->gfx.rlc_fw); 667 amdgpu_ucode_release(&adev->gfx.mec_fw); 668 669 kfree(adev->gfx.rlc.register_list_format); 670 } 671 672 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) 673 { 674 const struct psp_firmware_header_v1_0 *toc_hdr; 675 int err = 0; 676 677 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, 678 AMDGPU_UCODE_REQUIRED, 679 "amdgpu/%s_toc.bin", ucode_prefix); 680 if (err) 681 goto out; 682 683 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 684 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 685 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 686 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 687 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 688 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 689 return 0; 690 out: 691 amdgpu_ucode_release(&adev->psp.toc_fw); 692 return err; 693 } 694 695 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) 696 { 697 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 698 case IP_VERSION(11, 0, 0): 699 case IP_VERSION(11, 0, 2): 700 case IP_VERSION(11, 0, 3): 701 if ((adev->gfx.me_fw_version >= 1505) && 702 (adev->gfx.pfp_fw_version >= 1600) && 703 (adev->gfx.mec_fw_version >= 512)) { 704 if (amdgpu_sriov_vf(adev)) 705 adev->gfx.cp_gfx_shadow = true; 706 else 707 adev->gfx.cp_gfx_shadow = false; 708 } 709 break; 710 default: 711 adev->gfx.cp_gfx_shadow = false; 712 break; 713 } 714 } 715 716 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 717 { 718 char ucode_prefix[25]; 719 int err; 720 const struct rlc_firmware_header_v2_0 *rlc_hdr; 721 uint16_t version_major; 722 uint16_t version_minor; 723 724 DRM_DEBUG("\n"); 725 726 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 727 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 728 AMDGPU_UCODE_REQUIRED, 729 "amdgpu/%s_pfp.bin", ucode_prefix); 730 if (err) 731 goto out; 732 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 733 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 734 (union amdgpu_firmware_header *) 735 adev->gfx.pfp_fw->data, 2, 0); 736 if (adev->gfx.rs64_enable) { 737 dev_info(adev->dev, "CP RS64 enable\n"); 738 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 739 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 740 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 741 } else { 742 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 743 } 744 745 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 746 AMDGPU_UCODE_REQUIRED, 747 "amdgpu/%s_me.bin", ucode_prefix); 748 if (err) 749 goto out; 750 if (adev->gfx.rs64_enable) { 751 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 752 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 753 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 754 } else { 755 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 756 } 757 758 if (!amdgpu_sriov_vf(adev)) { 759 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && 760 adev->pdev->revision == 0xCE) 761 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 762 AMDGPU_UCODE_REQUIRED, 763 "amdgpu/gc_11_0_0_rlc_1.bin"); 764 else if (amdgpu_is_kicker_fw(adev)) 765 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 766 AMDGPU_UCODE_REQUIRED, 767 "amdgpu/%s_rlc_kicker.bin", ucode_prefix); 768 else 769 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 770 AMDGPU_UCODE_REQUIRED, 771 "amdgpu/%s_rlc.bin", ucode_prefix); 772 if (err) 773 goto out; 774 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 775 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 776 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 777 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 778 if (err) 779 goto out; 780 } 781 782 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 783 AMDGPU_UCODE_REQUIRED, 784 "amdgpu/%s_mec.bin", ucode_prefix); 785 if (err) 786 goto out; 787 if (adev->gfx.rs64_enable) { 788 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 789 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 790 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 791 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 792 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 793 } else { 794 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 795 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 796 } 797 798 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 799 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix); 800 801 /* only one MEC for gfx 11.0.0. */ 802 adev->gfx.mec2_fw = NULL; 803 804 gfx_v11_0_check_fw_cp_gfx_shadow(adev); 805 806 if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { 807 err = adev->gfx.imu.funcs->init_microcode(adev); 808 if (err) 809 DRM_ERROR("Failed to init imu firmware!\n"); 810 return err; 811 } 812 813 out: 814 if (err) { 815 amdgpu_ucode_release(&adev->gfx.pfp_fw); 816 amdgpu_ucode_release(&adev->gfx.me_fw); 817 amdgpu_ucode_release(&adev->gfx.rlc_fw); 818 amdgpu_ucode_release(&adev->gfx.mec_fw); 819 } 820 821 return err; 822 } 823 824 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 825 { 826 u32 count = 0; 827 const struct cs_section_def *sect = NULL; 828 const struct cs_extent_def *ext = NULL; 829 830 /* begin clear state */ 831 count += 2; 832 /* context control state */ 833 count += 3; 834 835 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 836 for (ext = sect->section; ext->extent != NULL; ++ext) { 837 if (sect->id == SECT_CONTEXT) 838 count += 2 + ext->reg_count; 839 else 840 return 0; 841 } 842 } 843 844 /* set PA_SC_TILE_STEERING_OVERRIDE */ 845 count += 3; 846 /* end clear state */ 847 count += 2; 848 /* clear state */ 849 count += 2; 850 851 return count; 852 } 853 854 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) 855 { 856 u32 count = 0; 857 int ctx_reg_offset; 858 859 if (adev->gfx.rlc.cs_data == NULL) 860 return; 861 if (buffer == NULL) 862 return; 863 864 count = amdgpu_gfx_csb_preamble_start(buffer); 865 count = amdgpu_gfx_csb_data_parser(adev, buffer, count); 866 867 ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 868 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 869 buffer[count++] = cpu_to_le32(ctx_reg_offset); 870 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 871 872 amdgpu_gfx_csb_preamble_end(buffer, count); 873 } 874 875 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 876 { 877 /* clear state block */ 878 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 879 &adev->gfx.rlc.clear_state_gpu_addr, 880 (void **)&adev->gfx.rlc.cs_ptr); 881 882 /* jump table block */ 883 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 884 &adev->gfx.rlc.cp_table_gpu_addr, 885 (void **)&adev->gfx.rlc.cp_table_ptr); 886 } 887 888 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 889 { 890 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 891 892 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 893 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 894 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 895 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 896 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 897 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 898 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 899 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 900 adev->gfx.rlc.rlcg_reg_access_supported = true; 901 } 902 903 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 904 { 905 const struct cs_section_def *cs_data; 906 int r; 907 908 adev->gfx.rlc.cs_data = gfx11_cs_data; 909 910 cs_data = adev->gfx.rlc.cs_data; 911 912 if (cs_data) { 913 /* init clear state block */ 914 r = amdgpu_gfx_rlc_init_csb(adev); 915 if (r) 916 return r; 917 } 918 919 /* init spm vmid with 0xf */ 920 if (adev->gfx.rlc.funcs->update_spm_vmid) 921 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf); 922 923 return 0; 924 } 925 926 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 927 { 928 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 929 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 930 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 931 } 932 933 static void gfx_v11_0_me_init(struct amdgpu_device *adev) 934 { 935 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 936 937 amdgpu_gfx_graphics_queue_acquire(adev); 938 } 939 940 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 941 { 942 int r; 943 u32 *hpd; 944 size_t mec_hpd_size; 945 946 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 947 948 /* take ownership of the relevant compute queues */ 949 amdgpu_gfx_compute_queue_acquire(adev); 950 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 951 952 if (mec_hpd_size) { 953 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 954 AMDGPU_GEM_DOMAIN_GTT, 955 &adev->gfx.mec.hpd_eop_obj, 956 &adev->gfx.mec.hpd_eop_gpu_addr, 957 (void **)&hpd); 958 if (r) { 959 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 960 gfx_v11_0_mec_fini(adev); 961 return r; 962 } 963 964 memset(hpd, 0, mec_hpd_size); 965 966 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 967 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 968 } 969 970 return 0; 971 } 972 973 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 974 { 975 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 976 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 977 (address << SQ_IND_INDEX__INDEX__SHIFT)); 978 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 979 } 980 981 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 982 uint32_t thread, uint32_t regno, 983 uint32_t num, uint32_t *out) 984 { 985 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 986 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 987 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 988 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 989 (SQ_IND_INDEX__AUTO_INCR_MASK)); 990 while (num--) 991 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 992 } 993 994 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 995 { 996 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 997 * field when performing a select_se_sh so it should be 998 * zero here */ 999 WARN_ON(simd != 0); 1000 1001 /* type 3 wave data */ 1002 dst[(*no_fields)++] = 3; 1003 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 1004 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 1005 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 1006 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 1007 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 1008 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 1009 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 1010 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 1011 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 1012 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 1013 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 1014 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 1015 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 1016 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 1017 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 1018 } 1019 1020 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1021 uint32_t wave, uint32_t start, 1022 uint32_t size, uint32_t *dst) 1023 { 1024 WARN_ON(simd != 0); 1025 1026 wave_read_regs( 1027 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 1028 dst); 1029 } 1030 1031 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1032 uint32_t wave, uint32_t thread, 1033 uint32_t start, uint32_t size, 1034 uint32_t *dst) 1035 { 1036 wave_read_regs( 1037 adev, wave, thread, 1038 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1039 } 1040 1041 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 1042 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1043 { 1044 soc21_grbm_select(adev, me, pipe, q, vm); 1045 } 1046 1047 /* all sizes are in bytes */ 1048 #define MQD_SHADOW_BASE_SIZE 73728 1049 #define MQD_SHADOW_BASE_ALIGNMENT 256 1050 #define MQD_FWWORKAREA_SIZE 484 1051 #define MQD_FWWORKAREA_ALIGNMENT 256 1052 1053 static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, 1054 struct amdgpu_gfx_shadow_info *shadow_info) 1055 { 1056 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; 1057 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; 1058 shadow_info->csa_size = MQD_FWWORKAREA_SIZE; 1059 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; 1060 } 1061 1062 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, 1063 struct amdgpu_gfx_shadow_info *shadow_info, 1064 bool skip_check) 1065 { 1066 if (adev->gfx.cp_gfx_shadow || skip_check) { 1067 gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); 1068 return 0; 1069 } else { 1070 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); 1071 return -ENOTSUPP; 1072 } 1073 } 1074 1075 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 1076 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 1077 .select_se_sh = &gfx_v11_0_select_se_sh, 1078 .read_wave_data = &gfx_v11_0_read_wave_data, 1079 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 1080 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 1081 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 1082 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 1083 .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, 1084 }; 1085 1086 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 1087 { 1088 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1089 case IP_VERSION(11, 0, 0): 1090 case IP_VERSION(11, 0, 2): 1091 adev->gfx.config.max_hw_contexts = 8; 1092 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1093 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1094 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1095 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1096 break; 1097 case IP_VERSION(11, 0, 3): 1098 adev->gfx.ras = &gfx_v11_0_3_ras; 1099 adev->gfx.config.max_hw_contexts = 8; 1100 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1101 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1102 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1103 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1104 break; 1105 case IP_VERSION(11, 0, 1): 1106 case IP_VERSION(11, 0, 4): 1107 case IP_VERSION(11, 5, 0): 1108 case IP_VERSION(11, 5, 1): 1109 case IP_VERSION(11, 5, 2): 1110 case IP_VERSION(11, 5, 3): 1111 adev->gfx.config.max_hw_contexts = 8; 1112 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1113 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1114 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1115 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 1116 break; 1117 default: 1118 BUG(); 1119 break; 1120 } 1121 1122 return 0; 1123 } 1124 1125 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1126 int me, int pipe, int queue) 1127 { 1128 struct amdgpu_ring *ring; 1129 unsigned int irq_type; 1130 unsigned int hw_prio; 1131 1132 ring = &adev->gfx.gfx_ring[ring_id]; 1133 1134 ring->me = me; 1135 ring->pipe = pipe; 1136 ring->queue = queue; 1137 1138 ring->ring_obj = NULL; 1139 ring->use_doorbell = true; 1140 if (adev->gfx.disable_kq) { 1141 ring->no_scheduler = true; 1142 ring->no_user_submission = true; 1143 } 1144 1145 if (!ring_id) 1146 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1147 else 1148 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1149 ring->vm_hub = AMDGPU_GFXHUB(0); 1150 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1151 1152 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1153 hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? 1154 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1155 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1156 hw_prio, NULL); 1157 } 1158 1159 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1160 int mec, int pipe, int queue) 1161 { 1162 int r; 1163 unsigned irq_type; 1164 struct amdgpu_ring *ring; 1165 unsigned int hw_prio; 1166 1167 ring = &adev->gfx.compute_ring[ring_id]; 1168 1169 /* mec0 is me1 */ 1170 ring->me = mec + 1; 1171 ring->pipe = pipe; 1172 ring->queue = queue; 1173 1174 ring->ring_obj = NULL; 1175 ring->use_doorbell = true; 1176 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1177 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1178 + (ring_id * GFX11_MEC_HPD_SIZE); 1179 ring->vm_hub = AMDGPU_GFXHUB(0); 1180 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1181 1182 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1183 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1184 + ring->pipe; 1185 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 1186 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1187 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1188 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1189 hw_prio, NULL); 1190 if (r) 1191 return r; 1192 1193 return 0; 1194 } 1195 1196 static struct { 1197 SOC21_FIRMWARE_ID id; 1198 unsigned int offset; 1199 unsigned int size; 1200 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 1201 1202 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 1203 { 1204 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 1205 1206 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 1207 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 1208 rlc_autoload_info[ucode->id].id = ucode->id; 1209 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 1210 rlc_autoload_info[ucode->id].size = ucode->size * 4; 1211 1212 ucode++; 1213 } 1214 } 1215 1216 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 1217 { 1218 uint32_t total_size = 0; 1219 SOC21_FIRMWARE_ID id; 1220 1221 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 1222 1223 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 1224 total_size += rlc_autoload_info[id].size; 1225 1226 /* In case the offset in rlc toc ucode is aligned */ 1227 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 1228 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 1229 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 1230 1231 return total_size; 1232 } 1233 1234 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 1235 { 1236 int r; 1237 uint32_t total_size; 1238 1239 total_size = gfx_v11_0_calc_toc_total_size(adev); 1240 1241 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 1242 AMDGPU_GEM_DOMAIN_VRAM | 1243 AMDGPU_GEM_DOMAIN_GTT, 1244 &adev->gfx.rlc.rlc_autoload_bo, 1245 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1246 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1247 1248 if (r) { 1249 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1250 return r; 1251 } 1252 1253 return 0; 1254 } 1255 1256 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1257 SOC21_FIRMWARE_ID id, 1258 const void *fw_data, 1259 uint32_t fw_size, 1260 uint32_t *fw_autoload_mask) 1261 { 1262 uint32_t toc_offset; 1263 uint32_t toc_fw_size; 1264 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1265 1266 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1267 return; 1268 1269 toc_offset = rlc_autoload_info[id].offset; 1270 toc_fw_size = rlc_autoload_info[id].size; 1271 1272 if (fw_size == 0) 1273 fw_size = toc_fw_size; 1274 1275 if (fw_size > toc_fw_size) 1276 fw_size = toc_fw_size; 1277 1278 memcpy(ptr + toc_offset, fw_data, fw_size); 1279 1280 if (fw_size < toc_fw_size) 1281 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1282 1283 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1284 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1285 } 1286 1287 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1288 uint32_t *fw_autoload_mask) 1289 { 1290 void *data; 1291 uint32_t size; 1292 uint64_t *toc_ptr; 1293 1294 *(uint64_t *)fw_autoload_mask |= 0x1; 1295 1296 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1297 1298 data = adev->psp.toc.start_addr; 1299 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1300 1301 toc_ptr = (uint64_t *)data + size / 8 - 1; 1302 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1303 1304 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1305 data, size, fw_autoload_mask); 1306 } 1307 1308 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1309 uint32_t *fw_autoload_mask) 1310 { 1311 const __le32 *fw_data; 1312 uint32_t fw_size; 1313 const struct gfx_firmware_header_v1_0 *cp_hdr; 1314 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1315 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1316 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1317 uint16_t version_major, version_minor; 1318 1319 if (adev->gfx.rs64_enable) { 1320 /* pfp ucode */ 1321 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1322 adev->gfx.pfp_fw->data; 1323 /* instruction */ 1324 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1325 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1326 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1327 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1328 fw_data, fw_size, fw_autoload_mask); 1329 /* data */ 1330 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1331 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1332 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1333 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1334 fw_data, fw_size, fw_autoload_mask); 1335 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1336 fw_data, fw_size, fw_autoload_mask); 1337 /* me ucode */ 1338 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1339 adev->gfx.me_fw->data; 1340 /* instruction */ 1341 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1342 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1343 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1344 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1345 fw_data, fw_size, fw_autoload_mask); 1346 /* data */ 1347 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1348 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1349 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1350 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1351 fw_data, fw_size, fw_autoload_mask); 1352 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1353 fw_data, fw_size, fw_autoload_mask); 1354 /* mec ucode */ 1355 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1356 adev->gfx.mec_fw->data; 1357 /* instruction */ 1358 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1359 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1360 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1361 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1362 fw_data, fw_size, fw_autoload_mask); 1363 /* data */ 1364 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1365 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1366 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1367 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1368 fw_data, fw_size, fw_autoload_mask); 1369 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1370 fw_data, fw_size, fw_autoload_mask); 1371 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1372 fw_data, fw_size, fw_autoload_mask); 1373 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1374 fw_data, fw_size, fw_autoload_mask); 1375 } else { 1376 /* pfp ucode */ 1377 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1378 adev->gfx.pfp_fw->data; 1379 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1380 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1381 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1382 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1383 fw_data, fw_size, fw_autoload_mask); 1384 1385 /* me ucode */ 1386 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1387 adev->gfx.me_fw->data; 1388 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1389 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1390 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1391 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1392 fw_data, fw_size, fw_autoload_mask); 1393 1394 /* mec ucode */ 1395 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1396 adev->gfx.mec_fw->data; 1397 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1398 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1399 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1400 cp_hdr->jt_size * 4; 1401 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1402 fw_data, fw_size, fw_autoload_mask); 1403 } 1404 1405 /* rlc ucode */ 1406 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1407 adev->gfx.rlc_fw->data; 1408 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1409 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1410 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1411 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1412 fw_data, fw_size, fw_autoload_mask); 1413 1414 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1415 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1416 if (version_major == 2) { 1417 if (version_minor >= 2) { 1418 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1419 1420 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1421 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1422 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1423 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1424 fw_data, fw_size, fw_autoload_mask); 1425 1426 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1427 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1428 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1429 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1430 fw_data, fw_size, fw_autoload_mask); 1431 } 1432 } 1433 } 1434 1435 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1436 uint32_t *fw_autoload_mask) 1437 { 1438 const __le32 *fw_data; 1439 uint32_t fw_size; 1440 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1441 1442 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1443 adev->sdma.instance[0].fw->data; 1444 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1445 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1446 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1447 1448 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1449 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1450 1451 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1452 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1453 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1454 1455 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1456 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1457 } 1458 1459 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1460 uint32_t *fw_autoload_mask) 1461 { 1462 const __le32 *fw_data; 1463 unsigned fw_size; 1464 const struct mes_firmware_header_v1_0 *mes_hdr; 1465 int pipe, ucode_id, data_id; 1466 1467 for (pipe = 0; pipe < 2; pipe++) { 1468 if (pipe==0) { 1469 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1470 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1471 } else { 1472 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1473 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1474 } 1475 1476 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1477 adev->mes.fw[pipe]->data; 1478 1479 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1480 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1481 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1482 1483 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1484 ucode_id, fw_data, fw_size, fw_autoload_mask); 1485 1486 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1487 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1488 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1489 1490 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1491 data_id, fw_data, fw_size, fw_autoload_mask); 1492 } 1493 } 1494 1495 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1496 { 1497 uint32_t rlc_g_offset, rlc_g_size; 1498 uint64_t gpu_addr; 1499 uint32_t autoload_fw_id[2]; 1500 1501 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1502 1503 /* RLC autoload sequence 2: copy ucode */ 1504 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1505 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1506 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1507 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1508 1509 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1510 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1511 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1512 1513 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1514 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1515 1516 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1517 1518 /* RLC autoload sequence 3: load IMU fw */ 1519 if (adev->gfx.imu.funcs->load_microcode) 1520 adev->gfx.imu.funcs->load_microcode(adev); 1521 /* RLC autoload sequence 4 init IMU fw */ 1522 if (adev->gfx.imu.funcs->setup_imu) 1523 adev->gfx.imu.funcs->setup_imu(adev); 1524 if (adev->gfx.imu.funcs->start_imu) 1525 adev->gfx.imu.funcs->start_imu(adev); 1526 1527 /* RLC autoload sequence 5 disable gpa mode */ 1528 gfx_v11_0_disable_gpa_mode(adev); 1529 1530 return 0; 1531 } 1532 1533 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) 1534 { 1535 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 1536 uint32_t *ptr; 1537 uint32_t inst; 1538 1539 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 1540 if (!ptr) { 1541 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 1542 adev->gfx.ip_dump_core = NULL; 1543 } else { 1544 adev->gfx.ip_dump_core = ptr; 1545 } 1546 1547 /* Allocate memory for compute queue registers for all the instances */ 1548 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 1549 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 1550 adev->gfx.mec.num_queue_per_pipe; 1551 1552 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1553 if (!ptr) { 1554 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 1555 adev->gfx.ip_dump_compute_queues = NULL; 1556 } else { 1557 adev->gfx.ip_dump_compute_queues = ptr; 1558 } 1559 1560 /* Allocate memory for gfx queue registers for all the instances */ 1561 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 1562 inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * 1563 adev->gfx.me.num_queue_per_pipe; 1564 1565 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1566 if (!ptr) { 1567 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); 1568 adev->gfx.ip_dump_gfx_queues = NULL; 1569 } else { 1570 adev->gfx.ip_dump_gfx_queues = ptr; 1571 } 1572 } 1573 1574 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) 1575 { 1576 int i, j, k, r, ring_id; 1577 int xcc_id = 0; 1578 struct amdgpu_device *adev = ip_block->adev; 1579 int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ 1580 1581 INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); 1582 1583 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1584 case IP_VERSION(11, 0, 0): 1585 case IP_VERSION(11, 0, 1): 1586 case IP_VERSION(11, 0, 2): 1587 case IP_VERSION(11, 0, 3): 1588 case IP_VERSION(11, 0, 4): 1589 case IP_VERSION(11, 5, 0): 1590 case IP_VERSION(11, 5, 1): 1591 case IP_VERSION(11, 5, 2): 1592 case IP_VERSION(11, 5, 3): 1593 adev->gfx.me.num_me = 1; 1594 adev->gfx.me.num_pipe_per_me = 1; 1595 adev->gfx.me.num_queue_per_pipe = 2; 1596 adev->gfx.mec.num_mec = 1; 1597 adev->gfx.mec.num_pipe_per_mec = 4; 1598 adev->gfx.mec.num_queue_per_pipe = 4; 1599 break; 1600 default: 1601 adev->gfx.me.num_me = 1; 1602 adev->gfx.me.num_pipe_per_me = 1; 1603 adev->gfx.me.num_queue_per_pipe = 1; 1604 adev->gfx.mec.num_mec = 1; 1605 adev->gfx.mec.num_pipe_per_mec = 4; 1606 adev->gfx.mec.num_queue_per_pipe = 8; 1607 break; 1608 } 1609 1610 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1611 case IP_VERSION(11, 0, 0): 1612 case IP_VERSION(11, 0, 2): 1613 case IP_VERSION(11, 0, 3): 1614 if (!adev->gfx.disable_uq && 1615 adev->gfx.me_fw_version >= 2420 && 1616 adev->gfx.pfp_fw_version >= 2580 && 1617 adev->gfx.mec_fw_version >= 2650 && 1618 adev->mes.fw_version[0] >= 120) { 1619 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1620 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1621 } 1622 break; 1623 case IP_VERSION(11, 0, 1): 1624 case IP_VERSION(11, 0, 4): 1625 case IP_VERSION(11, 5, 0): 1626 case IP_VERSION(11, 5, 1): 1627 case IP_VERSION(11, 5, 2): 1628 case IP_VERSION(11, 5, 3): 1629 /* add firmware version checks here */ 1630 if (0 && !adev->gfx.disable_uq) { 1631 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1632 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1633 } 1634 break; 1635 default: 1636 break; 1637 } 1638 1639 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1640 case IP_VERSION(11, 0, 0): 1641 case IP_VERSION(11, 0, 2): 1642 case IP_VERSION(11, 0, 3): 1643 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1644 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1645 if (adev->gfx.me_fw_version >= 2280 && 1646 adev->gfx.pfp_fw_version >= 2370 && 1647 adev->gfx.mec_fw_version >= 2450 && 1648 adev->mes.fw_version[0] >= 99) { 1649 adev->gfx.enable_cleaner_shader = true; 1650 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1651 if (r) { 1652 adev->gfx.enable_cleaner_shader = false; 1653 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1654 } 1655 } 1656 break; 1657 case IP_VERSION(11, 0, 1): 1658 case IP_VERSION(11, 0, 4): 1659 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1660 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1661 if (adev->gfx.pfp_fw_version >= 102 && 1662 adev->gfx.mec_fw_version >= 66 && 1663 adev->mes.fw_version[0] >= 128) { 1664 adev->gfx.enable_cleaner_shader = true; 1665 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1666 if (r) { 1667 adev->gfx.enable_cleaner_shader = false; 1668 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1669 } 1670 } 1671 break; 1672 case IP_VERSION(11, 5, 0): 1673 case IP_VERSION(11, 5, 1): 1674 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1675 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1676 if (adev->gfx.mec_fw_version >= 26 && 1677 adev->mes.fw_version[0] >= 114) { 1678 adev->gfx.enable_cleaner_shader = true; 1679 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1680 if (r) { 1681 adev->gfx.enable_cleaner_shader = false; 1682 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1683 } 1684 } 1685 break; 1686 case IP_VERSION(11, 5, 2): 1687 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1688 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1689 if (adev->gfx.me_fw_version >= 12 && 1690 adev->gfx.pfp_fw_version >= 15 && 1691 adev->gfx.mec_fw_version >= 15) { 1692 adev->gfx.enable_cleaner_shader = true; 1693 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1694 if (r) { 1695 adev->gfx.enable_cleaner_shader = false; 1696 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1697 } 1698 } 1699 break; 1700 case IP_VERSION(11, 5, 3): 1701 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1702 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1703 if (adev->gfx.me_fw_version >= 7 && 1704 adev->gfx.pfp_fw_version >= 8 && 1705 adev->gfx.mec_fw_version >= 8) { 1706 adev->gfx.enable_cleaner_shader = true; 1707 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1708 if (r) { 1709 adev->gfx.enable_cleaner_shader = false; 1710 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1711 } 1712 } 1713 break; 1714 default: 1715 adev->gfx.enable_cleaner_shader = false; 1716 break; 1717 } 1718 1719 /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ 1720 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && 1721 amdgpu_sriov_is_pp_one_vf(adev)) 1722 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; 1723 1724 /* EOP Event */ 1725 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1726 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1727 &adev->gfx.eop_irq); 1728 if (r) 1729 return r; 1730 1731 /* Bad opcode Event */ 1732 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1733 GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, 1734 &adev->gfx.bad_op_irq); 1735 if (r) 1736 return r; 1737 1738 /* Privileged reg */ 1739 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1740 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1741 &adev->gfx.priv_reg_irq); 1742 if (r) 1743 return r; 1744 1745 /* Privileged inst */ 1746 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1747 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1748 &adev->gfx.priv_inst_irq); 1749 if (r) 1750 return r; 1751 1752 /* FED error */ 1753 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1754 GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, 1755 &adev->gfx.rlc_gc_fed_irq); 1756 if (r) 1757 return r; 1758 1759 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1760 1761 gfx_v11_0_me_init(adev); 1762 1763 r = gfx_v11_0_rlc_init(adev); 1764 if (r) { 1765 DRM_ERROR("Failed to init rlc BOs!\n"); 1766 return r; 1767 } 1768 1769 r = gfx_v11_0_mec_init(adev); 1770 if (r) { 1771 DRM_ERROR("Failed to init MEC BOs!\n"); 1772 return r; 1773 } 1774 1775 if (adev->gfx.num_gfx_rings) { 1776 ring_id = 0; 1777 /* set up the gfx ring */ 1778 for (i = 0; i < adev->gfx.me.num_me; i++) { 1779 for (j = 0; j < num_queue_per_pipe; j++) { 1780 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1781 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1782 continue; 1783 1784 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1785 i, k, j); 1786 if (r) 1787 return r; 1788 ring_id++; 1789 } 1790 } 1791 } 1792 } 1793 1794 if (adev->gfx.num_compute_rings) { 1795 ring_id = 0; 1796 /* set up the compute queues - allocate horizontally across pipes */ 1797 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1798 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1799 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1800 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 1801 k, j)) 1802 continue; 1803 1804 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1805 i, k, j); 1806 if (r) 1807 return r; 1808 1809 ring_id++; 1810 } 1811 } 1812 } 1813 } 1814 1815 adev->gfx.gfx_supported_reset = 1816 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1817 adev->gfx.compute_supported_reset = 1818 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1819 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1820 case IP_VERSION(11, 0, 0): 1821 case IP_VERSION(11, 0, 2): 1822 case IP_VERSION(11, 0, 3): 1823 if ((adev->gfx.me_fw_version >= 2280) && 1824 (adev->gfx.mec_fw_version >= 2410) && 1825 !amdgpu_sriov_vf(adev) && 1826 !adev->debug_disable_gpu_ring_reset) { 1827 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1828 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1829 } 1830 break; 1831 default: 1832 if (!amdgpu_sriov_vf(adev) && 1833 !adev->debug_disable_gpu_ring_reset) { 1834 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1835 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1836 } 1837 break; 1838 } 1839 1840 if (!adev->enable_mes_kiq) { 1841 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); 1842 if (r) { 1843 DRM_ERROR("Failed to init KIQ BOs!\n"); 1844 return r; 1845 } 1846 1847 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 1848 if (r) 1849 return r; 1850 } 1851 1852 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); 1853 if (r) 1854 return r; 1855 1856 /* allocate visible FB for rlc auto-loading fw */ 1857 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1858 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1859 if (r) 1860 return r; 1861 } 1862 1863 r = gfx_v11_0_gpu_early_init(adev); 1864 if (r) 1865 return r; 1866 1867 if (amdgpu_gfx_ras_sw_init(adev)) { 1868 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 1869 return -EINVAL; 1870 } 1871 1872 gfx_v11_0_alloc_ip_dump(adev); 1873 1874 r = amdgpu_gfx_sysfs_init(adev); 1875 if (r) 1876 return r; 1877 1878 return 0; 1879 } 1880 1881 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1882 { 1883 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1884 &adev->gfx.pfp.pfp_fw_gpu_addr, 1885 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1886 1887 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1888 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1889 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1890 } 1891 1892 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1893 { 1894 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1895 &adev->gfx.me.me_fw_gpu_addr, 1896 (void **)&adev->gfx.me.me_fw_ptr); 1897 1898 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1899 &adev->gfx.me.me_fw_data_gpu_addr, 1900 (void **)&adev->gfx.me.me_fw_data_ptr); 1901 } 1902 1903 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1904 { 1905 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1906 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1907 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1908 } 1909 1910 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) 1911 { 1912 int i; 1913 struct amdgpu_device *adev = ip_block->adev; 1914 1915 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1916 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1917 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1918 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1919 1920 amdgpu_gfx_mqd_sw_fini(adev, 0); 1921 1922 if (!adev->enable_mes_kiq) { 1923 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 1924 amdgpu_gfx_kiq_fini(adev, 0); 1925 } 1926 1927 amdgpu_gfx_cleaner_shader_sw_fini(adev); 1928 1929 gfx_v11_0_pfp_fini(adev); 1930 gfx_v11_0_me_fini(adev); 1931 gfx_v11_0_rlc_fini(adev); 1932 gfx_v11_0_mec_fini(adev); 1933 1934 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1935 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1936 1937 gfx_v11_0_free_microcode(adev); 1938 1939 amdgpu_gfx_sysfs_fini(adev); 1940 1941 kfree(adev->gfx.ip_dump_core); 1942 kfree(adev->gfx.ip_dump_compute_queues); 1943 kfree(adev->gfx.ip_dump_gfx_queues); 1944 1945 return 0; 1946 } 1947 1948 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1949 u32 sh_num, u32 instance, int xcc_id) 1950 { 1951 u32 data; 1952 1953 if (instance == 0xffffffff) 1954 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1955 INSTANCE_BROADCAST_WRITES, 1); 1956 else 1957 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1958 instance); 1959 1960 if (se_num == 0xffffffff) 1961 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1962 1); 1963 else 1964 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1965 1966 if (sh_num == 0xffffffff) 1967 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1968 1); 1969 else 1970 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1971 1972 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 1973 } 1974 1975 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) 1976 { 1977 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; 1978 1979 gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); 1980 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, 1981 CC_GC_SA_UNIT_DISABLE, 1982 SA_DISABLE); 1983 gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); 1984 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, 1985 GC_USER_SA_UNIT_DISABLE, 1986 SA_DISABLE); 1987 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * 1988 adev->gfx.config.max_shader_engines); 1989 1990 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 1991 } 1992 1993 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1994 { 1995 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; 1996 u32 rb_mask; 1997 1998 gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 1999 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, 2000 CC_RB_BACKEND_DISABLE, 2001 BACKEND_DISABLE); 2002 gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 2003 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, 2004 GC_USER_RB_BACKEND_DISABLE, 2005 BACKEND_DISABLE); 2006 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * 2007 adev->gfx.config.max_shader_engines); 2008 2009 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); 2010 } 2011 2012 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 2013 { 2014 u32 rb_bitmap_per_sa; 2015 u32 rb_bitmap_width_per_sa; 2016 u32 max_sa; 2017 u32 active_sa_bitmap; 2018 u32 global_active_rb_bitmap; 2019 u32 active_rb_bitmap = 0; 2020 u32 i; 2021 2022 /* query sa bitmap from SA_UNIT_DISABLE registers */ 2023 active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); 2024 /* query rb bitmap from RB_BACKEND_DISABLE registers */ 2025 global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); 2026 2027 /* generate active rb bitmap according to active sa bitmap */ 2028 max_sa = adev->gfx.config.max_shader_engines * 2029 adev->gfx.config.max_sh_per_se; 2030 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 2031 adev->gfx.config.max_sh_per_se; 2032 rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); 2033 2034 for (i = 0; i < max_sa; i++) { 2035 if (active_sa_bitmap & (1 << i)) 2036 active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); 2037 } 2038 2039 active_rb_bitmap &= global_active_rb_bitmap; 2040 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 2041 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 2042 } 2043 2044 #define DEFAULT_SH_MEM_BASES (0x6000) 2045 #define LDS_APP_BASE 0x1 2046 #define SCRATCH_APP_BASE 0x2 2047 2048 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 2049 { 2050 int i; 2051 uint32_t sh_mem_bases; 2052 uint32_t data; 2053 2054 /* 2055 * Configure apertures: 2056 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2057 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2058 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2059 */ 2060 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 2061 SCRATCH_APP_BASE; 2062 2063 mutex_lock(&adev->srbm_mutex); 2064 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2065 soc21_grbm_select(adev, 0, 0, 0, i); 2066 /* CP and shaders */ 2067 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2068 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 2069 2070 /* Enable trap for each kfd vmid. */ 2071 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 2072 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 2073 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); 2074 } 2075 soc21_grbm_select(adev, 0, 0, 0, 0); 2076 mutex_unlock(&adev->srbm_mutex); 2077 2078 /* 2079 * Initialize all compute VMIDs to have no GDS, GWS, or OA 2080 * access. These should be enabled by FW for target VMIDs. 2081 */ 2082 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2083 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 2084 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 2085 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 2086 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 2087 } 2088 } 2089 2090 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 2091 { 2092 int vmid; 2093 2094 /* 2095 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2096 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2097 * the driver can enable them for graphics. VMID0 should maintain 2098 * access so that HWS firmware can save/restore entries. 2099 */ 2100 for (vmid = 1; vmid < 16; vmid++) { 2101 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 2102 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 2103 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 2104 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 2105 } 2106 } 2107 2108 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 2109 { 2110 /* TODO: harvest feature to be added later. */ 2111 } 2112 2113 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 2114 { 2115 /* TCCs are global (not instanced). */ 2116 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 2117 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 2118 2119 adev->gfx.config.tcc_disabled_mask = 2120 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 2121 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 2122 } 2123 2124 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 2125 { 2126 u32 tmp; 2127 int i; 2128 2129 if (!amdgpu_sriov_vf(adev)) 2130 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2131 2132 gfx_v11_0_setup_rb(adev); 2133 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 2134 gfx_v11_0_get_tcc_info(adev); 2135 adev->gfx.config.pa_sc_tile_steering_override = 0; 2136 2137 /* Set whether texture coordinate truncation is conformant. */ 2138 tmp = RREG32_SOC15(GC, 0, regTA_CNTL2); 2139 adev->gfx.config.ta_cntl2_truncate_coord_mode = 2140 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE); 2141 2142 /* XXX SH_MEM regs */ 2143 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2144 mutex_lock(&adev->srbm_mutex); 2145 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2146 soc21_grbm_select(adev, 0, 0, 0, i); 2147 /* CP and shaders */ 2148 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2149 if (i != 0) { 2150 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2151 (adev->gmc.private_aperture_start >> 48)); 2152 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2153 (adev->gmc.shared_aperture_start >> 48)); 2154 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 2155 } 2156 } 2157 soc21_grbm_select(adev, 0, 0, 0, 0); 2158 2159 mutex_unlock(&adev->srbm_mutex); 2160 2161 gfx_v11_0_init_compute_vmid(adev); 2162 gfx_v11_0_init_gds_vmid(adev); 2163 } 2164 2165 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, 2166 int me, int pipe) 2167 { 2168 if (me != 0) 2169 return 0; 2170 2171 switch (pipe) { 2172 case 0: 2173 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 2174 case 1: 2175 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 2176 default: 2177 return 0; 2178 } 2179 } 2180 2181 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, 2182 int me, int pipe) 2183 { 2184 /* 2185 * amdgpu controls only the first MEC. That's why this function only 2186 * handles the setting of interrupts for this specific MEC. All other 2187 * pipes' interrupts are set by amdkfd. 2188 */ 2189 if (me != 1) 2190 return 0; 2191 2192 switch (pipe) { 2193 case 0: 2194 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 2195 case 1: 2196 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 2197 case 2: 2198 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 2199 case 3: 2200 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 2201 default: 2202 return 0; 2203 } 2204 } 2205 2206 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2207 bool enable) 2208 { 2209 u32 tmp, cp_int_cntl_reg; 2210 int i, j; 2211 2212 if (amdgpu_sriov_vf(adev)) 2213 return; 2214 2215 for (i = 0; i < adev->gfx.me.num_me; i++) { 2216 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 2217 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 2218 2219 if (cp_int_cntl_reg) { 2220 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 2221 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 2222 enable ? 1 : 0); 2223 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 2224 enable ? 1 : 0); 2225 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 2226 enable ? 1 : 0); 2227 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 2228 enable ? 1 : 0); 2229 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); 2230 } 2231 } 2232 } 2233 } 2234 2235 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 2236 { 2237 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2238 2239 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 2240 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2241 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 2242 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2243 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 2244 2245 return 0; 2246 } 2247 2248 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 2249 { 2250 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 2251 2252 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 2253 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 2254 } 2255 2256 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 2257 { 2258 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2259 udelay(50); 2260 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2261 udelay(50); 2262 } 2263 2264 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 2265 bool enable) 2266 { 2267 uint32_t rlc_pg_cntl; 2268 2269 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 2270 2271 if (!enable) { 2272 /* RLC_PG_CNTL[23] = 0 (default) 2273 * RLC will wait for handshake acks with SMU 2274 * GFXOFF will be enabled 2275 * RLC_PG_CNTL[23] = 1 2276 * RLC will not issue any message to SMU 2277 * hence no handshake between SMU & RLC 2278 * GFXOFF will be disabled 2279 */ 2280 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2281 } else 2282 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2283 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 2284 } 2285 2286 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 2287 { 2288 /* TODO: enable rlc & smu handshake until smu 2289 * and gfxoff feature works as expected */ 2290 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 2291 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 2292 2293 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2294 udelay(50); 2295 } 2296 2297 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 2298 { 2299 uint32_t tmp; 2300 2301 /* enable Save Restore Machine */ 2302 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 2303 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2304 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 2305 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 2306 } 2307 2308 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 2309 { 2310 const struct rlc_firmware_header_v2_0 *hdr; 2311 const __le32 *fw_data; 2312 unsigned i, fw_size; 2313 2314 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2315 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2316 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2317 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2318 2319 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 2320 RLCG_UCODE_LOADING_START_ADDRESS); 2321 2322 for (i = 0; i < fw_size; i++) 2323 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 2324 le32_to_cpup(fw_data++)); 2325 2326 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2327 } 2328 2329 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 2330 { 2331 const struct rlc_firmware_header_v2_2 *hdr; 2332 const __le32 *fw_data; 2333 unsigned i, fw_size; 2334 u32 tmp; 2335 2336 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 2337 2338 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2339 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 2340 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 2341 2342 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 2343 2344 for (i = 0; i < fw_size; i++) { 2345 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2346 msleep(1); 2347 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 2348 le32_to_cpup(fw_data++)); 2349 } 2350 2351 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2352 2353 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2354 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 2355 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 2356 2357 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 2358 for (i = 0; i < fw_size; i++) { 2359 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2360 msleep(1); 2361 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 2362 le32_to_cpup(fw_data++)); 2363 } 2364 2365 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2366 2367 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 2368 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 2369 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 2370 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 2371 } 2372 2373 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 2374 { 2375 const struct rlc_firmware_header_v2_3 *hdr; 2376 const __le32 *fw_data; 2377 unsigned i, fw_size; 2378 u32 tmp; 2379 2380 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 2381 2382 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2383 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 2384 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 2385 2386 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 2387 2388 for (i = 0; i < fw_size; i++) { 2389 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2390 msleep(1); 2391 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 2392 le32_to_cpup(fw_data++)); 2393 } 2394 2395 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 2396 2397 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 2398 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 2399 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 2400 2401 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2402 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 2403 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 2404 2405 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 2406 2407 for (i = 0; i < fw_size; i++) { 2408 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2409 msleep(1); 2410 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 2411 le32_to_cpup(fw_data++)); 2412 } 2413 2414 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 2415 2416 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 2417 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 2418 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 2419 } 2420 2421 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 2422 { 2423 const struct rlc_firmware_header_v2_0 *hdr; 2424 uint16_t version_major; 2425 uint16_t version_minor; 2426 2427 if (!adev->gfx.rlc_fw) 2428 return -EINVAL; 2429 2430 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2431 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2432 2433 version_major = le16_to_cpu(hdr->header.header_version_major); 2434 version_minor = le16_to_cpu(hdr->header.header_version_minor); 2435 2436 if (version_major == 2) { 2437 gfx_v11_0_load_rlcg_microcode(adev); 2438 if (amdgpu_dpm == 1) { 2439 if (version_minor >= 2) 2440 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 2441 if (version_minor == 3) 2442 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 2443 } 2444 2445 return 0; 2446 } 2447 2448 return -EINVAL; 2449 } 2450 2451 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 2452 { 2453 int r; 2454 2455 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 2456 gfx_v11_0_init_csb(adev); 2457 2458 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 2459 gfx_v11_0_rlc_enable_srm(adev); 2460 } else { 2461 if (amdgpu_sriov_vf(adev)) { 2462 gfx_v11_0_init_csb(adev); 2463 return 0; 2464 } 2465 2466 adev->gfx.rlc.funcs->stop(adev); 2467 2468 /* disable CG */ 2469 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 2470 2471 /* disable PG */ 2472 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 2473 2474 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2475 /* legacy rlc firmware loading */ 2476 r = gfx_v11_0_rlc_load_microcode(adev); 2477 if (r) 2478 return r; 2479 } 2480 2481 gfx_v11_0_init_csb(adev); 2482 2483 adev->gfx.rlc.funcs->start(adev); 2484 } 2485 return 0; 2486 } 2487 2488 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 2489 { 2490 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2491 uint32_t tmp; 2492 int i; 2493 2494 /* Trigger an invalidation of the L1 instruction caches */ 2495 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2496 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2497 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2498 2499 /* Wait for invalidation complete */ 2500 for (i = 0; i < usec_timeout; i++) { 2501 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2502 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2503 INVALIDATE_CACHE_COMPLETE)) 2504 break; 2505 udelay(1); 2506 } 2507 2508 if (i >= usec_timeout) { 2509 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2510 return -EINVAL; 2511 } 2512 2513 if (amdgpu_emu_mode == 1) 2514 amdgpu_device_flush_hdp(adev, NULL); 2515 2516 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2517 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2518 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2519 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2520 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2521 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2522 2523 /* Program me ucode address into intruction cache address register */ 2524 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2525 lower_32_bits(addr) & 0xFFFFF000); 2526 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2527 upper_32_bits(addr)); 2528 2529 return 0; 2530 } 2531 2532 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 2533 { 2534 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2535 uint32_t tmp; 2536 int i; 2537 2538 /* Trigger an invalidation of the L1 instruction caches */ 2539 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2540 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2541 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2542 2543 /* Wait for invalidation complete */ 2544 for (i = 0; i < usec_timeout; i++) { 2545 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2546 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2547 INVALIDATE_CACHE_COMPLETE)) 2548 break; 2549 udelay(1); 2550 } 2551 2552 if (i >= usec_timeout) { 2553 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2554 return -EINVAL; 2555 } 2556 2557 if (amdgpu_emu_mode == 1) 2558 amdgpu_device_flush_hdp(adev, NULL); 2559 2560 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2561 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2562 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2563 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2564 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2565 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2566 2567 /* Program pfp ucode address into intruction cache address register */ 2568 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2569 lower_32_bits(addr) & 0xFFFFF000); 2570 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2571 upper_32_bits(addr)); 2572 2573 return 0; 2574 } 2575 2576 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2577 { 2578 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2579 uint32_t tmp; 2580 int i; 2581 2582 /* Trigger an invalidation of the L1 instruction caches */ 2583 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2584 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2585 2586 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2587 2588 /* Wait for invalidation complete */ 2589 for (i = 0; i < usec_timeout; i++) { 2590 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2591 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2592 INVALIDATE_CACHE_COMPLETE)) 2593 break; 2594 udelay(1); 2595 } 2596 2597 if (i >= usec_timeout) { 2598 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2599 return -EINVAL; 2600 } 2601 2602 if (amdgpu_emu_mode == 1) 2603 amdgpu_device_flush_hdp(adev, NULL); 2604 2605 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2606 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2607 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2608 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2609 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2610 2611 /* Program mec1 ucode address into intruction cache address register */ 2612 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2613 lower_32_bits(addr) & 0xFFFFF000); 2614 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2615 upper_32_bits(addr)); 2616 2617 return 0; 2618 } 2619 2620 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2621 { 2622 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2623 uint32_t tmp; 2624 unsigned i, pipe_id; 2625 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2626 2627 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2628 adev->gfx.pfp_fw->data; 2629 2630 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2631 lower_32_bits(addr)); 2632 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2633 upper_32_bits(addr)); 2634 2635 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2636 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2637 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2638 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2639 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2640 2641 /* 2642 * Programming any of the CP_PFP_IC_BASE registers 2643 * forces invalidation of the ME L1 I$. Wait for the 2644 * invalidation complete 2645 */ 2646 for (i = 0; i < usec_timeout; i++) { 2647 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2648 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2649 INVALIDATE_CACHE_COMPLETE)) 2650 break; 2651 udelay(1); 2652 } 2653 2654 if (i >= usec_timeout) { 2655 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2656 return -EINVAL; 2657 } 2658 2659 /* Prime the L1 instruction caches */ 2660 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2661 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2662 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2663 /* Waiting for cache primed*/ 2664 for (i = 0; i < usec_timeout; i++) { 2665 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2666 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2667 ICACHE_PRIMED)) 2668 break; 2669 udelay(1); 2670 } 2671 2672 if (i >= usec_timeout) { 2673 dev_err(adev->dev, "failed to prime instruction cache\n"); 2674 return -EINVAL; 2675 } 2676 2677 mutex_lock(&adev->srbm_mutex); 2678 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2679 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2680 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2681 (pfp_hdr->ucode_start_addr_hi << 30) | 2682 (pfp_hdr->ucode_start_addr_lo >> 2)); 2683 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2684 pfp_hdr->ucode_start_addr_hi >> 2); 2685 2686 /* 2687 * Program CP_ME_CNTL to reset given PIPE to take 2688 * effect of CP_PFP_PRGRM_CNTR_START. 2689 */ 2690 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2691 if (pipe_id == 0) 2692 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2693 PFP_PIPE0_RESET, 1); 2694 else 2695 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2696 PFP_PIPE1_RESET, 1); 2697 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2698 2699 /* Clear pfp pipe0 reset bit. */ 2700 if (pipe_id == 0) 2701 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2702 PFP_PIPE0_RESET, 0); 2703 else 2704 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2705 PFP_PIPE1_RESET, 0); 2706 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2707 2708 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2709 lower_32_bits(addr2)); 2710 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2711 upper_32_bits(addr2)); 2712 } 2713 soc21_grbm_select(adev, 0, 0, 0, 0); 2714 mutex_unlock(&adev->srbm_mutex); 2715 2716 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2717 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2718 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2719 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2720 2721 /* Invalidate the data caches */ 2722 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2723 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2724 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2725 2726 for (i = 0; i < usec_timeout; i++) { 2727 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2728 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2729 INVALIDATE_DCACHE_COMPLETE)) 2730 break; 2731 udelay(1); 2732 } 2733 2734 if (i >= usec_timeout) { 2735 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2736 return -EINVAL; 2737 } 2738 2739 return 0; 2740 } 2741 2742 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2743 { 2744 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2745 uint32_t tmp; 2746 unsigned i, pipe_id; 2747 const struct gfx_firmware_header_v2_0 *me_hdr; 2748 2749 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2750 adev->gfx.me_fw->data; 2751 2752 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2753 lower_32_bits(addr)); 2754 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2755 upper_32_bits(addr)); 2756 2757 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2758 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2759 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2760 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2761 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2762 2763 /* 2764 * Programming any of the CP_ME_IC_BASE registers 2765 * forces invalidation of the ME L1 I$. Wait for the 2766 * invalidation complete 2767 */ 2768 for (i = 0; i < usec_timeout; i++) { 2769 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2770 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2771 INVALIDATE_CACHE_COMPLETE)) 2772 break; 2773 udelay(1); 2774 } 2775 2776 if (i >= usec_timeout) { 2777 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2778 return -EINVAL; 2779 } 2780 2781 /* Prime the instruction caches */ 2782 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2783 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2784 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2785 2786 /* Waiting for instruction cache primed*/ 2787 for (i = 0; i < usec_timeout; i++) { 2788 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2789 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2790 ICACHE_PRIMED)) 2791 break; 2792 udelay(1); 2793 } 2794 2795 if (i >= usec_timeout) { 2796 dev_err(adev->dev, "failed to prime instruction cache\n"); 2797 return -EINVAL; 2798 } 2799 2800 mutex_lock(&adev->srbm_mutex); 2801 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2802 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2803 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2804 (me_hdr->ucode_start_addr_hi << 30) | 2805 (me_hdr->ucode_start_addr_lo >> 2) ); 2806 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2807 me_hdr->ucode_start_addr_hi>>2); 2808 2809 /* 2810 * Program CP_ME_CNTL to reset given PIPE to take 2811 * effect of CP_PFP_PRGRM_CNTR_START. 2812 */ 2813 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2814 if (pipe_id == 0) 2815 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2816 ME_PIPE0_RESET, 1); 2817 else 2818 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2819 ME_PIPE1_RESET, 1); 2820 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2821 2822 /* Clear pfp pipe0 reset bit. */ 2823 if (pipe_id == 0) 2824 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2825 ME_PIPE0_RESET, 0); 2826 else 2827 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2828 ME_PIPE1_RESET, 0); 2829 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2830 2831 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2832 lower_32_bits(addr2)); 2833 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2834 upper_32_bits(addr2)); 2835 } 2836 soc21_grbm_select(adev, 0, 0, 0, 0); 2837 mutex_unlock(&adev->srbm_mutex); 2838 2839 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2840 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2841 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2842 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2843 2844 /* Invalidate the data caches */ 2845 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2846 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2847 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2848 2849 for (i = 0; i < usec_timeout; i++) { 2850 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2851 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2852 INVALIDATE_DCACHE_COMPLETE)) 2853 break; 2854 udelay(1); 2855 } 2856 2857 if (i >= usec_timeout) { 2858 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2859 return -EINVAL; 2860 } 2861 2862 return 0; 2863 } 2864 2865 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2866 { 2867 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2868 uint32_t tmp; 2869 unsigned i; 2870 const struct gfx_firmware_header_v2_0 *mec_hdr; 2871 2872 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2873 adev->gfx.mec_fw->data; 2874 2875 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2876 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2877 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2878 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2879 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2880 2881 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2882 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2883 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2884 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2885 2886 mutex_lock(&adev->srbm_mutex); 2887 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2888 soc21_grbm_select(adev, 1, i, 0, 0); 2889 2890 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2891 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2892 upper_32_bits(addr2)); 2893 2894 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2895 mec_hdr->ucode_start_addr_lo >> 2 | 2896 mec_hdr->ucode_start_addr_hi << 30); 2897 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2898 mec_hdr->ucode_start_addr_hi >> 2); 2899 2900 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2901 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2902 upper_32_bits(addr)); 2903 } 2904 mutex_unlock(&adev->srbm_mutex); 2905 soc21_grbm_select(adev, 0, 0, 0, 0); 2906 2907 /* Trigger an invalidation of the L1 instruction caches */ 2908 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2909 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2910 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2911 2912 /* Wait for invalidation complete */ 2913 for (i = 0; i < usec_timeout; i++) { 2914 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2915 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2916 INVALIDATE_DCACHE_COMPLETE)) 2917 break; 2918 udelay(1); 2919 } 2920 2921 if (i >= usec_timeout) { 2922 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2923 return -EINVAL; 2924 } 2925 2926 /* Trigger an invalidation of the L1 instruction caches */ 2927 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2928 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2929 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2930 2931 /* Wait for invalidation complete */ 2932 for (i = 0; i < usec_timeout; i++) { 2933 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2934 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2935 INVALIDATE_CACHE_COMPLETE)) 2936 break; 2937 udelay(1); 2938 } 2939 2940 if (i >= usec_timeout) { 2941 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2942 return -EINVAL; 2943 } 2944 2945 return 0; 2946 } 2947 2948 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2949 { 2950 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2951 const struct gfx_firmware_header_v2_0 *me_hdr; 2952 const struct gfx_firmware_header_v2_0 *mec_hdr; 2953 uint32_t pipe_id, tmp; 2954 2955 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2956 adev->gfx.mec_fw->data; 2957 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2958 adev->gfx.me_fw->data; 2959 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2960 adev->gfx.pfp_fw->data; 2961 2962 /* config pfp program start addr */ 2963 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2964 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2965 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2966 (pfp_hdr->ucode_start_addr_hi << 30) | 2967 (pfp_hdr->ucode_start_addr_lo >> 2)); 2968 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2969 pfp_hdr->ucode_start_addr_hi >> 2); 2970 } 2971 soc21_grbm_select(adev, 0, 0, 0, 0); 2972 2973 /* reset pfp pipe */ 2974 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2975 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 2976 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 2977 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2978 2979 /* clear pfp pipe reset */ 2980 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 2981 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 2982 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2983 2984 /* config me program start addr */ 2985 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2986 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2987 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2988 (me_hdr->ucode_start_addr_hi << 30) | 2989 (me_hdr->ucode_start_addr_lo >> 2) ); 2990 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2991 me_hdr->ucode_start_addr_hi>>2); 2992 } 2993 soc21_grbm_select(adev, 0, 0, 0, 0); 2994 2995 /* reset me pipe */ 2996 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2997 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 2998 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 2999 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3000 3001 /* clear me pipe reset */ 3002 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 3003 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 3004 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3005 3006 /* config mec program start addr */ 3007 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 3008 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 3009 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3010 mec_hdr->ucode_start_addr_lo >> 2 | 3011 mec_hdr->ucode_start_addr_hi << 30); 3012 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3013 mec_hdr->ucode_start_addr_hi >> 2); 3014 } 3015 soc21_grbm_select(adev, 0, 0, 0, 0); 3016 3017 /* reset mec pipe */ 3018 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3019 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 3020 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 3021 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 3022 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 3023 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3024 3025 /* clear mec pipe reset */ 3026 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 3027 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 3028 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 3029 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 3030 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3031 } 3032 3033 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 3034 { 3035 uint32_t cp_status; 3036 uint32_t bootload_status; 3037 int i, r; 3038 uint64_t addr, addr2; 3039 3040 for (i = 0; i < adev->usec_timeout; i++) { 3041 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 3042 3043 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 3044 IP_VERSION(11, 0, 1) || 3045 amdgpu_ip_version(adev, GC_HWIP, 0) == 3046 IP_VERSION(11, 0, 4) || 3047 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || 3048 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || 3049 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || 3050 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3)) 3051 bootload_status = RREG32_SOC15(GC, 0, 3052 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 3053 else 3054 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 3055 3056 if ((cp_status == 0) && 3057 (REG_GET_FIELD(bootload_status, 3058 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 3059 break; 3060 } 3061 udelay(1); 3062 } 3063 3064 if (i >= adev->usec_timeout) { 3065 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 3066 return -ETIMEDOUT; 3067 } 3068 3069 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 3070 if (adev->gfx.rs64_enable) { 3071 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3072 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 3073 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3074 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 3075 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 3076 if (r) 3077 return r; 3078 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3079 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 3080 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3081 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 3082 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 3083 if (r) 3084 return r; 3085 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3086 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 3087 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3088 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 3089 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 3090 if (r) 3091 return r; 3092 } else { 3093 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3094 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 3095 r = gfx_v11_0_config_me_cache(adev, addr); 3096 if (r) 3097 return r; 3098 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3099 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 3100 r = gfx_v11_0_config_pfp_cache(adev, addr); 3101 if (r) 3102 return r; 3103 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3104 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 3105 r = gfx_v11_0_config_mec_cache(adev, addr); 3106 if (r) 3107 return r; 3108 } 3109 } 3110 3111 return 0; 3112 } 3113 3114 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3115 { 3116 int i; 3117 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3118 3119 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3120 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3121 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3122 3123 for (i = 0; i < adev->usec_timeout; i++) { 3124 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 3125 break; 3126 udelay(1); 3127 } 3128 3129 if (i >= adev->usec_timeout) 3130 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 3131 3132 return 0; 3133 } 3134 3135 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 3136 { 3137 int r; 3138 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3139 const __le32 *fw_data; 3140 unsigned i, fw_size; 3141 3142 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3143 adev->gfx.pfp_fw->data; 3144 3145 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3146 3147 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3148 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3149 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 3150 3151 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 3152 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3153 &adev->gfx.pfp.pfp_fw_obj, 3154 &adev->gfx.pfp.pfp_fw_gpu_addr, 3155 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3156 if (r) { 3157 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 3158 gfx_v11_0_pfp_fini(adev); 3159 return r; 3160 } 3161 3162 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 3163 3164 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3165 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3166 3167 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 3168 3169 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 3170 3171 for (i = 0; i < pfp_hdr->jt_size; i++) 3172 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 3173 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 3174 3175 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3176 3177 return 0; 3178 } 3179 3180 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 3181 { 3182 int r; 3183 const struct gfx_firmware_header_v2_0 *pfp_hdr; 3184 const __le32 *fw_ucode, *fw_data; 3185 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3186 uint32_t tmp; 3187 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3188 3189 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 3190 adev->gfx.pfp_fw->data; 3191 3192 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3193 3194 /* instruction */ 3195 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 3196 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 3197 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 3198 /* data */ 3199 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3200 le32_to_cpu(pfp_hdr->data_offset_bytes)); 3201 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 3202 3203 /* 64kb align */ 3204 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3205 64 * 1024, 3206 AMDGPU_GEM_DOMAIN_VRAM | 3207 AMDGPU_GEM_DOMAIN_GTT, 3208 &adev->gfx.pfp.pfp_fw_obj, 3209 &adev->gfx.pfp.pfp_fw_gpu_addr, 3210 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3211 if (r) { 3212 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 3213 gfx_v11_0_pfp_fini(adev); 3214 return r; 3215 } 3216 3217 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3218 64 * 1024, 3219 AMDGPU_GEM_DOMAIN_VRAM | 3220 AMDGPU_GEM_DOMAIN_GTT, 3221 &adev->gfx.pfp.pfp_fw_data_obj, 3222 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 3223 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 3224 if (r) { 3225 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 3226 gfx_v11_0_pfp_fini(adev); 3227 return r; 3228 } 3229 3230 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 3231 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 3232 3233 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3234 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 3235 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3236 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3237 3238 if (amdgpu_emu_mode == 1) 3239 amdgpu_device_flush_hdp(adev, NULL); 3240 3241 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3242 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3243 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 3244 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3245 3246 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 3247 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 3248 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 3249 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 3250 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 3251 3252 /* 3253 * Programming any of the CP_PFP_IC_BASE registers 3254 * forces invalidation of the ME L1 I$. Wait for the 3255 * invalidation complete 3256 */ 3257 for (i = 0; i < usec_timeout; i++) { 3258 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3259 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3260 INVALIDATE_CACHE_COMPLETE)) 3261 break; 3262 udelay(1); 3263 } 3264 3265 if (i >= usec_timeout) { 3266 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3267 return -EINVAL; 3268 } 3269 3270 /* Prime the L1 instruction caches */ 3271 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3272 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 3273 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 3274 /* Waiting for cache primed*/ 3275 for (i = 0; i < usec_timeout; i++) { 3276 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3277 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3278 ICACHE_PRIMED)) 3279 break; 3280 udelay(1); 3281 } 3282 3283 if (i >= usec_timeout) { 3284 dev_err(adev->dev, "failed to prime instruction cache\n"); 3285 return -EINVAL; 3286 } 3287 3288 mutex_lock(&adev->srbm_mutex); 3289 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3290 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3291 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 3292 (pfp_hdr->ucode_start_addr_hi << 30) | 3293 (pfp_hdr->ucode_start_addr_lo >> 2) ); 3294 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 3295 pfp_hdr->ucode_start_addr_hi>>2); 3296 3297 /* 3298 * Program CP_ME_CNTL to reset given PIPE to take 3299 * effect of CP_PFP_PRGRM_CNTR_START. 3300 */ 3301 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3302 if (pipe_id == 0) 3303 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3304 PFP_PIPE0_RESET, 1); 3305 else 3306 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3307 PFP_PIPE1_RESET, 1); 3308 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3309 3310 /* Clear pfp pipe0 reset bit. */ 3311 if (pipe_id == 0) 3312 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3313 PFP_PIPE0_RESET, 0); 3314 else 3315 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3316 PFP_PIPE1_RESET, 0); 3317 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3318 3319 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 3320 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3321 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 3322 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3323 } 3324 soc21_grbm_select(adev, 0, 0, 0, 0); 3325 mutex_unlock(&adev->srbm_mutex); 3326 3327 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3328 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3329 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3330 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3331 3332 /* Invalidate the data caches */ 3333 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3334 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3335 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3336 3337 for (i = 0; i < usec_timeout; i++) { 3338 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3339 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3340 INVALIDATE_DCACHE_COMPLETE)) 3341 break; 3342 udelay(1); 3343 } 3344 3345 if (i >= usec_timeout) { 3346 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3347 return -EINVAL; 3348 } 3349 3350 return 0; 3351 } 3352 3353 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 3354 { 3355 int r; 3356 const struct gfx_firmware_header_v1_0 *me_hdr; 3357 const __le32 *fw_data; 3358 unsigned i, fw_size; 3359 3360 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3361 adev->gfx.me_fw->data; 3362 3363 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3364 3365 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3366 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3367 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 3368 3369 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 3370 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3371 &adev->gfx.me.me_fw_obj, 3372 &adev->gfx.me.me_fw_gpu_addr, 3373 (void **)&adev->gfx.me.me_fw_ptr); 3374 if (r) { 3375 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 3376 gfx_v11_0_me_fini(adev); 3377 return r; 3378 } 3379 3380 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 3381 3382 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3383 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3384 3385 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 3386 3387 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 3388 3389 for (i = 0; i < me_hdr->jt_size; i++) 3390 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 3391 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 3392 3393 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 3394 3395 return 0; 3396 } 3397 3398 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 3399 { 3400 int r; 3401 const struct gfx_firmware_header_v2_0 *me_hdr; 3402 const __le32 *fw_ucode, *fw_data; 3403 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3404 uint32_t tmp; 3405 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3406 3407 me_hdr = (const struct gfx_firmware_header_v2_0 *) 3408 adev->gfx.me_fw->data; 3409 3410 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3411 3412 /* instruction */ 3413 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 3414 le32_to_cpu(me_hdr->ucode_offset_bytes)); 3415 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 3416 /* data */ 3417 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3418 le32_to_cpu(me_hdr->data_offset_bytes)); 3419 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 3420 3421 /* 64kb align*/ 3422 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3423 64 * 1024, 3424 AMDGPU_GEM_DOMAIN_VRAM | 3425 AMDGPU_GEM_DOMAIN_GTT, 3426 &adev->gfx.me.me_fw_obj, 3427 &adev->gfx.me.me_fw_gpu_addr, 3428 (void **)&adev->gfx.me.me_fw_ptr); 3429 if (r) { 3430 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 3431 gfx_v11_0_me_fini(adev); 3432 return r; 3433 } 3434 3435 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3436 64 * 1024, 3437 AMDGPU_GEM_DOMAIN_VRAM | 3438 AMDGPU_GEM_DOMAIN_GTT, 3439 &adev->gfx.me.me_fw_data_obj, 3440 &adev->gfx.me.me_fw_data_gpu_addr, 3441 (void **)&adev->gfx.me.me_fw_data_ptr); 3442 if (r) { 3443 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 3444 gfx_v11_0_pfp_fini(adev); 3445 return r; 3446 } 3447 3448 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 3449 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 3450 3451 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3452 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 3453 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3454 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3455 3456 if (amdgpu_emu_mode == 1) 3457 amdgpu_device_flush_hdp(adev, NULL); 3458 3459 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3460 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3461 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 3462 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3463 3464 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 3465 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 3466 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 3467 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 3468 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 3469 3470 /* 3471 * Programming any of the CP_ME_IC_BASE registers 3472 * forces invalidation of the ME L1 I$. Wait for the 3473 * invalidation complete 3474 */ 3475 for (i = 0; i < usec_timeout; i++) { 3476 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3477 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3478 INVALIDATE_CACHE_COMPLETE)) 3479 break; 3480 udelay(1); 3481 } 3482 3483 if (i >= usec_timeout) { 3484 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3485 return -EINVAL; 3486 } 3487 3488 /* Prime the instruction caches */ 3489 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3490 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 3491 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 3492 3493 /* Waiting for instruction cache primed*/ 3494 for (i = 0; i < usec_timeout; i++) { 3495 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3496 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3497 ICACHE_PRIMED)) 3498 break; 3499 udelay(1); 3500 } 3501 3502 if (i >= usec_timeout) { 3503 dev_err(adev->dev, "failed to prime instruction cache\n"); 3504 return -EINVAL; 3505 } 3506 3507 mutex_lock(&adev->srbm_mutex); 3508 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3509 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3510 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3511 (me_hdr->ucode_start_addr_hi << 30) | 3512 (me_hdr->ucode_start_addr_lo >> 2) ); 3513 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3514 me_hdr->ucode_start_addr_hi>>2); 3515 3516 /* 3517 * Program CP_ME_CNTL to reset given PIPE to take 3518 * effect of CP_PFP_PRGRM_CNTR_START. 3519 */ 3520 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3521 if (pipe_id == 0) 3522 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3523 ME_PIPE0_RESET, 1); 3524 else 3525 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3526 ME_PIPE1_RESET, 1); 3527 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3528 3529 /* Clear pfp pipe0 reset bit. */ 3530 if (pipe_id == 0) 3531 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3532 ME_PIPE0_RESET, 0); 3533 else 3534 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3535 ME_PIPE1_RESET, 0); 3536 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3537 3538 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 3539 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3540 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 3541 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3542 } 3543 soc21_grbm_select(adev, 0, 0, 0, 0); 3544 mutex_unlock(&adev->srbm_mutex); 3545 3546 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3547 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3548 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3549 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3550 3551 /* Invalidate the data caches */ 3552 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3553 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3554 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3555 3556 for (i = 0; i < usec_timeout; i++) { 3557 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3558 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3559 INVALIDATE_DCACHE_COMPLETE)) 3560 break; 3561 udelay(1); 3562 } 3563 3564 if (i >= usec_timeout) { 3565 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3566 return -EINVAL; 3567 } 3568 3569 return 0; 3570 } 3571 3572 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3573 { 3574 int r; 3575 3576 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3577 return -EINVAL; 3578 3579 gfx_v11_0_cp_gfx_enable(adev, false); 3580 3581 if (adev->gfx.rs64_enable) 3582 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3583 else 3584 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3585 if (r) { 3586 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3587 return r; 3588 } 3589 3590 if (adev->gfx.rs64_enable) 3591 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3592 else 3593 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3594 if (r) { 3595 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3596 return r; 3597 } 3598 3599 return 0; 3600 } 3601 3602 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3603 { 3604 struct amdgpu_ring *ring; 3605 const struct cs_section_def *sect = NULL; 3606 const struct cs_extent_def *ext = NULL; 3607 int r, i; 3608 int ctx_reg_offset; 3609 3610 /* init the CP */ 3611 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3612 adev->gfx.config.max_hw_contexts - 1); 3613 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3614 3615 if (!amdgpu_async_gfx_ring) 3616 gfx_v11_0_cp_gfx_enable(adev, true); 3617 3618 ring = &adev->gfx.gfx_ring[0]; 3619 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3620 if (r) { 3621 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3622 return r; 3623 } 3624 3625 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3626 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3627 3628 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3629 amdgpu_ring_write(ring, 0x80000000); 3630 amdgpu_ring_write(ring, 0x80000000); 3631 3632 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3633 for (ext = sect->section; ext->extent != NULL; ++ext) { 3634 if (sect->id == SECT_CONTEXT) { 3635 amdgpu_ring_write(ring, 3636 PACKET3(PACKET3_SET_CONTEXT_REG, 3637 ext->reg_count)); 3638 amdgpu_ring_write(ring, ext->reg_index - 3639 PACKET3_SET_CONTEXT_REG_START); 3640 for (i = 0; i < ext->reg_count; i++) 3641 amdgpu_ring_write(ring, ext->extent[i]); 3642 } 3643 } 3644 } 3645 3646 ctx_reg_offset = 3647 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3648 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3649 amdgpu_ring_write(ring, ctx_reg_offset); 3650 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3651 3652 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3653 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3654 3655 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3656 amdgpu_ring_write(ring, 0); 3657 3658 amdgpu_ring_commit(ring); 3659 3660 /* submit cs packet to copy state 0 to next available state */ 3661 if (adev->gfx.num_gfx_rings > 1) { 3662 /* maximum supported gfx ring is 2 */ 3663 ring = &adev->gfx.gfx_ring[1]; 3664 r = amdgpu_ring_alloc(ring, 2); 3665 if (r) { 3666 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3667 return r; 3668 } 3669 3670 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3671 amdgpu_ring_write(ring, 0); 3672 3673 amdgpu_ring_commit(ring); 3674 } 3675 return 0; 3676 } 3677 3678 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3679 CP_PIPE_ID pipe) 3680 { 3681 u32 tmp; 3682 3683 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3684 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3685 3686 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3687 } 3688 3689 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3690 struct amdgpu_ring *ring) 3691 { 3692 u32 tmp; 3693 3694 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3695 if (ring->use_doorbell) { 3696 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3697 DOORBELL_OFFSET, ring->doorbell_index); 3698 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3699 DOORBELL_EN, 1); 3700 } else { 3701 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3702 DOORBELL_EN, 0); 3703 } 3704 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3705 3706 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3707 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3708 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3709 3710 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3711 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3712 } 3713 3714 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3715 { 3716 struct amdgpu_ring *ring; 3717 u32 tmp; 3718 u32 rb_bufsz; 3719 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3720 3721 /* Set the write pointer delay */ 3722 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3723 3724 /* set the RB to use vmid 0 */ 3725 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3726 3727 /* Init gfx ring 0 for pipe 0 */ 3728 mutex_lock(&adev->srbm_mutex); 3729 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3730 3731 /* Set ring buffer size */ 3732 ring = &adev->gfx.gfx_ring[0]; 3733 rb_bufsz = order_base_2(ring->ring_size / 8); 3734 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3735 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3736 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3737 3738 /* Initialize the ring buffer's write pointers */ 3739 ring->wptr = 0; 3740 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3741 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3742 3743 /* set the wb address whether it's enabled or not */ 3744 rptr_addr = ring->rptr_gpu_addr; 3745 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3746 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3747 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3748 3749 wptr_gpu_addr = ring->wptr_gpu_addr; 3750 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3751 lower_32_bits(wptr_gpu_addr)); 3752 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3753 upper_32_bits(wptr_gpu_addr)); 3754 3755 mdelay(1); 3756 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3757 3758 rb_addr = ring->gpu_addr >> 8; 3759 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3760 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3761 3762 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3763 3764 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3765 mutex_unlock(&adev->srbm_mutex); 3766 3767 /* Init gfx ring 1 for pipe 1 */ 3768 if (adev->gfx.num_gfx_rings > 1) { 3769 mutex_lock(&adev->srbm_mutex); 3770 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3771 /* maximum supported gfx ring is 2 */ 3772 ring = &adev->gfx.gfx_ring[1]; 3773 rb_bufsz = order_base_2(ring->ring_size / 8); 3774 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3775 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3776 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3777 /* Initialize the ring buffer's write pointers */ 3778 ring->wptr = 0; 3779 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3780 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3781 /* Set the wb address whether it's enabled or not */ 3782 rptr_addr = ring->rptr_gpu_addr; 3783 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3784 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3785 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3786 wptr_gpu_addr = ring->wptr_gpu_addr; 3787 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3788 lower_32_bits(wptr_gpu_addr)); 3789 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3790 upper_32_bits(wptr_gpu_addr)); 3791 3792 mdelay(1); 3793 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3794 3795 rb_addr = ring->gpu_addr >> 8; 3796 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3797 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3798 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3799 3800 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3801 mutex_unlock(&adev->srbm_mutex); 3802 } 3803 /* Switch to pipe 0 */ 3804 mutex_lock(&adev->srbm_mutex); 3805 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3806 mutex_unlock(&adev->srbm_mutex); 3807 3808 /* start the ring */ 3809 gfx_v11_0_cp_gfx_start(adev); 3810 3811 return 0; 3812 } 3813 3814 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3815 { 3816 u32 data; 3817 3818 if (adev->gfx.rs64_enable) { 3819 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3820 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3821 enable ? 0 : 1); 3822 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3823 enable ? 0 : 1); 3824 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3825 enable ? 0 : 1); 3826 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3827 enable ? 0 : 1); 3828 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3829 enable ? 0 : 1); 3830 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3831 enable ? 1 : 0); 3832 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3833 enable ? 1 : 0); 3834 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3835 enable ? 1 : 0); 3836 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3837 enable ? 1 : 0); 3838 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3839 enable ? 0 : 1); 3840 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3841 } else { 3842 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3843 3844 if (enable) { 3845 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3846 if (!adev->enable_mes_kiq) 3847 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3848 MEC_ME2_HALT, 0); 3849 } else { 3850 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3851 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3852 } 3853 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3854 } 3855 3856 udelay(50); 3857 } 3858 3859 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3860 { 3861 const struct gfx_firmware_header_v1_0 *mec_hdr; 3862 const __le32 *fw_data; 3863 unsigned i, fw_size; 3864 u32 *fw = NULL; 3865 int r; 3866 3867 if (!adev->gfx.mec_fw) 3868 return -EINVAL; 3869 3870 gfx_v11_0_cp_compute_enable(adev, false); 3871 3872 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3873 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3874 3875 fw_data = (const __le32 *) 3876 (adev->gfx.mec_fw->data + 3877 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3878 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3879 3880 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3881 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3882 &adev->gfx.mec.mec_fw_obj, 3883 &adev->gfx.mec.mec_fw_gpu_addr, 3884 (void **)&fw); 3885 if (r) { 3886 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3887 gfx_v11_0_mec_fini(adev); 3888 return r; 3889 } 3890 3891 memcpy(fw, fw_data, fw_size); 3892 3893 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3894 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3895 3896 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3897 3898 /* MEC1 */ 3899 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3900 3901 for (i = 0; i < mec_hdr->jt_size; i++) 3902 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3903 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3904 3905 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3906 3907 return 0; 3908 } 3909 3910 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3911 { 3912 const struct gfx_firmware_header_v2_0 *mec_hdr; 3913 const __le32 *fw_ucode, *fw_data; 3914 u32 tmp, fw_ucode_size, fw_data_size; 3915 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3916 u32 *fw_ucode_ptr, *fw_data_ptr; 3917 int r; 3918 3919 if (!adev->gfx.mec_fw) 3920 return -EINVAL; 3921 3922 gfx_v11_0_cp_compute_enable(adev, false); 3923 3924 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3925 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3926 3927 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3928 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3929 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3930 3931 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3932 le32_to_cpu(mec_hdr->data_offset_bytes)); 3933 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3934 3935 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3936 64 * 1024, 3937 AMDGPU_GEM_DOMAIN_VRAM | 3938 AMDGPU_GEM_DOMAIN_GTT, 3939 &adev->gfx.mec.mec_fw_obj, 3940 &adev->gfx.mec.mec_fw_gpu_addr, 3941 (void **)&fw_ucode_ptr); 3942 if (r) { 3943 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3944 gfx_v11_0_mec_fini(adev); 3945 return r; 3946 } 3947 3948 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3949 64 * 1024, 3950 AMDGPU_GEM_DOMAIN_VRAM | 3951 AMDGPU_GEM_DOMAIN_GTT, 3952 &adev->gfx.mec.mec_fw_data_obj, 3953 &adev->gfx.mec.mec_fw_data_gpu_addr, 3954 (void **)&fw_data_ptr); 3955 if (r) { 3956 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3957 gfx_v11_0_mec_fini(adev); 3958 return r; 3959 } 3960 3961 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 3962 memcpy(fw_data_ptr, fw_data, fw_data_size); 3963 3964 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3965 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 3966 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3967 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 3968 3969 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 3970 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3971 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 3972 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3973 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 3974 3975 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 3976 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 3977 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 3978 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 3979 3980 mutex_lock(&adev->srbm_mutex); 3981 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 3982 soc21_grbm_select(adev, 1, i, 0, 0); 3983 3984 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 3985 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 3986 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 3987 3988 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3989 mec_hdr->ucode_start_addr_lo >> 2 | 3990 mec_hdr->ucode_start_addr_hi << 30); 3991 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3992 mec_hdr->ucode_start_addr_hi >> 2); 3993 3994 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 3995 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 3996 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 3997 } 3998 mutex_unlock(&adev->srbm_mutex); 3999 soc21_grbm_select(adev, 0, 0, 0, 0); 4000 4001 /* Trigger an invalidation of the L1 instruction caches */ 4002 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 4003 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 4004 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 4005 4006 /* Wait for invalidation complete */ 4007 for (i = 0; i < usec_timeout; i++) { 4008 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 4009 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 4010 INVALIDATE_DCACHE_COMPLETE)) 4011 break; 4012 udelay(1); 4013 } 4014 4015 if (i >= usec_timeout) { 4016 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4017 return -EINVAL; 4018 } 4019 4020 /* Trigger an invalidation of the L1 instruction caches */ 4021 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4022 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 4023 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 4024 4025 /* Wait for invalidation complete */ 4026 for (i = 0; i < usec_timeout; i++) { 4027 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4028 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 4029 INVALIDATE_CACHE_COMPLETE)) 4030 break; 4031 udelay(1); 4032 } 4033 4034 if (i >= usec_timeout) { 4035 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4036 return -EINVAL; 4037 } 4038 4039 return 0; 4040 } 4041 4042 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 4043 { 4044 uint32_t tmp; 4045 struct amdgpu_device *adev = ring->adev; 4046 4047 /* tell RLC which is KIQ queue */ 4048 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 4049 tmp &= 0xffffff00; 4050 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4051 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); 4052 } 4053 4054 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 4055 { 4056 /* set graphics engine doorbell range */ 4057 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 4058 (adev->doorbell_index.gfx_ring0 * 2) << 2); 4059 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 4060 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 4061 4062 /* set compute engine doorbell range */ 4063 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4064 (adev->doorbell_index.kiq * 2) << 2); 4065 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4066 (adev->doorbell_index.userqueue_end * 2) << 2); 4067 } 4068 4069 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, 4070 struct v11_gfx_mqd *mqd, 4071 struct amdgpu_mqd_prop *prop) 4072 { 4073 bool priority = 0; 4074 u32 tmp; 4075 4076 /* set up default queue priority level 4077 * 0x0 = low priority, 0x1 = high priority 4078 */ 4079 if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) 4080 priority = 1; 4081 4082 tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; 4083 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); 4084 mqd->cp_gfx_hqd_queue_priority = tmp; 4085 } 4086 4087 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 4088 struct amdgpu_mqd_prop *prop) 4089 { 4090 struct v11_gfx_mqd *mqd = m; 4091 uint64_t hqd_gpu_addr, wb_gpu_addr; 4092 uint32_t tmp; 4093 uint32_t rb_bufsz; 4094 4095 /* set up gfx hqd wptr */ 4096 mqd->cp_gfx_hqd_wptr = 0; 4097 mqd->cp_gfx_hqd_wptr_hi = 0; 4098 4099 /* set the pointer to the MQD */ 4100 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 4101 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4102 4103 /* set up mqd control */ 4104 tmp = regCP_GFX_MQD_CONTROL_DEFAULT; 4105 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 4106 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 4107 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 4108 mqd->cp_gfx_mqd_control = tmp; 4109 4110 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 4111 tmp = regCP_GFX_HQD_VMID_DEFAULT; 4112 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 4113 mqd->cp_gfx_hqd_vmid = 0; 4114 4115 /* set up gfx queue priority */ 4116 gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); 4117 4118 /* set up time quantum */ 4119 tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; 4120 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 4121 mqd->cp_gfx_hqd_quantum = tmp; 4122 4123 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 4124 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4125 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 4126 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 4127 4128 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 4129 wb_gpu_addr = prop->rptr_gpu_addr; 4130 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 4131 mqd->cp_gfx_hqd_rptr_addr_hi = 4132 upper_32_bits(wb_gpu_addr) & 0xffff; 4133 4134 /* set up rb_wptr_poll addr */ 4135 wb_gpu_addr = prop->wptr_gpu_addr; 4136 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4137 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4138 4139 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 4140 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 4141 tmp = regCP_GFX_HQD_CNTL_DEFAULT; 4142 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 4143 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 4144 #ifdef __BIG_ENDIAN 4145 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 4146 #endif 4147 if (prop->tmz_queue) 4148 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); 4149 if (!prop->kernel_queue) 4150 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); 4151 mqd->cp_gfx_hqd_cntl = tmp; 4152 4153 /* set up cp_doorbell_control */ 4154 tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; 4155 if (prop->use_doorbell) { 4156 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4157 DOORBELL_OFFSET, prop->doorbell_index); 4158 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4159 DOORBELL_EN, 1); 4160 } else 4161 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4162 DOORBELL_EN, 0); 4163 mqd->cp_rb_doorbell_control = tmp; 4164 4165 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4166 mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; 4167 4168 /* active the queue */ 4169 mqd->cp_gfx_hqd_active = 1; 4170 4171 /* set gfx UQ items */ 4172 mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); 4173 mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); 4174 mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); 4175 mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); 4176 mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); 4177 mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); 4178 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4179 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4180 4181 return 0; 4182 } 4183 4184 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) 4185 { 4186 struct amdgpu_device *adev = ring->adev; 4187 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 4188 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 4189 4190 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4191 memset((void *)mqd, 0, sizeof(*mqd)); 4192 mutex_lock(&adev->srbm_mutex); 4193 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4194 amdgpu_ring_init_mqd(ring); 4195 soc21_grbm_select(adev, 0, 0, 0, 0); 4196 mutex_unlock(&adev->srbm_mutex); 4197 if (adev->gfx.me.mqd_backup[mqd_idx]) 4198 memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4199 } else { 4200 /* restore mqd with the backup copy */ 4201 if (adev->gfx.me.mqd_backup[mqd_idx]) 4202 memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 4203 /* reset the ring */ 4204 ring->wptr = 0; 4205 *ring->wptr_cpu_addr = 0; 4206 amdgpu_ring_clear_ring(ring); 4207 } 4208 4209 return 0; 4210 } 4211 4212 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 4213 { 4214 int r, i; 4215 4216 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4217 r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); 4218 if (r) 4219 return r; 4220 } 4221 4222 r = amdgpu_gfx_enable_kgq(adev, 0); 4223 if (r) 4224 return r; 4225 4226 return gfx_v11_0_cp_gfx_start(adev); 4227 } 4228 4229 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 4230 struct amdgpu_mqd_prop *prop) 4231 { 4232 struct v11_compute_mqd *mqd = m; 4233 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4234 uint32_t tmp; 4235 4236 mqd->header = 0xC0310800; 4237 mqd->compute_pipelinestat_enable = 0x00000001; 4238 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4239 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4240 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4241 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4242 mqd->compute_misc_reserved = 0x00000007; 4243 4244 eop_base_addr = prop->eop_gpu_addr >> 8; 4245 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4246 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4247 4248 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4249 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 4250 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4251 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 4252 4253 mqd->cp_hqd_eop_control = tmp; 4254 4255 /* enable doorbell? */ 4256 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4257 4258 if (prop->use_doorbell) { 4259 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4260 DOORBELL_OFFSET, prop->doorbell_index); 4261 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4262 DOORBELL_EN, 1); 4263 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4264 DOORBELL_SOURCE, 0); 4265 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4266 DOORBELL_HIT, 0); 4267 } else { 4268 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4269 DOORBELL_EN, 0); 4270 } 4271 4272 mqd->cp_hqd_pq_doorbell_control = tmp; 4273 4274 /* disable the queue if it's active */ 4275 mqd->cp_hqd_dequeue_request = 0; 4276 mqd->cp_hqd_pq_rptr = 0; 4277 mqd->cp_hqd_pq_wptr_lo = 0; 4278 mqd->cp_hqd_pq_wptr_hi = 0; 4279 4280 /* set the pointer to the MQD */ 4281 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 4282 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4283 4284 /* set MQD vmid to 0 */ 4285 tmp = regCP_MQD_CONTROL_DEFAULT; 4286 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4287 mqd->cp_mqd_control = tmp; 4288 4289 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4290 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4291 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4292 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4293 4294 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4295 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 4296 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4297 (order_base_2(prop->queue_size / 4) - 1)); 4298 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4299 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 4300 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 4301 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 4302 prop->allow_tunneling); 4303 if (prop->kernel_queue) { 4304 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4305 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4306 } 4307 if (prop->tmz_queue) 4308 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); 4309 mqd->cp_hqd_pq_control = tmp; 4310 4311 /* set the wb address whether it's enabled or not */ 4312 wb_gpu_addr = prop->rptr_gpu_addr; 4313 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4314 mqd->cp_hqd_pq_rptr_report_addr_hi = 4315 upper_32_bits(wb_gpu_addr) & 0xffff; 4316 4317 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4318 wb_gpu_addr = prop->wptr_gpu_addr; 4319 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4320 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4321 4322 tmp = 0; 4323 /* enable the doorbell if requested */ 4324 if (prop->use_doorbell) { 4325 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4326 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4327 DOORBELL_OFFSET, prop->doorbell_index); 4328 4329 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4330 DOORBELL_EN, 1); 4331 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4332 DOORBELL_SOURCE, 0); 4333 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4334 DOORBELL_HIT, 0); 4335 } 4336 4337 mqd->cp_hqd_pq_doorbell_control = tmp; 4338 4339 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4340 mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; 4341 4342 /* set the vmid for the queue */ 4343 mqd->cp_hqd_vmid = 0; 4344 4345 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 4346 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 4347 mqd->cp_hqd_persistent_state = tmp; 4348 4349 /* set MIN_IB_AVAIL_SIZE */ 4350 tmp = regCP_HQD_IB_CONTROL_DEFAULT; 4351 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4352 mqd->cp_hqd_ib_control = tmp; 4353 4354 /* set static priority for a compute queue/ring */ 4355 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 4356 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 4357 4358 mqd->cp_hqd_active = prop->hqd_active; 4359 4360 /* set UQ fenceaddress */ 4361 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4362 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4363 4364 return 0; 4365 } 4366 4367 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 4368 { 4369 struct amdgpu_device *adev = ring->adev; 4370 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4371 int j; 4372 4373 /* inactivate the queue */ 4374 if (amdgpu_sriov_vf(adev)) 4375 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 4376 4377 /* disable wptr polling */ 4378 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4379 4380 /* write the EOP addr */ 4381 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 4382 mqd->cp_hqd_eop_base_addr_lo); 4383 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 4384 mqd->cp_hqd_eop_base_addr_hi); 4385 4386 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4387 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 4388 mqd->cp_hqd_eop_control); 4389 4390 /* enable doorbell? */ 4391 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4392 mqd->cp_hqd_pq_doorbell_control); 4393 4394 /* disable the queue if it's active */ 4395 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 4396 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 4397 for (j = 0; j < adev->usec_timeout; j++) { 4398 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 4399 break; 4400 udelay(1); 4401 } 4402 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 4403 mqd->cp_hqd_dequeue_request); 4404 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 4405 mqd->cp_hqd_pq_rptr); 4406 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4407 mqd->cp_hqd_pq_wptr_lo); 4408 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4409 mqd->cp_hqd_pq_wptr_hi); 4410 } 4411 4412 /* set the pointer to the MQD */ 4413 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 4414 mqd->cp_mqd_base_addr_lo); 4415 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 4416 mqd->cp_mqd_base_addr_hi); 4417 4418 /* set MQD vmid to 0 */ 4419 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 4420 mqd->cp_mqd_control); 4421 4422 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4423 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 4424 mqd->cp_hqd_pq_base_lo); 4425 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 4426 mqd->cp_hqd_pq_base_hi); 4427 4428 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4429 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 4430 mqd->cp_hqd_pq_control); 4431 4432 /* set the wb address whether it's enabled or not */ 4433 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 4434 mqd->cp_hqd_pq_rptr_report_addr_lo); 4435 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4436 mqd->cp_hqd_pq_rptr_report_addr_hi); 4437 4438 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4439 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 4440 mqd->cp_hqd_pq_wptr_poll_addr_lo); 4441 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4442 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4443 4444 /* enable the doorbell if requested */ 4445 if (ring->use_doorbell) { 4446 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4447 (adev->doorbell_index.kiq * 2) << 2); 4448 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4449 (adev->doorbell_index.userqueue_end * 2) << 2); 4450 } 4451 4452 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4453 mqd->cp_hqd_pq_doorbell_control); 4454 4455 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4456 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4457 mqd->cp_hqd_pq_wptr_lo); 4458 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4459 mqd->cp_hqd_pq_wptr_hi); 4460 4461 /* set the vmid for the queue */ 4462 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 4463 4464 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 4465 mqd->cp_hqd_persistent_state); 4466 4467 /* activate the queue */ 4468 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 4469 mqd->cp_hqd_active); 4470 4471 if (ring->use_doorbell) 4472 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4473 4474 return 0; 4475 } 4476 4477 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 4478 { 4479 struct amdgpu_device *adev = ring->adev; 4480 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4481 4482 gfx_v11_0_kiq_setting(ring); 4483 4484 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4485 /* reset MQD to a clean status */ 4486 if (adev->gfx.kiq[0].mqd_backup) 4487 memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); 4488 4489 /* reset ring buffer */ 4490 ring->wptr = 0; 4491 amdgpu_ring_clear_ring(ring); 4492 4493 mutex_lock(&adev->srbm_mutex); 4494 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4495 gfx_v11_0_kiq_init_register(ring); 4496 soc21_grbm_select(adev, 0, 0, 0, 0); 4497 mutex_unlock(&adev->srbm_mutex); 4498 } else { 4499 memset((void *)mqd, 0, sizeof(*mqd)); 4500 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4501 amdgpu_ring_clear_ring(ring); 4502 mutex_lock(&adev->srbm_mutex); 4503 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4504 amdgpu_ring_init_mqd(ring); 4505 gfx_v11_0_kiq_init_register(ring); 4506 soc21_grbm_select(adev, 0, 0, 0, 0); 4507 mutex_unlock(&adev->srbm_mutex); 4508 4509 if (adev->gfx.kiq[0].mqd_backup) 4510 memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); 4511 } 4512 4513 return 0; 4514 } 4515 4516 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) 4517 { 4518 struct amdgpu_device *adev = ring->adev; 4519 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4520 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4521 4522 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4523 memset((void *)mqd, 0, sizeof(*mqd)); 4524 mutex_lock(&adev->srbm_mutex); 4525 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4526 amdgpu_ring_init_mqd(ring); 4527 soc21_grbm_select(adev, 0, 0, 0, 0); 4528 mutex_unlock(&adev->srbm_mutex); 4529 4530 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4531 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4532 } else { 4533 /* restore MQD to a clean status */ 4534 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4535 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4536 /* reset ring buffer */ 4537 ring->wptr = 0; 4538 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4539 amdgpu_ring_clear_ring(ring); 4540 } 4541 4542 return 0; 4543 } 4544 4545 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4546 { 4547 gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring); 4548 return 0; 4549 } 4550 4551 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4552 { 4553 int i, r; 4554 4555 if (!amdgpu_async_gfx_ring) 4556 gfx_v11_0_cp_compute_enable(adev, true); 4557 4558 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4559 r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); 4560 if (r) 4561 return r; 4562 } 4563 4564 return amdgpu_gfx_enable_kcq(adev, 0); 4565 } 4566 4567 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4568 { 4569 int r, i; 4570 struct amdgpu_ring *ring; 4571 4572 if (!(adev->flags & AMD_IS_APU)) 4573 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4574 4575 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4576 /* legacy firmware loading */ 4577 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4578 if (r) 4579 return r; 4580 4581 if (adev->gfx.rs64_enable) 4582 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4583 else 4584 r = gfx_v11_0_cp_compute_load_microcode(adev); 4585 if (r) 4586 return r; 4587 } 4588 4589 gfx_v11_0_cp_set_doorbell_range(adev); 4590 4591 if (amdgpu_async_gfx_ring) { 4592 gfx_v11_0_cp_compute_enable(adev, true); 4593 gfx_v11_0_cp_gfx_enable(adev, true); 4594 } 4595 4596 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4597 r = amdgpu_mes_kiq_hw_init(adev, 0); 4598 else 4599 r = gfx_v11_0_kiq_resume(adev); 4600 if (r) 4601 return r; 4602 4603 r = gfx_v11_0_kcq_resume(adev); 4604 if (r) 4605 return r; 4606 4607 if (!amdgpu_async_gfx_ring) { 4608 r = gfx_v11_0_cp_gfx_resume(adev); 4609 if (r) 4610 return r; 4611 } else { 4612 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4613 if (r) 4614 return r; 4615 } 4616 4617 if (adev->gfx.disable_kq) { 4618 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4619 ring = &adev->gfx.gfx_ring[i]; 4620 /* we don't want to set ring->ready */ 4621 r = amdgpu_ring_test_ring(ring); 4622 if (r) 4623 return r; 4624 } 4625 if (amdgpu_async_gfx_ring) 4626 amdgpu_gfx_disable_kgq(adev, 0); 4627 } else { 4628 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4629 ring = &adev->gfx.gfx_ring[i]; 4630 r = amdgpu_ring_test_helper(ring); 4631 if (r) 4632 return r; 4633 } 4634 } 4635 4636 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4637 ring = &adev->gfx.compute_ring[i]; 4638 r = amdgpu_ring_test_helper(ring); 4639 if (r) 4640 return r; 4641 } 4642 4643 return 0; 4644 } 4645 4646 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4647 { 4648 gfx_v11_0_cp_gfx_enable(adev, enable); 4649 gfx_v11_0_cp_compute_enable(adev, enable); 4650 } 4651 4652 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4653 { 4654 int r; 4655 bool value; 4656 4657 r = adev->gfxhub.funcs->gart_enable(adev); 4658 if (r) 4659 return r; 4660 4661 amdgpu_device_flush_hdp(adev, NULL); 4662 4663 value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; 4664 4665 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4666 /* TODO investigate why this and the hdp flush above is needed, 4667 * are we missing a flush somewhere else? */ 4668 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); 4669 4670 return 0; 4671 } 4672 4673 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4674 { 4675 u32 tmp; 4676 4677 /* select RS64 */ 4678 if (adev->gfx.rs64_enable) { 4679 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4680 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4681 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4682 4683 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4684 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4685 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4686 } 4687 4688 if (amdgpu_emu_mode == 1) 4689 msleep(100); 4690 } 4691 4692 static int get_gb_addr_config(struct amdgpu_device * adev) 4693 { 4694 u32 gb_addr_config; 4695 4696 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4697 if (gb_addr_config == 0) 4698 return -EINVAL; 4699 4700 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4701 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4702 4703 adev->gfx.config.gb_addr_config = gb_addr_config; 4704 4705 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4706 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4707 GB_ADDR_CONFIG, NUM_PIPES); 4708 4709 adev->gfx.config.max_tile_pipes = 4710 adev->gfx.config.gb_addr_config_fields.num_pipes; 4711 4712 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4713 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4714 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4715 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4716 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4717 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4718 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4719 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4720 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4721 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4722 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4723 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4724 4725 return 0; 4726 } 4727 4728 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4729 { 4730 uint32_t data; 4731 4732 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4733 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4734 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4735 4736 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4737 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4738 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4739 } 4740 4741 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) 4742 { 4743 int r; 4744 struct amdgpu_device *adev = ip_block->adev; 4745 4746 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4747 adev->gfx.cleaner_shader_ptr); 4748 4749 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4750 if (adev->gfx.imu.funcs) { 4751 /* RLC autoload sequence 1: Program rlc ram */ 4752 if (adev->gfx.imu.funcs->program_rlc_ram) 4753 adev->gfx.imu.funcs->program_rlc_ram(adev); 4754 /* rlc autoload firmware */ 4755 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4756 if (r) 4757 return r; 4758 } 4759 } else { 4760 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4761 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4762 if (adev->gfx.imu.funcs->load_microcode) 4763 adev->gfx.imu.funcs->load_microcode(adev); 4764 if (adev->gfx.imu.funcs->setup_imu) 4765 adev->gfx.imu.funcs->setup_imu(adev); 4766 if (adev->gfx.imu.funcs->start_imu) 4767 adev->gfx.imu.funcs->start_imu(adev); 4768 } 4769 4770 /* disable gpa mode in backdoor loading */ 4771 gfx_v11_0_disable_gpa_mode(adev); 4772 } 4773 } 4774 4775 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4776 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4777 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4778 if (r) { 4779 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4780 return r; 4781 } 4782 } 4783 4784 adev->gfx.is_poweron = true; 4785 4786 if(get_gb_addr_config(adev)) 4787 DRM_WARN("Invalid gb_addr_config !\n"); 4788 4789 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4790 adev->gfx.rs64_enable) 4791 gfx_v11_0_config_gfx_rs64(adev); 4792 4793 r = gfx_v11_0_gfxhub_enable(adev); 4794 if (r) 4795 return r; 4796 4797 if (!amdgpu_emu_mode) 4798 gfx_v11_0_init_golden_registers(adev); 4799 4800 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4801 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4802 /** 4803 * For gfx 11, rlc firmware loading relies on smu firmware is 4804 * loaded firstly, so in direct type, it has to load smc ucode 4805 * here before rlc. 4806 */ 4807 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4808 if (r) 4809 return r; 4810 } 4811 4812 gfx_v11_0_constants_init(adev); 4813 4814 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4815 gfx_v11_0_select_cp_fw_arch(adev); 4816 4817 if (adev->nbio.funcs->gc_doorbell_init) 4818 adev->nbio.funcs->gc_doorbell_init(adev); 4819 4820 r = gfx_v11_0_rlc_resume(adev); 4821 if (r) 4822 return r; 4823 4824 /* 4825 * init golden registers and rlc resume may override some registers, 4826 * reconfig them here 4827 */ 4828 gfx_v11_0_tcp_harvest(adev); 4829 4830 r = gfx_v11_0_cp_resume(adev); 4831 if (r) 4832 return r; 4833 4834 /* get IMU version from HW if it's not set */ 4835 if (!adev->gfx.imu_fw_version) 4836 adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); 4837 4838 return r; 4839 } 4840 4841 static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, 4842 bool enable) 4843 { 4844 unsigned int irq_type; 4845 int m, p, r; 4846 4847 if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { 4848 for (m = 0; m < adev->gfx.me.num_me; m++) { 4849 for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { 4850 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; 4851 if (enable) 4852 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4853 irq_type); 4854 else 4855 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4856 irq_type); 4857 if (r) 4858 return r; 4859 } 4860 } 4861 } 4862 4863 if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { 4864 for (m = 0; m < adev->gfx.mec.num_mec; ++m) { 4865 for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { 4866 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 4867 + (m * adev->gfx.mec.num_pipe_per_mec) 4868 + p; 4869 if (enable) 4870 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4871 irq_type); 4872 else 4873 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4874 irq_type); 4875 if (r) 4876 return r; 4877 } 4878 } 4879 } 4880 4881 return 0; 4882 } 4883 4884 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) 4885 { 4886 struct amdgpu_device *adev = ip_block->adev; 4887 4888 cancel_delayed_work_sync(&adev->gfx.idle_work); 4889 4890 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4891 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4892 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4893 gfx_v11_0_set_userq_eop_interrupts(adev, false); 4894 4895 if (!adev->no_hw_access) { 4896 if (amdgpu_async_gfx_ring && 4897 !adev->gfx.disable_kq) { 4898 if (amdgpu_gfx_disable_kgq(adev, 0)) 4899 DRM_ERROR("KGQ disable failed\n"); 4900 } 4901 4902 if (amdgpu_gfx_disable_kcq(adev, 0)) 4903 DRM_ERROR("KCQ disable failed\n"); 4904 4905 amdgpu_mes_kiq_hw_fini(adev, 0); 4906 } 4907 4908 if (amdgpu_sriov_vf(adev)) 4909 /* Remove the steps disabling CPG and clearing KIQ position, 4910 * so that CP could perform IDLE-SAVE during switch. Those 4911 * steps are necessary to avoid a DMAR error in gfx9 but it is 4912 * not reproduced on gfx11. 4913 */ 4914 return 0; 4915 4916 gfx_v11_0_cp_enable(adev, false); 4917 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4918 4919 adev->gfxhub.funcs->gart_disable(adev); 4920 4921 adev->gfx.is_poweron = false; 4922 4923 return 0; 4924 } 4925 4926 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) 4927 { 4928 return gfx_v11_0_hw_fini(ip_block); 4929 } 4930 4931 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) 4932 { 4933 return gfx_v11_0_hw_init(ip_block); 4934 } 4935 4936 static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block) 4937 { 4938 struct amdgpu_device *adev = ip_block->adev; 4939 4940 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 4941 GRBM_STATUS, GUI_ACTIVE)) 4942 return false; 4943 else 4944 return true; 4945 } 4946 4947 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 4948 { 4949 unsigned i; 4950 u32 tmp; 4951 struct amdgpu_device *adev = ip_block->adev; 4952 4953 for (i = 0; i < adev->usec_timeout; i++) { 4954 /* read MC_STATUS */ 4955 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 4956 GRBM_STATUS__GUI_ACTIVE_MASK; 4957 4958 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 4959 return 0; 4960 udelay(1); 4961 } 4962 return -ETIMEDOUT; 4963 } 4964 4965 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, 4966 bool req) 4967 { 4968 u32 i, tmp, val; 4969 4970 for (i = 0; i < adev->usec_timeout; i++) { 4971 /* Request with MeId=2, PipeId=0 */ 4972 tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); 4973 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); 4974 WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); 4975 4976 val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); 4977 if (req) { 4978 if (val == tmp) 4979 break; 4980 } else { 4981 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, 4982 REQUEST, 1); 4983 4984 /* unlocked or locked by firmware */ 4985 if (val != tmp) 4986 break; 4987 } 4988 udelay(1); 4989 } 4990 4991 if (i >= adev->usec_timeout) 4992 return -EINVAL; 4993 4994 return 0; 4995 } 4996 4997 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) 4998 { 4999 u32 grbm_soft_reset = 0; 5000 u32 tmp; 5001 int r, i, j, k; 5002 struct amdgpu_device *adev = ip_block->adev; 5003 5004 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5005 5006 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5007 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 5008 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 5009 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 5010 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 5011 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5012 5013 mutex_lock(&adev->srbm_mutex); 5014 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 5015 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 5016 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 5017 soc21_grbm_select(adev, i, k, j, 0); 5018 5019 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 5020 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 5021 } 5022 } 5023 } 5024 for (i = 0; i < adev->gfx.me.num_me; ++i) { 5025 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 5026 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 5027 soc21_grbm_select(adev, i, k, j, 0); 5028 5029 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 5030 } 5031 } 5032 } 5033 soc21_grbm_select(adev, 0, 0, 0, 0); 5034 mutex_unlock(&adev->srbm_mutex); 5035 5036 /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ 5037 mutex_lock(&adev->gfx.reset_sem_mutex); 5038 r = gfx_v11_0_request_gfx_index_mutex(adev, true); 5039 if (r) { 5040 mutex_unlock(&adev->gfx.reset_sem_mutex); 5041 DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); 5042 return r; 5043 } 5044 5045 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 5046 5047 // Read CP_VMID_RESET register three times. 5048 // to get sufficient time for GFX_HQD_ACTIVE reach 0 5049 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5050 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5051 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5052 5053 /* release the gfx mutex */ 5054 r = gfx_v11_0_request_gfx_index_mutex(adev, false); 5055 mutex_unlock(&adev->gfx.reset_sem_mutex); 5056 if (r) { 5057 DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); 5058 return r; 5059 } 5060 5061 for (i = 0; i < adev->usec_timeout; i++) { 5062 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 5063 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 5064 break; 5065 udelay(1); 5066 } 5067 if (i >= adev->usec_timeout) { 5068 printk("Failed to wait all pipes clean\n"); 5069 return -EINVAL; 5070 } 5071 5072 /********** trigger soft reset ***********/ 5073 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5074 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5075 SOFT_RESET_CP, 1); 5076 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5077 SOFT_RESET_GFX, 1); 5078 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5079 SOFT_RESET_CPF, 1); 5080 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5081 SOFT_RESET_CPC, 1); 5082 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5083 SOFT_RESET_CPG, 1); 5084 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5085 /********** exit soft reset ***********/ 5086 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5087 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5088 SOFT_RESET_CP, 0); 5089 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5090 SOFT_RESET_GFX, 0); 5091 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5092 SOFT_RESET_CPF, 0); 5093 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5094 SOFT_RESET_CPC, 0); 5095 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5096 SOFT_RESET_CPG, 0); 5097 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5098 5099 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 5100 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 5101 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 5102 5103 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 5104 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 5105 5106 for (i = 0; i < adev->usec_timeout; i++) { 5107 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 5108 break; 5109 udelay(1); 5110 } 5111 if (i >= adev->usec_timeout) { 5112 printk("Failed to wait CP_VMID_RESET to 0\n"); 5113 return -EINVAL; 5114 } 5115 5116 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5117 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5118 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5119 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5120 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5121 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5122 5123 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5124 5125 return gfx_v11_0_cp_resume(adev); 5126 } 5127 5128 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) 5129 { 5130 int i, r; 5131 struct amdgpu_device *adev = ip_block->adev; 5132 struct amdgpu_ring *ring; 5133 long tmo = msecs_to_jiffies(1000); 5134 5135 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 5136 ring = &adev->gfx.gfx_ring[i]; 5137 r = amdgpu_ring_test_ib(ring, tmo); 5138 if (r) 5139 return true; 5140 } 5141 5142 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5143 ring = &adev->gfx.compute_ring[i]; 5144 r = amdgpu_ring_test_ib(ring, tmo); 5145 if (r) 5146 return true; 5147 } 5148 5149 return false; 5150 } 5151 5152 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) 5153 { 5154 struct amdgpu_device *adev = ip_block->adev; 5155 /** 5156 * GFX soft reset will impact MES, need resume MES when do GFX soft reset 5157 */ 5158 return amdgpu_mes_resume(adev); 5159 } 5160 5161 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5162 { 5163 uint64_t clock; 5164 uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; 5165 5166 if (amdgpu_sriov_vf(adev)) { 5167 amdgpu_gfx_off_ctrl(adev, false); 5168 mutex_lock(&adev->gfx.gpu_clock_mutex); 5169 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5170 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5171 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5172 if (clock_counter_hi_pre != clock_counter_hi_after) 5173 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5174 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5175 amdgpu_gfx_off_ctrl(adev, true); 5176 } else { 5177 preempt_disable(); 5178 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5179 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5180 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5181 if (clock_counter_hi_pre != clock_counter_hi_after) 5182 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5183 preempt_enable(); 5184 } 5185 clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); 5186 5187 return clock; 5188 } 5189 5190 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5191 uint32_t vmid, 5192 uint32_t gds_base, uint32_t gds_size, 5193 uint32_t gws_base, uint32_t gws_size, 5194 uint32_t oa_base, uint32_t oa_size) 5195 { 5196 struct amdgpu_device *adev = ring->adev; 5197 5198 /* GDS Base */ 5199 gfx_v11_0_write_data_to_reg(ring, 0, false, 5200 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 5201 gds_base); 5202 5203 /* GDS Size */ 5204 gfx_v11_0_write_data_to_reg(ring, 0, false, 5205 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 5206 gds_size); 5207 5208 /* GWS */ 5209 gfx_v11_0_write_data_to_reg(ring, 0, false, 5210 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 5211 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5212 5213 /* OA */ 5214 gfx_v11_0_write_data_to_reg(ring, 0, false, 5215 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 5216 (1 << (oa_size + oa_base)) - (1 << oa_base)); 5217 } 5218 5219 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) 5220 { 5221 struct amdgpu_device *adev = ip_block->adev; 5222 5223 switch (amdgpu_user_queue) { 5224 case -1: 5225 case 0: 5226 default: 5227 adev->gfx.disable_kq = false; 5228 adev->gfx.disable_uq = true; 5229 break; 5230 case 1: 5231 adev->gfx.disable_kq = false; 5232 adev->gfx.disable_uq = false; 5233 break; 5234 case 2: 5235 adev->gfx.disable_kq = true; 5236 adev->gfx.disable_uq = false; 5237 break; 5238 } 5239 5240 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 5241 5242 if (adev->gfx.disable_kq) { 5243 /* We need one GFX ring temporarily to set up 5244 * the clear state. 5245 */ 5246 adev->gfx.num_gfx_rings = 1; 5247 adev->gfx.num_compute_rings = 0; 5248 } else { 5249 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 5250 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 5251 AMDGPU_MAX_COMPUTE_RINGS); 5252 } 5253 5254 gfx_v11_0_set_kiq_pm4_funcs(adev); 5255 gfx_v11_0_set_ring_funcs(adev); 5256 gfx_v11_0_set_irq_funcs(adev); 5257 gfx_v11_0_set_gds_init(adev); 5258 gfx_v11_0_set_rlc_funcs(adev); 5259 gfx_v11_0_set_mqd_funcs(adev); 5260 gfx_v11_0_set_imu_funcs(adev); 5261 5262 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 5263 5264 return gfx_v11_0_init_microcode(adev); 5265 } 5266 5267 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) 5268 { 5269 struct amdgpu_device *adev = ip_block->adev; 5270 int r; 5271 5272 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5273 if (r) 5274 return r; 5275 5276 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5277 if (r) 5278 return r; 5279 5280 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 5281 if (r) 5282 return r; 5283 5284 r = gfx_v11_0_set_userq_eop_interrupts(adev, true); 5285 if (r) 5286 return r; 5287 5288 return 0; 5289 } 5290 5291 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 5292 { 5293 uint32_t rlc_cntl; 5294 5295 /* if RLC is not enabled, do nothing */ 5296 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 5297 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 5298 } 5299 5300 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 5301 { 5302 uint32_t data; 5303 unsigned i; 5304 5305 data = RLC_SAFE_MODE__CMD_MASK; 5306 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5307 5308 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 5309 5310 /* wait for RLC_SAFE_MODE */ 5311 for (i = 0; i < adev->usec_timeout; i++) { 5312 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 5313 RLC_SAFE_MODE, CMD)) 5314 break; 5315 udelay(1); 5316 } 5317 } 5318 5319 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 5320 { 5321 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 5322 } 5323 5324 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 5325 bool enable) 5326 { 5327 uint32_t def, data; 5328 5329 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 5330 return; 5331 5332 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5333 5334 if (enable) 5335 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5336 else 5337 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5338 5339 if (def != data) 5340 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5341 } 5342 5343 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 5344 bool enable) 5345 { 5346 uint32_t def, data; 5347 5348 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 5349 return; 5350 5351 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5352 5353 if (enable) 5354 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5355 else 5356 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5357 5358 if (def != data) 5359 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5360 } 5361 5362 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 5363 bool enable) 5364 { 5365 uint32_t def, data; 5366 5367 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 5368 return; 5369 5370 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5371 5372 if (enable) 5373 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5374 else 5375 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5376 5377 if (def != data) 5378 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5379 } 5380 5381 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5382 bool enable) 5383 { 5384 uint32_t data, def; 5385 5386 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 5387 return; 5388 5389 /* It is disabled by HW by default */ 5390 if (enable) { 5391 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5392 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 5393 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5394 5395 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5396 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5397 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5398 5399 if (def != data) 5400 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5401 } 5402 } else { 5403 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5404 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5405 5406 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5407 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5408 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5409 5410 if (def != data) 5411 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5412 } 5413 } 5414 } 5415 5416 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5417 bool enable) 5418 { 5419 uint32_t def, data; 5420 5421 if (!(adev->cg_flags & 5422 (AMD_CG_SUPPORT_GFX_CGCG | 5423 AMD_CG_SUPPORT_GFX_CGLS | 5424 AMD_CG_SUPPORT_GFX_3D_CGCG | 5425 AMD_CG_SUPPORT_GFX_3D_CGLS))) 5426 return; 5427 5428 if (enable) { 5429 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5430 5431 /* unset CGCG override */ 5432 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5433 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5434 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5435 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5436 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 5437 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5438 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5439 5440 /* update CGCG override bits */ 5441 if (def != data) 5442 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5443 5444 /* enable cgcg FSM(0x0000363F) */ 5445 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5446 5447 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5448 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 5449 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5450 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5451 } 5452 5453 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5454 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 5455 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5456 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5457 } 5458 5459 if (def != data) 5460 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5461 5462 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5463 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5464 5465 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5466 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 5467 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5468 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5469 } 5470 5471 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5472 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 5473 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5474 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5475 } 5476 5477 if (def != data) 5478 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5479 5480 /* set IDLE_POLL_COUNT(0x00900100) */ 5481 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 5482 5483 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 5484 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5485 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5486 5487 if (def != data) 5488 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 5489 5490 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5491 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5492 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5493 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5494 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5495 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 5496 5497 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5498 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5499 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5500 5501 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5502 if (adev->sdma.num_instances > 1) { 5503 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5504 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5505 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5506 } 5507 } else { 5508 /* Program RLC_CGCG_CGLS_CTRL */ 5509 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5510 5511 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5512 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5513 5514 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5515 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5516 5517 if (def != data) 5518 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5519 5520 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5521 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5522 5523 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5524 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5525 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5526 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5527 5528 if (def != data) 5529 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5530 5531 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5532 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5533 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5534 5535 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5536 if (adev->sdma.num_instances > 1) { 5537 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5538 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5539 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5540 } 5541 } 5542 } 5543 5544 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5545 bool enable) 5546 { 5547 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5548 5549 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 5550 5551 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 5552 5553 gfx_v11_0_update_repeater_fgcg(adev, enable); 5554 5555 gfx_v11_0_update_sram_fgcg(adev, enable); 5556 5557 gfx_v11_0_update_perf_clk(adev, enable); 5558 5559 if (adev->cg_flags & 5560 (AMD_CG_SUPPORT_GFX_MGCG | 5561 AMD_CG_SUPPORT_GFX_CGLS | 5562 AMD_CG_SUPPORT_GFX_CGCG | 5563 AMD_CG_SUPPORT_GFX_3D_CGCG | 5564 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5565 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5566 5567 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5568 5569 return 0; 5570 } 5571 5572 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id, 5573 struct amdgpu_ring *ring, unsigned vmid) 5574 { 5575 u32 reg, pre_data, data; 5576 5577 amdgpu_gfx_off_ctrl(adev, false); 5578 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5579 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) 5580 pre_data = RREG32_NO_KIQ(reg); 5581 else 5582 pre_data = RREG32(reg); 5583 5584 data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); 5585 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5586 5587 if (pre_data != data) { 5588 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { 5589 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5590 } else 5591 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5592 } 5593 amdgpu_gfx_off_ctrl(adev, true); 5594 5595 if (ring 5596 && amdgpu_sriov_is_pp_one_vf(adev) 5597 && (pre_data != data) 5598 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) 5599 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { 5600 amdgpu_ring_emit_wreg(ring, reg, data); 5601 } 5602 } 5603 5604 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5605 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5606 .set_safe_mode = gfx_v11_0_set_safe_mode, 5607 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5608 .init = gfx_v11_0_rlc_init, 5609 .get_csb_size = gfx_v11_0_get_csb_size, 5610 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5611 .resume = gfx_v11_0_rlc_resume, 5612 .stop = gfx_v11_0_rlc_stop, 5613 .reset = gfx_v11_0_rlc_reset, 5614 .start = gfx_v11_0_rlc_start, 5615 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5616 }; 5617 5618 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5619 { 5620 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5621 5622 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5623 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5624 else 5625 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5626 5627 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5628 5629 // Program RLC_PG_DELAY3 for CGPG hysteresis 5630 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5631 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5632 case IP_VERSION(11, 0, 1): 5633 case IP_VERSION(11, 0, 4): 5634 case IP_VERSION(11, 5, 0): 5635 case IP_VERSION(11, 5, 1): 5636 case IP_VERSION(11, 5, 2): 5637 case IP_VERSION(11, 5, 3): 5638 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5639 break; 5640 default: 5641 break; 5642 } 5643 } 5644 } 5645 5646 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5647 { 5648 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5649 5650 gfx_v11_cntl_power_gating(adev, enable); 5651 5652 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5653 } 5654 5655 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5656 enum amd_powergating_state state) 5657 { 5658 struct amdgpu_device *adev = ip_block->adev; 5659 bool enable = (state == AMD_PG_STATE_GATE); 5660 5661 if (amdgpu_sriov_vf(adev)) 5662 return 0; 5663 5664 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5665 case IP_VERSION(11, 0, 0): 5666 case IP_VERSION(11, 0, 2): 5667 case IP_VERSION(11, 0, 3): 5668 amdgpu_gfx_off_ctrl(adev, enable); 5669 break; 5670 case IP_VERSION(11, 0, 1): 5671 case IP_VERSION(11, 0, 4): 5672 case IP_VERSION(11, 5, 0): 5673 case IP_VERSION(11, 5, 1): 5674 case IP_VERSION(11, 5, 2): 5675 case IP_VERSION(11, 5, 3): 5676 if (!enable) 5677 amdgpu_gfx_off_ctrl(adev, false); 5678 5679 gfx_v11_cntl_pg(adev, enable); 5680 5681 if (enable) 5682 amdgpu_gfx_off_ctrl(adev, true); 5683 5684 break; 5685 default: 5686 break; 5687 } 5688 5689 return 0; 5690 } 5691 5692 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5693 enum amd_clockgating_state state) 5694 { 5695 struct amdgpu_device *adev = ip_block->adev; 5696 5697 if (amdgpu_sriov_vf(adev)) 5698 return 0; 5699 5700 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5701 case IP_VERSION(11, 0, 0): 5702 case IP_VERSION(11, 0, 1): 5703 case IP_VERSION(11, 0, 2): 5704 case IP_VERSION(11, 0, 3): 5705 case IP_VERSION(11, 0, 4): 5706 case IP_VERSION(11, 5, 0): 5707 case IP_VERSION(11, 5, 1): 5708 case IP_VERSION(11, 5, 2): 5709 case IP_VERSION(11, 5, 3): 5710 gfx_v11_0_update_gfx_clock_gating(adev, 5711 state == AMD_CG_STATE_GATE); 5712 break; 5713 default: 5714 break; 5715 } 5716 5717 return 0; 5718 } 5719 5720 static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5721 { 5722 struct amdgpu_device *adev = ip_block->adev; 5723 int data; 5724 5725 /* AMD_CG_SUPPORT_GFX_MGCG */ 5726 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5727 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5728 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5729 5730 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5731 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5732 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5733 5734 /* AMD_CG_SUPPORT_GFX_FGCG */ 5735 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5736 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5737 5738 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5739 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5740 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5741 5742 /* AMD_CG_SUPPORT_GFX_CGCG */ 5743 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5744 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5745 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5746 5747 /* AMD_CG_SUPPORT_GFX_CGLS */ 5748 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5749 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5750 5751 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5752 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5753 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5754 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5755 5756 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5757 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5758 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5759 } 5760 5761 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5762 { 5763 /* gfx11 is 32bit rptr*/ 5764 return *(uint32_t *)ring->rptr_cpu_addr; 5765 } 5766 5767 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5768 { 5769 struct amdgpu_device *adev = ring->adev; 5770 u64 wptr; 5771 5772 /* XXX check if swapping is necessary on BE */ 5773 if (ring->use_doorbell) { 5774 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5775 } else { 5776 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5777 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5778 } 5779 5780 return wptr; 5781 } 5782 5783 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5784 { 5785 struct amdgpu_device *adev = ring->adev; 5786 5787 if (ring->use_doorbell) { 5788 /* XXX check if swapping is necessary on BE */ 5789 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5790 ring->wptr); 5791 WDOORBELL64(ring->doorbell_index, ring->wptr); 5792 } else { 5793 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5794 lower_32_bits(ring->wptr)); 5795 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5796 upper_32_bits(ring->wptr)); 5797 } 5798 } 5799 5800 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5801 { 5802 /* gfx11 hardware is 32bit rptr */ 5803 return *(uint32_t *)ring->rptr_cpu_addr; 5804 } 5805 5806 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5807 { 5808 u64 wptr; 5809 5810 /* XXX check if swapping is necessary on BE */ 5811 if (ring->use_doorbell) 5812 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5813 else 5814 BUG(); 5815 return wptr; 5816 } 5817 5818 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5819 { 5820 struct amdgpu_device *adev = ring->adev; 5821 5822 /* XXX check if swapping is necessary on BE */ 5823 if (ring->use_doorbell) { 5824 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5825 ring->wptr); 5826 WDOORBELL64(ring->doorbell_index, ring->wptr); 5827 } else { 5828 BUG(); /* only DOORBELL method supported on gfx11 now */ 5829 } 5830 } 5831 5832 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5833 { 5834 struct amdgpu_device *adev = ring->adev; 5835 u32 ref_and_mask, reg_mem_engine; 5836 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; 5837 5838 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 5839 switch (ring->me) { 5840 case 1: 5841 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 5842 break; 5843 case 2: 5844 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 5845 break; 5846 default: 5847 return; 5848 } 5849 reg_mem_engine = 0; 5850 } else { 5851 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; 5852 reg_mem_engine = 1; /* pfp */ 5853 } 5854 5855 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5856 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5857 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5858 ref_and_mask, ref_and_mask, 0x20); 5859 } 5860 5861 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5862 struct amdgpu_job *job, 5863 struct amdgpu_ib *ib, 5864 uint32_t flags) 5865 { 5866 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5867 u32 header, control = 0; 5868 5869 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5870 5871 control |= ib->length_dw | (vmid << 24); 5872 5873 if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5874 control |= INDIRECT_BUFFER_PRE_ENB(1); 5875 5876 if (flags & AMDGPU_IB_PREEMPTED) 5877 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5878 5879 if (vmid && !ring->adev->gfx.rs64_enable) 5880 gfx_v11_0_ring_emit_de_meta(ring, 5881 !amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED)); 5882 } 5883 5884 amdgpu_ring_write(ring, header); 5885 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5886 amdgpu_ring_write(ring, 5887 #ifdef __BIG_ENDIAN 5888 (2 << 0) | 5889 #endif 5890 lower_32_bits(ib->gpu_addr)); 5891 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5892 amdgpu_ring_write(ring, control); 5893 } 5894 5895 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5896 struct amdgpu_job *job, 5897 struct amdgpu_ib *ib, 5898 uint32_t flags) 5899 { 5900 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5901 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5902 5903 /* Currently, there is a high possibility to get wave ID mismatch 5904 * between ME and GDS, leading to a hw deadlock, because ME generates 5905 * different wave IDs than the GDS expects. This situation happens 5906 * randomly when at least 5 compute pipes use GDS ordered append. 5907 * The wave IDs generated by ME are also wrong after suspend/resume. 5908 * Those are probably bugs somewhere else in the kernel driver. 5909 * 5910 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5911 * GDS to 0 for this ring (me/pipe). 5912 */ 5913 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5914 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5915 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 5916 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5917 } 5918 5919 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5920 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5921 amdgpu_ring_write(ring, 5922 #ifdef __BIG_ENDIAN 5923 (2 << 0) | 5924 #endif 5925 lower_32_bits(ib->gpu_addr)); 5926 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5927 amdgpu_ring_write(ring, control); 5928 } 5929 5930 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5931 u64 seq, unsigned flags) 5932 { 5933 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5934 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5935 5936 /* RELEASE_MEM - flush caches, send int */ 5937 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5938 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 5939 PACKET3_RELEASE_MEM_GCR_GL2_WB | 5940 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ 5941 PACKET3_RELEASE_MEM_GCR_GLM_WB | 5942 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 5943 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5944 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 5945 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 5946 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 5947 5948 /* 5949 * the address should be Qword aligned if 64bit write, Dword 5950 * aligned if only send 32bit data low (discard data high) 5951 */ 5952 if (write64bit) 5953 BUG_ON(addr & 0x7); 5954 else 5955 BUG_ON(addr & 0x3); 5956 amdgpu_ring_write(ring, lower_32_bits(addr)); 5957 amdgpu_ring_write(ring, upper_32_bits(addr)); 5958 amdgpu_ring_write(ring, lower_32_bits(seq)); 5959 amdgpu_ring_write(ring, upper_32_bits(seq)); 5960 amdgpu_ring_write(ring, 0); 5961 } 5962 5963 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5964 { 5965 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 5966 uint32_t seq = ring->fence_drv.sync_seq; 5967 uint64_t addr = ring->fence_drv.gpu_addr; 5968 5969 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 5970 upper_32_bits(addr), seq, 0xffffffff, 4); 5971 } 5972 5973 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 5974 uint16_t pasid, uint32_t flush_type, 5975 bool all_hub, uint8_t dst_sel) 5976 { 5977 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 5978 amdgpu_ring_write(ring, 5979 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 5980 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 5981 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 5982 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 5983 } 5984 5985 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5986 unsigned vmid, uint64_t pd_addr) 5987 { 5988 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 5989 5990 /* compute doesn't have PFP */ 5991 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 5992 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5993 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5994 amdgpu_ring_write(ring, 0x0); 5995 } 5996 5997 /* Make sure that we can't skip the SET_Q_MODE packets when the VM 5998 * changed in any way. 5999 */ 6000 ring->set_q_mode_offs = 0; 6001 ring->set_q_mode_ptr = NULL; 6002 } 6003 6004 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6005 u64 seq, unsigned int flags) 6006 { 6007 struct amdgpu_device *adev = ring->adev; 6008 6009 /* we only allocate 32bit for each seq wb address */ 6010 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6011 6012 /* write fence seq to the "addr" */ 6013 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6014 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6015 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6016 amdgpu_ring_write(ring, lower_32_bits(addr)); 6017 amdgpu_ring_write(ring, upper_32_bits(addr)); 6018 amdgpu_ring_write(ring, lower_32_bits(seq)); 6019 6020 if (flags & AMDGPU_FENCE_FLAG_INT) { 6021 /* set register to trigger INT */ 6022 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6023 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6024 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6025 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 6026 amdgpu_ring_write(ring, 0); 6027 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6028 } 6029 } 6030 6031 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 6032 uint32_t flags) 6033 { 6034 uint32_t dw2 = 0; 6035 6036 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6037 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6038 /* set load_global_config & load_global_uconfig */ 6039 dw2 |= 0x8001; 6040 /* set load_cs_sh_regs */ 6041 dw2 |= 0x01000000; 6042 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6043 dw2 |= 0x10002; 6044 } 6045 6046 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6047 amdgpu_ring_write(ring, dw2); 6048 amdgpu_ring_write(ring, 0); 6049 } 6050 6051 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 6052 uint64_t addr) 6053 { 6054 unsigned ret; 6055 6056 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6057 amdgpu_ring_write(ring, lower_32_bits(addr)); 6058 amdgpu_ring_write(ring, upper_32_bits(addr)); 6059 /* discard following DWs if *cond_exec_gpu_addr==0 */ 6060 amdgpu_ring_write(ring, 0); 6061 ret = ring->wptr & ring->buf_mask; 6062 /* patch dummy value later */ 6063 amdgpu_ring_write(ring, 0); 6064 6065 return ret; 6066 } 6067 6068 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, 6069 u64 shadow_va, u64 csa_va, 6070 u64 gds_va, bool init_shadow, 6071 int vmid) 6072 { 6073 struct amdgpu_device *adev = ring->adev; 6074 unsigned int offs, end; 6075 6076 if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) 6077 return; 6078 6079 /* 6080 * The logic here isn't easy to understand because we need to keep state 6081 * accross multiple executions of the function as well as between the 6082 * CPU and GPU. The general idea is that the newly written GPU command 6083 * has a condition on the previous one and only executed if really 6084 * necessary. 6085 */ 6086 6087 /* 6088 * The dw in the NOP controls if the next SET_Q_MODE packet should be 6089 * executed or not. Reserve 64bits just to be on the save side. 6090 */ 6091 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); 6092 offs = ring->wptr & ring->buf_mask; 6093 6094 /* 6095 * We start with skipping the prefix SET_Q_MODE and always executing 6096 * the postfix SET_Q_MODE packet. This is changed below with a 6097 * WRITE_DATA command when the postfix executed. 6098 */ 6099 amdgpu_ring_write(ring, shadow_va ? 1 : 0); 6100 amdgpu_ring_write(ring, 0); 6101 6102 if (ring->set_q_mode_offs) { 6103 uint64_t addr; 6104 6105 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6106 addr += ring->set_q_mode_offs << 2; 6107 end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); 6108 } 6109 6110 /* 6111 * When the postfix SET_Q_MODE packet executes we need to make sure that the 6112 * next prefix SET_Q_MODE packet executes as well. 6113 */ 6114 if (!shadow_va) { 6115 uint64_t addr; 6116 6117 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6118 addr += offs << 2; 6119 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6120 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 6121 amdgpu_ring_write(ring, lower_32_bits(addr)); 6122 amdgpu_ring_write(ring, upper_32_bits(addr)); 6123 amdgpu_ring_write(ring, 0x1); 6124 } 6125 6126 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); 6127 amdgpu_ring_write(ring, lower_32_bits(shadow_va)); 6128 amdgpu_ring_write(ring, upper_32_bits(shadow_va)); 6129 amdgpu_ring_write(ring, lower_32_bits(gds_va)); 6130 amdgpu_ring_write(ring, upper_32_bits(gds_va)); 6131 amdgpu_ring_write(ring, lower_32_bits(csa_va)); 6132 amdgpu_ring_write(ring, upper_32_bits(csa_va)); 6133 amdgpu_ring_write(ring, shadow_va ? 6134 PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); 6135 amdgpu_ring_write(ring, init_shadow ? 6136 PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); 6137 6138 if (ring->set_q_mode_offs) 6139 amdgpu_ring_patch_cond_exec(ring, end); 6140 6141 if (shadow_va) { 6142 uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; 6143 6144 /* 6145 * If the tokens match try to skip the last postfix SET_Q_MODE 6146 * packet to avoid saving/restoring the state all the time. 6147 */ 6148 if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) 6149 *ring->set_q_mode_ptr = 0; 6150 6151 ring->set_q_mode_token = token; 6152 } else { 6153 ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; 6154 } 6155 6156 ring->set_q_mode_offs = offs; 6157 } 6158 6159 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) 6160 { 6161 int i, r = 0; 6162 struct amdgpu_device *adev = ring->adev; 6163 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 6164 struct amdgpu_ring *kiq_ring = &kiq->ring; 6165 unsigned long flags; 6166 6167 if (adev->enable_mes) 6168 return -EINVAL; 6169 6170 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 6171 return -EINVAL; 6172 6173 spin_lock_irqsave(&kiq->ring_lock, flags); 6174 6175 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 6176 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6177 return -ENOMEM; 6178 } 6179 6180 /* assert preemption condition */ 6181 amdgpu_ring_set_preempt_cond_exec(ring, false); 6182 6183 /* assert IB preemption, emit the trailing fence */ 6184 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 6185 ring->trail_fence_gpu_addr, 6186 ++ring->trail_seq); 6187 amdgpu_ring_commit(kiq_ring); 6188 6189 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6190 6191 /* poll the trailing fence */ 6192 for (i = 0; i < adev->usec_timeout; i++) { 6193 if (ring->trail_seq == 6194 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 6195 break; 6196 udelay(1); 6197 } 6198 6199 if (i >= adev->usec_timeout) { 6200 r = -EINVAL; 6201 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 6202 } 6203 6204 /* deassert preemption condition */ 6205 amdgpu_ring_set_preempt_cond_exec(ring, true); 6206 return r; 6207 } 6208 6209 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 6210 { 6211 struct amdgpu_device *adev = ring->adev; 6212 struct v10_de_ib_state de_payload = {0}; 6213 uint64_t offset, gds_addr, de_payload_gpu_addr; 6214 void *de_payload_cpu_addr; 6215 int cnt; 6216 6217 offset = offsetof(struct v10_gfx_meta_data, de_payload); 6218 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 6219 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 6220 6221 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 6222 AMDGPU_CSA_SIZE - adev->gds.gds_size, 6223 PAGE_SIZE); 6224 6225 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 6226 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 6227 6228 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 6229 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 6230 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 6231 WRITE_DATA_DST_SEL(8) | 6232 WR_CONFIRM) | 6233 WRITE_DATA_CACHE_POLICY(0)); 6234 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 6235 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 6236 6237 if (resume) 6238 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 6239 sizeof(de_payload) >> 2); 6240 else 6241 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 6242 sizeof(de_payload) >> 2); 6243 } 6244 6245 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 6246 bool secure) 6247 { 6248 uint32_t v = secure ? FRAME_TMZ : 0; 6249 6250 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 6251 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 6252 } 6253 6254 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 6255 uint32_t reg_val_offs) 6256 { 6257 struct amdgpu_device *adev = ring->adev; 6258 6259 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6260 amdgpu_ring_write(ring, 0 | /* src: register*/ 6261 (5 << 8) | /* dst: memory */ 6262 (1 << 20)); /* write confirm */ 6263 amdgpu_ring_write(ring, reg); 6264 amdgpu_ring_write(ring, 0); 6265 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6266 reg_val_offs * 4)); 6267 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6268 reg_val_offs * 4)); 6269 } 6270 6271 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6272 uint32_t val) 6273 { 6274 uint32_t cmd = 0; 6275 6276 switch (ring->funcs->type) { 6277 case AMDGPU_RING_TYPE_GFX: 6278 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6279 break; 6280 case AMDGPU_RING_TYPE_KIQ: 6281 cmd = (1 << 16); /* no inc addr */ 6282 break; 6283 default: 6284 cmd = WR_CONFIRM; 6285 break; 6286 } 6287 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6288 amdgpu_ring_write(ring, cmd); 6289 amdgpu_ring_write(ring, reg); 6290 amdgpu_ring_write(ring, 0); 6291 amdgpu_ring_write(ring, val); 6292 } 6293 6294 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 6295 uint32_t val, uint32_t mask) 6296 { 6297 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 6298 } 6299 6300 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 6301 uint32_t reg0, uint32_t reg1, 6302 uint32_t ref, uint32_t mask) 6303 { 6304 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6305 6306 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 6307 ref, mask, 0x20); 6308 } 6309 6310 static void 6311 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6312 uint32_t me, uint32_t pipe, 6313 enum amdgpu_interrupt_state state) 6314 { 6315 uint32_t cp_int_cntl, cp_int_cntl_reg; 6316 6317 if (!me) { 6318 switch (pipe) { 6319 case 0: 6320 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 6321 break; 6322 case 1: 6323 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 6324 break; 6325 default: 6326 DRM_DEBUG("invalid pipe %d\n", pipe); 6327 return; 6328 } 6329 } else { 6330 DRM_DEBUG("invalid me %d\n", me); 6331 return; 6332 } 6333 6334 switch (state) { 6335 case AMDGPU_IRQ_STATE_DISABLE: 6336 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6337 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6338 TIME_STAMP_INT_ENABLE, 0); 6339 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6340 GENERIC0_INT_ENABLE, 0); 6341 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6342 break; 6343 case AMDGPU_IRQ_STATE_ENABLE: 6344 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6345 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6346 TIME_STAMP_INT_ENABLE, 1); 6347 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6348 GENERIC0_INT_ENABLE, 1); 6349 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6350 break; 6351 default: 6352 break; 6353 } 6354 } 6355 6356 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6357 int me, int pipe, 6358 enum amdgpu_interrupt_state state) 6359 { 6360 u32 mec_int_cntl, mec_int_cntl_reg; 6361 6362 /* 6363 * amdgpu controls only the first MEC. That's why this function only 6364 * handles the setting of interrupts for this specific MEC. All other 6365 * pipes' interrupts are set by amdkfd. 6366 */ 6367 6368 if (me == 1) { 6369 switch (pipe) { 6370 case 0: 6371 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6372 break; 6373 case 1: 6374 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 6375 break; 6376 case 2: 6377 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 6378 break; 6379 case 3: 6380 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 6381 break; 6382 default: 6383 DRM_DEBUG("invalid pipe %d\n", pipe); 6384 return; 6385 } 6386 } else { 6387 DRM_DEBUG("invalid me %d\n", me); 6388 return; 6389 } 6390 6391 switch (state) { 6392 case AMDGPU_IRQ_STATE_DISABLE: 6393 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6394 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6395 TIME_STAMP_INT_ENABLE, 0); 6396 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6397 GENERIC0_INT_ENABLE, 0); 6398 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6399 break; 6400 case AMDGPU_IRQ_STATE_ENABLE: 6401 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6402 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6403 TIME_STAMP_INT_ENABLE, 1); 6404 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6405 GENERIC0_INT_ENABLE, 1); 6406 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6407 break; 6408 default: 6409 break; 6410 } 6411 } 6412 6413 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6414 struct amdgpu_irq_src *src, 6415 unsigned type, 6416 enum amdgpu_interrupt_state state) 6417 { 6418 switch (type) { 6419 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6420 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 6421 break; 6422 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 6423 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 6424 break; 6425 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6426 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6427 break; 6428 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6429 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6430 break; 6431 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6432 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6433 break; 6434 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6435 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6436 break; 6437 default: 6438 break; 6439 } 6440 return 0; 6441 } 6442 6443 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 6444 struct amdgpu_irq_src *source, 6445 struct amdgpu_iv_entry *entry) 6446 { 6447 u32 doorbell_offset = entry->src_data[0]; 6448 u8 me_id, pipe_id, queue_id; 6449 struct amdgpu_ring *ring; 6450 int i; 6451 6452 DRM_DEBUG("IH: CP EOP\n"); 6453 6454 if (adev->enable_mes && doorbell_offset) { 6455 struct amdgpu_userq_fence_driver *fence_drv = NULL; 6456 struct xarray *xa = &adev->userq_xa; 6457 unsigned long flags; 6458 6459 xa_lock_irqsave(xa, flags); 6460 fence_drv = xa_load(xa, doorbell_offset); 6461 if (fence_drv) 6462 amdgpu_userq_fence_driver_process(fence_drv); 6463 xa_unlock_irqrestore(xa, flags); 6464 } else { 6465 me_id = (entry->ring_id & 0x0c) >> 2; 6466 pipe_id = (entry->ring_id & 0x03) >> 0; 6467 queue_id = (entry->ring_id & 0x70) >> 4; 6468 6469 switch (me_id) { 6470 case 0: 6471 if (pipe_id == 0) 6472 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6473 else 6474 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 6475 break; 6476 case 1: 6477 case 2: 6478 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6479 ring = &adev->gfx.compute_ring[i]; 6480 /* Per-queue interrupt is supported for MEC starting from VI. 6481 * The interrupt can only be enabled/disabled per pipe instead 6482 * of per queue. 6483 */ 6484 if ((ring->me == me_id) && 6485 (ring->pipe == pipe_id) && 6486 (ring->queue == queue_id)) 6487 amdgpu_fence_process(ring); 6488 } 6489 break; 6490 } 6491 } 6492 6493 return 0; 6494 } 6495 6496 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6497 struct amdgpu_irq_src *source, 6498 unsigned int type, 6499 enum amdgpu_interrupt_state state) 6500 { 6501 u32 cp_int_cntl_reg, cp_int_cntl; 6502 int i, j; 6503 6504 switch (state) { 6505 case AMDGPU_IRQ_STATE_DISABLE: 6506 case AMDGPU_IRQ_STATE_ENABLE: 6507 for (i = 0; i < adev->gfx.me.num_me; i++) { 6508 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6509 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6510 6511 if (cp_int_cntl_reg) { 6512 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6513 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6514 PRIV_REG_INT_ENABLE, 6515 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6516 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6517 } 6518 } 6519 } 6520 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6521 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6522 /* MECs start at 1 */ 6523 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6524 6525 if (cp_int_cntl_reg) { 6526 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6527 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6528 PRIV_REG_INT_ENABLE, 6529 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6530 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6531 } 6532 } 6533 } 6534 break; 6535 default: 6536 break; 6537 } 6538 6539 return 0; 6540 } 6541 6542 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6543 struct amdgpu_irq_src *source, 6544 unsigned type, 6545 enum amdgpu_interrupt_state state) 6546 { 6547 u32 cp_int_cntl_reg, cp_int_cntl; 6548 int i, j; 6549 6550 switch (state) { 6551 case AMDGPU_IRQ_STATE_DISABLE: 6552 case AMDGPU_IRQ_STATE_ENABLE: 6553 for (i = 0; i < adev->gfx.me.num_me; i++) { 6554 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6555 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6556 6557 if (cp_int_cntl_reg) { 6558 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6559 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6560 OPCODE_ERROR_INT_ENABLE, 6561 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6562 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6563 } 6564 } 6565 } 6566 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6567 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6568 /* MECs start at 1 */ 6569 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6570 6571 if (cp_int_cntl_reg) { 6572 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6573 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6574 OPCODE_ERROR_INT_ENABLE, 6575 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6576 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6577 } 6578 } 6579 } 6580 break; 6581 default: 6582 break; 6583 } 6584 return 0; 6585 } 6586 6587 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6588 struct amdgpu_irq_src *source, 6589 unsigned int type, 6590 enum amdgpu_interrupt_state state) 6591 { 6592 u32 cp_int_cntl_reg, cp_int_cntl; 6593 int i, j; 6594 6595 switch (state) { 6596 case AMDGPU_IRQ_STATE_DISABLE: 6597 case AMDGPU_IRQ_STATE_ENABLE: 6598 for (i = 0; i < adev->gfx.me.num_me; i++) { 6599 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6600 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6601 6602 if (cp_int_cntl_reg) { 6603 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6604 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6605 PRIV_INSTR_INT_ENABLE, 6606 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6607 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6608 } 6609 } 6610 } 6611 break; 6612 default: 6613 break; 6614 } 6615 6616 return 0; 6617 } 6618 6619 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 6620 struct amdgpu_iv_entry *entry) 6621 { 6622 u8 me_id, pipe_id, queue_id; 6623 struct amdgpu_ring *ring; 6624 int i; 6625 6626 me_id = (entry->ring_id & 0x0c) >> 2; 6627 pipe_id = (entry->ring_id & 0x03) >> 0; 6628 queue_id = (entry->ring_id & 0x70) >> 4; 6629 6630 if (!adev->gfx.disable_kq) { 6631 switch (me_id) { 6632 case 0: 6633 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 6634 ring = &adev->gfx.gfx_ring[i]; 6635 if (ring->me == me_id && ring->pipe == pipe_id && 6636 ring->queue == queue_id) 6637 drm_sched_fault(&ring->sched); 6638 } 6639 break; 6640 case 1: 6641 case 2: 6642 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6643 ring = &adev->gfx.compute_ring[i]; 6644 if (ring->me == me_id && ring->pipe == pipe_id && 6645 ring->queue == queue_id) 6646 drm_sched_fault(&ring->sched); 6647 } 6648 break; 6649 default: 6650 BUG(); 6651 break; 6652 } 6653 } 6654 } 6655 6656 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 6657 struct amdgpu_irq_src *source, 6658 struct amdgpu_iv_entry *entry) 6659 { 6660 DRM_ERROR("Illegal register access in command stream\n"); 6661 gfx_v11_0_handle_priv_fault(adev, entry); 6662 return 0; 6663 } 6664 6665 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, 6666 struct amdgpu_irq_src *source, 6667 struct amdgpu_iv_entry *entry) 6668 { 6669 DRM_ERROR("Illegal opcode in command stream \n"); 6670 gfx_v11_0_handle_priv_fault(adev, entry); 6671 return 0; 6672 } 6673 6674 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6675 struct amdgpu_irq_src *source, 6676 struct amdgpu_iv_entry *entry) 6677 { 6678 DRM_ERROR("Illegal instruction in command stream\n"); 6679 gfx_v11_0_handle_priv_fault(adev, entry); 6680 return 0; 6681 } 6682 6683 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, 6684 struct amdgpu_irq_src *source, 6685 struct amdgpu_iv_entry *entry) 6686 { 6687 if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) 6688 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); 6689 6690 return 0; 6691 } 6692 6693 #if 0 6694 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6695 struct amdgpu_irq_src *src, 6696 unsigned int type, 6697 enum amdgpu_interrupt_state state) 6698 { 6699 uint32_t tmp, target; 6700 struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); 6701 6702 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6703 target += ring->pipe; 6704 6705 switch (type) { 6706 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6707 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6708 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6709 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6710 GENERIC2_INT_ENABLE, 0); 6711 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6712 6713 tmp = RREG32_SOC15_IP(GC, target); 6714 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6715 GENERIC2_INT_ENABLE, 0); 6716 WREG32_SOC15_IP(GC, target, tmp); 6717 } else { 6718 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6719 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6720 GENERIC2_INT_ENABLE, 1); 6721 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6722 6723 tmp = RREG32_SOC15_IP(GC, target); 6724 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6725 GENERIC2_INT_ENABLE, 1); 6726 WREG32_SOC15_IP(GC, target, tmp); 6727 } 6728 break; 6729 default: 6730 BUG(); /* kiq only support GENERIC2_INT now */ 6731 break; 6732 } 6733 return 0; 6734 } 6735 #endif 6736 6737 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6738 { 6739 const unsigned int gcr_cntl = 6740 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6741 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6742 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6743 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6744 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6745 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6746 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6747 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6748 6749 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6750 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6751 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6752 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6753 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6754 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6755 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6756 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6757 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6758 } 6759 6760 static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) 6761 { 6762 /* Disable the pipe reset until the CPFW fully support it.*/ 6763 dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); 6764 return false; 6765 } 6766 6767 6768 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) 6769 { 6770 struct amdgpu_device *adev = ring->adev; 6771 uint32_t reset_pipe = 0, clean_pipe = 0; 6772 int r; 6773 6774 if (!gfx_v11_pipe_reset_support(adev)) 6775 return -EOPNOTSUPP; 6776 6777 gfx_v11_0_set_safe_mode(adev, 0); 6778 mutex_lock(&adev->srbm_mutex); 6779 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6780 6781 switch (ring->pipe) { 6782 case 0: 6783 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6784 PFP_PIPE0_RESET, 1); 6785 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6786 ME_PIPE0_RESET, 1); 6787 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6788 PFP_PIPE0_RESET, 0); 6789 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6790 ME_PIPE0_RESET, 0); 6791 break; 6792 case 1: 6793 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6794 PFP_PIPE1_RESET, 1); 6795 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6796 ME_PIPE1_RESET, 1); 6797 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6798 PFP_PIPE1_RESET, 0); 6799 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6800 ME_PIPE1_RESET, 0); 6801 break; 6802 default: 6803 break; 6804 } 6805 6806 WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); 6807 WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); 6808 6809 r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - 6810 RS64_FW_UC_START_ADDR_LO; 6811 soc21_grbm_select(adev, 0, 0, 0, 0); 6812 mutex_unlock(&adev->srbm_mutex); 6813 gfx_v11_0_unset_safe_mode(adev, 0); 6814 6815 dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, 6816 r == 0 ? "successfully" : "failed"); 6817 /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, 6818 * so the pipe reset status relies on the later gfx ring test result. 6819 */ 6820 return 0; 6821 } 6822 6823 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, 6824 unsigned int vmid, 6825 struct amdgpu_fence *timedout_fence) 6826 { 6827 struct amdgpu_device *adev = ring->adev; 6828 int r; 6829 6830 amdgpu_ring_reset_helper_begin(ring, timedout_fence); 6831 6832 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false, 0); 6833 if (r) { 6834 6835 dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); 6836 r = gfx_v11_reset_gfx_pipe(ring); 6837 if (r) 6838 return r; 6839 } 6840 6841 r = gfx_v11_0_kgq_init_queue(ring, true); 6842 if (r) { 6843 dev_err(adev->dev, "failed to init kgq\n"); 6844 return r; 6845 } 6846 6847 r = amdgpu_mes_map_legacy_queue(adev, ring, 0); 6848 if (r) { 6849 dev_err(adev->dev, "failed to remap kgq\n"); 6850 return r; 6851 } 6852 6853 return amdgpu_ring_reset_helper_end(ring, timedout_fence); 6854 } 6855 6856 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) 6857 { 6858 6859 struct amdgpu_device *adev = ring->adev; 6860 uint32_t reset_pipe = 0, clean_pipe = 0; 6861 int r; 6862 6863 if (!gfx_v11_pipe_reset_support(adev)) 6864 return -EOPNOTSUPP; 6865 6866 gfx_v11_0_set_safe_mode(adev, 0); 6867 mutex_lock(&adev->srbm_mutex); 6868 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6869 6870 reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 6871 clean_pipe = reset_pipe; 6872 6873 if (adev->gfx.rs64_enable) { 6874 6875 switch (ring->pipe) { 6876 case 0: 6877 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6878 MEC_PIPE0_RESET, 1); 6879 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6880 MEC_PIPE0_RESET, 0); 6881 break; 6882 case 1: 6883 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6884 MEC_PIPE1_RESET, 1); 6885 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6886 MEC_PIPE1_RESET, 0); 6887 break; 6888 case 2: 6889 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6890 MEC_PIPE2_RESET, 1); 6891 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6892 MEC_PIPE2_RESET, 0); 6893 break; 6894 case 3: 6895 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6896 MEC_PIPE3_RESET, 1); 6897 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6898 MEC_PIPE3_RESET, 0); 6899 break; 6900 default: 6901 break; 6902 } 6903 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); 6904 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); 6905 r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - 6906 RS64_FW_UC_START_ADDR_LO; 6907 } else { 6908 if (ring->me == 1) { 6909 switch (ring->pipe) { 6910 case 0: 6911 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6912 MEC_ME1_PIPE0_RESET, 1); 6913 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6914 MEC_ME1_PIPE0_RESET, 0); 6915 break; 6916 case 1: 6917 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6918 MEC_ME1_PIPE1_RESET, 1); 6919 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6920 MEC_ME1_PIPE1_RESET, 0); 6921 break; 6922 case 2: 6923 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6924 MEC_ME1_PIPE2_RESET, 1); 6925 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6926 MEC_ME1_PIPE2_RESET, 0); 6927 break; 6928 case 3: 6929 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6930 MEC_ME1_PIPE3_RESET, 1); 6931 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6932 MEC_ME1_PIPE3_RESET, 0); 6933 break; 6934 default: 6935 break; 6936 } 6937 /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ 6938 } else { 6939 switch (ring->pipe) { 6940 case 0: 6941 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6942 MEC_ME2_PIPE0_RESET, 1); 6943 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6944 MEC_ME2_PIPE0_RESET, 0); 6945 break; 6946 case 1: 6947 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6948 MEC_ME2_PIPE1_RESET, 1); 6949 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6950 MEC_ME2_PIPE1_RESET, 0); 6951 break; 6952 case 2: 6953 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6954 MEC_ME2_PIPE2_RESET, 1); 6955 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6956 MEC_ME2_PIPE2_RESET, 0); 6957 break; 6958 case 3: 6959 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6960 MEC_ME2_PIPE3_RESET, 1); 6961 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6962 MEC_ME2_PIPE3_RESET, 0); 6963 break; 6964 default: 6965 break; 6966 } 6967 /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ 6968 } 6969 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); 6970 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); 6971 r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); 6972 } 6973 6974 soc21_grbm_select(adev, 0, 0, 0, 0); 6975 mutex_unlock(&adev->srbm_mutex); 6976 gfx_v11_0_unset_safe_mode(adev, 0); 6977 6978 dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, 6979 r == 0 ? "successfully" : "failed"); 6980 /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe 6981 * reset status relies on the compute ring test result. 6982 */ 6983 return 0; 6984 } 6985 6986 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, 6987 unsigned int vmid, 6988 struct amdgpu_fence *timedout_fence) 6989 { 6990 struct amdgpu_device *adev = ring->adev; 6991 int r = 0; 6992 6993 amdgpu_ring_reset_helper_begin(ring, timedout_fence); 6994 6995 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0); 6996 if (r) { 6997 dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); 6998 r = gfx_v11_0_reset_compute_pipe(ring); 6999 if (r) 7000 return r; 7001 } 7002 7003 r = gfx_v11_0_kcq_init_queue(ring, true); 7004 if (r) { 7005 dev_err(adev->dev, "fail to init kcq\n"); 7006 return r; 7007 } 7008 r = amdgpu_mes_map_legacy_queue(adev, ring, 0); 7009 if (r) { 7010 dev_err(adev->dev, "failed to remap kcq\n"); 7011 return r; 7012 } 7013 7014 return amdgpu_ring_reset_helper_end(ring, timedout_fence); 7015 } 7016 7017 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 7018 { 7019 struct amdgpu_device *adev = ip_block->adev; 7020 uint32_t i, j, k, reg, index = 0; 7021 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7022 7023 if (!adev->gfx.ip_dump_core) 7024 return; 7025 7026 for (i = 0; i < reg_count; i++) 7027 drm_printf(p, "%-50s \t 0x%08x\n", 7028 gc_reg_list_11_0[i].reg_name, 7029 adev->gfx.ip_dump_core[i]); 7030 7031 /* print compute queue registers for all instances */ 7032 if (!adev->gfx.ip_dump_compute_queues) 7033 return; 7034 7035 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7036 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7037 adev->gfx.mec.num_mec, 7038 adev->gfx.mec.num_pipe_per_mec, 7039 adev->gfx.mec.num_queue_per_pipe); 7040 7041 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7042 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7043 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7044 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7045 for (reg = 0; reg < reg_count; reg++) { 7046 if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) 7047 drm_printf(p, "%-50s \t 0x%08x\n", 7048 "regCP_MEC_ME2_HEADER_DUMP", 7049 adev->gfx.ip_dump_compute_queues[index + reg]); 7050 else 7051 drm_printf(p, "%-50s \t 0x%08x\n", 7052 gc_cp_reg_list_11[reg].reg_name, 7053 adev->gfx.ip_dump_compute_queues[index + reg]); 7054 } 7055 index += reg_count; 7056 } 7057 } 7058 } 7059 7060 /* print gfx queue registers for all instances */ 7061 if (!adev->gfx.ip_dump_gfx_queues) 7062 return; 7063 7064 index = 0; 7065 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7066 drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", 7067 adev->gfx.me.num_me, 7068 adev->gfx.me.num_pipe_per_me, 7069 adev->gfx.me.num_queue_per_pipe); 7070 7071 for (i = 0; i < adev->gfx.me.num_me; i++) { 7072 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7073 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7074 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); 7075 for (reg = 0; reg < reg_count; reg++) { 7076 drm_printf(p, "%-50s \t 0x%08x\n", 7077 gc_gfx_queue_reg_list_11[reg].reg_name, 7078 adev->gfx.ip_dump_gfx_queues[index + reg]); 7079 } 7080 index += reg_count; 7081 } 7082 } 7083 } 7084 } 7085 7086 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) 7087 { 7088 struct amdgpu_device *adev = ip_block->adev; 7089 uint32_t i, j, k, reg, index = 0; 7090 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7091 7092 if (!adev->gfx.ip_dump_core) 7093 return; 7094 7095 amdgpu_gfx_off_ctrl(adev, false); 7096 for (i = 0; i < reg_count; i++) 7097 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); 7098 amdgpu_gfx_off_ctrl(adev, true); 7099 7100 /* dump compute queue registers for all instances */ 7101 if (!adev->gfx.ip_dump_compute_queues) 7102 return; 7103 7104 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7105 amdgpu_gfx_off_ctrl(adev, false); 7106 mutex_lock(&adev->srbm_mutex); 7107 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7108 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7109 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7110 /* ME0 is for GFX so start from 1 for CP */ 7111 soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); 7112 for (reg = 0; reg < reg_count; reg++) { 7113 if (i && 7114 gc_cp_reg_list_11[reg].reg_offset == 7115 regCP_MEC_ME1_HEADER_DUMP) 7116 adev->gfx.ip_dump_compute_queues[index + reg] = 7117 RREG32(SOC15_REG_OFFSET(GC, 0, 7118 regCP_MEC_ME2_HEADER_DUMP)); 7119 else 7120 adev->gfx.ip_dump_compute_queues[index + reg] = 7121 RREG32(SOC15_REG_ENTRY_OFFSET( 7122 gc_cp_reg_list_11[reg])); 7123 } 7124 index += reg_count; 7125 } 7126 } 7127 } 7128 soc21_grbm_select(adev, 0, 0, 0, 0); 7129 mutex_unlock(&adev->srbm_mutex); 7130 amdgpu_gfx_off_ctrl(adev, true); 7131 7132 /* dump gfx queue registers for all instances */ 7133 if (!adev->gfx.ip_dump_gfx_queues) 7134 return; 7135 7136 index = 0; 7137 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7138 amdgpu_gfx_off_ctrl(adev, false); 7139 mutex_lock(&adev->srbm_mutex); 7140 for (i = 0; i < adev->gfx.me.num_me; i++) { 7141 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7142 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7143 soc21_grbm_select(adev, i, j, k, 0); 7144 7145 for (reg = 0; reg < reg_count; reg++) { 7146 adev->gfx.ip_dump_gfx_queues[index + reg] = 7147 RREG32(SOC15_REG_ENTRY_OFFSET( 7148 gc_gfx_queue_reg_list_11[reg])); 7149 } 7150 index += reg_count; 7151 } 7152 } 7153 } 7154 soc21_grbm_select(adev, 0, 0, 0, 0); 7155 mutex_unlock(&adev->srbm_mutex); 7156 amdgpu_gfx_off_ctrl(adev, true); 7157 } 7158 7159 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 7160 { 7161 /* Emit the cleaner shader */ 7162 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7163 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7164 } 7165 7166 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring) 7167 { 7168 amdgpu_gfx_profile_ring_begin_use(ring); 7169 7170 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 7171 } 7172 7173 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring) 7174 { 7175 amdgpu_gfx_profile_ring_end_use(ring); 7176 7177 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 7178 } 7179 7180 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 7181 .name = "gfx_v11_0", 7182 .early_init = gfx_v11_0_early_init, 7183 .late_init = gfx_v11_0_late_init, 7184 .sw_init = gfx_v11_0_sw_init, 7185 .sw_fini = gfx_v11_0_sw_fini, 7186 .hw_init = gfx_v11_0_hw_init, 7187 .hw_fini = gfx_v11_0_hw_fini, 7188 .suspend = gfx_v11_0_suspend, 7189 .resume = gfx_v11_0_resume, 7190 .is_idle = gfx_v11_0_is_idle, 7191 .wait_for_idle = gfx_v11_0_wait_for_idle, 7192 .soft_reset = gfx_v11_0_soft_reset, 7193 .check_soft_reset = gfx_v11_0_check_soft_reset, 7194 .post_soft_reset = gfx_v11_0_post_soft_reset, 7195 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 7196 .set_powergating_state = gfx_v11_0_set_powergating_state, 7197 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 7198 .dump_ip_state = gfx_v11_ip_dump, 7199 .print_ip_state = gfx_v11_ip_print, 7200 }; 7201 7202 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7203 .type = AMDGPU_RING_TYPE_GFX, 7204 .align_mask = 0xff, 7205 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7206 .support_64bit_ptrs = true, 7207 .secure_submission_supported = true, 7208 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 7209 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 7210 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 7211 .emit_frame_size = /* totally 247 maximum if 16 IBs */ 7212 5 + /* update_spm_vmid */ 7213 5 + /* COND_EXEC */ 7214 22 + /* SET_Q_PREEMPTION_MODE */ 7215 7 + /* PIPELINE_SYNC */ 7216 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7217 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7218 4 + /* VM_FLUSH */ 7219 8 + /* FENCE for VM_FLUSH */ 7220 20 + /* GDS switch */ 7221 5 + /* COND_EXEC */ 7222 7 + /* HDP_flush */ 7223 4 + /* VGT_flush */ 7224 31 + /* DE_META */ 7225 3 + /* CNTX_CTRL */ 7226 5 + /* HDP_INVL */ 7227 22 + /* SET_Q_PREEMPTION_MODE */ 7228 8 + 8 + /* FENCE x2 */ 7229 8 + /* gfx_v11_0_emit_mem_sync */ 7230 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7231 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 7232 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 7233 .emit_fence = gfx_v11_0_ring_emit_fence, 7234 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7235 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7236 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7237 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7238 .test_ring = gfx_v11_0_ring_test_ring, 7239 .test_ib = gfx_v11_0_ring_test_ib, 7240 .insert_nop = gfx_v11_ring_insert_nop, 7241 .pad_ib = amdgpu_ring_generic_pad_ib, 7242 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 7243 .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, 7244 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 7245 .preempt_ib = gfx_v11_0_ring_preempt_ib, 7246 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 7247 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7248 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7249 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7250 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7251 .reset = gfx_v11_0_reset_kgq, 7252 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7253 .begin_use = gfx_v11_0_ring_begin_use, 7254 .end_use = gfx_v11_0_ring_end_use, 7255 }; 7256 7257 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 7258 .type = AMDGPU_RING_TYPE_COMPUTE, 7259 .align_mask = 0xff, 7260 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7261 .support_64bit_ptrs = true, 7262 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7263 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7264 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7265 .emit_frame_size = 7266 5 + /* update_spm_vmid */ 7267 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7268 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7269 5 + /* hdp invalidate */ 7270 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7271 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7272 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7273 2 + /* gfx_v11_0_ring_emit_vm_flush */ 7274 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 7275 8 + /* gfx_v11_0_emit_mem_sync */ 7276 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7277 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7278 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7279 .emit_fence = gfx_v11_0_ring_emit_fence, 7280 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7281 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7282 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7283 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7284 .test_ring = gfx_v11_0_ring_test_ring, 7285 .test_ib = gfx_v11_0_ring_test_ib, 7286 .insert_nop = gfx_v11_ring_insert_nop, 7287 .pad_ib = amdgpu_ring_generic_pad_ib, 7288 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7289 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7290 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7291 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7292 .reset = gfx_v11_0_reset_kcq, 7293 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7294 .begin_use = gfx_v11_0_ring_begin_use, 7295 .end_use = gfx_v11_0_ring_end_use, 7296 }; 7297 7298 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7299 .type = AMDGPU_RING_TYPE_KIQ, 7300 .align_mask = 0xff, 7301 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7302 .support_64bit_ptrs = true, 7303 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7304 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7305 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7306 .emit_frame_size = 7307 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7308 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7309 5 + /*hdp invalidate */ 7310 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7311 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7312 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7313 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7314 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7315 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7316 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 7317 .test_ring = gfx_v11_0_ring_test_ring, 7318 .test_ib = gfx_v11_0_ring_test_ib, 7319 .insert_nop = amdgpu_ring_insert_nop, 7320 .pad_ib = amdgpu_ring_generic_pad_ib, 7321 .emit_rreg = gfx_v11_0_ring_emit_rreg, 7322 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7323 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7324 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7325 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7326 }; 7327 7328 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 7329 { 7330 int i; 7331 7332 adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; 7333 7334 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7335 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 7336 7337 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7338 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 7339 } 7340 7341 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 7342 .set = gfx_v11_0_set_eop_interrupt_state, 7343 .process = gfx_v11_0_eop_irq, 7344 }; 7345 7346 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 7347 .set = gfx_v11_0_set_priv_reg_fault_state, 7348 .process = gfx_v11_0_priv_reg_irq, 7349 }; 7350 7351 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { 7352 .set = gfx_v11_0_set_bad_op_fault_state, 7353 .process = gfx_v11_0_bad_op_irq, 7354 }; 7355 7356 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 7357 .set = gfx_v11_0_set_priv_inst_fault_state, 7358 .process = gfx_v11_0_priv_inst_irq, 7359 }; 7360 7361 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { 7362 .process = gfx_v11_0_rlc_gc_fed_irq, 7363 }; 7364 7365 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 7366 { 7367 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7368 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 7369 7370 adev->gfx.priv_reg_irq.num_types = 1; 7371 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 7372 7373 adev->gfx.bad_op_irq.num_types = 1; 7374 adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; 7375 7376 adev->gfx.priv_inst_irq.num_types = 1; 7377 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 7378 7379 adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ 7380 adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; 7381 7382 } 7383 7384 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 7385 { 7386 if (adev->flags & AMD_IS_APU) 7387 adev->gfx.imu.mode = MISSION_MODE; 7388 else 7389 adev->gfx.imu.mode = DEBUG_MODE; 7390 7391 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 7392 } 7393 7394 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 7395 { 7396 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 7397 } 7398 7399 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 7400 { 7401 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 7402 adev->gfx.config.max_sh_per_se * 7403 adev->gfx.config.max_shader_engines; 7404 7405 adev->gds.gds_size = 0x1000; 7406 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 7407 adev->gds.gws_size = 64; 7408 adev->gds.oa_size = 16; 7409 } 7410 7411 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 7412 { 7413 /* set gfx eng mqd */ 7414 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 7415 sizeof(struct v11_gfx_mqd); 7416 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 7417 gfx_v11_0_gfx_mqd_init; 7418 /* set compute eng mqd */ 7419 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 7420 sizeof(struct v11_compute_mqd); 7421 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 7422 gfx_v11_0_compute_mqd_init; 7423 } 7424 7425 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 7426 u32 bitmap) 7427 { 7428 u32 data; 7429 7430 if (!bitmap) 7431 return; 7432 7433 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7434 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7435 7436 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 7437 } 7438 7439 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 7440 { 7441 u32 data, wgp_bitmask; 7442 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 7443 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 7444 7445 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7446 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7447 7448 wgp_bitmask = 7449 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 7450 7451 return (~data) & wgp_bitmask; 7452 } 7453 7454 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 7455 { 7456 u32 wgp_idx, wgp_active_bitmap; 7457 u32 cu_bitmap_per_wgp, cu_active_bitmap; 7458 7459 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 7460 cu_active_bitmap = 0; 7461 7462 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 7463 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 7464 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 7465 if (wgp_active_bitmap & (1 << wgp_idx)) 7466 cu_active_bitmap |= cu_bitmap_per_wgp; 7467 } 7468 7469 return cu_active_bitmap; 7470 } 7471 7472 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 7473 struct amdgpu_cu_info *cu_info) 7474 { 7475 int i, j, k, counter, active_cu_number = 0; 7476 u32 mask, bitmap; 7477 unsigned disable_masks[8 * 2]; 7478 7479 if (!adev || !cu_info) 7480 return -EINVAL; 7481 7482 amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2); 7483 7484 mutex_lock(&adev->grbm_idx_mutex); 7485 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7486 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7487 bitmap = i * adev->gfx.config.max_sh_per_se + j; 7488 if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) 7489 continue; 7490 mask = 1; 7491 counter = 0; 7492 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); 7493 if (i < 8 && j < 2) 7494 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 7495 adev, disable_masks[i * 2 + j]); 7496 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 7497 7498 /** 7499 * GFX11 could support more than 4 SEs, while the bitmap 7500 * in cu_info struct is 4x4 and ioctl interface struct 7501 * drm_amdgpu_info_device should keep stable. 7502 * So we use last two columns of bitmap to store cu mask for 7503 * SEs 4 to 7, the layout of the bitmap is as below: 7504 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 7505 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 7506 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 7507 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 7508 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 7509 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 7510 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 7511 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 7512 */ 7513 cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; 7514 7515 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 7516 if (bitmap & mask) 7517 counter++; 7518 7519 mask <<= 1; 7520 } 7521 active_cu_number += counter; 7522 } 7523 } 7524 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7525 mutex_unlock(&adev->grbm_idx_mutex); 7526 7527 cu_info->number = active_cu_number; 7528 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7529 7530 return 0; 7531 } 7532 7533 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 7534 { 7535 .type = AMD_IP_BLOCK_TYPE_GFX, 7536 .major = 11, 7537 .minor = 0, 7538 .rev = 0, 7539 .funcs = &gfx_v11_0_ip_funcs, 7540 }; 7541