1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "imu_v11_0.h" 33 #include "soc21.h" 34 #include "nvd.h" 35 36 #include "gc/gc_11_0_0_offset.h" 37 #include "gc/gc_11_0_0_sh_mask.h" 38 #include "smuio/smuio_13_0_6_offset.h" 39 #include "smuio/smuio_13_0_6_sh_mask.h" 40 #include "navi10_enum.h" 41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 42 43 #include "soc15.h" 44 #include "clearstate_gfx11.h" 45 #include "v11_structs.h" 46 #include "gfx_v11_0.h" 47 #include "gfx_v11_0_cleaner_shader.h" 48 #include "gfx_v11_0_3.h" 49 #include "nbio_v4_3.h" 50 #include "mes_v11_0.h" 51 #include "mes_userqueue.h" 52 #include "amdgpu_userq_fence.h" 53 54 #define GFX11_NUM_GFX_RINGS 1 55 #define GFX11_MEC_HPD_SIZE 2048 56 57 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 58 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 59 60 #define regCGTT_WD_CLK_CTRL 0x5086 61 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 63 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 64 #define regPC_CONFIG_CNTL_1 0x194d 65 #define regPC_CONFIG_CNTL_1_BASE_IDX 1 66 67 #define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 68 #define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 69 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 70 #define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 71 #define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 72 #define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 73 #define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 74 75 #define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 76 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 77 #define regCP_MQD_CONTROL_DEFAULT 0x00000100 78 #define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 79 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 80 #define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 81 #define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 82 #define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 83 84 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 85 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 86 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 87 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 88 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); 89 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); 90 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 91 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 92 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 93 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 94 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 95 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 97 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 99 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 100 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 101 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 102 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 103 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 105 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 107 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); 108 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); 109 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); 110 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); 111 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); 112 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); 113 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); 115 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); 117 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); 119 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); 120 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); 121 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); 122 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); 123 MODULE_FIRMWARE("amdgpu/gc_11_5_4_pfp.bin"); 124 MODULE_FIRMWARE("amdgpu/gc_11_5_4_me.bin"); 125 MODULE_FIRMWARE("amdgpu/gc_11_5_4_mec.bin"); 126 MODULE_FIRMWARE("amdgpu/gc_11_5_4_rlc.bin"); 127 128 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { 129 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), 130 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), 131 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), 132 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), 133 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), 134 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), 135 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), 136 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), 137 SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), 138 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), 139 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), 140 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), 141 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), 142 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), 143 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), 144 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), 145 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), 146 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 147 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), 148 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), 149 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), 150 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), 151 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), 152 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), 153 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), 154 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 155 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), 156 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 157 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 158 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 159 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), 160 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), 161 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), 162 SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), 163 SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), 164 SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), 165 SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), 166 SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), 167 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), 168 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), 169 SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), 170 SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), 171 SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), 172 SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), 173 SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), 174 SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), 175 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), 176 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), 177 SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), 178 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), 179 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), 180 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), 181 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), 182 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), 183 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), 184 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), 185 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), 186 /* cp header registers */ 187 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 188 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 189 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 190 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 191 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 192 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 193 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 194 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 195 /* SE status registers */ 196 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), 197 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), 198 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), 199 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), 200 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), 201 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) 202 }; 203 204 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { 205 /* compute registers */ 206 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), 207 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), 208 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), 209 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), 210 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), 211 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), 212 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), 213 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), 214 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), 215 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), 216 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), 217 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), 218 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), 219 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), 220 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), 221 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), 222 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), 223 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), 224 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), 225 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), 226 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), 227 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), 228 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), 229 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), 230 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), 231 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), 232 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), 233 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), 234 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), 235 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), 236 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), 237 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), 238 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), 239 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), 240 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), 241 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), 242 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), 243 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), 244 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), 245 /* cp header registers */ 246 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 247 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 248 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 249 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 250 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 251 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 252 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 253 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 254 }; 255 256 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { 257 /* gfx queue registers */ 258 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), 259 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), 260 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), 261 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), 262 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), 263 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), 264 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), 265 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), 266 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), 267 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), 268 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), 269 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), 270 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), 271 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), 272 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), 273 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), 274 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), 275 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), 276 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), 277 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), 278 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 279 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 280 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 281 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 282 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 283 /* cp header registers */ 284 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 285 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 286 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 287 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 288 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 289 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 290 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 291 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 292 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 293 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 294 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 295 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 296 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 297 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 298 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 299 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 300 }; 301 302 static const struct soc15_reg_golden golden_settings_gc_11_0[] = { 303 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) 304 }; 305 306 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 307 { 308 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 309 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 310 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 311 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 312 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 313 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 314 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 315 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 316 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 317 }; 318 319 #define DEFAULT_SH_MEM_CONFIG \ 320 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 321 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 322 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 323 324 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 325 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 326 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 327 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 328 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 329 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 330 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 331 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 332 struct amdgpu_cu_info *cu_info); 333 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 334 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 335 u32 sh_num, u32 instance, int xcc_id); 336 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 337 338 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 339 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 340 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 341 uint32_t val); 342 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 343 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 344 uint16_t pasid, uint32_t flush_type, 345 bool all_hub, uint8_t dst_sel); 346 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 347 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 348 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 349 bool enable); 350 351 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 352 { 353 struct amdgpu_device *adev = kiq_ring->adev; 354 u64 shader_mc_addr; 355 356 /* Cleaner shader MC address */ 357 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 358 359 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 360 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 361 PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ 362 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 363 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 364 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 365 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 366 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 367 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 368 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 369 } 370 371 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 372 struct amdgpu_ring *ring) 373 { 374 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 375 uint64_t wptr_addr = ring->wptr_gpu_addr; 376 uint32_t me = 0, eng_sel = 0; 377 378 switch (ring->funcs->type) { 379 case AMDGPU_RING_TYPE_COMPUTE: 380 me = 1; 381 eng_sel = 0; 382 break; 383 case AMDGPU_RING_TYPE_GFX: 384 me = 0; 385 eng_sel = 4; 386 break; 387 case AMDGPU_RING_TYPE_MES: 388 me = 2; 389 eng_sel = 5; 390 break; 391 default: 392 WARN_ON(1); 393 } 394 395 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 396 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 397 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 398 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 399 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 400 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 401 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 402 PACKET3_MAP_QUEUES_ME((me)) | 403 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 404 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 405 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 406 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 407 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 408 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 409 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 410 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 411 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 412 } 413 414 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 415 struct amdgpu_ring *ring, 416 enum amdgpu_unmap_queues_action action, 417 u64 gpu_addr, u64 seq) 418 { 419 struct amdgpu_device *adev = kiq_ring->adev; 420 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 421 422 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { 423 amdgpu_mes_unmap_legacy_queue(adev, ring, action, 424 gpu_addr, seq, 0); 425 return; 426 } 427 428 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 429 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 430 PACKET3_UNMAP_QUEUES_ACTION(action) | 431 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 432 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 433 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 434 amdgpu_ring_write(kiq_ring, 435 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 436 437 if (action == PREEMPT_QUEUES_NO_UNMAP) { 438 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 439 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 440 amdgpu_ring_write(kiq_ring, seq); 441 } else { 442 amdgpu_ring_write(kiq_ring, 0); 443 amdgpu_ring_write(kiq_ring, 0); 444 amdgpu_ring_write(kiq_ring, 0); 445 } 446 } 447 448 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 449 struct amdgpu_ring *ring, 450 u64 addr, 451 u64 seq) 452 { 453 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 454 455 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 456 amdgpu_ring_write(kiq_ring, 457 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 458 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 459 PACKET3_QUERY_STATUS_COMMAND(2)); 460 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 461 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 462 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 463 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 464 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 465 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 466 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 467 } 468 469 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 470 uint16_t pasid, uint32_t flush_type, 471 bool all_hub) 472 { 473 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 474 } 475 476 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 477 .kiq_set_resources = gfx11_kiq_set_resources, 478 .kiq_map_queues = gfx11_kiq_map_queues, 479 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 480 .kiq_query_status = gfx11_kiq_query_status, 481 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 482 .set_resources_size = 8, 483 .map_queues_size = 7, 484 .unmap_queues_size = 6, 485 .query_status_size = 7, 486 .invalidate_tlbs_size = 2, 487 }; 488 489 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 490 { 491 adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; 492 } 493 494 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 495 { 496 if (amdgpu_sriov_vf(adev)) 497 return; 498 499 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 500 case IP_VERSION(11, 0, 1): 501 case IP_VERSION(11, 0, 4): 502 soc15_program_register_sequence(adev, 503 golden_settings_gc_11_0_1, 504 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 505 break; 506 default: 507 break; 508 } 509 soc15_program_register_sequence(adev, 510 golden_settings_gc_11_0, 511 (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); 512 513 } 514 515 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 516 bool wc, uint32_t reg, uint32_t val) 517 { 518 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 519 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 520 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 521 amdgpu_ring_write(ring, reg); 522 amdgpu_ring_write(ring, 0); 523 amdgpu_ring_write(ring, val); 524 } 525 526 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 527 int mem_space, int opt, uint32_t addr0, 528 uint32_t addr1, uint32_t ref, uint32_t mask, 529 uint32_t inv) 530 { 531 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 532 amdgpu_ring_write(ring, 533 /* memory (1) or register (0) */ 534 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 535 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 536 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 537 WAIT_REG_MEM_ENGINE(eng_sel))); 538 539 if (mem_space) 540 BUG_ON(addr0 & 0x3); /* Dword align */ 541 amdgpu_ring_write(ring, addr0); 542 amdgpu_ring_write(ring, addr1); 543 amdgpu_ring_write(ring, ref); 544 amdgpu_ring_write(ring, mask); 545 amdgpu_ring_write(ring, inv); /* poll interval */ 546 } 547 548 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 549 { 550 /* Header itself is a NOP packet */ 551 if (num_nop == 1) { 552 amdgpu_ring_write(ring, ring->funcs->nop); 553 return; 554 } 555 556 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 557 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 558 559 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 560 amdgpu_ring_insert_nop(ring, num_nop - 1); 561 } 562 563 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 564 { 565 struct amdgpu_device *adev = ring->adev; 566 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 567 uint32_t tmp = 0; 568 unsigned i; 569 int r; 570 571 WREG32(scratch, 0xCAFEDEAD); 572 r = amdgpu_ring_alloc(ring, 5); 573 if (r) { 574 drm_err(adev_to_drm(adev), "cp failed to lock ring %d (%d).\n", 575 ring->idx, r); 576 return r; 577 } 578 579 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 580 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 581 } else { 582 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 583 amdgpu_ring_write(ring, scratch - 584 PACKET3_SET_UCONFIG_REG_START); 585 amdgpu_ring_write(ring, 0xDEADBEEF); 586 } 587 amdgpu_ring_commit(ring); 588 589 for (i = 0; i < adev->usec_timeout; i++) { 590 tmp = RREG32(scratch); 591 if (tmp == 0xDEADBEEF) 592 break; 593 if (amdgpu_emu_mode == 1) 594 msleep(1); 595 else 596 udelay(1); 597 } 598 599 if (i >= adev->usec_timeout) 600 r = -ETIMEDOUT; 601 return r; 602 } 603 604 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 605 { 606 struct amdgpu_device *adev = ring->adev; 607 struct amdgpu_ib ib; 608 struct dma_fence *f = NULL; 609 unsigned index; 610 uint64_t gpu_addr; 611 uint32_t *cpu_ptr; 612 long r; 613 614 /* MES KIQ fw hasn't indirect buffer support for now */ 615 if (adev->enable_mes_kiq && 616 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 617 return 0; 618 619 memset(&ib, 0, sizeof(ib)); 620 621 r = amdgpu_device_wb_get(adev, &index); 622 if (r) 623 return r; 624 625 gpu_addr = adev->wb.gpu_addr + (index * 4); 626 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 627 cpu_ptr = &adev->wb.wb[index]; 628 629 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 630 if (r) { 631 drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r); 632 goto err1; 633 } 634 635 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 636 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 637 ib.ptr[2] = lower_32_bits(gpu_addr); 638 ib.ptr[3] = upper_32_bits(gpu_addr); 639 ib.ptr[4] = 0xDEADBEEF; 640 ib.length_dw = 5; 641 642 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 643 if (r) 644 goto err2; 645 646 r = dma_fence_wait_timeout(f, false, timeout); 647 if (r == 0) { 648 r = -ETIMEDOUT; 649 goto err2; 650 } else if (r < 0) { 651 goto err2; 652 } 653 654 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 655 r = 0; 656 else 657 r = -EINVAL; 658 err2: 659 amdgpu_ib_free(&ib, NULL); 660 dma_fence_put(f); 661 err1: 662 amdgpu_device_wb_free(adev, index); 663 return r; 664 } 665 666 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 667 { 668 amdgpu_ucode_release(&adev->gfx.pfp_fw); 669 amdgpu_ucode_release(&adev->gfx.me_fw); 670 amdgpu_ucode_release(&adev->gfx.rlc_fw); 671 amdgpu_ucode_release(&adev->gfx.mec_fw); 672 673 kfree(adev->gfx.rlc.register_list_format); 674 } 675 676 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) 677 { 678 const struct psp_firmware_header_v1_0 *toc_hdr; 679 int err = 0; 680 681 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, 682 AMDGPU_UCODE_REQUIRED, 683 "amdgpu/%s_toc.bin", ucode_prefix); 684 if (err) 685 goto out; 686 687 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 688 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 689 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 690 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 691 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 692 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 693 return 0; 694 out: 695 amdgpu_ucode_release(&adev->psp.toc_fw); 696 return err; 697 } 698 699 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) 700 { 701 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 702 case IP_VERSION(11, 0, 0): 703 case IP_VERSION(11, 0, 2): 704 case IP_VERSION(11, 0, 3): 705 if ((adev->gfx.me_fw_version >= 1505) && 706 (adev->gfx.pfp_fw_version >= 1600) && 707 (adev->gfx.mec_fw_version >= 512)) { 708 if (amdgpu_sriov_vf(adev)) 709 adev->gfx.cp_gfx_shadow = true; 710 else 711 adev->gfx.cp_gfx_shadow = false; 712 } 713 break; 714 default: 715 adev->gfx.cp_gfx_shadow = false; 716 break; 717 } 718 } 719 720 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 721 { 722 char ucode_prefix[25]; 723 int err; 724 const struct rlc_firmware_header_v2_0 *rlc_hdr; 725 uint16_t version_major; 726 uint16_t version_minor; 727 728 DRM_DEBUG("\n"); 729 730 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 731 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 732 AMDGPU_UCODE_REQUIRED, 733 "amdgpu/%s_pfp.bin", ucode_prefix); 734 if (err) 735 goto out; 736 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 737 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 738 (union amdgpu_firmware_header *) 739 adev->gfx.pfp_fw->data, 2, 0); 740 if (adev->gfx.rs64_enable) { 741 dev_info(adev->dev, "CP RS64 enable\n"); 742 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 743 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 744 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 745 } else { 746 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 747 } 748 749 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 750 AMDGPU_UCODE_REQUIRED, 751 "amdgpu/%s_me.bin", ucode_prefix); 752 if (err) 753 goto out; 754 if (adev->gfx.rs64_enable) { 755 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 756 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 757 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 758 } else { 759 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 760 } 761 762 if (!amdgpu_sriov_vf(adev)) { 763 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && 764 adev->pdev->revision == 0xCE) 765 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 766 AMDGPU_UCODE_REQUIRED, 767 "amdgpu/gc_11_0_0_rlc_1.bin"); 768 else if (amdgpu_is_kicker_fw(adev)) 769 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 770 AMDGPU_UCODE_REQUIRED, 771 "amdgpu/%s_rlc_kicker.bin", ucode_prefix); 772 else 773 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 774 AMDGPU_UCODE_REQUIRED, 775 "amdgpu/%s_rlc.bin", ucode_prefix); 776 if (err) 777 goto out; 778 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 779 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 780 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 781 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 782 if (err) 783 goto out; 784 } 785 786 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 787 AMDGPU_UCODE_REQUIRED, 788 "amdgpu/%s_mec.bin", ucode_prefix); 789 if (err) 790 goto out; 791 if (adev->gfx.rs64_enable) { 792 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 793 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 794 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 795 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 796 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 797 } else { 798 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 799 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 800 } 801 802 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 803 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix); 804 805 /* only one MEC for gfx 11.0.0. */ 806 adev->gfx.mec2_fw = NULL; 807 808 gfx_v11_0_check_fw_cp_gfx_shadow(adev); 809 810 if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { 811 err = adev->gfx.imu.funcs->init_microcode(adev); 812 if (err) 813 DRM_ERROR("Failed to init imu firmware!\n"); 814 return err; 815 } 816 817 out: 818 if (err) { 819 amdgpu_ucode_release(&adev->gfx.pfp_fw); 820 amdgpu_ucode_release(&adev->gfx.me_fw); 821 amdgpu_ucode_release(&adev->gfx.rlc_fw); 822 amdgpu_ucode_release(&adev->gfx.mec_fw); 823 } 824 825 return err; 826 } 827 828 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 829 { 830 u32 count = 0; 831 const struct cs_section_def *sect = NULL; 832 const struct cs_extent_def *ext = NULL; 833 834 /* begin clear state */ 835 count += 2; 836 /* context control state */ 837 count += 3; 838 839 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 840 for (ext = sect->section; ext->extent != NULL; ++ext) { 841 if (sect->id == SECT_CONTEXT) 842 count += 2 + ext->reg_count; 843 else 844 return 0; 845 } 846 } 847 848 /* set PA_SC_TILE_STEERING_OVERRIDE */ 849 count += 3; 850 /* end clear state */ 851 count += 2; 852 /* clear state */ 853 count += 2; 854 855 return count; 856 } 857 858 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) 859 { 860 u32 count = 0; 861 int ctx_reg_offset; 862 863 if (adev->gfx.rlc.cs_data == NULL) 864 return; 865 if (buffer == NULL) 866 return; 867 868 count = amdgpu_gfx_csb_preamble_start(buffer); 869 count = amdgpu_gfx_csb_data_parser(adev, buffer, count); 870 871 ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 872 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 873 buffer[count++] = cpu_to_le32(ctx_reg_offset); 874 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 875 876 amdgpu_gfx_csb_preamble_end(buffer, count); 877 } 878 879 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 880 { 881 /* clear state block */ 882 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 883 &adev->gfx.rlc.clear_state_gpu_addr, 884 (void **)&adev->gfx.rlc.cs_ptr); 885 886 /* jump table block */ 887 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 888 &adev->gfx.rlc.cp_table_gpu_addr, 889 (void **)&adev->gfx.rlc.cp_table_ptr); 890 } 891 892 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 893 { 894 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 895 896 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 897 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 898 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 899 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 900 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 901 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 902 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 903 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 904 adev->gfx.rlc.rlcg_reg_access_supported = true; 905 } 906 907 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 908 { 909 const struct cs_section_def *cs_data; 910 int r; 911 912 adev->gfx.rlc.cs_data = gfx11_cs_data; 913 914 cs_data = adev->gfx.rlc.cs_data; 915 916 if (cs_data) { 917 /* init clear state block */ 918 r = amdgpu_gfx_rlc_init_csb(adev); 919 if (r) 920 return r; 921 } 922 923 /* init spm vmid with 0xf */ 924 if (adev->gfx.rlc.funcs->update_spm_vmid) 925 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf); 926 927 return 0; 928 } 929 930 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 931 { 932 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 933 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 934 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 935 } 936 937 static void gfx_v11_0_me_init(struct amdgpu_device *adev) 938 { 939 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 940 941 amdgpu_gfx_graphics_queue_acquire(adev); 942 } 943 944 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 945 { 946 int r; 947 u32 *hpd; 948 size_t mec_hpd_size; 949 950 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 951 952 /* take ownership of the relevant compute queues */ 953 amdgpu_gfx_compute_queue_acquire(adev); 954 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 955 956 if (mec_hpd_size) { 957 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 958 AMDGPU_GEM_DOMAIN_GTT, 959 &adev->gfx.mec.hpd_eop_obj, 960 &adev->gfx.mec.hpd_eop_gpu_addr, 961 (void **)&hpd); 962 if (r) { 963 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 964 gfx_v11_0_mec_fini(adev); 965 return r; 966 } 967 968 memset(hpd, 0, mec_hpd_size); 969 970 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 971 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 972 } 973 974 return 0; 975 } 976 977 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 978 { 979 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 980 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 981 (address << SQ_IND_INDEX__INDEX__SHIFT)); 982 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 983 } 984 985 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 986 uint32_t thread, uint32_t regno, 987 uint32_t num, uint32_t *out) 988 { 989 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 990 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 991 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 992 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 993 (SQ_IND_INDEX__AUTO_INCR_MASK)); 994 while (num--) 995 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 996 } 997 998 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 999 { 1000 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 1001 * field when performing a select_se_sh so it should be 1002 * zero here */ 1003 WARN_ON(simd != 0); 1004 1005 /* type 3 wave data */ 1006 dst[(*no_fields)++] = 3; 1007 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 1008 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 1009 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 1010 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 1011 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 1012 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 1013 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 1014 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 1015 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 1016 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 1017 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 1018 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 1019 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 1020 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 1021 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 1022 } 1023 1024 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1025 uint32_t wave, uint32_t start, 1026 uint32_t size, uint32_t *dst) 1027 { 1028 WARN_ON(simd != 0); 1029 1030 wave_read_regs( 1031 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 1032 dst); 1033 } 1034 1035 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1036 uint32_t wave, uint32_t thread, 1037 uint32_t start, uint32_t size, 1038 uint32_t *dst) 1039 { 1040 wave_read_regs( 1041 adev, wave, thread, 1042 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1043 } 1044 1045 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 1046 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1047 { 1048 soc21_grbm_select(adev, me, pipe, q, vm); 1049 } 1050 1051 /* all sizes are in bytes */ 1052 #define MQD_SHADOW_BASE_SIZE 73728 1053 #define MQD_SHADOW_BASE_ALIGNMENT 256 1054 #define MQD_FWWORKAREA_SIZE 484 1055 #define MQD_FWWORKAREA_ALIGNMENT 256 1056 1057 static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, 1058 struct amdgpu_gfx_shadow_info *shadow_info) 1059 { 1060 /* for gfx */ 1061 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; 1062 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; 1063 shadow_info->csa_size = MQD_FWWORKAREA_SIZE; 1064 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; 1065 /* for compute */ 1066 shadow_info->eop_size = GFX11_MEC_HPD_SIZE; 1067 shadow_info->eop_alignment = 256; 1068 } 1069 1070 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, 1071 struct amdgpu_gfx_shadow_info *shadow_info, 1072 bool skip_check) 1073 { 1074 if (adev->gfx.cp_gfx_shadow || skip_check) { 1075 gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); 1076 return 0; 1077 } else { 1078 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); 1079 return -ENOTSUPP; 1080 } 1081 } 1082 1083 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 1084 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 1085 .select_se_sh = &gfx_v11_0_select_se_sh, 1086 .read_wave_data = &gfx_v11_0_read_wave_data, 1087 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 1088 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 1089 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 1090 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 1091 .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, 1092 .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask, 1093 }; 1094 1095 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 1096 { 1097 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1098 case IP_VERSION(11, 0, 0): 1099 case IP_VERSION(11, 0, 2): 1100 adev->gfx.config.max_hw_contexts = 8; 1101 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1102 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1103 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1104 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1105 break; 1106 case IP_VERSION(11, 0, 3): 1107 adev->gfx.ras = &gfx_v11_0_3_ras; 1108 adev->gfx.config.max_hw_contexts = 8; 1109 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1110 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1111 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1112 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1113 break; 1114 case IP_VERSION(11, 0, 1): 1115 case IP_VERSION(11, 0, 4): 1116 case IP_VERSION(11, 5, 0): 1117 case IP_VERSION(11, 5, 1): 1118 case IP_VERSION(11, 5, 2): 1119 case IP_VERSION(11, 5, 3): 1120 case IP_VERSION(11, 5, 4): 1121 adev->gfx.config.max_hw_contexts = 8; 1122 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1123 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1124 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1125 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 1126 break; 1127 default: 1128 BUG(); 1129 break; 1130 } 1131 1132 return 0; 1133 } 1134 1135 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1136 int me, int pipe, int queue) 1137 { 1138 struct amdgpu_ring *ring; 1139 unsigned int irq_type; 1140 unsigned int hw_prio; 1141 1142 ring = &adev->gfx.gfx_ring[ring_id]; 1143 1144 ring->me = me; 1145 ring->pipe = pipe; 1146 ring->queue = queue; 1147 1148 ring->ring_obj = NULL; 1149 ring->use_doorbell = true; 1150 if (adev->gfx.disable_kq) { 1151 ring->no_scheduler = true; 1152 ring->no_user_submission = true; 1153 } 1154 1155 if (!ring_id) 1156 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1157 else 1158 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1159 ring->vm_hub = AMDGPU_GFXHUB(0); 1160 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1161 1162 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1163 hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? 1164 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1165 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1166 hw_prio, NULL); 1167 } 1168 1169 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1170 int mec, int pipe, int queue) 1171 { 1172 int r; 1173 unsigned irq_type; 1174 struct amdgpu_ring *ring; 1175 unsigned int hw_prio; 1176 1177 ring = &adev->gfx.compute_ring[ring_id]; 1178 1179 /* mec0 is me1 */ 1180 ring->me = mec + 1; 1181 ring->pipe = pipe; 1182 ring->queue = queue; 1183 1184 ring->ring_obj = NULL; 1185 ring->use_doorbell = true; 1186 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1187 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1188 + (ring_id * GFX11_MEC_HPD_SIZE); 1189 ring->vm_hub = AMDGPU_GFXHUB(0); 1190 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1191 1192 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1193 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1194 + ring->pipe; 1195 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 1196 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1197 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1198 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1199 hw_prio, NULL); 1200 if (r) 1201 return r; 1202 1203 return 0; 1204 } 1205 1206 static struct { 1207 SOC21_FIRMWARE_ID id; 1208 unsigned int offset; 1209 unsigned int size; 1210 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 1211 1212 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 1213 { 1214 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 1215 1216 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 1217 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 1218 rlc_autoload_info[ucode->id].id = ucode->id; 1219 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 1220 rlc_autoload_info[ucode->id].size = ucode->size * 4; 1221 1222 ucode++; 1223 } 1224 } 1225 1226 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 1227 { 1228 uint32_t total_size = 0; 1229 SOC21_FIRMWARE_ID id; 1230 1231 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 1232 1233 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 1234 total_size += rlc_autoload_info[id].size; 1235 1236 /* In case the offset in rlc toc ucode is aligned */ 1237 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 1238 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 1239 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 1240 1241 return total_size; 1242 } 1243 1244 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 1245 { 1246 int r; 1247 uint32_t total_size; 1248 1249 total_size = gfx_v11_0_calc_toc_total_size(adev); 1250 1251 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 1252 AMDGPU_GEM_DOMAIN_VRAM | 1253 AMDGPU_GEM_DOMAIN_GTT, 1254 &adev->gfx.rlc.rlc_autoload_bo, 1255 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1256 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1257 1258 if (r) { 1259 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1260 return r; 1261 } 1262 1263 return 0; 1264 } 1265 1266 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1267 SOC21_FIRMWARE_ID id, 1268 const void *fw_data, 1269 uint32_t fw_size, 1270 uint32_t *fw_autoload_mask) 1271 { 1272 uint32_t toc_offset; 1273 uint32_t toc_fw_size; 1274 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1275 1276 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1277 return; 1278 1279 toc_offset = rlc_autoload_info[id].offset; 1280 toc_fw_size = rlc_autoload_info[id].size; 1281 1282 if (fw_size == 0) 1283 fw_size = toc_fw_size; 1284 1285 if (fw_size > toc_fw_size) 1286 fw_size = toc_fw_size; 1287 1288 memcpy(ptr + toc_offset, fw_data, fw_size); 1289 1290 if (fw_size < toc_fw_size) 1291 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1292 1293 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1294 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1295 } 1296 1297 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1298 uint32_t *fw_autoload_mask) 1299 { 1300 void *data; 1301 uint32_t size; 1302 uint64_t *toc_ptr; 1303 1304 *(uint64_t *)fw_autoload_mask |= 0x1; 1305 1306 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1307 1308 data = adev->psp.toc.start_addr; 1309 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1310 1311 toc_ptr = (uint64_t *)data + size / 8 - 1; 1312 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1313 1314 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1315 data, size, fw_autoload_mask); 1316 } 1317 1318 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1319 uint32_t *fw_autoload_mask) 1320 { 1321 const __le32 *fw_data; 1322 uint32_t fw_size; 1323 const struct gfx_firmware_header_v1_0 *cp_hdr; 1324 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1325 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1326 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1327 uint16_t version_major, version_minor; 1328 1329 if (adev->gfx.rs64_enable) { 1330 /* pfp ucode */ 1331 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1332 adev->gfx.pfp_fw->data; 1333 /* instruction */ 1334 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1335 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1336 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1337 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1338 fw_data, fw_size, fw_autoload_mask); 1339 /* data */ 1340 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1341 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1342 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1343 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1344 fw_data, fw_size, fw_autoload_mask); 1345 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1346 fw_data, fw_size, fw_autoload_mask); 1347 /* me ucode */ 1348 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1349 adev->gfx.me_fw->data; 1350 /* instruction */ 1351 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1352 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1353 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1354 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1355 fw_data, fw_size, fw_autoload_mask); 1356 /* data */ 1357 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1358 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1359 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1360 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1361 fw_data, fw_size, fw_autoload_mask); 1362 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1363 fw_data, fw_size, fw_autoload_mask); 1364 /* mec ucode */ 1365 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1366 adev->gfx.mec_fw->data; 1367 /* instruction */ 1368 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1369 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1370 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1371 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1372 fw_data, fw_size, fw_autoload_mask); 1373 /* data */ 1374 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1375 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1376 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1377 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1378 fw_data, fw_size, fw_autoload_mask); 1379 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1380 fw_data, fw_size, fw_autoload_mask); 1381 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1382 fw_data, fw_size, fw_autoload_mask); 1383 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1384 fw_data, fw_size, fw_autoload_mask); 1385 } else { 1386 /* pfp ucode */ 1387 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1388 adev->gfx.pfp_fw->data; 1389 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1390 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1391 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1392 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1393 fw_data, fw_size, fw_autoload_mask); 1394 1395 /* me ucode */ 1396 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1397 adev->gfx.me_fw->data; 1398 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1399 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1400 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1401 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1402 fw_data, fw_size, fw_autoload_mask); 1403 1404 /* mec ucode */ 1405 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1406 adev->gfx.mec_fw->data; 1407 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1408 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1409 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1410 cp_hdr->jt_size * 4; 1411 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1412 fw_data, fw_size, fw_autoload_mask); 1413 } 1414 1415 /* rlc ucode */ 1416 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1417 adev->gfx.rlc_fw->data; 1418 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1419 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1420 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1421 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1422 fw_data, fw_size, fw_autoload_mask); 1423 1424 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1425 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1426 if (version_major == 2) { 1427 if (version_minor >= 2) { 1428 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1429 1430 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1431 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1432 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1433 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1434 fw_data, fw_size, fw_autoload_mask); 1435 1436 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1437 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1438 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1439 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1440 fw_data, fw_size, fw_autoload_mask); 1441 } 1442 } 1443 } 1444 1445 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1446 uint32_t *fw_autoload_mask) 1447 { 1448 const __le32 *fw_data; 1449 uint32_t fw_size; 1450 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1451 1452 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1453 adev->sdma.instance[0].fw->data; 1454 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1455 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1456 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1457 1458 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1459 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1460 1461 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1462 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1463 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1464 1465 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1466 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1467 } 1468 1469 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1470 uint32_t *fw_autoload_mask) 1471 { 1472 const __le32 *fw_data; 1473 unsigned fw_size; 1474 const struct mes_firmware_header_v1_0 *mes_hdr; 1475 int pipe, ucode_id, data_id; 1476 1477 for (pipe = 0; pipe < 2; pipe++) { 1478 if (pipe==0) { 1479 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1480 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1481 } else { 1482 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1483 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1484 } 1485 1486 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1487 adev->mes.fw[pipe]->data; 1488 1489 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1490 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1491 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1492 1493 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1494 ucode_id, fw_data, fw_size, fw_autoload_mask); 1495 1496 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1497 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1498 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1499 1500 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1501 data_id, fw_data, fw_size, fw_autoload_mask); 1502 } 1503 } 1504 1505 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1506 { 1507 uint32_t rlc_g_offset, rlc_g_size; 1508 uint64_t gpu_addr; 1509 uint32_t autoload_fw_id[2]; 1510 1511 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1512 1513 /* RLC autoload sequence 2: copy ucode */ 1514 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1515 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1516 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1517 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1518 1519 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1520 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1521 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1522 1523 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1524 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1525 1526 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1527 1528 /* RLC autoload sequence 3: load IMU fw */ 1529 if (adev->gfx.imu.funcs->load_microcode) 1530 adev->gfx.imu.funcs->load_microcode(adev); 1531 /* RLC autoload sequence 4 init IMU fw */ 1532 if (adev->gfx.imu.funcs->setup_imu) 1533 adev->gfx.imu.funcs->setup_imu(adev); 1534 if (adev->gfx.imu.funcs->start_imu) 1535 adev->gfx.imu.funcs->start_imu(adev); 1536 1537 /* RLC autoload sequence 5 disable gpa mode */ 1538 gfx_v11_0_disable_gpa_mode(adev); 1539 1540 return 0; 1541 } 1542 1543 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) 1544 { 1545 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 1546 uint32_t *ptr; 1547 uint32_t inst; 1548 1549 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 1550 if (!ptr) { 1551 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 1552 adev->gfx.ip_dump_core = NULL; 1553 } else { 1554 adev->gfx.ip_dump_core = ptr; 1555 } 1556 1557 /* Allocate memory for compute queue registers for all the instances */ 1558 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 1559 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 1560 adev->gfx.mec.num_queue_per_pipe; 1561 1562 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1563 if (!ptr) { 1564 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 1565 adev->gfx.ip_dump_compute_queues = NULL; 1566 } else { 1567 adev->gfx.ip_dump_compute_queues = ptr; 1568 } 1569 1570 /* Allocate memory for gfx queue registers for all the instances */ 1571 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 1572 inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * 1573 adev->gfx.me.num_queue_per_pipe; 1574 1575 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1576 if (!ptr) { 1577 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); 1578 adev->gfx.ip_dump_gfx_queues = NULL; 1579 } else { 1580 adev->gfx.ip_dump_gfx_queues = ptr; 1581 } 1582 } 1583 1584 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) 1585 { 1586 int i, j, k, r, ring_id; 1587 int xcc_id = 0; 1588 struct amdgpu_device *adev = ip_block->adev; 1589 int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ 1590 1591 INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); 1592 1593 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1594 case IP_VERSION(11, 0, 0): 1595 case IP_VERSION(11, 0, 1): 1596 case IP_VERSION(11, 0, 2): 1597 case IP_VERSION(11, 0, 3): 1598 case IP_VERSION(11, 0, 4): 1599 case IP_VERSION(11, 5, 0): 1600 case IP_VERSION(11, 5, 1): 1601 case IP_VERSION(11, 5, 2): 1602 case IP_VERSION(11, 5, 3): 1603 case IP_VERSION(11, 5, 4): 1604 adev->gfx.me.num_me = 1; 1605 adev->gfx.me.num_pipe_per_me = 1; 1606 adev->gfx.me.num_queue_per_pipe = 2; 1607 adev->gfx.mec.num_mec = 1; 1608 adev->gfx.mec.num_pipe_per_mec = 4; 1609 adev->gfx.mec.num_queue_per_pipe = 4; 1610 break; 1611 default: 1612 adev->gfx.me.num_me = 1; 1613 adev->gfx.me.num_pipe_per_me = 1; 1614 adev->gfx.me.num_queue_per_pipe = 1; 1615 adev->gfx.mec.num_mec = 1; 1616 adev->gfx.mec.num_pipe_per_mec = 4; 1617 adev->gfx.mec.num_queue_per_pipe = 8; 1618 break; 1619 } 1620 1621 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1622 case IP_VERSION(11, 0, 0): 1623 case IP_VERSION(11, 0, 2): 1624 case IP_VERSION(11, 0, 3): 1625 if (!adev->gfx.disable_uq && 1626 adev->gfx.me_fw_version >= 2420 && 1627 adev->gfx.pfp_fw_version >= 2580 && 1628 adev->gfx.mec_fw_version >= 2650 && 1629 adev->mes.fw_version[0] >= 120) { 1630 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1631 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1632 } 1633 break; 1634 case IP_VERSION(11, 0, 1): 1635 case IP_VERSION(11, 0, 4): 1636 case IP_VERSION(11, 5, 0): 1637 case IP_VERSION(11, 5, 1): 1638 case IP_VERSION(11, 5, 2): 1639 case IP_VERSION(11, 5, 3): 1640 /* add firmware version checks here */ 1641 if (0 && !adev->gfx.disable_uq) { 1642 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1643 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1644 } 1645 break; 1646 default: 1647 break; 1648 } 1649 1650 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1651 case IP_VERSION(11, 0, 0): 1652 case IP_VERSION(11, 0, 2): 1653 case IP_VERSION(11, 0, 3): 1654 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1655 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1656 if (adev->gfx.me_fw_version >= 2280 && 1657 adev->gfx.pfp_fw_version >= 2370 && 1658 adev->gfx.mec_fw_version >= 2450 && 1659 adev->mes.fw_version[0] >= 99) { 1660 adev->gfx.enable_cleaner_shader = true; 1661 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1662 if (r) { 1663 adev->gfx.enable_cleaner_shader = false; 1664 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1665 } 1666 } 1667 break; 1668 case IP_VERSION(11, 0, 1): 1669 case IP_VERSION(11, 0, 4): 1670 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1671 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1672 if (adev->gfx.pfp_fw_version >= 102 && 1673 adev->gfx.mec_fw_version >= 66 && 1674 adev->mes.fw_version[0] >= 128) { 1675 adev->gfx.enable_cleaner_shader = true; 1676 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1677 if (r) { 1678 adev->gfx.enable_cleaner_shader = false; 1679 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1680 } 1681 } 1682 break; 1683 case IP_VERSION(11, 5, 0): 1684 case IP_VERSION(11, 5, 1): 1685 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1686 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1687 if (adev->gfx.mec_fw_version >= 26 && 1688 adev->mes.fw_version[0] >= 114) { 1689 adev->gfx.enable_cleaner_shader = true; 1690 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1691 if (r) { 1692 adev->gfx.enable_cleaner_shader = false; 1693 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1694 } 1695 } 1696 break; 1697 case IP_VERSION(11, 5, 2): 1698 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1699 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1700 if (adev->gfx.me_fw_version >= 12 && 1701 adev->gfx.pfp_fw_version >= 15 && 1702 adev->gfx.mec_fw_version >= 15) { 1703 adev->gfx.enable_cleaner_shader = true; 1704 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1705 if (r) { 1706 adev->gfx.enable_cleaner_shader = false; 1707 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1708 } 1709 } 1710 break; 1711 case IP_VERSION(11, 5, 3): 1712 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1713 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1714 if (adev->gfx.me_fw_version >= 7 && 1715 adev->gfx.pfp_fw_version >= 8 && 1716 adev->gfx.mec_fw_version >= 8) { 1717 adev->gfx.enable_cleaner_shader = true; 1718 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1719 if (r) { 1720 adev->gfx.enable_cleaner_shader = false; 1721 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1722 } 1723 } 1724 break; 1725 case IP_VERSION(11, 5, 4): 1726 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1727 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1728 if (adev->gfx.me_fw_version >= 4 && 1729 adev->gfx.pfp_fw_version >= 7 && 1730 adev->gfx.mec_fw_version >= 5) { 1731 adev->gfx.enable_cleaner_shader = true; 1732 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1733 if (r) { 1734 adev->gfx.enable_cleaner_shader = false; 1735 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1736 } 1737 } 1738 break; 1739 default: 1740 adev->gfx.enable_cleaner_shader = false; 1741 break; 1742 } 1743 1744 /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ 1745 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && 1746 amdgpu_sriov_is_pp_one_vf(adev)) 1747 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; 1748 1749 /* EOP Event */ 1750 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1751 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1752 &adev->gfx.eop_irq); 1753 if (r) 1754 return r; 1755 1756 /* Bad opcode Event */ 1757 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1758 GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, 1759 &adev->gfx.bad_op_irq); 1760 if (r) 1761 return r; 1762 1763 /* Privileged reg */ 1764 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1765 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1766 &adev->gfx.priv_reg_irq); 1767 if (r) 1768 return r; 1769 1770 /* Privileged inst */ 1771 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1772 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1773 &adev->gfx.priv_inst_irq); 1774 if (r) 1775 return r; 1776 1777 /* FED error */ 1778 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1779 GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, 1780 &adev->gfx.rlc_gc_fed_irq); 1781 if (r) 1782 return r; 1783 1784 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1785 1786 gfx_v11_0_me_init(adev); 1787 1788 r = gfx_v11_0_rlc_init(adev); 1789 if (r) { 1790 DRM_ERROR("Failed to init rlc BOs!\n"); 1791 return r; 1792 } 1793 1794 r = gfx_v11_0_mec_init(adev); 1795 if (r) { 1796 DRM_ERROR("Failed to init MEC BOs!\n"); 1797 return r; 1798 } 1799 1800 if (adev->gfx.num_gfx_rings) { 1801 ring_id = 0; 1802 /* set up the gfx ring */ 1803 for (i = 0; i < adev->gfx.me.num_me; i++) { 1804 for (j = 0; j < num_queue_per_pipe; j++) { 1805 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1806 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1807 continue; 1808 1809 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1810 i, k, j); 1811 if (r) 1812 return r; 1813 ring_id++; 1814 } 1815 } 1816 } 1817 } 1818 1819 if (adev->gfx.num_compute_rings) { 1820 ring_id = 0; 1821 /* set up the compute queues - allocate horizontally across pipes */ 1822 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1823 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1824 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1825 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 1826 k, j)) 1827 continue; 1828 1829 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1830 i, k, j); 1831 if (r) 1832 return r; 1833 1834 ring_id++; 1835 } 1836 } 1837 } 1838 } 1839 1840 adev->gfx.gfx_supported_reset = 1841 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1842 adev->gfx.compute_supported_reset = 1843 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1844 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1845 case IP_VERSION(11, 0, 0): 1846 case IP_VERSION(11, 0, 2): 1847 case IP_VERSION(11, 0, 3): 1848 if ((adev->gfx.me_fw_version >= 2280) && 1849 (adev->gfx.mec_fw_version >= 2410) && 1850 !amdgpu_sriov_vf(adev) && 1851 !adev->debug_disable_gpu_ring_reset) { 1852 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1853 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1854 } 1855 break; 1856 default: 1857 if (!amdgpu_sriov_vf(adev) && 1858 !adev->debug_disable_gpu_ring_reset) { 1859 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1860 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1861 } 1862 break; 1863 } 1864 1865 if (!adev->enable_mes_kiq) { 1866 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); 1867 if (r) { 1868 DRM_ERROR("Failed to init KIQ BOs!\n"); 1869 return r; 1870 } 1871 1872 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 1873 if (r) 1874 return r; 1875 } 1876 1877 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); 1878 if (r) 1879 return r; 1880 1881 /* allocate visible FB for rlc auto-loading fw */ 1882 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1883 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1884 if (r) 1885 return r; 1886 } 1887 1888 r = gfx_v11_0_gpu_early_init(adev); 1889 if (r) 1890 return r; 1891 1892 if (amdgpu_gfx_ras_sw_init(adev)) { 1893 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 1894 return -EINVAL; 1895 } 1896 1897 gfx_v11_0_alloc_ip_dump(adev); 1898 1899 r = amdgpu_gfx_sysfs_init(adev); 1900 if (r) 1901 return r; 1902 1903 return 0; 1904 } 1905 1906 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1907 { 1908 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1909 &adev->gfx.pfp.pfp_fw_gpu_addr, 1910 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1911 1912 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1913 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1914 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1915 } 1916 1917 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1918 { 1919 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1920 &adev->gfx.me.me_fw_gpu_addr, 1921 (void **)&adev->gfx.me.me_fw_ptr); 1922 1923 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1924 &adev->gfx.me.me_fw_data_gpu_addr, 1925 (void **)&adev->gfx.me.me_fw_data_ptr); 1926 } 1927 1928 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1929 { 1930 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1931 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1932 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1933 } 1934 1935 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) 1936 { 1937 int i; 1938 struct amdgpu_device *adev = ip_block->adev; 1939 1940 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1941 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1942 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1943 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1944 1945 amdgpu_gfx_mqd_sw_fini(adev, 0); 1946 1947 if (!adev->enable_mes_kiq) { 1948 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 1949 amdgpu_gfx_kiq_fini(adev, 0); 1950 } 1951 1952 amdgpu_gfx_cleaner_shader_sw_fini(adev); 1953 1954 gfx_v11_0_pfp_fini(adev); 1955 gfx_v11_0_me_fini(adev); 1956 gfx_v11_0_rlc_fini(adev); 1957 gfx_v11_0_mec_fini(adev); 1958 1959 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1960 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1961 1962 gfx_v11_0_free_microcode(adev); 1963 1964 amdgpu_gfx_sysfs_fini(adev); 1965 1966 kfree(adev->gfx.ip_dump_core); 1967 kfree(adev->gfx.ip_dump_compute_queues); 1968 kfree(adev->gfx.ip_dump_gfx_queues); 1969 1970 return 0; 1971 } 1972 1973 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1974 u32 sh_num, u32 instance, int xcc_id) 1975 { 1976 u32 data; 1977 1978 if (instance == 0xffffffff) 1979 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1980 INSTANCE_BROADCAST_WRITES, 1); 1981 else 1982 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1983 instance); 1984 1985 if (se_num == 0xffffffff) 1986 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1987 1); 1988 else 1989 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1990 1991 if (sh_num == 0xffffffff) 1992 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1993 1); 1994 else 1995 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1996 1997 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 1998 } 1999 2000 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) 2001 { 2002 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; 2003 2004 gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); 2005 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, 2006 CC_GC_SA_UNIT_DISABLE, 2007 SA_DISABLE); 2008 gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); 2009 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, 2010 GC_USER_SA_UNIT_DISABLE, 2011 SA_DISABLE); 2012 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * 2013 adev->gfx.config.max_shader_engines); 2014 2015 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 2016 } 2017 2018 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2019 { 2020 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; 2021 u32 rb_mask; 2022 2023 gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 2024 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, 2025 CC_RB_BACKEND_DISABLE, 2026 BACKEND_DISABLE); 2027 gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 2028 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, 2029 GC_USER_RB_BACKEND_DISABLE, 2030 BACKEND_DISABLE); 2031 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * 2032 adev->gfx.config.max_shader_engines); 2033 2034 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); 2035 } 2036 2037 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 2038 { 2039 u32 rb_bitmap_per_sa; 2040 u32 rb_bitmap_width_per_sa; 2041 u32 max_sa; 2042 u32 active_sa_bitmap; 2043 u32 global_active_rb_bitmap; 2044 u32 active_rb_bitmap = 0; 2045 u32 i; 2046 2047 /* query sa bitmap from SA_UNIT_DISABLE registers */ 2048 active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); 2049 /* query rb bitmap from RB_BACKEND_DISABLE registers */ 2050 global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); 2051 2052 /* generate active rb bitmap according to active sa bitmap */ 2053 max_sa = adev->gfx.config.max_shader_engines * 2054 adev->gfx.config.max_sh_per_se; 2055 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 2056 adev->gfx.config.max_sh_per_se; 2057 rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); 2058 2059 for (i = 0; i < max_sa; i++) { 2060 if (active_sa_bitmap & (1 << i)) 2061 active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); 2062 } 2063 2064 active_rb_bitmap &= global_active_rb_bitmap; 2065 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 2066 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 2067 } 2068 2069 #define DEFAULT_SH_MEM_BASES (0x6000) 2070 #define LDS_APP_BASE 0x1 2071 #define SCRATCH_APP_BASE 0x2 2072 2073 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 2074 { 2075 int i; 2076 uint32_t sh_mem_bases; 2077 uint32_t data; 2078 2079 /* 2080 * Configure apertures: 2081 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2082 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2083 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2084 */ 2085 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 2086 SCRATCH_APP_BASE; 2087 2088 mutex_lock(&adev->srbm_mutex); 2089 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2090 soc21_grbm_select(adev, 0, 0, 0, i); 2091 /* CP and shaders */ 2092 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2093 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 2094 2095 /* Enable trap for each kfd vmid. */ 2096 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 2097 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 2098 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); 2099 } 2100 soc21_grbm_select(adev, 0, 0, 0, 0); 2101 mutex_unlock(&adev->srbm_mutex); 2102 2103 /* 2104 * Initialize all compute VMIDs to have no GDS, GWS, or OA 2105 * access. These should be enabled by FW for target VMIDs. 2106 */ 2107 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2108 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 2109 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 2110 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 2111 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 2112 } 2113 } 2114 2115 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 2116 { 2117 int vmid; 2118 2119 /* 2120 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2121 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2122 * the driver can enable them for graphics. VMID0 should maintain 2123 * access so that HWS firmware can save/restore entries. 2124 */ 2125 for (vmid = 1; vmid < 16; vmid++) { 2126 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 2127 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 2128 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 2129 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 2130 } 2131 } 2132 2133 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 2134 { 2135 /* TODO: harvest feature to be added later. */ 2136 } 2137 2138 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 2139 { 2140 /* TCCs are global (not instanced). */ 2141 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 2142 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 2143 2144 adev->gfx.config.tcc_disabled_mask = 2145 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 2146 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 2147 } 2148 2149 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 2150 { 2151 u32 tmp; 2152 int i; 2153 2154 if (!amdgpu_sriov_vf(adev)) 2155 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2156 2157 gfx_v11_0_setup_rb(adev); 2158 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 2159 gfx_v11_0_get_tcc_info(adev); 2160 adev->gfx.config.pa_sc_tile_steering_override = 0; 2161 2162 /* Set whether texture coordinate truncation is conformant. */ 2163 tmp = RREG32_SOC15(GC, 0, regTA_CNTL2); 2164 adev->gfx.config.ta_cntl2_truncate_coord_mode = 2165 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE); 2166 2167 /* XXX SH_MEM regs */ 2168 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2169 mutex_lock(&adev->srbm_mutex); 2170 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2171 soc21_grbm_select(adev, 0, 0, 0, i); 2172 /* CP and shaders */ 2173 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2174 if (i != 0) { 2175 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2176 (adev->gmc.private_aperture_start >> 48)); 2177 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2178 (adev->gmc.shared_aperture_start >> 48)); 2179 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 2180 } 2181 } 2182 soc21_grbm_select(adev, 0, 0, 0, 0); 2183 2184 mutex_unlock(&adev->srbm_mutex); 2185 2186 gfx_v11_0_init_compute_vmid(adev); 2187 gfx_v11_0_init_gds_vmid(adev); 2188 } 2189 2190 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, 2191 int me, int pipe) 2192 { 2193 if (me != 0) 2194 return 0; 2195 2196 switch (pipe) { 2197 case 0: 2198 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 2199 case 1: 2200 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 2201 default: 2202 return 0; 2203 } 2204 } 2205 2206 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, 2207 int me, int pipe) 2208 { 2209 /* 2210 * amdgpu controls only the first MEC. That's why this function only 2211 * handles the setting of interrupts for this specific MEC. All other 2212 * pipes' interrupts are set by amdkfd. 2213 */ 2214 if (me != 1) 2215 return 0; 2216 2217 switch (pipe) { 2218 case 0: 2219 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 2220 case 1: 2221 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 2222 case 2: 2223 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 2224 case 3: 2225 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 2226 default: 2227 return 0; 2228 } 2229 } 2230 2231 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2232 bool enable) 2233 { 2234 u32 tmp, cp_int_cntl_reg; 2235 int i, j; 2236 2237 if (amdgpu_sriov_vf(adev)) 2238 return; 2239 2240 for (i = 0; i < adev->gfx.me.num_me; i++) { 2241 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 2242 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 2243 2244 if (cp_int_cntl_reg) { 2245 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 2246 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 2247 enable ? 1 : 0); 2248 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 2249 enable ? 1 : 0); 2250 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 2251 enable ? 1 : 0); 2252 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 2253 enable ? 1 : 0); 2254 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); 2255 } 2256 } 2257 } 2258 } 2259 2260 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 2261 { 2262 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2263 2264 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 2265 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2266 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 2267 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2268 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 2269 2270 return 0; 2271 } 2272 2273 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 2274 { 2275 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 2276 2277 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 2278 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 2279 } 2280 2281 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 2282 { 2283 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2284 udelay(50); 2285 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2286 udelay(50); 2287 } 2288 2289 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 2290 bool enable) 2291 { 2292 uint32_t rlc_pg_cntl; 2293 2294 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 2295 2296 if (!enable) { 2297 /* RLC_PG_CNTL[23] = 0 (default) 2298 * RLC will wait for handshake acks with SMU 2299 * GFXOFF will be enabled 2300 * RLC_PG_CNTL[23] = 1 2301 * RLC will not issue any message to SMU 2302 * hence no handshake between SMU & RLC 2303 * GFXOFF will be disabled 2304 */ 2305 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2306 } else 2307 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2308 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 2309 } 2310 2311 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 2312 { 2313 /* TODO: enable rlc & smu handshake until smu 2314 * and gfxoff feature works as expected */ 2315 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 2316 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 2317 2318 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2319 udelay(50); 2320 } 2321 2322 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 2323 { 2324 uint32_t tmp; 2325 2326 /* enable Save Restore Machine */ 2327 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 2328 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2329 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 2330 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 2331 } 2332 2333 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 2334 { 2335 const struct rlc_firmware_header_v2_0 *hdr; 2336 const __le32 *fw_data; 2337 unsigned i, fw_size; 2338 2339 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2340 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2341 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2342 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2343 2344 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 2345 RLCG_UCODE_LOADING_START_ADDRESS); 2346 2347 for (i = 0; i < fw_size; i++) 2348 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 2349 le32_to_cpup(fw_data++)); 2350 2351 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2352 } 2353 2354 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 2355 { 2356 const struct rlc_firmware_header_v2_2 *hdr; 2357 const __le32 *fw_data; 2358 unsigned i, fw_size; 2359 u32 tmp; 2360 2361 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 2362 2363 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2364 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 2365 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 2366 2367 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 2368 2369 for (i = 0; i < fw_size; i++) { 2370 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2371 msleep(1); 2372 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 2373 le32_to_cpup(fw_data++)); 2374 } 2375 2376 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2377 2378 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2379 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 2380 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 2381 2382 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 2383 for (i = 0; i < fw_size; i++) { 2384 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2385 msleep(1); 2386 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 2387 le32_to_cpup(fw_data++)); 2388 } 2389 2390 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2391 2392 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 2393 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 2394 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 2395 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 2396 } 2397 2398 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 2399 { 2400 const struct rlc_firmware_header_v2_3 *hdr; 2401 const __le32 *fw_data; 2402 unsigned i, fw_size; 2403 u32 tmp; 2404 2405 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 2406 2407 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2408 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 2409 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 2410 2411 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 2412 2413 for (i = 0; i < fw_size; i++) { 2414 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2415 msleep(1); 2416 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 2417 le32_to_cpup(fw_data++)); 2418 } 2419 2420 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 2421 2422 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 2423 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 2424 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 2425 2426 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2427 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 2428 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 2429 2430 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 2431 2432 for (i = 0; i < fw_size; i++) { 2433 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2434 msleep(1); 2435 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 2436 le32_to_cpup(fw_data++)); 2437 } 2438 2439 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 2440 2441 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 2442 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 2443 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 2444 } 2445 2446 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 2447 { 2448 const struct rlc_firmware_header_v2_0 *hdr; 2449 uint16_t version_major; 2450 uint16_t version_minor; 2451 2452 if (!adev->gfx.rlc_fw) 2453 return -EINVAL; 2454 2455 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2456 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2457 2458 version_major = le16_to_cpu(hdr->header.header_version_major); 2459 version_minor = le16_to_cpu(hdr->header.header_version_minor); 2460 2461 if (version_major == 2) { 2462 gfx_v11_0_load_rlcg_microcode(adev); 2463 if (amdgpu_dpm == 1) { 2464 if (version_minor >= 2) 2465 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 2466 if (version_minor == 3) 2467 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 2468 } 2469 2470 return 0; 2471 } 2472 2473 return -EINVAL; 2474 } 2475 2476 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 2477 { 2478 int r; 2479 2480 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 2481 gfx_v11_0_init_csb(adev); 2482 2483 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 2484 gfx_v11_0_rlc_enable_srm(adev); 2485 } else { 2486 if (amdgpu_sriov_vf(adev)) { 2487 gfx_v11_0_init_csb(adev); 2488 return 0; 2489 } 2490 2491 adev->gfx.rlc.funcs->stop(adev); 2492 2493 /* disable CG */ 2494 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 2495 2496 /* disable PG */ 2497 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 2498 2499 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2500 /* legacy rlc firmware loading */ 2501 r = gfx_v11_0_rlc_load_microcode(adev); 2502 if (r) 2503 return r; 2504 } 2505 2506 gfx_v11_0_init_csb(adev); 2507 2508 adev->gfx.rlc.funcs->start(adev); 2509 } 2510 return 0; 2511 } 2512 2513 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 2514 { 2515 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2516 uint32_t tmp; 2517 int i; 2518 2519 /* Trigger an invalidation of the L1 instruction caches */ 2520 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2521 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2522 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2523 2524 /* Wait for invalidation complete */ 2525 for (i = 0; i < usec_timeout; i++) { 2526 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2527 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2528 INVALIDATE_CACHE_COMPLETE)) 2529 break; 2530 udelay(1); 2531 } 2532 2533 if (i >= usec_timeout) { 2534 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2535 return -EINVAL; 2536 } 2537 2538 if (amdgpu_emu_mode == 1) 2539 amdgpu_device_flush_hdp(adev, NULL); 2540 2541 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2542 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2543 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2544 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2545 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2546 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2547 2548 /* Program me ucode address into intruction cache address register */ 2549 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2550 lower_32_bits(addr) & 0xFFFFF000); 2551 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2552 upper_32_bits(addr)); 2553 2554 return 0; 2555 } 2556 2557 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 2558 { 2559 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2560 uint32_t tmp; 2561 int i; 2562 2563 /* Trigger an invalidation of the L1 instruction caches */ 2564 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2565 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2566 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2567 2568 /* Wait for invalidation complete */ 2569 for (i = 0; i < usec_timeout; i++) { 2570 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2571 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2572 INVALIDATE_CACHE_COMPLETE)) 2573 break; 2574 udelay(1); 2575 } 2576 2577 if (i >= usec_timeout) { 2578 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2579 return -EINVAL; 2580 } 2581 2582 if (amdgpu_emu_mode == 1) 2583 amdgpu_device_flush_hdp(adev, NULL); 2584 2585 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2586 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2587 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2588 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2589 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2590 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2591 2592 /* Program pfp ucode address into intruction cache address register */ 2593 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2594 lower_32_bits(addr) & 0xFFFFF000); 2595 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2596 upper_32_bits(addr)); 2597 2598 return 0; 2599 } 2600 2601 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2602 { 2603 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2604 uint32_t tmp; 2605 int i; 2606 2607 /* Trigger an invalidation of the L1 instruction caches */ 2608 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2609 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2610 2611 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2612 2613 /* Wait for invalidation complete */ 2614 for (i = 0; i < usec_timeout; i++) { 2615 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2616 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2617 INVALIDATE_CACHE_COMPLETE)) 2618 break; 2619 udelay(1); 2620 } 2621 2622 if (i >= usec_timeout) { 2623 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2624 return -EINVAL; 2625 } 2626 2627 if (amdgpu_emu_mode == 1) 2628 amdgpu_device_flush_hdp(adev, NULL); 2629 2630 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2631 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2632 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2633 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2634 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2635 2636 /* Program mec1 ucode address into intruction cache address register */ 2637 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2638 lower_32_bits(addr) & 0xFFFFF000); 2639 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2640 upper_32_bits(addr)); 2641 2642 return 0; 2643 } 2644 2645 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2646 { 2647 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2648 uint32_t tmp; 2649 unsigned i, pipe_id; 2650 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2651 2652 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2653 adev->gfx.pfp_fw->data; 2654 2655 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2656 lower_32_bits(addr)); 2657 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2658 upper_32_bits(addr)); 2659 2660 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2661 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2662 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2663 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2664 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2665 2666 /* 2667 * Programming any of the CP_PFP_IC_BASE registers 2668 * forces invalidation of the ME L1 I$. Wait for the 2669 * invalidation complete 2670 */ 2671 for (i = 0; i < usec_timeout; i++) { 2672 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2673 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2674 INVALIDATE_CACHE_COMPLETE)) 2675 break; 2676 udelay(1); 2677 } 2678 2679 if (i >= usec_timeout) { 2680 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2681 return -EINVAL; 2682 } 2683 2684 /* Prime the L1 instruction caches */ 2685 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2686 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2687 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2688 /* Waiting for cache primed*/ 2689 for (i = 0; i < usec_timeout; i++) { 2690 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2691 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2692 ICACHE_PRIMED)) 2693 break; 2694 udelay(1); 2695 } 2696 2697 if (i >= usec_timeout) { 2698 dev_err(adev->dev, "failed to prime instruction cache\n"); 2699 return -EINVAL; 2700 } 2701 2702 mutex_lock(&adev->srbm_mutex); 2703 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2704 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2705 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2706 (pfp_hdr->ucode_start_addr_hi << 30) | 2707 (pfp_hdr->ucode_start_addr_lo >> 2)); 2708 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2709 pfp_hdr->ucode_start_addr_hi >> 2); 2710 2711 /* 2712 * Program CP_ME_CNTL to reset given PIPE to take 2713 * effect of CP_PFP_PRGRM_CNTR_START. 2714 */ 2715 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2716 if (pipe_id == 0) 2717 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2718 PFP_PIPE0_RESET, 1); 2719 else 2720 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2721 PFP_PIPE1_RESET, 1); 2722 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2723 2724 /* Clear pfp pipe0 reset bit. */ 2725 if (pipe_id == 0) 2726 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2727 PFP_PIPE0_RESET, 0); 2728 else 2729 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2730 PFP_PIPE1_RESET, 0); 2731 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2732 2733 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2734 lower_32_bits(addr2)); 2735 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2736 upper_32_bits(addr2)); 2737 } 2738 soc21_grbm_select(adev, 0, 0, 0, 0); 2739 mutex_unlock(&adev->srbm_mutex); 2740 2741 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2742 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2743 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2744 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2745 2746 /* Invalidate the data caches */ 2747 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2748 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2749 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2750 2751 for (i = 0; i < usec_timeout; i++) { 2752 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2753 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2754 INVALIDATE_DCACHE_COMPLETE)) 2755 break; 2756 udelay(1); 2757 } 2758 2759 if (i >= usec_timeout) { 2760 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2761 return -EINVAL; 2762 } 2763 2764 return 0; 2765 } 2766 2767 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2768 { 2769 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2770 uint32_t tmp; 2771 unsigned i, pipe_id; 2772 const struct gfx_firmware_header_v2_0 *me_hdr; 2773 2774 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2775 adev->gfx.me_fw->data; 2776 2777 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2778 lower_32_bits(addr)); 2779 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2780 upper_32_bits(addr)); 2781 2782 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2783 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2784 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2785 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2786 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2787 2788 /* 2789 * Programming any of the CP_ME_IC_BASE registers 2790 * forces invalidation of the ME L1 I$. Wait for the 2791 * invalidation complete 2792 */ 2793 for (i = 0; i < usec_timeout; i++) { 2794 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2795 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2796 INVALIDATE_CACHE_COMPLETE)) 2797 break; 2798 udelay(1); 2799 } 2800 2801 if (i >= usec_timeout) { 2802 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2803 return -EINVAL; 2804 } 2805 2806 /* Prime the instruction caches */ 2807 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2808 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2809 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2810 2811 /* Waiting for instruction cache primed*/ 2812 for (i = 0; i < usec_timeout; i++) { 2813 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2814 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2815 ICACHE_PRIMED)) 2816 break; 2817 udelay(1); 2818 } 2819 2820 if (i >= usec_timeout) { 2821 dev_err(adev->dev, "failed to prime instruction cache\n"); 2822 return -EINVAL; 2823 } 2824 2825 mutex_lock(&adev->srbm_mutex); 2826 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2827 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2828 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2829 (me_hdr->ucode_start_addr_hi << 30) | 2830 (me_hdr->ucode_start_addr_lo >> 2) ); 2831 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2832 me_hdr->ucode_start_addr_hi>>2); 2833 2834 /* 2835 * Program CP_ME_CNTL to reset given PIPE to take 2836 * effect of CP_PFP_PRGRM_CNTR_START. 2837 */ 2838 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2839 if (pipe_id == 0) 2840 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2841 ME_PIPE0_RESET, 1); 2842 else 2843 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2844 ME_PIPE1_RESET, 1); 2845 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2846 2847 /* Clear pfp pipe0 reset bit. */ 2848 if (pipe_id == 0) 2849 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2850 ME_PIPE0_RESET, 0); 2851 else 2852 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2853 ME_PIPE1_RESET, 0); 2854 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2855 2856 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2857 lower_32_bits(addr2)); 2858 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2859 upper_32_bits(addr2)); 2860 } 2861 soc21_grbm_select(adev, 0, 0, 0, 0); 2862 mutex_unlock(&adev->srbm_mutex); 2863 2864 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2865 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2866 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2867 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2868 2869 /* Invalidate the data caches */ 2870 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2871 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2872 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2873 2874 for (i = 0; i < usec_timeout; i++) { 2875 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2876 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2877 INVALIDATE_DCACHE_COMPLETE)) 2878 break; 2879 udelay(1); 2880 } 2881 2882 if (i >= usec_timeout) { 2883 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2884 return -EINVAL; 2885 } 2886 2887 return 0; 2888 } 2889 2890 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2891 { 2892 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2893 uint32_t tmp; 2894 unsigned i; 2895 const struct gfx_firmware_header_v2_0 *mec_hdr; 2896 2897 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2898 adev->gfx.mec_fw->data; 2899 2900 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2901 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2902 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2903 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2904 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2905 2906 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2907 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2908 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2909 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2910 2911 mutex_lock(&adev->srbm_mutex); 2912 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2913 soc21_grbm_select(adev, 1, i, 0, 0); 2914 2915 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2916 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2917 upper_32_bits(addr2)); 2918 2919 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2920 mec_hdr->ucode_start_addr_lo >> 2 | 2921 mec_hdr->ucode_start_addr_hi << 30); 2922 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2923 mec_hdr->ucode_start_addr_hi >> 2); 2924 2925 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2926 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2927 upper_32_bits(addr)); 2928 } 2929 mutex_unlock(&adev->srbm_mutex); 2930 soc21_grbm_select(adev, 0, 0, 0, 0); 2931 2932 /* Trigger an invalidation of the L1 instruction caches */ 2933 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2934 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2935 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2936 2937 /* Wait for invalidation complete */ 2938 for (i = 0; i < usec_timeout; i++) { 2939 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2940 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2941 INVALIDATE_DCACHE_COMPLETE)) 2942 break; 2943 udelay(1); 2944 } 2945 2946 if (i >= usec_timeout) { 2947 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2948 return -EINVAL; 2949 } 2950 2951 /* Trigger an invalidation of the L1 instruction caches */ 2952 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2953 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2954 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2955 2956 /* Wait for invalidation complete */ 2957 for (i = 0; i < usec_timeout; i++) { 2958 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2959 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2960 INVALIDATE_CACHE_COMPLETE)) 2961 break; 2962 udelay(1); 2963 } 2964 2965 if (i >= usec_timeout) { 2966 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2967 return -EINVAL; 2968 } 2969 2970 return 0; 2971 } 2972 2973 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2974 { 2975 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2976 const struct gfx_firmware_header_v2_0 *me_hdr; 2977 const struct gfx_firmware_header_v2_0 *mec_hdr; 2978 uint32_t pipe_id, tmp; 2979 2980 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2981 adev->gfx.mec_fw->data; 2982 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2983 adev->gfx.me_fw->data; 2984 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2985 adev->gfx.pfp_fw->data; 2986 2987 /* config pfp program start addr */ 2988 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2989 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2990 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2991 (pfp_hdr->ucode_start_addr_hi << 30) | 2992 (pfp_hdr->ucode_start_addr_lo >> 2)); 2993 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2994 pfp_hdr->ucode_start_addr_hi >> 2); 2995 } 2996 soc21_grbm_select(adev, 0, 0, 0, 0); 2997 2998 /* reset pfp pipe */ 2999 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3000 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 3001 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 3002 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3003 3004 /* clear pfp pipe reset */ 3005 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 3006 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 3007 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3008 3009 /* config me program start addr */ 3010 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 3011 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3012 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3013 (me_hdr->ucode_start_addr_hi << 30) | 3014 (me_hdr->ucode_start_addr_lo >> 2) ); 3015 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3016 me_hdr->ucode_start_addr_hi>>2); 3017 } 3018 soc21_grbm_select(adev, 0, 0, 0, 0); 3019 3020 /* reset me pipe */ 3021 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3022 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 3023 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 3024 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3025 3026 /* clear me pipe reset */ 3027 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 3028 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 3029 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3030 3031 /* config mec program start addr */ 3032 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 3033 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 3034 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3035 mec_hdr->ucode_start_addr_lo >> 2 | 3036 mec_hdr->ucode_start_addr_hi << 30); 3037 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3038 mec_hdr->ucode_start_addr_hi >> 2); 3039 } 3040 soc21_grbm_select(adev, 0, 0, 0, 0); 3041 3042 /* reset mec pipe */ 3043 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3044 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 3045 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 3046 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 3047 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 3048 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3049 3050 /* clear mec pipe reset */ 3051 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 3052 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 3053 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 3054 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 3055 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3056 } 3057 3058 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 3059 { 3060 uint32_t cp_status; 3061 uint32_t bootload_status; 3062 int i, r; 3063 uint64_t addr, addr2; 3064 3065 for (i = 0; i < adev->usec_timeout; i++) { 3066 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 3067 3068 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 3069 IP_VERSION(11, 0, 1) || 3070 amdgpu_ip_version(adev, GC_HWIP, 0) == 3071 IP_VERSION(11, 0, 4) || 3072 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || 3073 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || 3074 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || 3075 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3) || 3076 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4)) 3077 bootload_status = RREG32_SOC15(GC, 0, 3078 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 3079 else 3080 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 3081 3082 if ((cp_status == 0) && 3083 (REG_GET_FIELD(bootload_status, 3084 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 3085 break; 3086 } 3087 udelay(1); 3088 } 3089 3090 if (i >= adev->usec_timeout) { 3091 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 3092 return -ETIMEDOUT; 3093 } 3094 3095 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 3096 if (adev->gfx.rs64_enable) { 3097 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3098 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 3099 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3100 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 3101 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 3102 if (r) 3103 return r; 3104 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3105 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 3106 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3107 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 3108 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 3109 if (r) 3110 return r; 3111 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3112 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 3113 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3114 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 3115 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 3116 if (r) 3117 return r; 3118 } else { 3119 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3120 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 3121 r = gfx_v11_0_config_me_cache(adev, addr); 3122 if (r) 3123 return r; 3124 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3125 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 3126 r = gfx_v11_0_config_pfp_cache(adev, addr); 3127 if (r) 3128 return r; 3129 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3130 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 3131 r = gfx_v11_0_config_mec_cache(adev, addr); 3132 if (r) 3133 return r; 3134 } 3135 } 3136 3137 return 0; 3138 } 3139 3140 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3141 { 3142 int i; 3143 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3144 3145 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3146 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3147 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3148 3149 for (i = 0; i < adev->usec_timeout; i++) { 3150 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 3151 break; 3152 udelay(1); 3153 } 3154 3155 if (i >= adev->usec_timeout) 3156 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 3157 3158 return 0; 3159 } 3160 3161 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 3162 { 3163 int r; 3164 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3165 const __le32 *fw_data; 3166 unsigned i, fw_size; 3167 3168 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3169 adev->gfx.pfp_fw->data; 3170 3171 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3172 3173 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3174 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3175 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 3176 3177 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 3178 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3179 &adev->gfx.pfp.pfp_fw_obj, 3180 &adev->gfx.pfp.pfp_fw_gpu_addr, 3181 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3182 if (r) { 3183 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 3184 gfx_v11_0_pfp_fini(adev); 3185 return r; 3186 } 3187 3188 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 3189 3190 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3191 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3192 3193 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 3194 3195 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 3196 3197 for (i = 0; i < pfp_hdr->jt_size; i++) 3198 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 3199 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 3200 3201 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3202 3203 return 0; 3204 } 3205 3206 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 3207 { 3208 int r; 3209 const struct gfx_firmware_header_v2_0 *pfp_hdr; 3210 const __le32 *fw_ucode, *fw_data; 3211 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3212 uint32_t tmp; 3213 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3214 3215 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 3216 adev->gfx.pfp_fw->data; 3217 3218 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3219 3220 /* instruction */ 3221 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 3222 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 3223 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 3224 /* data */ 3225 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3226 le32_to_cpu(pfp_hdr->data_offset_bytes)); 3227 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 3228 3229 /* 64kb align */ 3230 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3231 64 * 1024, 3232 AMDGPU_GEM_DOMAIN_VRAM | 3233 AMDGPU_GEM_DOMAIN_GTT, 3234 &adev->gfx.pfp.pfp_fw_obj, 3235 &adev->gfx.pfp.pfp_fw_gpu_addr, 3236 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3237 if (r) { 3238 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 3239 gfx_v11_0_pfp_fini(adev); 3240 return r; 3241 } 3242 3243 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3244 64 * 1024, 3245 AMDGPU_GEM_DOMAIN_VRAM | 3246 AMDGPU_GEM_DOMAIN_GTT, 3247 &adev->gfx.pfp.pfp_fw_data_obj, 3248 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 3249 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 3250 if (r) { 3251 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 3252 gfx_v11_0_pfp_fini(adev); 3253 return r; 3254 } 3255 3256 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 3257 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 3258 3259 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3260 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 3261 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3262 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3263 3264 if (amdgpu_emu_mode == 1) 3265 amdgpu_device_flush_hdp(adev, NULL); 3266 3267 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3268 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3269 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 3270 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3271 3272 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 3273 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 3274 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 3275 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 3276 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 3277 3278 /* 3279 * Programming any of the CP_PFP_IC_BASE registers 3280 * forces invalidation of the ME L1 I$. Wait for the 3281 * invalidation complete 3282 */ 3283 for (i = 0; i < usec_timeout; i++) { 3284 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3285 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3286 INVALIDATE_CACHE_COMPLETE)) 3287 break; 3288 udelay(1); 3289 } 3290 3291 if (i >= usec_timeout) { 3292 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3293 return -EINVAL; 3294 } 3295 3296 /* Prime the L1 instruction caches */ 3297 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3298 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 3299 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 3300 /* Waiting for cache primed*/ 3301 for (i = 0; i < usec_timeout; i++) { 3302 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3303 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3304 ICACHE_PRIMED)) 3305 break; 3306 udelay(1); 3307 } 3308 3309 if (i >= usec_timeout) { 3310 dev_err(adev->dev, "failed to prime instruction cache\n"); 3311 return -EINVAL; 3312 } 3313 3314 mutex_lock(&adev->srbm_mutex); 3315 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3316 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3317 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 3318 (pfp_hdr->ucode_start_addr_hi << 30) | 3319 (pfp_hdr->ucode_start_addr_lo >> 2) ); 3320 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 3321 pfp_hdr->ucode_start_addr_hi>>2); 3322 3323 /* 3324 * Program CP_ME_CNTL to reset given PIPE to take 3325 * effect of CP_PFP_PRGRM_CNTR_START. 3326 */ 3327 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3328 if (pipe_id == 0) 3329 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3330 PFP_PIPE0_RESET, 1); 3331 else 3332 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3333 PFP_PIPE1_RESET, 1); 3334 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3335 3336 /* Clear pfp pipe0 reset bit. */ 3337 if (pipe_id == 0) 3338 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3339 PFP_PIPE0_RESET, 0); 3340 else 3341 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3342 PFP_PIPE1_RESET, 0); 3343 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3344 3345 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 3346 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3347 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 3348 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3349 } 3350 soc21_grbm_select(adev, 0, 0, 0, 0); 3351 mutex_unlock(&adev->srbm_mutex); 3352 3353 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3354 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3355 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3356 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3357 3358 /* Invalidate the data caches */ 3359 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3360 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3361 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3362 3363 for (i = 0; i < usec_timeout; i++) { 3364 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3365 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3366 INVALIDATE_DCACHE_COMPLETE)) 3367 break; 3368 udelay(1); 3369 } 3370 3371 if (i >= usec_timeout) { 3372 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3373 return -EINVAL; 3374 } 3375 3376 return 0; 3377 } 3378 3379 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 3380 { 3381 int r; 3382 const struct gfx_firmware_header_v1_0 *me_hdr; 3383 const __le32 *fw_data; 3384 unsigned i, fw_size; 3385 3386 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3387 adev->gfx.me_fw->data; 3388 3389 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3390 3391 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3392 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3393 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 3394 3395 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 3396 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3397 &adev->gfx.me.me_fw_obj, 3398 &adev->gfx.me.me_fw_gpu_addr, 3399 (void **)&adev->gfx.me.me_fw_ptr); 3400 if (r) { 3401 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 3402 gfx_v11_0_me_fini(adev); 3403 return r; 3404 } 3405 3406 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 3407 3408 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3409 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3410 3411 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 3412 3413 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 3414 3415 for (i = 0; i < me_hdr->jt_size; i++) 3416 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 3417 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 3418 3419 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 3420 3421 return 0; 3422 } 3423 3424 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 3425 { 3426 int r; 3427 const struct gfx_firmware_header_v2_0 *me_hdr; 3428 const __le32 *fw_ucode, *fw_data; 3429 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3430 uint32_t tmp; 3431 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3432 3433 me_hdr = (const struct gfx_firmware_header_v2_0 *) 3434 adev->gfx.me_fw->data; 3435 3436 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3437 3438 /* instruction */ 3439 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 3440 le32_to_cpu(me_hdr->ucode_offset_bytes)); 3441 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 3442 /* data */ 3443 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3444 le32_to_cpu(me_hdr->data_offset_bytes)); 3445 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 3446 3447 /* 64kb align*/ 3448 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3449 64 * 1024, 3450 AMDGPU_GEM_DOMAIN_VRAM | 3451 AMDGPU_GEM_DOMAIN_GTT, 3452 &adev->gfx.me.me_fw_obj, 3453 &adev->gfx.me.me_fw_gpu_addr, 3454 (void **)&adev->gfx.me.me_fw_ptr); 3455 if (r) { 3456 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 3457 gfx_v11_0_me_fini(adev); 3458 return r; 3459 } 3460 3461 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3462 64 * 1024, 3463 AMDGPU_GEM_DOMAIN_VRAM | 3464 AMDGPU_GEM_DOMAIN_GTT, 3465 &adev->gfx.me.me_fw_data_obj, 3466 &adev->gfx.me.me_fw_data_gpu_addr, 3467 (void **)&adev->gfx.me.me_fw_data_ptr); 3468 if (r) { 3469 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 3470 gfx_v11_0_pfp_fini(adev); 3471 return r; 3472 } 3473 3474 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 3475 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 3476 3477 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3478 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 3479 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3480 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3481 3482 if (amdgpu_emu_mode == 1) 3483 amdgpu_device_flush_hdp(adev, NULL); 3484 3485 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3486 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3487 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 3488 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3489 3490 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 3491 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 3492 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 3493 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 3494 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 3495 3496 /* 3497 * Programming any of the CP_ME_IC_BASE registers 3498 * forces invalidation of the ME L1 I$. Wait for the 3499 * invalidation complete 3500 */ 3501 for (i = 0; i < usec_timeout; i++) { 3502 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3503 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3504 INVALIDATE_CACHE_COMPLETE)) 3505 break; 3506 udelay(1); 3507 } 3508 3509 if (i >= usec_timeout) { 3510 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3511 return -EINVAL; 3512 } 3513 3514 /* Prime the instruction caches */ 3515 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3516 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 3517 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 3518 3519 /* Waiting for instruction cache primed*/ 3520 for (i = 0; i < usec_timeout; i++) { 3521 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3522 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3523 ICACHE_PRIMED)) 3524 break; 3525 udelay(1); 3526 } 3527 3528 if (i >= usec_timeout) { 3529 dev_err(adev->dev, "failed to prime instruction cache\n"); 3530 return -EINVAL; 3531 } 3532 3533 mutex_lock(&adev->srbm_mutex); 3534 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3535 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3536 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3537 (me_hdr->ucode_start_addr_hi << 30) | 3538 (me_hdr->ucode_start_addr_lo >> 2) ); 3539 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3540 me_hdr->ucode_start_addr_hi>>2); 3541 3542 /* 3543 * Program CP_ME_CNTL to reset given PIPE to take 3544 * effect of CP_PFP_PRGRM_CNTR_START. 3545 */ 3546 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3547 if (pipe_id == 0) 3548 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3549 ME_PIPE0_RESET, 1); 3550 else 3551 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3552 ME_PIPE1_RESET, 1); 3553 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3554 3555 /* Clear pfp pipe0 reset bit. */ 3556 if (pipe_id == 0) 3557 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3558 ME_PIPE0_RESET, 0); 3559 else 3560 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3561 ME_PIPE1_RESET, 0); 3562 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3563 3564 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 3565 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3566 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 3567 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3568 } 3569 soc21_grbm_select(adev, 0, 0, 0, 0); 3570 mutex_unlock(&adev->srbm_mutex); 3571 3572 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3573 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3574 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3575 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3576 3577 /* Invalidate the data caches */ 3578 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3579 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3580 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3581 3582 for (i = 0; i < usec_timeout; i++) { 3583 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3584 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3585 INVALIDATE_DCACHE_COMPLETE)) 3586 break; 3587 udelay(1); 3588 } 3589 3590 if (i >= usec_timeout) { 3591 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3592 return -EINVAL; 3593 } 3594 3595 return 0; 3596 } 3597 3598 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3599 { 3600 int r; 3601 3602 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3603 return -EINVAL; 3604 3605 gfx_v11_0_cp_gfx_enable(adev, false); 3606 3607 if (adev->gfx.rs64_enable) 3608 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3609 else 3610 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3611 if (r) { 3612 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3613 return r; 3614 } 3615 3616 if (adev->gfx.rs64_enable) 3617 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3618 else 3619 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3620 if (r) { 3621 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3622 return r; 3623 } 3624 3625 return 0; 3626 } 3627 3628 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3629 { 3630 struct amdgpu_ring *ring; 3631 const struct cs_section_def *sect = NULL; 3632 const struct cs_extent_def *ext = NULL; 3633 int r, i; 3634 int ctx_reg_offset; 3635 3636 /* init the CP */ 3637 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3638 adev->gfx.config.max_hw_contexts - 1); 3639 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3640 3641 if (!amdgpu_async_gfx_ring) 3642 gfx_v11_0_cp_gfx_enable(adev, true); 3643 3644 ring = &adev->gfx.gfx_ring[0]; 3645 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3646 if (r) { 3647 drm_err(&adev->ddev, "cp failed to lock ring (%d).\n", r); 3648 return r; 3649 } 3650 3651 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3652 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3653 3654 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3655 amdgpu_ring_write(ring, 0x80000000); 3656 amdgpu_ring_write(ring, 0x80000000); 3657 3658 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3659 for (ext = sect->section; ext->extent != NULL; ++ext) { 3660 if (sect->id == SECT_CONTEXT) { 3661 amdgpu_ring_write(ring, 3662 PACKET3(PACKET3_SET_CONTEXT_REG, 3663 ext->reg_count)); 3664 amdgpu_ring_write(ring, ext->reg_index - 3665 PACKET3_SET_CONTEXT_REG_START); 3666 for (i = 0; i < ext->reg_count; i++) 3667 amdgpu_ring_write(ring, ext->extent[i]); 3668 } 3669 } 3670 } 3671 3672 ctx_reg_offset = 3673 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3674 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3675 amdgpu_ring_write(ring, ctx_reg_offset); 3676 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3677 3678 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3679 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3680 3681 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3682 amdgpu_ring_write(ring, 0); 3683 3684 amdgpu_ring_commit(ring); 3685 3686 /* submit cs packet to copy state 0 to next available state */ 3687 if (adev->gfx.num_gfx_rings > 1) { 3688 /* maximum supported gfx ring is 2 */ 3689 ring = &adev->gfx.gfx_ring[1]; 3690 r = amdgpu_ring_alloc(ring, 2); 3691 if (r) { 3692 drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r); 3693 return r; 3694 } 3695 3696 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3697 amdgpu_ring_write(ring, 0); 3698 3699 amdgpu_ring_commit(ring); 3700 } 3701 return 0; 3702 } 3703 3704 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3705 CP_PIPE_ID pipe) 3706 { 3707 u32 tmp; 3708 3709 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3710 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3711 3712 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3713 } 3714 3715 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3716 struct amdgpu_ring *ring) 3717 { 3718 u32 tmp; 3719 3720 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3721 if (ring->use_doorbell) { 3722 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3723 DOORBELL_OFFSET, ring->doorbell_index); 3724 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3725 DOORBELL_EN, 1); 3726 } else { 3727 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3728 DOORBELL_EN, 0); 3729 } 3730 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3731 3732 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3733 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3734 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3735 3736 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3737 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3738 } 3739 3740 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3741 { 3742 struct amdgpu_ring *ring; 3743 u32 tmp; 3744 u32 rb_bufsz; 3745 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3746 3747 /* Set the write pointer delay */ 3748 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3749 3750 /* set the RB to use vmid 0 */ 3751 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3752 3753 /* Init gfx ring 0 for pipe 0 */ 3754 mutex_lock(&adev->srbm_mutex); 3755 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3756 3757 /* Set ring buffer size */ 3758 ring = &adev->gfx.gfx_ring[0]; 3759 rb_bufsz = order_base_2(ring->ring_size / 8); 3760 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3761 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3762 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3763 3764 /* Initialize the ring buffer's write pointers */ 3765 ring->wptr = 0; 3766 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3767 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3768 3769 /* set the wb address whether it's enabled or not */ 3770 rptr_addr = ring->rptr_gpu_addr; 3771 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3772 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3773 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3774 3775 wptr_gpu_addr = ring->wptr_gpu_addr; 3776 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3777 lower_32_bits(wptr_gpu_addr)); 3778 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3779 upper_32_bits(wptr_gpu_addr)); 3780 3781 mdelay(1); 3782 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3783 3784 rb_addr = ring->gpu_addr >> 8; 3785 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3786 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3787 3788 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3789 3790 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3791 mutex_unlock(&adev->srbm_mutex); 3792 3793 /* Init gfx ring 1 for pipe 1 */ 3794 if (adev->gfx.num_gfx_rings > 1) { 3795 mutex_lock(&adev->srbm_mutex); 3796 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3797 /* maximum supported gfx ring is 2 */ 3798 ring = &adev->gfx.gfx_ring[1]; 3799 rb_bufsz = order_base_2(ring->ring_size / 8); 3800 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3801 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3802 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3803 /* Initialize the ring buffer's write pointers */ 3804 ring->wptr = 0; 3805 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3806 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3807 /* Set the wb address whether it's enabled or not */ 3808 rptr_addr = ring->rptr_gpu_addr; 3809 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3810 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3811 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3812 wptr_gpu_addr = ring->wptr_gpu_addr; 3813 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3814 lower_32_bits(wptr_gpu_addr)); 3815 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3816 upper_32_bits(wptr_gpu_addr)); 3817 3818 mdelay(1); 3819 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3820 3821 rb_addr = ring->gpu_addr >> 8; 3822 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3823 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3824 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3825 3826 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3827 mutex_unlock(&adev->srbm_mutex); 3828 } 3829 /* Switch to pipe 0 */ 3830 mutex_lock(&adev->srbm_mutex); 3831 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3832 mutex_unlock(&adev->srbm_mutex); 3833 3834 /* start the ring */ 3835 gfx_v11_0_cp_gfx_start(adev); 3836 3837 return 0; 3838 } 3839 3840 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3841 { 3842 u32 data; 3843 3844 if (adev->gfx.rs64_enable) { 3845 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3846 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3847 enable ? 0 : 1); 3848 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3849 enable ? 0 : 1); 3850 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3851 enable ? 0 : 1); 3852 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3853 enable ? 0 : 1); 3854 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3855 enable ? 0 : 1); 3856 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3857 enable ? 1 : 0); 3858 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3859 enable ? 1 : 0); 3860 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3861 enable ? 1 : 0); 3862 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3863 enable ? 1 : 0); 3864 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3865 enable ? 0 : 1); 3866 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3867 } else { 3868 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3869 3870 if (enable) { 3871 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3872 if (!adev->enable_mes_kiq) 3873 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3874 MEC_ME2_HALT, 0); 3875 } else { 3876 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3877 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3878 } 3879 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3880 } 3881 3882 udelay(50); 3883 } 3884 3885 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3886 { 3887 const struct gfx_firmware_header_v1_0 *mec_hdr; 3888 const __le32 *fw_data; 3889 unsigned i, fw_size; 3890 u32 *fw = NULL; 3891 int r; 3892 3893 if (!adev->gfx.mec_fw) 3894 return -EINVAL; 3895 3896 gfx_v11_0_cp_compute_enable(adev, false); 3897 3898 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3899 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3900 3901 fw_data = (const __le32 *) 3902 (adev->gfx.mec_fw->data + 3903 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3904 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3905 3906 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3907 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3908 &adev->gfx.mec.mec_fw_obj, 3909 &adev->gfx.mec.mec_fw_gpu_addr, 3910 (void **)&fw); 3911 if (r) { 3912 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3913 gfx_v11_0_mec_fini(adev); 3914 return r; 3915 } 3916 3917 memcpy(fw, fw_data, fw_size); 3918 3919 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3920 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3921 3922 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3923 3924 /* MEC1 */ 3925 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3926 3927 for (i = 0; i < mec_hdr->jt_size; i++) 3928 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3929 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3930 3931 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3932 3933 return 0; 3934 } 3935 3936 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3937 { 3938 const struct gfx_firmware_header_v2_0 *mec_hdr; 3939 const __le32 *fw_ucode, *fw_data; 3940 u32 tmp, fw_ucode_size, fw_data_size; 3941 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3942 u32 *fw_ucode_ptr, *fw_data_ptr; 3943 int r; 3944 3945 if (!adev->gfx.mec_fw) 3946 return -EINVAL; 3947 3948 gfx_v11_0_cp_compute_enable(adev, false); 3949 3950 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3951 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3952 3953 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3954 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3955 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3956 3957 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3958 le32_to_cpu(mec_hdr->data_offset_bytes)); 3959 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3960 3961 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3962 64 * 1024, 3963 AMDGPU_GEM_DOMAIN_VRAM | 3964 AMDGPU_GEM_DOMAIN_GTT, 3965 &adev->gfx.mec.mec_fw_obj, 3966 &adev->gfx.mec.mec_fw_gpu_addr, 3967 (void **)&fw_ucode_ptr); 3968 if (r) { 3969 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3970 gfx_v11_0_mec_fini(adev); 3971 return r; 3972 } 3973 3974 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3975 64 * 1024, 3976 AMDGPU_GEM_DOMAIN_VRAM | 3977 AMDGPU_GEM_DOMAIN_GTT, 3978 &adev->gfx.mec.mec_fw_data_obj, 3979 &adev->gfx.mec.mec_fw_data_gpu_addr, 3980 (void **)&fw_data_ptr); 3981 if (r) { 3982 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3983 gfx_v11_0_mec_fini(adev); 3984 return r; 3985 } 3986 3987 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 3988 memcpy(fw_data_ptr, fw_data, fw_data_size); 3989 3990 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3991 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 3992 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3993 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 3994 3995 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 3996 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 3997 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 3998 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 3999 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 4000 4001 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 4002 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 4003 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 4004 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 4005 4006 mutex_lock(&adev->srbm_mutex); 4007 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 4008 soc21_grbm_select(adev, 1, i, 0, 0); 4009 4010 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 4011 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 4012 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 4013 4014 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 4015 mec_hdr->ucode_start_addr_lo >> 2 | 4016 mec_hdr->ucode_start_addr_hi << 30); 4017 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 4018 mec_hdr->ucode_start_addr_hi >> 2); 4019 4020 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 4021 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 4022 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 4023 } 4024 mutex_unlock(&adev->srbm_mutex); 4025 soc21_grbm_select(adev, 0, 0, 0, 0); 4026 4027 /* Trigger an invalidation of the L1 instruction caches */ 4028 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 4029 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 4030 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 4031 4032 /* Wait for invalidation complete */ 4033 for (i = 0; i < usec_timeout; i++) { 4034 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 4035 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 4036 INVALIDATE_DCACHE_COMPLETE)) 4037 break; 4038 udelay(1); 4039 } 4040 4041 if (i >= usec_timeout) { 4042 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4043 return -EINVAL; 4044 } 4045 4046 /* Trigger an invalidation of the L1 instruction caches */ 4047 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4048 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 4049 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 4050 4051 /* Wait for invalidation complete */ 4052 for (i = 0; i < usec_timeout; i++) { 4053 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4054 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 4055 INVALIDATE_CACHE_COMPLETE)) 4056 break; 4057 udelay(1); 4058 } 4059 4060 if (i >= usec_timeout) { 4061 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4062 return -EINVAL; 4063 } 4064 4065 return 0; 4066 } 4067 4068 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 4069 { 4070 uint32_t tmp; 4071 struct amdgpu_device *adev = ring->adev; 4072 4073 /* tell RLC which is KIQ queue */ 4074 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 4075 tmp &= 0xffffff00; 4076 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4077 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); 4078 } 4079 4080 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 4081 { 4082 /* set graphics engine doorbell range */ 4083 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 4084 (adev->doorbell_index.gfx_ring0 * 2) << 2); 4085 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 4086 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 4087 4088 /* set compute engine doorbell range */ 4089 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4090 (adev->doorbell_index.kiq * 2) << 2); 4091 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4092 (adev->doorbell_index.userqueue_end * 2) << 2); 4093 } 4094 4095 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, 4096 struct v11_gfx_mqd *mqd, 4097 struct amdgpu_mqd_prop *prop) 4098 { 4099 bool priority = 0; 4100 u32 tmp; 4101 4102 /* set up default queue priority level 4103 * 0x0 = low priority, 0x1 = high priority 4104 */ 4105 if (prop->hqd_queue_priority == AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) 4106 priority = 1; 4107 4108 tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; 4109 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); 4110 mqd->cp_gfx_hqd_queue_priority = tmp; 4111 } 4112 4113 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 4114 struct amdgpu_mqd_prop *prop) 4115 { 4116 struct v11_gfx_mqd *mqd = m; 4117 uint64_t hqd_gpu_addr, wb_gpu_addr; 4118 uint32_t tmp; 4119 uint32_t rb_bufsz; 4120 4121 /* set up gfx hqd wptr */ 4122 mqd->cp_gfx_hqd_wptr = 0; 4123 mqd->cp_gfx_hqd_wptr_hi = 0; 4124 4125 /* set the pointer to the MQD */ 4126 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 4127 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4128 4129 /* set up mqd control */ 4130 tmp = regCP_GFX_MQD_CONTROL_DEFAULT; 4131 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 4132 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 4133 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 4134 mqd->cp_gfx_mqd_control = tmp; 4135 4136 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 4137 tmp = regCP_GFX_HQD_VMID_DEFAULT; 4138 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 4139 mqd->cp_gfx_hqd_vmid = 0; 4140 4141 /* set up gfx queue priority */ 4142 gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); 4143 4144 /* set up time quantum */ 4145 tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; 4146 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 4147 mqd->cp_gfx_hqd_quantum = tmp; 4148 4149 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 4150 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4151 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 4152 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 4153 4154 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 4155 wb_gpu_addr = prop->rptr_gpu_addr; 4156 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 4157 mqd->cp_gfx_hqd_rptr_addr_hi = 4158 upper_32_bits(wb_gpu_addr) & 0xffff; 4159 4160 /* set up rb_wptr_poll addr */ 4161 wb_gpu_addr = prop->wptr_gpu_addr; 4162 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4163 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4164 4165 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 4166 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 4167 tmp = regCP_GFX_HQD_CNTL_DEFAULT; 4168 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 4169 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 4170 #ifdef __BIG_ENDIAN 4171 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 4172 #endif 4173 if (prop->tmz_queue) 4174 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); 4175 if (!prop->kernel_queue) 4176 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); 4177 mqd->cp_gfx_hqd_cntl = tmp; 4178 4179 /* set up cp_doorbell_control */ 4180 tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; 4181 if (prop->use_doorbell) { 4182 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4183 DOORBELL_OFFSET, prop->doorbell_index); 4184 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4185 DOORBELL_EN, 1); 4186 } else 4187 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4188 DOORBELL_EN, 0); 4189 mqd->cp_rb_doorbell_control = tmp; 4190 4191 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4192 mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; 4193 4194 /* active the queue */ 4195 mqd->cp_gfx_hqd_active = 1; 4196 4197 /* set gfx UQ items */ 4198 mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); 4199 mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); 4200 mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); 4201 mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); 4202 mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); 4203 mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); 4204 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4205 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4206 4207 return 0; 4208 } 4209 4210 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) 4211 { 4212 struct amdgpu_device *adev = ring->adev; 4213 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 4214 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 4215 4216 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4217 memset((void *)mqd, 0, sizeof(*mqd)); 4218 mutex_lock(&adev->srbm_mutex); 4219 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4220 amdgpu_ring_init_mqd(ring); 4221 soc21_grbm_select(adev, 0, 0, 0, 0); 4222 mutex_unlock(&adev->srbm_mutex); 4223 if (adev->gfx.me.mqd_backup[mqd_idx]) 4224 memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4225 } else { 4226 /* restore mqd with the backup copy */ 4227 if (adev->gfx.me.mqd_backup[mqd_idx]) 4228 memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 4229 /* reset the ring */ 4230 ring->wptr = 0; 4231 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4232 amdgpu_ring_clear_ring(ring); 4233 } 4234 4235 return 0; 4236 } 4237 4238 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 4239 { 4240 int r, i; 4241 4242 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4243 r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); 4244 if (r) 4245 return r; 4246 } 4247 4248 r = amdgpu_gfx_enable_kgq(adev, 0); 4249 if (r) 4250 return r; 4251 4252 return gfx_v11_0_cp_gfx_start(adev); 4253 } 4254 4255 static void gfx_v11_0_compute_mqd_set_cu_mask(struct amdgpu_device *adev, 4256 struct v11_compute_mqd *mqd, 4257 struct amdgpu_mqd_prop *prop) 4258 { 4259 uint32_t se_mask[8] = {0}; 4260 uint32_t wa_mask; 4261 bool has_wa_flag = prop->cu_flags & (AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE | 4262 AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE); 4263 4264 if (!has_wa_flag && (!prop->cu_mask || !prop->cu_mask_count)) 4265 return; 4266 4267 if (has_wa_flag) { 4268 wa_mask = (prop->cu_flags & AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE) ? 4269 0xffff : 0xffffffff; 4270 mqd->compute_static_thread_mgmt_se0 = wa_mask; 4271 mqd->compute_static_thread_mgmt_se1 = wa_mask; 4272 mqd->compute_static_thread_mgmt_se2 = wa_mask; 4273 mqd->compute_static_thread_mgmt_se3 = wa_mask; 4274 return; 4275 } 4276 4277 amdgpu_gfx_mqd_symmetrically_map_cu_mask(adev, prop->cu_mask, 4278 prop->cu_mask_count, se_mask); 4279 4280 mqd->compute_static_thread_mgmt_se0 = se_mask[0]; 4281 mqd->compute_static_thread_mgmt_se1 = se_mask[1]; 4282 mqd->compute_static_thread_mgmt_se2 = se_mask[2]; 4283 mqd->compute_static_thread_mgmt_se3 = se_mask[3]; 4284 } 4285 4286 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 4287 struct amdgpu_mqd_prop *prop) 4288 { 4289 struct v11_compute_mqd *mqd = m; 4290 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4291 uint32_t tmp; 4292 4293 mqd->header = 0xC0310800; 4294 mqd->compute_pipelinestat_enable = 0x00000001; 4295 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4296 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4297 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4298 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4299 mqd->compute_misc_reserved = 0x00000007; 4300 4301 eop_base_addr = prop->eop_gpu_addr >> 8; 4302 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4303 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4304 4305 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4306 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 4307 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4308 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 4309 4310 mqd->cp_hqd_eop_control = tmp; 4311 4312 /* enable doorbell? */ 4313 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4314 4315 if (prop->use_doorbell) { 4316 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4317 DOORBELL_OFFSET, prop->doorbell_index); 4318 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4319 DOORBELL_EN, 1); 4320 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4321 DOORBELL_SOURCE, 0); 4322 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4323 DOORBELL_HIT, 0); 4324 } else { 4325 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4326 DOORBELL_EN, 0); 4327 } 4328 4329 mqd->cp_hqd_pq_doorbell_control = tmp; 4330 4331 /* disable the queue if it's active */ 4332 mqd->cp_hqd_dequeue_request = 0; 4333 mqd->cp_hqd_pq_rptr = 0; 4334 mqd->cp_hqd_pq_wptr_lo = 0; 4335 mqd->cp_hqd_pq_wptr_hi = 0; 4336 4337 /* set the pointer to the MQD */ 4338 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 4339 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4340 4341 /* set MQD vmid to 0 */ 4342 tmp = regCP_MQD_CONTROL_DEFAULT; 4343 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4344 mqd->cp_mqd_control = tmp; 4345 4346 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4347 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4348 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4349 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4350 4351 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4352 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 4353 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4354 (order_base_2(prop->queue_size / 4) - 1)); 4355 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4356 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 4357 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 4358 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 4359 prop->allow_tunneling); 4360 if (prop->kernel_queue) { 4361 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4362 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4363 } 4364 if (prop->tmz_queue) 4365 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); 4366 mqd->cp_hqd_pq_control = tmp; 4367 4368 /* set the wb address whether it's enabled or not */ 4369 wb_gpu_addr = prop->rptr_gpu_addr; 4370 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4371 mqd->cp_hqd_pq_rptr_report_addr_hi = 4372 upper_32_bits(wb_gpu_addr) & 0xffff; 4373 4374 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4375 wb_gpu_addr = prop->wptr_gpu_addr; 4376 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4377 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4378 4379 tmp = 0; 4380 /* enable the doorbell if requested */ 4381 if (prop->use_doorbell) { 4382 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4383 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4384 DOORBELL_OFFSET, prop->doorbell_index); 4385 4386 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4387 DOORBELL_EN, 1); 4388 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4389 DOORBELL_SOURCE, 0); 4390 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4391 DOORBELL_HIT, 0); 4392 } 4393 4394 mqd->cp_hqd_pq_doorbell_control = tmp; 4395 4396 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4397 mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; 4398 4399 /* set the vmid for the queue */ 4400 mqd->cp_hqd_vmid = 0; 4401 4402 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 4403 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 4404 mqd->cp_hqd_persistent_state = tmp; 4405 4406 /* set MIN_IB_AVAIL_SIZE */ 4407 tmp = regCP_HQD_IB_CONTROL_DEFAULT; 4408 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4409 mqd->cp_hqd_ib_control = tmp; 4410 4411 /* set static priority for a compute queue/ring */ 4412 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 4413 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 4414 4415 mqd->cp_hqd_active = prop->hqd_active; 4416 4417 /* set UQ fenceaddress */ 4418 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4419 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4420 /* set CU mask */ 4421 gfx_v11_0_compute_mqd_set_cu_mask(adev, mqd, prop); 4422 4423 return 0; 4424 } 4425 4426 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 4427 { 4428 struct amdgpu_device *adev = ring->adev; 4429 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4430 int j; 4431 4432 /* inactivate the queue */ 4433 if (amdgpu_sriov_vf(adev)) 4434 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 4435 4436 /* disable wptr polling */ 4437 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4438 4439 /* write the EOP addr */ 4440 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 4441 mqd->cp_hqd_eop_base_addr_lo); 4442 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 4443 mqd->cp_hqd_eop_base_addr_hi); 4444 4445 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4446 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 4447 mqd->cp_hqd_eop_control); 4448 4449 /* enable doorbell? */ 4450 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4451 mqd->cp_hqd_pq_doorbell_control); 4452 4453 /* disable the queue if it's active */ 4454 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 4455 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 4456 for (j = 0; j < adev->usec_timeout; j++) { 4457 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 4458 break; 4459 udelay(1); 4460 } 4461 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 4462 mqd->cp_hqd_dequeue_request); 4463 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 4464 mqd->cp_hqd_pq_rptr); 4465 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4466 mqd->cp_hqd_pq_wptr_lo); 4467 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4468 mqd->cp_hqd_pq_wptr_hi); 4469 } 4470 4471 /* set the pointer to the MQD */ 4472 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 4473 mqd->cp_mqd_base_addr_lo); 4474 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 4475 mqd->cp_mqd_base_addr_hi); 4476 4477 /* set MQD vmid to 0 */ 4478 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 4479 mqd->cp_mqd_control); 4480 4481 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4482 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 4483 mqd->cp_hqd_pq_base_lo); 4484 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 4485 mqd->cp_hqd_pq_base_hi); 4486 4487 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4488 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 4489 mqd->cp_hqd_pq_control); 4490 4491 /* set the wb address whether it's enabled or not */ 4492 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 4493 mqd->cp_hqd_pq_rptr_report_addr_lo); 4494 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4495 mqd->cp_hqd_pq_rptr_report_addr_hi); 4496 4497 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4498 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 4499 mqd->cp_hqd_pq_wptr_poll_addr_lo); 4500 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4501 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4502 4503 /* enable the doorbell if requested */ 4504 if (ring->use_doorbell) { 4505 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4506 (adev->doorbell_index.kiq * 2) << 2); 4507 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4508 (adev->doorbell_index.userqueue_end * 2) << 2); 4509 } 4510 4511 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4512 mqd->cp_hqd_pq_doorbell_control); 4513 4514 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4515 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4516 mqd->cp_hqd_pq_wptr_lo); 4517 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4518 mqd->cp_hqd_pq_wptr_hi); 4519 4520 /* set the vmid for the queue */ 4521 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 4522 4523 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 4524 mqd->cp_hqd_persistent_state); 4525 4526 /* activate the queue */ 4527 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 4528 mqd->cp_hqd_active); 4529 4530 if (ring->use_doorbell) 4531 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4532 4533 return 0; 4534 } 4535 4536 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 4537 { 4538 struct amdgpu_device *adev = ring->adev; 4539 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4540 4541 gfx_v11_0_kiq_setting(ring); 4542 4543 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4544 /* reset MQD to a clean status */ 4545 if (adev->gfx.kiq[0].mqd_backup) 4546 memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); 4547 4548 /* reset ring buffer */ 4549 ring->wptr = 0; 4550 amdgpu_ring_clear_ring(ring); 4551 4552 mutex_lock(&adev->srbm_mutex); 4553 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4554 gfx_v11_0_kiq_init_register(ring); 4555 soc21_grbm_select(adev, 0, 0, 0, 0); 4556 mutex_unlock(&adev->srbm_mutex); 4557 } else { 4558 memset((void *)mqd, 0, sizeof(*mqd)); 4559 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4560 amdgpu_ring_clear_ring(ring); 4561 mutex_lock(&adev->srbm_mutex); 4562 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4563 amdgpu_ring_init_mqd(ring); 4564 gfx_v11_0_kiq_init_register(ring); 4565 soc21_grbm_select(adev, 0, 0, 0, 0); 4566 mutex_unlock(&adev->srbm_mutex); 4567 4568 if (adev->gfx.kiq[0].mqd_backup) 4569 memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); 4570 } 4571 4572 return 0; 4573 } 4574 4575 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) 4576 { 4577 struct amdgpu_device *adev = ring->adev; 4578 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4579 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4580 4581 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4582 memset((void *)mqd, 0, sizeof(*mqd)); 4583 mutex_lock(&adev->srbm_mutex); 4584 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4585 amdgpu_ring_init_mqd(ring); 4586 soc21_grbm_select(adev, 0, 0, 0, 0); 4587 mutex_unlock(&adev->srbm_mutex); 4588 4589 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4590 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4591 } else { 4592 /* restore MQD to a clean status */ 4593 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4594 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4595 /* reset ring buffer */ 4596 ring->wptr = 0; 4597 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4598 amdgpu_ring_clear_ring(ring); 4599 } 4600 4601 return 0; 4602 } 4603 4604 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4605 { 4606 gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring); 4607 return 0; 4608 } 4609 4610 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4611 { 4612 int i, r; 4613 4614 if (!amdgpu_async_gfx_ring) 4615 gfx_v11_0_cp_compute_enable(adev, true); 4616 4617 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4618 r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); 4619 if (r) 4620 return r; 4621 } 4622 4623 return amdgpu_gfx_enable_kcq(adev, 0); 4624 } 4625 4626 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4627 { 4628 int r, i; 4629 struct amdgpu_ring *ring; 4630 4631 if (!(adev->flags & AMD_IS_APU)) 4632 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4633 4634 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4635 /* legacy firmware loading */ 4636 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4637 if (r) 4638 return r; 4639 4640 if (adev->gfx.rs64_enable) 4641 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4642 else 4643 r = gfx_v11_0_cp_compute_load_microcode(adev); 4644 if (r) 4645 return r; 4646 } 4647 4648 gfx_v11_0_cp_set_doorbell_range(adev); 4649 4650 if (amdgpu_async_gfx_ring) { 4651 gfx_v11_0_cp_compute_enable(adev, true); 4652 gfx_v11_0_cp_gfx_enable(adev, true); 4653 } 4654 4655 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4656 r = amdgpu_mes_kiq_hw_init(adev, 0); 4657 else 4658 r = gfx_v11_0_kiq_resume(adev); 4659 if (r) 4660 return r; 4661 4662 r = gfx_v11_0_kcq_resume(adev); 4663 if (r) 4664 return r; 4665 4666 if (!amdgpu_async_gfx_ring) { 4667 r = gfx_v11_0_cp_gfx_resume(adev); 4668 if (r) 4669 return r; 4670 } else { 4671 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4672 if (r) 4673 return r; 4674 } 4675 4676 if (adev->gfx.disable_kq) { 4677 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4678 ring = &adev->gfx.gfx_ring[i]; 4679 /* we don't want to set ring->ready */ 4680 r = amdgpu_ring_test_ring(ring); 4681 if (r) 4682 return r; 4683 } 4684 if (amdgpu_async_gfx_ring) 4685 amdgpu_gfx_disable_kgq(adev, 0); 4686 } else { 4687 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4688 ring = &adev->gfx.gfx_ring[i]; 4689 r = amdgpu_ring_test_helper(ring); 4690 if (r) 4691 return r; 4692 } 4693 } 4694 4695 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4696 ring = &adev->gfx.compute_ring[i]; 4697 r = amdgpu_ring_test_helper(ring); 4698 if (r) 4699 return r; 4700 } 4701 4702 return 0; 4703 } 4704 4705 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4706 { 4707 gfx_v11_0_cp_gfx_enable(adev, enable); 4708 gfx_v11_0_cp_compute_enable(adev, enable); 4709 } 4710 4711 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4712 { 4713 int r; 4714 bool value; 4715 4716 r = adev->gfxhub.funcs->gart_enable(adev); 4717 if (r) 4718 return r; 4719 4720 amdgpu_device_flush_hdp(adev, NULL); 4721 4722 value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; 4723 4724 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4725 /* TODO investigate why this and the hdp flush above is needed, 4726 * are we missing a flush somewhere else? */ 4727 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); 4728 4729 return 0; 4730 } 4731 4732 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4733 { 4734 u32 tmp; 4735 4736 /* select RS64 */ 4737 if (adev->gfx.rs64_enable) { 4738 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4739 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4740 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4741 4742 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4743 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4744 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4745 } 4746 4747 if (amdgpu_emu_mode == 1) 4748 msleep(100); 4749 } 4750 4751 static int get_gb_addr_config(struct amdgpu_device * adev) 4752 { 4753 u32 gb_addr_config; 4754 4755 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4756 if (gb_addr_config == 0) 4757 return -EINVAL; 4758 4759 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4760 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4761 4762 adev->gfx.config.gb_addr_config = gb_addr_config; 4763 4764 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4765 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4766 GB_ADDR_CONFIG, NUM_PIPES); 4767 4768 adev->gfx.config.max_tile_pipes = 4769 adev->gfx.config.gb_addr_config_fields.num_pipes; 4770 4771 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4772 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4773 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4774 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4775 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4776 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4777 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4778 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4779 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4780 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4781 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4782 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4783 4784 return 0; 4785 } 4786 4787 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4788 { 4789 uint32_t data; 4790 4791 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4792 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4793 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4794 4795 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4796 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4797 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4798 } 4799 4800 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) 4801 { 4802 int r; 4803 struct amdgpu_device *adev = ip_block->adev; 4804 4805 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4806 adev->gfx.cleaner_shader_ptr); 4807 4808 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4809 if (adev->gfx.imu.funcs) { 4810 /* RLC autoload sequence 1: Program rlc ram */ 4811 if (adev->gfx.imu.funcs->program_rlc_ram) 4812 adev->gfx.imu.funcs->program_rlc_ram(adev); 4813 /* rlc autoload firmware */ 4814 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4815 if (r) 4816 return r; 4817 } 4818 } else { 4819 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4820 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4821 if (adev->gfx.imu.funcs->load_microcode) 4822 adev->gfx.imu.funcs->load_microcode(adev); 4823 if (adev->gfx.imu.funcs->setup_imu) 4824 adev->gfx.imu.funcs->setup_imu(adev); 4825 if (adev->gfx.imu.funcs->start_imu) 4826 adev->gfx.imu.funcs->start_imu(adev); 4827 } 4828 4829 /* disable gpa mode in backdoor loading */ 4830 gfx_v11_0_disable_gpa_mode(adev); 4831 } 4832 } 4833 4834 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4835 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4836 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4837 if (r) { 4838 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4839 return r; 4840 } 4841 } 4842 4843 adev->gfx.is_poweron = true; 4844 4845 if(get_gb_addr_config(adev)) 4846 drm_warn(adev_to_drm(adev), "Invalid gb_addr_config !\n"); 4847 4848 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4849 adev->gfx.rs64_enable) 4850 gfx_v11_0_config_gfx_rs64(adev); 4851 4852 r = gfx_v11_0_gfxhub_enable(adev); 4853 if (r) 4854 return r; 4855 4856 if (!amdgpu_emu_mode) 4857 gfx_v11_0_init_golden_registers(adev); 4858 4859 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4860 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4861 /** 4862 * For gfx 11, rlc firmware loading relies on smu firmware is 4863 * loaded firstly, so in direct type, it has to load smc ucode 4864 * here before rlc. 4865 */ 4866 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4867 if (r) 4868 return r; 4869 } 4870 4871 gfx_v11_0_constants_init(adev); 4872 4873 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4874 gfx_v11_0_select_cp_fw_arch(adev); 4875 4876 if (adev->nbio.funcs->gc_doorbell_init) 4877 adev->nbio.funcs->gc_doorbell_init(adev); 4878 4879 r = gfx_v11_0_rlc_resume(adev); 4880 if (r) 4881 return r; 4882 4883 /* 4884 * init golden registers and rlc resume may override some registers, 4885 * reconfig them here 4886 */ 4887 gfx_v11_0_tcp_harvest(adev); 4888 4889 r = gfx_v11_0_cp_resume(adev); 4890 if (r) 4891 return r; 4892 4893 /* get IMU version from HW if it's not set */ 4894 if (!adev->gfx.imu_fw_version) 4895 adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); 4896 4897 return r; 4898 } 4899 4900 static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, 4901 bool enable) 4902 { 4903 unsigned int irq_type; 4904 int m, p, r; 4905 4906 if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { 4907 for (m = 0; m < adev->gfx.me.num_me; m++) { 4908 for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { 4909 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; 4910 if (enable) 4911 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4912 irq_type); 4913 else 4914 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4915 irq_type); 4916 if (r) 4917 return r; 4918 } 4919 } 4920 } 4921 4922 if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { 4923 for (m = 0; m < adev->gfx.mec.num_mec; ++m) { 4924 for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { 4925 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 4926 + (m * adev->gfx.mec.num_pipe_per_mec) 4927 + p; 4928 if (enable) 4929 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4930 irq_type); 4931 else 4932 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4933 irq_type); 4934 if (r) 4935 return r; 4936 } 4937 } 4938 } 4939 4940 return 0; 4941 } 4942 4943 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) 4944 { 4945 struct amdgpu_device *adev = ip_block->adev; 4946 4947 cancel_delayed_work_sync(&adev->gfx.idle_work); 4948 4949 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4950 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4951 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4952 gfx_v11_0_set_userq_eop_interrupts(adev, false); 4953 4954 if (!adev->no_hw_access) { 4955 if (amdgpu_async_gfx_ring && 4956 !adev->gfx.disable_kq) { 4957 if (amdgpu_gfx_disable_kgq(adev, 0)) 4958 DRM_ERROR("KGQ disable failed\n"); 4959 } 4960 4961 if (amdgpu_gfx_disable_kcq(adev, 0)) 4962 DRM_ERROR("KCQ disable failed\n"); 4963 4964 amdgpu_mes_kiq_hw_fini(adev, 0); 4965 } 4966 4967 if (amdgpu_sriov_vf(adev)) 4968 /* Remove the steps disabling CPG and clearing KIQ position, 4969 * so that CP could perform IDLE-SAVE during switch. Those 4970 * steps are necessary to avoid a DMAR error in gfx9 but it is 4971 * not reproduced on gfx11. 4972 */ 4973 return 0; 4974 4975 gfx_v11_0_cp_enable(adev, false); 4976 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4977 4978 adev->gfxhub.funcs->gart_disable(adev); 4979 4980 adev->gfx.is_poweron = false; 4981 4982 return 0; 4983 } 4984 4985 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) 4986 { 4987 return gfx_v11_0_hw_fini(ip_block); 4988 } 4989 4990 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) 4991 { 4992 return gfx_v11_0_hw_init(ip_block); 4993 } 4994 4995 static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block) 4996 { 4997 struct amdgpu_device *adev = ip_block->adev; 4998 4999 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 5000 GRBM_STATUS, GUI_ACTIVE)) 5001 return false; 5002 else 5003 return true; 5004 } 5005 5006 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 5007 { 5008 unsigned i; 5009 u32 tmp; 5010 struct amdgpu_device *adev = ip_block->adev; 5011 5012 for (i = 0; i < adev->usec_timeout; i++) { 5013 /* read MC_STATUS */ 5014 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 5015 GRBM_STATUS__GUI_ACTIVE_MASK; 5016 5017 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 5018 return 0; 5019 udelay(1); 5020 } 5021 return -ETIMEDOUT; 5022 } 5023 5024 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, 5025 bool req) 5026 { 5027 u32 i, tmp, val; 5028 5029 for (i = 0; i < adev->usec_timeout; i++) { 5030 /* Request with MeId=2, PipeId=0 */ 5031 tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); 5032 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); 5033 WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); 5034 5035 val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); 5036 if (req) { 5037 if (val == tmp) 5038 break; 5039 } else { 5040 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, 5041 REQUEST, 1); 5042 5043 /* unlocked or locked by firmware */ 5044 if (val != tmp) 5045 break; 5046 } 5047 udelay(1); 5048 } 5049 5050 if (i >= adev->usec_timeout) 5051 return -EINVAL; 5052 5053 return 0; 5054 } 5055 5056 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) 5057 { 5058 u32 grbm_soft_reset = 0; 5059 u32 tmp; 5060 int r, i, j, k; 5061 struct amdgpu_device *adev = ip_block->adev; 5062 5063 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5064 5065 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5066 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 5067 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 5068 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 5069 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 5070 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5071 5072 mutex_lock(&adev->srbm_mutex); 5073 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 5074 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 5075 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 5076 soc21_grbm_select(adev, i, k, j, 0); 5077 5078 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 5079 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 5080 } 5081 } 5082 } 5083 for (i = 0; i < adev->gfx.me.num_me; ++i) { 5084 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 5085 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 5086 soc21_grbm_select(adev, i, k, j, 0); 5087 5088 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 5089 } 5090 } 5091 } 5092 soc21_grbm_select(adev, 0, 0, 0, 0); 5093 mutex_unlock(&adev->srbm_mutex); 5094 5095 /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ 5096 mutex_lock(&adev->gfx.reset_sem_mutex); 5097 r = gfx_v11_0_request_gfx_index_mutex(adev, true); 5098 if (r) { 5099 mutex_unlock(&adev->gfx.reset_sem_mutex); 5100 DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); 5101 return r; 5102 } 5103 5104 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 5105 5106 // Read CP_VMID_RESET register three times. 5107 // to get sufficient time for GFX_HQD_ACTIVE reach 0 5108 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5109 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5110 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5111 5112 /* release the gfx mutex */ 5113 r = gfx_v11_0_request_gfx_index_mutex(adev, false); 5114 mutex_unlock(&adev->gfx.reset_sem_mutex); 5115 if (r) { 5116 DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); 5117 return r; 5118 } 5119 5120 for (i = 0; i < adev->usec_timeout; i++) { 5121 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 5122 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 5123 break; 5124 udelay(1); 5125 } 5126 if (i >= adev->usec_timeout) { 5127 printk("Failed to wait all pipes clean\n"); 5128 return -EINVAL; 5129 } 5130 5131 /********** trigger soft reset ***********/ 5132 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5133 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5134 SOFT_RESET_CP, 1); 5135 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5136 SOFT_RESET_GFX, 1); 5137 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5138 SOFT_RESET_CPF, 1); 5139 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5140 SOFT_RESET_CPC, 1); 5141 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5142 SOFT_RESET_CPG, 1); 5143 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5144 /********** exit soft reset ***********/ 5145 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5146 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5147 SOFT_RESET_CP, 0); 5148 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5149 SOFT_RESET_GFX, 0); 5150 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5151 SOFT_RESET_CPF, 0); 5152 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5153 SOFT_RESET_CPC, 0); 5154 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5155 SOFT_RESET_CPG, 0); 5156 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5157 5158 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 5159 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 5160 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 5161 5162 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 5163 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 5164 5165 for (i = 0; i < adev->usec_timeout; i++) { 5166 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 5167 break; 5168 udelay(1); 5169 } 5170 if (i >= adev->usec_timeout) { 5171 printk("Failed to wait CP_VMID_RESET to 0\n"); 5172 return -EINVAL; 5173 } 5174 5175 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5176 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5177 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5178 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5179 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5180 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5181 5182 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5183 5184 return gfx_v11_0_cp_resume(adev); 5185 } 5186 5187 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) 5188 { 5189 int i, r; 5190 struct amdgpu_device *adev = ip_block->adev; 5191 struct amdgpu_ring *ring; 5192 long tmo = msecs_to_jiffies(1000); 5193 5194 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 5195 ring = &adev->gfx.gfx_ring[i]; 5196 r = amdgpu_ring_test_ib(ring, tmo); 5197 if (r) 5198 return true; 5199 } 5200 5201 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5202 ring = &adev->gfx.compute_ring[i]; 5203 r = amdgpu_ring_test_ib(ring, tmo); 5204 if (r) 5205 return true; 5206 } 5207 5208 return false; 5209 } 5210 5211 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) 5212 { 5213 struct amdgpu_device *adev = ip_block->adev; 5214 /** 5215 * GFX soft reset will impact MES, need resume MES when do GFX soft reset 5216 */ 5217 return amdgpu_mes_resume(adev); 5218 } 5219 5220 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5221 { 5222 uint64_t clock; 5223 uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; 5224 5225 if (amdgpu_sriov_vf(adev)) { 5226 amdgpu_gfx_off_ctrl(adev, false); 5227 mutex_lock(&adev->gfx.gpu_clock_mutex); 5228 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5229 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5230 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5231 if (clock_counter_hi_pre != clock_counter_hi_after) 5232 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5233 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5234 amdgpu_gfx_off_ctrl(adev, true); 5235 } else { 5236 preempt_disable(); 5237 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5238 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5239 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); 5240 if (clock_counter_hi_pre != clock_counter_hi_after) 5241 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); 5242 preempt_enable(); 5243 } 5244 clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); 5245 5246 return clock; 5247 } 5248 5249 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5250 uint32_t vmid, 5251 uint32_t gds_base, uint32_t gds_size, 5252 uint32_t gws_base, uint32_t gws_size, 5253 uint32_t oa_base, uint32_t oa_size) 5254 { 5255 struct amdgpu_device *adev = ring->adev; 5256 5257 /* GDS Base */ 5258 gfx_v11_0_write_data_to_reg(ring, 0, false, 5259 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 5260 gds_base); 5261 5262 /* GDS Size */ 5263 gfx_v11_0_write_data_to_reg(ring, 0, false, 5264 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 5265 gds_size); 5266 5267 /* GWS */ 5268 gfx_v11_0_write_data_to_reg(ring, 0, false, 5269 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 5270 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5271 5272 /* OA */ 5273 gfx_v11_0_write_data_to_reg(ring, 0, false, 5274 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 5275 (1 << (oa_size + oa_base)) - (1 << oa_base)); 5276 } 5277 5278 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) 5279 { 5280 struct amdgpu_device *adev = ip_block->adev; 5281 5282 switch (amdgpu_user_queue) { 5283 case -1: 5284 case 0: 5285 default: 5286 adev->gfx.disable_kq = false; 5287 adev->gfx.disable_uq = true; 5288 break; 5289 case 1: 5290 adev->gfx.disable_kq = false; 5291 adev->gfx.disable_uq = false; 5292 break; 5293 case 2: 5294 adev->gfx.disable_kq = true; 5295 adev->gfx.disable_uq = false; 5296 break; 5297 } 5298 5299 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 5300 5301 if (adev->gfx.disable_kq) { 5302 /* We need one GFX ring temporarily to set up 5303 * the clear state. 5304 */ 5305 adev->gfx.num_gfx_rings = 1; 5306 adev->gfx.num_compute_rings = 0; 5307 } else { 5308 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 5309 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 5310 AMDGPU_MAX_COMPUTE_RINGS); 5311 } 5312 5313 gfx_v11_0_set_kiq_pm4_funcs(adev); 5314 gfx_v11_0_set_ring_funcs(adev); 5315 gfx_v11_0_set_irq_funcs(adev); 5316 gfx_v11_0_set_gds_init(adev); 5317 gfx_v11_0_set_rlc_funcs(adev); 5318 gfx_v11_0_set_mqd_funcs(adev); 5319 gfx_v11_0_set_imu_funcs(adev); 5320 5321 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 5322 5323 return gfx_v11_0_init_microcode(adev); 5324 } 5325 5326 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) 5327 { 5328 struct amdgpu_device *adev = ip_block->adev; 5329 int r; 5330 5331 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5332 if (r) 5333 return r; 5334 5335 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5336 if (r) 5337 return r; 5338 5339 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 5340 if (r) 5341 return r; 5342 5343 r = gfx_v11_0_set_userq_eop_interrupts(adev, true); 5344 if (r) 5345 return r; 5346 5347 return 0; 5348 } 5349 5350 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 5351 { 5352 uint32_t rlc_cntl; 5353 5354 /* if RLC is not enabled, do nothing */ 5355 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 5356 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 5357 } 5358 5359 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 5360 { 5361 uint32_t data; 5362 unsigned i; 5363 5364 data = RLC_SAFE_MODE__CMD_MASK; 5365 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5366 5367 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 5368 5369 /* wait for RLC_SAFE_MODE */ 5370 for (i = 0; i < adev->usec_timeout; i++) { 5371 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 5372 RLC_SAFE_MODE, CMD)) 5373 break; 5374 udelay(1); 5375 } 5376 } 5377 5378 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 5379 { 5380 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 5381 } 5382 5383 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 5384 bool enable) 5385 { 5386 uint32_t def, data; 5387 5388 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 5389 return; 5390 5391 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5392 5393 if (enable) 5394 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5395 else 5396 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5397 5398 if (def != data) 5399 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5400 } 5401 5402 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 5403 bool enable) 5404 { 5405 uint32_t def, data; 5406 5407 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 5408 return; 5409 5410 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5411 5412 if (enable) 5413 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5414 else 5415 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5416 5417 if (def != data) 5418 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5419 } 5420 5421 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 5422 bool enable) 5423 { 5424 uint32_t def, data; 5425 5426 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 5427 return; 5428 5429 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5430 5431 if (enable) 5432 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5433 else 5434 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5435 5436 if (def != data) 5437 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5438 } 5439 5440 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5441 bool enable) 5442 { 5443 uint32_t data, def; 5444 5445 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 5446 return; 5447 5448 /* It is disabled by HW by default */ 5449 if (enable) { 5450 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5451 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 5452 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5453 5454 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5455 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5456 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5457 5458 if (def != data) 5459 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5460 } 5461 } else { 5462 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5463 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5464 5465 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5466 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5467 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5468 5469 if (def != data) 5470 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5471 } 5472 } 5473 } 5474 5475 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5476 bool enable) 5477 { 5478 uint32_t def, data; 5479 5480 if (!(adev->cg_flags & 5481 (AMD_CG_SUPPORT_GFX_CGCG | 5482 AMD_CG_SUPPORT_GFX_CGLS | 5483 AMD_CG_SUPPORT_GFX_3D_CGCG | 5484 AMD_CG_SUPPORT_GFX_3D_CGLS))) 5485 return; 5486 5487 if (enable) { 5488 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5489 5490 /* unset CGCG override */ 5491 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5492 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5493 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5494 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5495 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 5496 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5497 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5498 5499 /* update CGCG override bits */ 5500 if (def != data) 5501 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5502 5503 /* enable cgcg FSM(0x0000363F) */ 5504 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5505 5506 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5507 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 5508 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5509 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5510 } 5511 5512 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5513 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 5514 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5515 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5516 } 5517 5518 if (def != data) 5519 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5520 5521 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5522 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5523 5524 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5525 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 5526 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5527 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5528 } 5529 5530 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5531 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 5532 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5533 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5534 } 5535 5536 if (def != data) 5537 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5538 5539 /* set IDLE_POLL_COUNT(0x00900100) */ 5540 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 5541 5542 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 5543 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5544 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5545 5546 if (def != data) 5547 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 5548 5549 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5550 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5551 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5552 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5553 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5554 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 5555 5556 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5557 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5558 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5559 5560 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5561 if (adev->sdma.num_instances > 1) { 5562 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5563 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5564 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5565 } 5566 } else { 5567 /* Program RLC_CGCG_CGLS_CTRL */ 5568 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5569 5570 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5571 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5572 5573 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5574 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5575 5576 if (def != data) 5577 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5578 5579 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5580 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5581 5582 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5583 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5584 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5585 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5586 5587 if (def != data) 5588 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5589 5590 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5591 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5592 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5593 5594 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5595 if (adev->sdma.num_instances > 1) { 5596 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5597 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5598 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5599 } 5600 } 5601 } 5602 5603 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5604 bool enable) 5605 { 5606 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5607 5608 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 5609 5610 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 5611 5612 gfx_v11_0_update_repeater_fgcg(adev, enable); 5613 5614 gfx_v11_0_update_sram_fgcg(adev, enable); 5615 5616 gfx_v11_0_update_perf_clk(adev, enable); 5617 5618 if (adev->cg_flags & 5619 (AMD_CG_SUPPORT_GFX_MGCG | 5620 AMD_CG_SUPPORT_GFX_CGLS | 5621 AMD_CG_SUPPORT_GFX_CGCG | 5622 AMD_CG_SUPPORT_GFX_3D_CGCG | 5623 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5624 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5625 5626 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5627 5628 return 0; 5629 } 5630 5631 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id, 5632 struct amdgpu_ring *ring, unsigned vmid) 5633 { 5634 u32 reg, pre_data, data; 5635 5636 amdgpu_gfx_off_ctrl(adev, false); 5637 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5638 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) 5639 pre_data = RREG32_NO_KIQ(reg); 5640 else 5641 pre_data = RREG32(reg); 5642 5643 data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); 5644 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5645 5646 if (pre_data != data) { 5647 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { 5648 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5649 } else 5650 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5651 } 5652 amdgpu_gfx_off_ctrl(adev, true); 5653 5654 if (ring 5655 && amdgpu_sriov_is_pp_one_vf(adev) 5656 && (pre_data != data) 5657 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) 5658 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { 5659 amdgpu_ring_emit_wreg(ring, reg, data); 5660 } 5661 } 5662 5663 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5664 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5665 .set_safe_mode = gfx_v11_0_set_safe_mode, 5666 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5667 .init = gfx_v11_0_rlc_init, 5668 .get_csb_size = gfx_v11_0_get_csb_size, 5669 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5670 .resume = gfx_v11_0_rlc_resume, 5671 .stop = gfx_v11_0_rlc_stop, 5672 .reset = gfx_v11_0_rlc_reset, 5673 .start = gfx_v11_0_rlc_start, 5674 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5675 }; 5676 5677 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5678 { 5679 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5680 5681 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5682 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5683 else 5684 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5685 5686 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5687 5688 // Program RLC_PG_DELAY3 for CGPG hysteresis 5689 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5690 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5691 case IP_VERSION(11, 0, 1): 5692 case IP_VERSION(11, 0, 4): 5693 case IP_VERSION(11, 5, 0): 5694 case IP_VERSION(11, 5, 1): 5695 case IP_VERSION(11, 5, 2): 5696 case IP_VERSION(11, 5, 3): 5697 case IP_VERSION(11, 5, 4): 5698 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5699 break; 5700 default: 5701 break; 5702 } 5703 } 5704 } 5705 5706 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5707 { 5708 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5709 5710 gfx_v11_cntl_power_gating(adev, enable); 5711 5712 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5713 } 5714 5715 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5716 enum amd_powergating_state state) 5717 { 5718 struct amdgpu_device *adev = ip_block->adev; 5719 bool enable = (state == AMD_PG_STATE_GATE); 5720 5721 if (amdgpu_sriov_vf(adev)) 5722 return 0; 5723 5724 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5725 case IP_VERSION(11, 0, 0): 5726 case IP_VERSION(11, 0, 2): 5727 case IP_VERSION(11, 0, 3): 5728 amdgpu_gfx_off_ctrl(adev, enable); 5729 break; 5730 case IP_VERSION(11, 0, 1): 5731 case IP_VERSION(11, 0, 4): 5732 case IP_VERSION(11, 5, 0): 5733 case IP_VERSION(11, 5, 1): 5734 case IP_VERSION(11, 5, 2): 5735 case IP_VERSION(11, 5, 3): 5736 case IP_VERSION(11, 5, 4): 5737 if (!enable) 5738 amdgpu_gfx_off_ctrl(adev, false); 5739 5740 gfx_v11_cntl_pg(adev, enable); 5741 5742 if (enable) 5743 amdgpu_gfx_off_ctrl(adev, true); 5744 5745 break; 5746 default: 5747 break; 5748 } 5749 5750 return 0; 5751 } 5752 5753 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5754 enum amd_clockgating_state state) 5755 { 5756 struct amdgpu_device *adev = ip_block->adev; 5757 5758 if (amdgpu_sriov_vf(adev)) 5759 return 0; 5760 5761 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5762 case IP_VERSION(11, 0, 0): 5763 case IP_VERSION(11, 0, 1): 5764 case IP_VERSION(11, 0, 2): 5765 case IP_VERSION(11, 0, 3): 5766 case IP_VERSION(11, 0, 4): 5767 case IP_VERSION(11, 5, 0): 5768 case IP_VERSION(11, 5, 1): 5769 case IP_VERSION(11, 5, 2): 5770 case IP_VERSION(11, 5, 3): 5771 case IP_VERSION(11, 5, 4): 5772 gfx_v11_0_update_gfx_clock_gating(adev, 5773 state == AMD_CG_STATE_GATE); 5774 break; 5775 default: 5776 break; 5777 } 5778 5779 return 0; 5780 } 5781 5782 static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5783 { 5784 struct amdgpu_device *adev = ip_block->adev; 5785 int data; 5786 5787 /* AMD_CG_SUPPORT_GFX_MGCG */ 5788 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5789 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5790 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5791 5792 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5793 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5794 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5795 5796 /* AMD_CG_SUPPORT_GFX_FGCG */ 5797 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5798 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5799 5800 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5801 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5802 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5803 5804 /* AMD_CG_SUPPORT_GFX_CGCG */ 5805 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5806 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5807 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5808 5809 /* AMD_CG_SUPPORT_GFX_CGLS */ 5810 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5811 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5812 5813 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5814 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5815 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5816 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5817 5818 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5819 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5820 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5821 } 5822 5823 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5824 { 5825 /* gfx11 is 32bit rptr*/ 5826 return *(uint32_t *)ring->rptr_cpu_addr; 5827 } 5828 5829 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5830 { 5831 struct amdgpu_device *adev = ring->adev; 5832 u64 wptr; 5833 5834 /* XXX check if swapping is necessary on BE */ 5835 if (ring->use_doorbell) { 5836 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5837 } else { 5838 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5839 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5840 } 5841 5842 return wptr; 5843 } 5844 5845 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5846 { 5847 struct amdgpu_device *adev = ring->adev; 5848 5849 if (ring->use_doorbell) { 5850 /* XXX check if swapping is necessary on BE */ 5851 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5852 ring->wptr); 5853 WDOORBELL64(ring->doorbell_index, ring->wptr); 5854 } else { 5855 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5856 lower_32_bits(ring->wptr)); 5857 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5858 upper_32_bits(ring->wptr)); 5859 } 5860 } 5861 5862 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5863 { 5864 /* gfx11 hardware is 32bit rptr */ 5865 return *(uint32_t *)ring->rptr_cpu_addr; 5866 } 5867 5868 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5869 { 5870 u64 wptr; 5871 5872 /* XXX check if swapping is necessary on BE */ 5873 if (ring->use_doorbell) 5874 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5875 else 5876 BUG(); 5877 return wptr; 5878 } 5879 5880 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5881 { 5882 struct amdgpu_device *adev = ring->adev; 5883 5884 /* XXX check if swapping is necessary on BE */ 5885 if (ring->use_doorbell) { 5886 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5887 ring->wptr); 5888 WDOORBELL64(ring->doorbell_index, ring->wptr); 5889 } else { 5890 BUG(); /* only DOORBELL method supported on gfx11 now */ 5891 } 5892 } 5893 5894 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5895 { 5896 struct amdgpu_device *adev = ring->adev; 5897 u32 ref_and_mask, reg_mem_engine; 5898 5899 if (!adev->gfx.funcs->get_hdp_flush_mask) { 5900 dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__); 5901 return; 5902 } 5903 5904 adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, ®_mem_engine); 5905 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5906 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5907 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5908 ref_and_mask, ref_and_mask, 0x20); 5909 } 5910 5911 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5912 struct amdgpu_job *job, 5913 struct amdgpu_ib *ib, 5914 uint32_t flags) 5915 { 5916 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5917 u32 header, control = 0; 5918 5919 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5920 5921 control |= ib->length_dw | (vmid << 24); 5922 5923 if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5924 control |= INDIRECT_BUFFER_PRE_ENB(1); 5925 5926 if (flags & AMDGPU_IB_PREEMPTED) 5927 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5928 5929 if (vmid && !ring->adev->gfx.rs64_enable) 5930 gfx_v11_0_ring_emit_de_meta(ring, 5931 !amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED)); 5932 } 5933 5934 amdgpu_ring_write(ring, header); 5935 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5936 amdgpu_ring_write(ring, 5937 #ifdef __BIG_ENDIAN 5938 (2 << 0) | 5939 #endif 5940 lower_32_bits(ib->gpu_addr)); 5941 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5942 amdgpu_ring_write(ring, control); 5943 } 5944 5945 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5946 struct amdgpu_job *job, 5947 struct amdgpu_ib *ib, 5948 uint32_t flags) 5949 { 5950 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5951 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5952 5953 /* Currently, there is a high possibility to get wave ID mismatch 5954 * between ME and GDS, leading to a hw deadlock, because ME generates 5955 * different wave IDs than the GDS expects. This situation happens 5956 * randomly when at least 5 compute pipes use GDS ordered append. 5957 * The wave IDs generated by ME are also wrong after suspend/resume. 5958 * Those are probably bugs somewhere else in the kernel driver. 5959 * 5960 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5961 * GDS to 0 for this ring (me/pipe). 5962 */ 5963 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5964 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5965 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 5966 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5967 } 5968 5969 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5970 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5971 amdgpu_ring_write(ring, 5972 #ifdef __BIG_ENDIAN 5973 (2 << 0) | 5974 #endif 5975 lower_32_bits(ib->gpu_addr)); 5976 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5977 amdgpu_ring_write(ring, control); 5978 } 5979 5980 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 5981 u64 seq, unsigned flags) 5982 { 5983 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5984 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5985 5986 /* RELEASE_MEM - flush caches, send int */ 5987 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 5988 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 5989 PACKET3_RELEASE_MEM_GCR_GL2_WB | 5990 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ 5991 PACKET3_RELEASE_MEM_GCR_GLM_WB | 5992 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 5993 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5994 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 5995 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 5996 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 5997 5998 /* 5999 * the address should be Qword aligned if 64bit write, Dword 6000 * aligned if only send 32bit data low (discard data high) 6001 */ 6002 if (write64bit) 6003 BUG_ON(addr & 0x7); 6004 else 6005 BUG_ON(addr & 0x3); 6006 amdgpu_ring_write(ring, lower_32_bits(addr)); 6007 amdgpu_ring_write(ring, upper_32_bits(addr)); 6008 amdgpu_ring_write(ring, lower_32_bits(seq)); 6009 amdgpu_ring_write(ring, upper_32_bits(seq)); 6010 amdgpu_ring_write(ring, 0); 6011 } 6012 6013 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6014 { 6015 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6016 uint32_t seq = ring->fence_drv.sync_seq; 6017 uint64_t addr = ring->fence_drv.gpu_addr; 6018 6019 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 6020 upper_32_bits(addr), seq, 0xffffffff, 4); 6021 } 6022 6023 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 6024 uint16_t pasid, uint32_t flush_type, 6025 bool all_hub, uint8_t dst_sel) 6026 { 6027 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 6028 amdgpu_ring_write(ring, 6029 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 6030 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 6031 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 6032 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 6033 } 6034 6035 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6036 unsigned vmid, uint64_t pd_addr) 6037 { 6038 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6039 6040 /* compute doesn't have PFP */ 6041 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 6042 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6043 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6044 amdgpu_ring_write(ring, 0x0); 6045 } 6046 6047 /* Make sure that we can't skip the SET_Q_MODE packets when the VM 6048 * changed in any way. 6049 */ 6050 ring->set_q_mode_offs = 0; 6051 ring->set_q_mode_ptr = NULL; 6052 } 6053 6054 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6055 u64 seq, unsigned int flags) 6056 { 6057 struct amdgpu_device *adev = ring->adev; 6058 6059 /* we only allocate 32bit for each seq wb address */ 6060 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6061 6062 /* write fence seq to the "addr" */ 6063 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6064 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6065 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6066 amdgpu_ring_write(ring, lower_32_bits(addr)); 6067 amdgpu_ring_write(ring, upper_32_bits(addr)); 6068 amdgpu_ring_write(ring, lower_32_bits(seq)); 6069 6070 if (flags & AMDGPU_FENCE_FLAG_INT) { 6071 /* set register to trigger INT */ 6072 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6073 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6074 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6075 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 6076 amdgpu_ring_write(ring, 0); 6077 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6078 } 6079 } 6080 6081 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 6082 uint32_t flags) 6083 { 6084 uint32_t dw2 = 0; 6085 6086 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6087 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6088 /* set load_global_config & load_global_uconfig */ 6089 dw2 |= 0x8001; 6090 /* set load_cs_sh_regs */ 6091 dw2 |= 0x01000000; 6092 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6093 dw2 |= 0x10002; 6094 } 6095 6096 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6097 amdgpu_ring_write(ring, dw2); 6098 amdgpu_ring_write(ring, 0); 6099 } 6100 6101 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 6102 uint64_t addr) 6103 { 6104 unsigned ret; 6105 6106 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6107 amdgpu_ring_write(ring, lower_32_bits(addr)); 6108 amdgpu_ring_write(ring, upper_32_bits(addr)); 6109 /* discard following DWs if *cond_exec_gpu_addr==0 */ 6110 amdgpu_ring_write(ring, 0); 6111 ret = ring->wptr & ring->buf_mask; 6112 /* patch dummy value later */ 6113 amdgpu_ring_write(ring, 0); 6114 6115 return ret; 6116 } 6117 6118 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, 6119 u64 shadow_va, u64 csa_va, 6120 u64 gds_va, bool init_shadow, 6121 int vmid) 6122 { 6123 struct amdgpu_device *adev = ring->adev; 6124 unsigned int offs, end; 6125 6126 if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) 6127 return; 6128 6129 /* 6130 * The logic here isn't easy to understand because we need to keep state 6131 * accross multiple executions of the function as well as between the 6132 * CPU and GPU. The general idea is that the newly written GPU command 6133 * has a condition on the previous one and only executed if really 6134 * necessary. 6135 */ 6136 6137 /* 6138 * The dw in the NOP controls if the next SET_Q_MODE packet should be 6139 * executed or not. Reserve 64bits just to be on the save side. 6140 */ 6141 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); 6142 offs = ring->wptr & ring->buf_mask; 6143 6144 /* 6145 * We start with skipping the prefix SET_Q_MODE and always executing 6146 * the postfix SET_Q_MODE packet. This is changed below with a 6147 * WRITE_DATA command when the postfix executed. 6148 */ 6149 amdgpu_ring_write(ring, shadow_va ? 1 : 0); 6150 amdgpu_ring_write(ring, 0); 6151 6152 if (ring->set_q_mode_offs) { 6153 uint64_t addr; 6154 6155 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6156 addr += ring->set_q_mode_offs << 2; 6157 end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); 6158 } 6159 6160 /* 6161 * When the postfix SET_Q_MODE packet executes we need to make sure that the 6162 * next prefix SET_Q_MODE packet executes as well. 6163 */ 6164 if (!shadow_va) { 6165 uint64_t addr; 6166 6167 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6168 addr += offs << 2; 6169 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6170 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 6171 amdgpu_ring_write(ring, lower_32_bits(addr)); 6172 amdgpu_ring_write(ring, upper_32_bits(addr)); 6173 amdgpu_ring_write(ring, 0x1); 6174 } 6175 6176 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); 6177 amdgpu_ring_write(ring, lower_32_bits(shadow_va)); 6178 amdgpu_ring_write(ring, upper_32_bits(shadow_va)); 6179 amdgpu_ring_write(ring, lower_32_bits(gds_va)); 6180 amdgpu_ring_write(ring, upper_32_bits(gds_va)); 6181 amdgpu_ring_write(ring, lower_32_bits(csa_va)); 6182 amdgpu_ring_write(ring, upper_32_bits(csa_va)); 6183 amdgpu_ring_write(ring, shadow_va ? 6184 PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); 6185 amdgpu_ring_write(ring, init_shadow ? 6186 PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); 6187 6188 if (ring->set_q_mode_offs) 6189 amdgpu_ring_patch_cond_exec(ring, end); 6190 6191 if (shadow_va) { 6192 uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; 6193 6194 /* 6195 * If the tokens match try to skip the last postfix SET_Q_MODE 6196 * packet to avoid saving/restoring the state all the time. 6197 */ 6198 if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) 6199 *ring->set_q_mode_ptr = 0; 6200 6201 ring->set_q_mode_token = token; 6202 } else { 6203 ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; 6204 } 6205 6206 ring->set_q_mode_offs = offs; 6207 } 6208 6209 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) 6210 { 6211 int i, r = 0; 6212 struct amdgpu_device *adev = ring->adev; 6213 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 6214 struct amdgpu_ring *kiq_ring = &kiq->ring; 6215 unsigned long flags; 6216 6217 if (adev->enable_mes) 6218 return -EINVAL; 6219 6220 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 6221 return -EINVAL; 6222 6223 spin_lock_irqsave(&kiq->ring_lock, flags); 6224 6225 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 6226 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6227 return -ENOMEM; 6228 } 6229 6230 /* assert preemption condition */ 6231 amdgpu_ring_set_preempt_cond_exec(ring, false); 6232 6233 /* assert IB preemption, emit the trailing fence */ 6234 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 6235 ring->trail_fence_gpu_addr, 6236 ++ring->trail_seq); 6237 amdgpu_ring_commit(kiq_ring); 6238 6239 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6240 6241 /* poll the trailing fence */ 6242 for (i = 0; i < adev->usec_timeout; i++) { 6243 if (ring->trail_seq == 6244 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 6245 break; 6246 udelay(1); 6247 } 6248 6249 if (i >= adev->usec_timeout) { 6250 r = -EINVAL; 6251 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 6252 } 6253 6254 /* deassert preemption condition */ 6255 amdgpu_ring_set_preempt_cond_exec(ring, true); 6256 return r; 6257 } 6258 6259 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 6260 { 6261 struct amdgpu_device *adev = ring->adev; 6262 struct v10_de_ib_state de_payload = {0}; 6263 uint64_t offset, gds_addr, de_payload_gpu_addr; 6264 void *de_payload_cpu_addr; 6265 int cnt; 6266 6267 offset = offsetof(struct v10_gfx_meta_data, de_payload); 6268 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 6269 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 6270 6271 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 6272 AMDGPU_CSA_SIZE - adev->gds.gds_size, 6273 PAGE_SIZE); 6274 6275 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 6276 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 6277 6278 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 6279 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 6280 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 6281 WRITE_DATA_DST_SEL(8) | 6282 WR_CONFIRM) | 6283 WRITE_DATA_CACHE_POLICY(0)); 6284 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 6285 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 6286 6287 if (resume) 6288 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 6289 sizeof(de_payload) >> 2); 6290 else 6291 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 6292 sizeof(de_payload) >> 2); 6293 } 6294 6295 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 6296 bool secure) 6297 { 6298 uint32_t v = secure ? FRAME_TMZ : 0; 6299 6300 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 6301 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 6302 } 6303 6304 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 6305 uint32_t reg_val_offs) 6306 { 6307 struct amdgpu_device *adev = ring->adev; 6308 6309 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6310 amdgpu_ring_write(ring, 0 | /* src: register*/ 6311 (5 << 8) | /* dst: memory */ 6312 (1 << 20)); /* write confirm */ 6313 amdgpu_ring_write(ring, reg); 6314 amdgpu_ring_write(ring, 0); 6315 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6316 reg_val_offs * 4)); 6317 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6318 reg_val_offs * 4)); 6319 } 6320 6321 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6322 uint32_t val) 6323 { 6324 uint32_t cmd = 0; 6325 6326 switch (ring->funcs->type) { 6327 case AMDGPU_RING_TYPE_GFX: 6328 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6329 break; 6330 case AMDGPU_RING_TYPE_KIQ: 6331 cmd = (1 << 16); /* no inc addr */ 6332 break; 6333 default: 6334 cmd = WR_CONFIRM; 6335 break; 6336 } 6337 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6338 amdgpu_ring_write(ring, cmd); 6339 amdgpu_ring_write(ring, reg); 6340 amdgpu_ring_write(ring, 0); 6341 amdgpu_ring_write(ring, val); 6342 } 6343 6344 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 6345 uint32_t val, uint32_t mask) 6346 { 6347 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 6348 } 6349 6350 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 6351 uint32_t reg0, uint32_t reg1, 6352 uint32_t ref, uint32_t mask) 6353 { 6354 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6355 6356 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 6357 ref, mask, 0x20); 6358 } 6359 6360 static void 6361 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6362 uint32_t me, uint32_t pipe, 6363 enum amdgpu_interrupt_state state) 6364 { 6365 uint32_t cp_int_cntl, cp_int_cntl_reg; 6366 6367 if (!me) { 6368 switch (pipe) { 6369 case 0: 6370 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 6371 break; 6372 case 1: 6373 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 6374 break; 6375 default: 6376 DRM_DEBUG("invalid pipe %d\n", pipe); 6377 return; 6378 } 6379 } else { 6380 DRM_DEBUG("invalid me %d\n", me); 6381 return; 6382 } 6383 6384 switch (state) { 6385 case AMDGPU_IRQ_STATE_DISABLE: 6386 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6387 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6388 TIME_STAMP_INT_ENABLE, 0); 6389 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6390 GENERIC0_INT_ENABLE, 0); 6391 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6392 break; 6393 case AMDGPU_IRQ_STATE_ENABLE: 6394 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6395 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6396 TIME_STAMP_INT_ENABLE, 1); 6397 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6398 GENERIC0_INT_ENABLE, 1); 6399 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6400 break; 6401 default: 6402 break; 6403 } 6404 } 6405 6406 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6407 int me, int pipe, 6408 enum amdgpu_interrupt_state state) 6409 { 6410 u32 mec_int_cntl, mec_int_cntl_reg; 6411 6412 /* 6413 * amdgpu controls only the first MEC. That's why this function only 6414 * handles the setting of interrupts for this specific MEC. All other 6415 * pipes' interrupts are set by amdkfd. 6416 */ 6417 6418 if (me == 1) { 6419 switch (pipe) { 6420 case 0: 6421 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6422 break; 6423 case 1: 6424 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 6425 break; 6426 case 2: 6427 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 6428 break; 6429 case 3: 6430 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 6431 break; 6432 default: 6433 DRM_DEBUG("invalid pipe %d\n", pipe); 6434 return; 6435 } 6436 } else { 6437 DRM_DEBUG("invalid me %d\n", me); 6438 return; 6439 } 6440 6441 switch (state) { 6442 case AMDGPU_IRQ_STATE_DISABLE: 6443 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6444 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6445 TIME_STAMP_INT_ENABLE, 0); 6446 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6447 GENERIC0_INT_ENABLE, 0); 6448 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6449 break; 6450 case AMDGPU_IRQ_STATE_ENABLE: 6451 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6452 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6453 TIME_STAMP_INT_ENABLE, 1); 6454 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6455 GENERIC0_INT_ENABLE, 1); 6456 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6457 break; 6458 default: 6459 break; 6460 } 6461 } 6462 6463 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6464 struct amdgpu_irq_src *src, 6465 unsigned type, 6466 enum amdgpu_interrupt_state state) 6467 { 6468 switch (type) { 6469 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6470 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 6471 break; 6472 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 6473 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 6474 break; 6475 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6476 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6477 break; 6478 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6479 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6480 break; 6481 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6482 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6483 break; 6484 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6485 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6486 break; 6487 default: 6488 break; 6489 } 6490 return 0; 6491 } 6492 6493 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 6494 struct amdgpu_irq_src *source, 6495 struct amdgpu_iv_entry *entry) 6496 { 6497 u32 doorbell_offset = entry->src_data[0]; 6498 u8 me_id, pipe_id, queue_id; 6499 struct amdgpu_ring *ring; 6500 int i; 6501 6502 DRM_DEBUG("IH: CP EOP\n"); 6503 6504 if (adev->enable_mes && doorbell_offset) { 6505 struct amdgpu_userq_fence_driver *fence_drv = NULL; 6506 struct xarray *xa = &adev->userq_xa; 6507 unsigned long flags; 6508 6509 xa_lock_irqsave(xa, flags); 6510 fence_drv = xa_load(xa, doorbell_offset); 6511 if (fence_drv) 6512 amdgpu_userq_fence_driver_process(fence_drv); 6513 xa_unlock_irqrestore(xa, flags); 6514 } else { 6515 me_id = (entry->ring_id & 0x0c) >> 2; 6516 pipe_id = (entry->ring_id & 0x03) >> 0; 6517 queue_id = (entry->ring_id & 0x70) >> 4; 6518 6519 switch (me_id) { 6520 case 0: 6521 if (pipe_id == 0) 6522 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6523 else 6524 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 6525 break; 6526 case 1: 6527 case 2: 6528 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6529 ring = &adev->gfx.compute_ring[i]; 6530 /* Per-queue interrupt is supported for MEC starting from VI. 6531 * The interrupt can only be enabled/disabled per pipe instead 6532 * of per queue. 6533 */ 6534 if ((ring->me == me_id) && 6535 (ring->pipe == pipe_id) && 6536 (ring->queue == queue_id)) 6537 amdgpu_fence_process(ring); 6538 } 6539 break; 6540 } 6541 } 6542 6543 return 0; 6544 } 6545 6546 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6547 struct amdgpu_irq_src *source, 6548 unsigned int type, 6549 enum amdgpu_interrupt_state state) 6550 { 6551 u32 cp_int_cntl_reg, cp_int_cntl; 6552 int i, j; 6553 6554 switch (state) { 6555 case AMDGPU_IRQ_STATE_DISABLE: 6556 case AMDGPU_IRQ_STATE_ENABLE: 6557 for (i = 0; i < adev->gfx.me.num_me; i++) { 6558 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6559 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6560 6561 if (cp_int_cntl_reg) { 6562 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6563 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6564 PRIV_REG_INT_ENABLE, 6565 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6566 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6567 } 6568 } 6569 } 6570 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6571 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6572 /* MECs start at 1 */ 6573 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6574 6575 if (cp_int_cntl_reg) { 6576 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6577 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6578 PRIV_REG_INT_ENABLE, 6579 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6580 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6581 } 6582 } 6583 } 6584 break; 6585 default: 6586 break; 6587 } 6588 6589 return 0; 6590 } 6591 6592 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6593 struct amdgpu_irq_src *source, 6594 unsigned type, 6595 enum amdgpu_interrupt_state state) 6596 { 6597 u32 cp_int_cntl_reg, cp_int_cntl; 6598 int i, j; 6599 6600 switch (state) { 6601 case AMDGPU_IRQ_STATE_DISABLE: 6602 case AMDGPU_IRQ_STATE_ENABLE: 6603 for (i = 0; i < adev->gfx.me.num_me; i++) { 6604 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6605 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6606 6607 if (cp_int_cntl_reg) { 6608 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6609 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6610 OPCODE_ERROR_INT_ENABLE, 6611 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6612 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6613 } 6614 } 6615 } 6616 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6617 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6618 /* MECs start at 1 */ 6619 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6620 6621 if (cp_int_cntl_reg) { 6622 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6623 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6624 OPCODE_ERROR_INT_ENABLE, 6625 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6626 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6627 } 6628 } 6629 } 6630 break; 6631 default: 6632 break; 6633 } 6634 return 0; 6635 } 6636 6637 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6638 struct amdgpu_irq_src *source, 6639 unsigned int type, 6640 enum amdgpu_interrupt_state state) 6641 { 6642 u32 cp_int_cntl_reg, cp_int_cntl; 6643 int i, j; 6644 6645 switch (state) { 6646 case AMDGPU_IRQ_STATE_DISABLE: 6647 case AMDGPU_IRQ_STATE_ENABLE: 6648 for (i = 0; i < adev->gfx.me.num_me; i++) { 6649 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6650 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6651 6652 if (cp_int_cntl_reg) { 6653 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6654 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6655 PRIV_INSTR_INT_ENABLE, 6656 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6657 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6658 } 6659 } 6660 } 6661 break; 6662 default: 6663 break; 6664 } 6665 6666 return 0; 6667 } 6668 6669 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 6670 struct amdgpu_iv_entry *entry) 6671 { 6672 u8 me_id, pipe_id, queue_id; 6673 struct amdgpu_ring *ring; 6674 int i; 6675 6676 me_id = (entry->ring_id & 0x0c) >> 2; 6677 pipe_id = (entry->ring_id & 0x03) >> 0; 6678 queue_id = (entry->ring_id & 0x70) >> 4; 6679 6680 if (!adev->gfx.disable_kq) { 6681 switch (me_id) { 6682 case 0: 6683 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 6684 ring = &adev->gfx.gfx_ring[i]; 6685 if (ring->me == me_id && ring->pipe == pipe_id && 6686 ring->queue == queue_id) 6687 drm_sched_fault(&ring->sched); 6688 } 6689 break; 6690 case 1: 6691 case 2: 6692 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6693 ring = &adev->gfx.compute_ring[i]; 6694 if (ring->me == me_id && ring->pipe == pipe_id && 6695 ring->queue == queue_id) 6696 drm_sched_fault(&ring->sched); 6697 } 6698 break; 6699 default: 6700 BUG(); 6701 break; 6702 } 6703 } 6704 } 6705 6706 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 6707 struct amdgpu_irq_src *source, 6708 struct amdgpu_iv_entry *entry) 6709 { 6710 DRM_ERROR("Illegal register access in command stream\n"); 6711 gfx_v11_0_handle_priv_fault(adev, entry); 6712 return 0; 6713 } 6714 6715 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, 6716 struct amdgpu_irq_src *source, 6717 struct amdgpu_iv_entry *entry) 6718 { 6719 DRM_ERROR("Illegal opcode in command stream\n"); 6720 gfx_v11_0_handle_priv_fault(adev, entry); 6721 return 0; 6722 } 6723 6724 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6725 struct amdgpu_irq_src *source, 6726 struct amdgpu_iv_entry *entry) 6727 { 6728 DRM_ERROR("Illegal instruction in command stream\n"); 6729 gfx_v11_0_handle_priv_fault(adev, entry); 6730 return 0; 6731 } 6732 6733 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, 6734 struct amdgpu_irq_src *source, 6735 struct amdgpu_iv_entry *entry) 6736 { 6737 if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) 6738 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); 6739 6740 return 0; 6741 } 6742 6743 #if 0 6744 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6745 struct amdgpu_irq_src *src, 6746 unsigned int type, 6747 enum amdgpu_interrupt_state state) 6748 { 6749 uint32_t tmp, target; 6750 struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); 6751 6752 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6753 target += ring->pipe; 6754 6755 switch (type) { 6756 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6757 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6758 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6759 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6760 GENERIC2_INT_ENABLE, 0); 6761 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6762 6763 tmp = RREG32_SOC15_IP(GC, target); 6764 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6765 GENERIC2_INT_ENABLE, 0); 6766 WREG32_SOC15_IP(GC, target, tmp); 6767 } else { 6768 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6769 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6770 GENERIC2_INT_ENABLE, 1); 6771 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6772 6773 tmp = RREG32_SOC15_IP(GC, target); 6774 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6775 GENERIC2_INT_ENABLE, 1); 6776 WREG32_SOC15_IP(GC, target, tmp); 6777 } 6778 break; 6779 default: 6780 BUG(); /* kiq only support GENERIC2_INT now */ 6781 break; 6782 } 6783 return 0; 6784 } 6785 #endif 6786 6787 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6788 { 6789 const unsigned int gcr_cntl = 6790 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6791 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6792 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6793 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6794 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6795 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6796 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6797 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6798 6799 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6800 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6801 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6802 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6803 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6804 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6805 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6806 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6807 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6808 } 6809 6810 static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) 6811 { 6812 /* Disable the pipe reset until the CPFW fully support it.*/ 6813 dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); 6814 return false; 6815 } 6816 6817 6818 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) 6819 { 6820 struct amdgpu_device *adev = ring->adev; 6821 uint32_t reset_pipe = 0, clean_pipe = 0; 6822 int r; 6823 6824 if (!gfx_v11_pipe_reset_support(adev)) 6825 return -EOPNOTSUPP; 6826 6827 gfx_v11_0_set_safe_mode(adev, 0); 6828 mutex_lock(&adev->srbm_mutex); 6829 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6830 6831 switch (ring->pipe) { 6832 case 0: 6833 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6834 PFP_PIPE0_RESET, 1); 6835 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6836 ME_PIPE0_RESET, 1); 6837 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6838 PFP_PIPE0_RESET, 0); 6839 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6840 ME_PIPE0_RESET, 0); 6841 break; 6842 case 1: 6843 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6844 PFP_PIPE1_RESET, 1); 6845 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6846 ME_PIPE1_RESET, 1); 6847 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6848 PFP_PIPE1_RESET, 0); 6849 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6850 ME_PIPE1_RESET, 0); 6851 break; 6852 default: 6853 break; 6854 } 6855 6856 WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); 6857 WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); 6858 6859 r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - 6860 RS64_FW_UC_START_ADDR_LO; 6861 soc21_grbm_select(adev, 0, 0, 0, 0); 6862 mutex_unlock(&adev->srbm_mutex); 6863 gfx_v11_0_unset_safe_mode(adev, 0); 6864 6865 dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, 6866 r == 0 ? "successfully" : "failed"); 6867 /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, 6868 * so the pipe reset status relies on the later gfx ring test result. 6869 */ 6870 return 0; 6871 } 6872 6873 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, 6874 unsigned int vmid, 6875 struct amdgpu_fence *timedout_fence) 6876 { 6877 struct amdgpu_device *adev = ring->adev; 6878 bool use_mmio = false; 6879 int r; 6880 6881 amdgpu_ring_reset_helper_begin(ring, timedout_fence); 6882 6883 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio, 0); 6884 if (r) { 6885 6886 dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); 6887 r = gfx_v11_reset_gfx_pipe(ring); 6888 if (r) 6889 return r; 6890 } 6891 6892 if (use_mmio) { 6893 r = gfx_v11_0_kgq_init_queue(ring, true); 6894 if (r) { 6895 dev_err(adev->dev, "failed to init kgq\n"); 6896 return r; 6897 } 6898 6899 r = amdgpu_mes_map_legacy_queue(adev, ring, 0); 6900 if (r) { 6901 dev_err(adev->dev, "failed to remap kgq\n"); 6902 return r; 6903 } 6904 } 6905 6906 return amdgpu_ring_reset_helper_end(ring, timedout_fence); 6907 } 6908 6909 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) 6910 { 6911 6912 struct amdgpu_device *adev = ring->adev; 6913 uint32_t reset_pipe = 0, clean_pipe = 0; 6914 int r; 6915 6916 if (!gfx_v11_pipe_reset_support(adev)) 6917 return -EOPNOTSUPP; 6918 6919 gfx_v11_0_set_safe_mode(adev, 0); 6920 mutex_lock(&adev->srbm_mutex); 6921 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6922 6923 reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 6924 clean_pipe = reset_pipe; 6925 6926 if (adev->gfx.rs64_enable) { 6927 6928 switch (ring->pipe) { 6929 case 0: 6930 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6931 MEC_PIPE0_RESET, 1); 6932 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6933 MEC_PIPE0_RESET, 0); 6934 break; 6935 case 1: 6936 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6937 MEC_PIPE1_RESET, 1); 6938 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6939 MEC_PIPE1_RESET, 0); 6940 break; 6941 case 2: 6942 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6943 MEC_PIPE2_RESET, 1); 6944 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6945 MEC_PIPE2_RESET, 0); 6946 break; 6947 case 3: 6948 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6949 MEC_PIPE3_RESET, 1); 6950 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6951 MEC_PIPE3_RESET, 0); 6952 break; 6953 default: 6954 break; 6955 } 6956 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); 6957 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); 6958 r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - 6959 RS64_FW_UC_START_ADDR_LO; 6960 } else { 6961 if (ring->me == 1) { 6962 switch (ring->pipe) { 6963 case 0: 6964 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6965 MEC_ME1_PIPE0_RESET, 1); 6966 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6967 MEC_ME1_PIPE0_RESET, 0); 6968 break; 6969 case 1: 6970 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6971 MEC_ME1_PIPE1_RESET, 1); 6972 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6973 MEC_ME1_PIPE1_RESET, 0); 6974 break; 6975 case 2: 6976 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6977 MEC_ME1_PIPE2_RESET, 1); 6978 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6979 MEC_ME1_PIPE2_RESET, 0); 6980 break; 6981 case 3: 6982 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6983 MEC_ME1_PIPE3_RESET, 1); 6984 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6985 MEC_ME1_PIPE3_RESET, 0); 6986 break; 6987 default: 6988 break; 6989 } 6990 /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ 6991 } else { 6992 switch (ring->pipe) { 6993 case 0: 6994 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6995 MEC_ME2_PIPE0_RESET, 1); 6996 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6997 MEC_ME2_PIPE0_RESET, 0); 6998 break; 6999 case 1: 7000 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 7001 MEC_ME2_PIPE1_RESET, 1); 7002 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 7003 MEC_ME2_PIPE1_RESET, 0); 7004 break; 7005 case 2: 7006 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 7007 MEC_ME2_PIPE2_RESET, 1); 7008 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 7009 MEC_ME2_PIPE2_RESET, 0); 7010 break; 7011 case 3: 7012 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 7013 MEC_ME2_PIPE3_RESET, 1); 7014 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 7015 MEC_ME2_PIPE3_RESET, 0); 7016 break; 7017 default: 7018 break; 7019 } 7020 /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ 7021 } 7022 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); 7023 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); 7024 r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); 7025 } 7026 7027 soc21_grbm_select(adev, 0, 0, 0, 0); 7028 mutex_unlock(&adev->srbm_mutex); 7029 gfx_v11_0_unset_safe_mode(adev, 0); 7030 7031 dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, 7032 r == 0 ? "successfully" : "failed"); 7033 /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe 7034 * reset status relies on the compute ring test result. 7035 */ 7036 return 0; 7037 } 7038 7039 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, 7040 unsigned int vmid, 7041 struct amdgpu_fence *timedout_fence) 7042 { 7043 struct amdgpu_device *adev = ring->adev; 7044 int r = 0; 7045 7046 amdgpu_ring_reset_helper_begin(ring, timedout_fence); 7047 7048 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0); 7049 if (r) { 7050 dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); 7051 r = gfx_v11_0_reset_compute_pipe(ring); 7052 if (r) 7053 return r; 7054 } 7055 7056 r = gfx_v11_0_kcq_init_queue(ring, true); 7057 if (r) { 7058 dev_err(adev->dev, "fail to init kcq\n"); 7059 return r; 7060 } 7061 r = amdgpu_mes_map_legacy_queue(adev, ring, 0); 7062 if (r) { 7063 dev_err(adev->dev, "failed to remap kcq\n"); 7064 return r; 7065 } 7066 7067 return amdgpu_ring_reset_helper_end(ring, timedout_fence); 7068 } 7069 7070 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 7071 { 7072 struct amdgpu_device *adev = ip_block->adev; 7073 uint32_t i, j, k, reg, index = 0; 7074 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7075 7076 if (!adev->gfx.ip_dump_core) 7077 return; 7078 7079 for (i = 0; i < reg_count; i++) 7080 drm_printf(p, "%-50s \t 0x%08x\n", 7081 gc_reg_list_11_0[i].reg_name, 7082 adev->gfx.ip_dump_core[i]); 7083 7084 /* print compute queue registers for all instances */ 7085 if (!adev->gfx.ip_dump_compute_queues) 7086 return; 7087 7088 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7089 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7090 adev->gfx.mec.num_mec, 7091 adev->gfx.mec.num_pipe_per_mec, 7092 adev->gfx.mec.num_queue_per_pipe); 7093 7094 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7095 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7096 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7097 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7098 for (reg = 0; reg < reg_count; reg++) { 7099 if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) 7100 drm_printf(p, "%-50s \t 0x%08x\n", 7101 "regCP_MEC_ME2_HEADER_DUMP", 7102 adev->gfx.ip_dump_compute_queues[index + reg]); 7103 else 7104 drm_printf(p, "%-50s \t 0x%08x\n", 7105 gc_cp_reg_list_11[reg].reg_name, 7106 adev->gfx.ip_dump_compute_queues[index + reg]); 7107 } 7108 index += reg_count; 7109 } 7110 } 7111 } 7112 7113 /* print gfx queue registers for all instances */ 7114 if (!adev->gfx.ip_dump_gfx_queues) 7115 return; 7116 7117 index = 0; 7118 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7119 drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", 7120 adev->gfx.me.num_me, 7121 adev->gfx.me.num_pipe_per_me, 7122 adev->gfx.me.num_queue_per_pipe); 7123 7124 for (i = 0; i < adev->gfx.me.num_me; i++) { 7125 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7126 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7127 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); 7128 for (reg = 0; reg < reg_count; reg++) { 7129 drm_printf(p, "%-50s \t 0x%08x\n", 7130 gc_gfx_queue_reg_list_11[reg].reg_name, 7131 adev->gfx.ip_dump_gfx_queues[index + reg]); 7132 } 7133 index += reg_count; 7134 } 7135 } 7136 } 7137 } 7138 7139 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) 7140 { 7141 struct amdgpu_device *adev = ip_block->adev; 7142 uint32_t i, j, k, reg, index = 0; 7143 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7144 7145 if (!adev->gfx.ip_dump_core) 7146 return; 7147 7148 amdgpu_gfx_off_ctrl(adev, false); 7149 for (i = 0; i < reg_count; i++) 7150 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); 7151 amdgpu_gfx_off_ctrl(adev, true); 7152 7153 /* dump compute queue registers for all instances */ 7154 if (!adev->gfx.ip_dump_compute_queues) 7155 return; 7156 7157 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7158 amdgpu_gfx_off_ctrl(adev, false); 7159 mutex_lock(&adev->srbm_mutex); 7160 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7161 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7162 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7163 /* ME0 is for GFX so start from 1 for CP */ 7164 soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); 7165 for (reg = 0; reg < reg_count; reg++) { 7166 if (i && 7167 gc_cp_reg_list_11[reg].reg_offset == 7168 regCP_MEC_ME1_HEADER_DUMP) 7169 adev->gfx.ip_dump_compute_queues[index + reg] = 7170 RREG32(SOC15_REG_OFFSET(GC, 0, 7171 regCP_MEC_ME2_HEADER_DUMP)); 7172 else 7173 adev->gfx.ip_dump_compute_queues[index + reg] = 7174 RREG32(SOC15_REG_ENTRY_OFFSET( 7175 gc_cp_reg_list_11[reg])); 7176 } 7177 index += reg_count; 7178 } 7179 } 7180 } 7181 soc21_grbm_select(adev, 0, 0, 0, 0); 7182 mutex_unlock(&adev->srbm_mutex); 7183 amdgpu_gfx_off_ctrl(adev, true); 7184 7185 /* dump gfx queue registers for all instances */ 7186 if (!adev->gfx.ip_dump_gfx_queues) 7187 return; 7188 7189 index = 0; 7190 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7191 amdgpu_gfx_off_ctrl(adev, false); 7192 mutex_lock(&adev->srbm_mutex); 7193 for (i = 0; i < adev->gfx.me.num_me; i++) { 7194 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7195 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7196 soc21_grbm_select(adev, i, j, k, 0); 7197 7198 for (reg = 0; reg < reg_count; reg++) { 7199 adev->gfx.ip_dump_gfx_queues[index + reg] = 7200 RREG32(SOC15_REG_ENTRY_OFFSET( 7201 gc_gfx_queue_reg_list_11[reg])); 7202 } 7203 index += reg_count; 7204 } 7205 } 7206 } 7207 soc21_grbm_select(adev, 0, 0, 0, 0); 7208 mutex_unlock(&adev->srbm_mutex); 7209 amdgpu_gfx_off_ctrl(adev, true); 7210 } 7211 7212 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 7213 { 7214 /* Emit the cleaner shader */ 7215 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7216 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7217 } 7218 7219 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring) 7220 { 7221 amdgpu_gfx_profile_ring_begin_use(ring); 7222 7223 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 7224 } 7225 7226 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring) 7227 { 7228 amdgpu_gfx_profile_ring_end_use(ring); 7229 7230 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 7231 } 7232 7233 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 7234 .name = "gfx_v11_0", 7235 .early_init = gfx_v11_0_early_init, 7236 .late_init = gfx_v11_0_late_init, 7237 .sw_init = gfx_v11_0_sw_init, 7238 .sw_fini = gfx_v11_0_sw_fini, 7239 .hw_init = gfx_v11_0_hw_init, 7240 .hw_fini = gfx_v11_0_hw_fini, 7241 .suspend = gfx_v11_0_suspend, 7242 .resume = gfx_v11_0_resume, 7243 .is_idle = gfx_v11_0_is_idle, 7244 .wait_for_idle = gfx_v11_0_wait_for_idle, 7245 .soft_reset = gfx_v11_0_soft_reset, 7246 .check_soft_reset = gfx_v11_0_check_soft_reset, 7247 .post_soft_reset = gfx_v11_0_post_soft_reset, 7248 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 7249 .set_powergating_state = gfx_v11_0_set_powergating_state, 7250 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 7251 .dump_ip_state = gfx_v11_ip_dump, 7252 .print_ip_state = gfx_v11_ip_print, 7253 }; 7254 7255 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7256 .type = AMDGPU_RING_TYPE_GFX, 7257 .align_mask = 0xff, 7258 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7259 .support_64bit_ptrs = true, 7260 .secure_submission_supported = true, 7261 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 7262 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 7263 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 7264 .emit_frame_size = /* totally 247 maximum if 16 IBs */ 7265 5 + /* update_spm_vmid */ 7266 5 + /* COND_EXEC */ 7267 22 + /* SET_Q_PREEMPTION_MODE */ 7268 7 + /* PIPELINE_SYNC */ 7269 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7270 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7271 4 + /* VM_FLUSH */ 7272 8 + /* FENCE for VM_FLUSH */ 7273 20 + /* GDS switch */ 7274 5 + /* COND_EXEC */ 7275 7 + /* HDP_flush */ 7276 4 + /* VGT_flush */ 7277 31 + /* DE_META */ 7278 3 + /* CNTX_CTRL */ 7279 5 + /* HDP_INVL */ 7280 22 + /* SET_Q_PREEMPTION_MODE */ 7281 8 + 8 + /* FENCE x2 */ 7282 8 + /* gfx_v11_0_emit_mem_sync */ 7283 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7284 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 7285 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 7286 .emit_fence = gfx_v11_0_ring_emit_fence, 7287 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7288 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7289 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7290 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7291 .test_ring = gfx_v11_0_ring_test_ring, 7292 .test_ib = gfx_v11_0_ring_test_ib, 7293 .insert_nop = gfx_v11_ring_insert_nop, 7294 .pad_ib = amdgpu_ring_generic_pad_ib, 7295 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 7296 .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, 7297 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 7298 .preempt_ib = gfx_v11_0_ring_preempt_ib, 7299 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 7300 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7301 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7302 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7303 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7304 .reset = gfx_v11_0_reset_kgq, 7305 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7306 .begin_use = gfx_v11_0_ring_begin_use, 7307 .end_use = gfx_v11_0_ring_end_use, 7308 }; 7309 7310 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 7311 .type = AMDGPU_RING_TYPE_COMPUTE, 7312 .align_mask = 0xff, 7313 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7314 .support_64bit_ptrs = true, 7315 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7316 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7317 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7318 .emit_frame_size = 7319 5 + /* update_spm_vmid */ 7320 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7321 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7322 5 + /* hdp invalidate */ 7323 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7324 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7325 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7326 2 + /* gfx_v11_0_ring_emit_vm_flush */ 7327 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 7328 8 + /* gfx_v11_0_emit_mem_sync */ 7329 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7330 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7331 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7332 .emit_fence = gfx_v11_0_ring_emit_fence, 7333 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7334 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7335 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7336 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7337 .test_ring = gfx_v11_0_ring_test_ring, 7338 .test_ib = gfx_v11_0_ring_test_ib, 7339 .insert_nop = gfx_v11_ring_insert_nop, 7340 .pad_ib = amdgpu_ring_generic_pad_ib, 7341 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7342 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7343 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7344 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7345 .reset = gfx_v11_0_reset_kcq, 7346 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7347 .begin_use = gfx_v11_0_ring_begin_use, 7348 .end_use = gfx_v11_0_ring_end_use, 7349 }; 7350 7351 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7352 .type = AMDGPU_RING_TYPE_KIQ, 7353 .align_mask = 0xff, 7354 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7355 .support_64bit_ptrs = true, 7356 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7357 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7358 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7359 .emit_frame_size = 7360 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7361 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7362 5 + /*hdp invalidate */ 7363 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7364 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7365 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7366 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7367 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7368 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7369 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 7370 .test_ring = gfx_v11_0_ring_test_ring, 7371 .test_ib = gfx_v11_0_ring_test_ib, 7372 .insert_nop = amdgpu_ring_insert_nop, 7373 .pad_ib = amdgpu_ring_generic_pad_ib, 7374 .emit_rreg = gfx_v11_0_ring_emit_rreg, 7375 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7376 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7377 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7378 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7379 }; 7380 7381 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 7382 { 7383 int i; 7384 7385 adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; 7386 7387 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7388 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 7389 7390 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7391 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 7392 } 7393 7394 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 7395 .set = gfx_v11_0_set_eop_interrupt_state, 7396 .process = gfx_v11_0_eop_irq, 7397 }; 7398 7399 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 7400 .set = gfx_v11_0_set_priv_reg_fault_state, 7401 .process = gfx_v11_0_priv_reg_irq, 7402 }; 7403 7404 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { 7405 .set = gfx_v11_0_set_bad_op_fault_state, 7406 .process = gfx_v11_0_bad_op_irq, 7407 }; 7408 7409 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 7410 .set = gfx_v11_0_set_priv_inst_fault_state, 7411 .process = gfx_v11_0_priv_inst_irq, 7412 }; 7413 7414 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { 7415 .process = gfx_v11_0_rlc_gc_fed_irq, 7416 }; 7417 7418 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 7419 { 7420 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7421 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 7422 7423 adev->gfx.priv_reg_irq.num_types = 1; 7424 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 7425 7426 adev->gfx.bad_op_irq.num_types = 1; 7427 adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; 7428 7429 adev->gfx.priv_inst_irq.num_types = 1; 7430 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 7431 7432 adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ 7433 adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; 7434 7435 } 7436 7437 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 7438 { 7439 if (adev->flags & AMD_IS_APU) 7440 adev->gfx.imu.mode = MISSION_MODE; 7441 else 7442 adev->gfx.imu.mode = DEBUG_MODE; 7443 7444 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 7445 } 7446 7447 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 7448 { 7449 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 7450 } 7451 7452 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 7453 { 7454 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 7455 adev->gfx.config.max_sh_per_se * 7456 adev->gfx.config.max_shader_engines; 7457 7458 adev->gds.gds_size = 0x1000; 7459 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 7460 adev->gds.gws_size = 64; 7461 adev->gds.oa_size = 16; 7462 } 7463 7464 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 7465 { 7466 /* set gfx eng mqd */ 7467 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 7468 sizeof(struct v11_gfx_mqd); 7469 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 7470 gfx_v11_0_gfx_mqd_init; 7471 /* set compute eng mqd */ 7472 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 7473 sizeof(struct v11_compute_mqd); 7474 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 7475 gfx_v11_0_compute_mqd_init; 7476 } 7477 7478 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 7479 u32 bitmap) 7480 { 7481 u32 data; 7482 7483 if (!bitmap) 7484 return; 7485 7486 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7487 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7488 7489 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 7490 } 7491 7492 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 7493 { 7494 u32 data, wgp_bitmask; 7495 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 7496 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 7497 7498 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7499 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7500 7501 wgp_bitmask = 7502 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 7503 7504 return (~data) & wgp_bitmask; 7505 } 7506 7507 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 7508 { 7509 u32 wgp_idx, wgp_active_bitmap; 7510 u32 cu_bitmap_per_wgp, cu_active_bitmap; 7511 7512 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 7513 cu_active_bitmap = 0; 7514 7515 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 7516 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 7517 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 7518 if (wgp_active_bitmap & (1 << wgp_idx)) 7519 cu_active_bitmap |= cu_bitmap_per_wgp; 7520 } 7521 7522 return cu_active_bitmap; 7523 } 7524 7525 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 7526 struct amdgpu_cu_info *cu_info) 7527 { 7528 int i, j, k, counter, active_cu_number = 0; 7529 u32 mask, bitmap; 7530 unsigned disable_masks[8 * 2]; 7531 7532 if (!adev || !cu_info) 7533 return -EINVAL; 7534 7535 amdgpu_gfx_parse_disable_cu(adev, disable_masks, 8, 2); 7536 7537 mutex_lock(&adev->grbm_idx_mutex); 7538 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7539 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7540 bitmap = i * adev->gfx.config.max_sh_per_se + j; 7541 if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) 7542 continue; 7543 mask = 1; 7544 counter = 0; 7545 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); 7546 if (i < 8 && j < 2) 7547 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 7548 adev, disable_masks[i * 2 + j]); 7549 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 7550 7551 /** 7552 * GFX11 could support more than 4 SEs, while the bitmap 7553 * in cu_info struct is 4x4 and ioctl interface struct 7554 * drm_amdgpu_info_device should keep stable. 7555 * So we use last two columns of bitmap to store cu mask for 7556 * SEs 4 to 7, the layout of the bitmap is as below: 7557 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 7558 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 7559 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 7560 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 7561 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 7562 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 7563 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 7564 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 7565 */ 7566 cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; 7567 7568 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 7569 if (bitmap & mask) 7570 counter++; 7571 7572 mask <<= 1; 7573 } 7574 active_cu_number += counter; 7575 } 7576 } 7577 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7578 mutex_unlock(&adev->grbm_idx_mutex); 7579 7580 cu_info->number = active_cu_number; 7581 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7582 7583 return 0; 7584 } 7585 7586 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 7587 { 7588 .type = AMD_IP_BLOCK_TYPE_GFX, 7589 .major = 11, 7590 .minor = 0, 7591 .rev = 0, 7592 .funcs = &gfx_v11_0_ip_funcs, 7593 }; 7594