1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "imu_v11_0.h" 33 #include "soc21.h" 34 #include "nvd.h" 35 36 #include "gc/gc_11_0_0_offset.h" 37 #include "gc/gc_11_0_0_sh_mask.h" 38 #include "smuio/smuio_13_0_6_offset.h" 39 #include "smuio/smuio_13_0_6_sh_mask.h" 40 #include "navi10_enum.h" 41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 42 43 #include "soc15.h" 44 #include "clearstate_gfx11.h" 45 #include "v11_structs.h" 46 #include "gfx_v11_0.h" 47 #include "gfx_v11_0_cleaner_shader.h" 48 #include "gfx_v11_0_3.h" 49 #include "nbio_v4_3.h" 50 #include "mes_v11_0.h" 51 #include "mes_userqueue.h" 52 #include "amdgpu_userq_fence.h" 53 54 #define GFX11_NUM_GFX_RINGS 1 55 #define GFX11_MEC_HPD_SIZE 2048 56 57 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 58 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 59 60 #define regCGTT_WD_CLK_CTRL 0x5086 61 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 63 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 64 #define regPC_CONFIG_CNTL_1 0x194d 65 #define regPC_CONFIG_CNTL_1_BASE_IDX 1 66 67 #define regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0 0x0030 68 #define regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0_BASE_IDX 1 69 #define regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0 0x0031 70 #define regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0_BASE_IDX 1 71 72 #define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 73 #define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 74 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 75 #define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 76 #define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 77 #define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 78 #define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 79 80 #define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 81 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 82 #define regCP_MQD_CONTROL_DEFAULT 0x00000100 83 #define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 84 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 85 #define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 86 #define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 87 #define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 88 89 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 90 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 91 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 92 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 93 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); 94 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); 95 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 96 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 98 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 101 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 102 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 103 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 104 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 105 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 106 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 107 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 109 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 110 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 112 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); 113 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); 114 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); 115 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); 116 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); 118 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); 120 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); 121 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); 122 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); 123 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); 124 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); 125 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); 126 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); 127 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); 128 MODULE_FIRMWARE("amdgpu/gc_11_5_4_pfp.bin"); 129 MODULE_FIRMWARE("amdgpu/gc_11_5_4_me.bin"); 130 MODULE_FIRMWARE("amdgpu/gc_11_5_4_mec.bin"); 131 MODULE_FIRMWARE("amdgpu/gc_11_5_4_rlc.bin"); 132 133 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { 134 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), 135 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), 136 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), 137 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), 138 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), 139 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), 140 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), 141 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), 142 SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), 143 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), 144 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), 145 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), 146 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), 147 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), 148 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), 149 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), 150 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), 151 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 152 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), 153 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), 154 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), 155 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), 156 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), 157 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), 158 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), 159 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 160 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), 161 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 162 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 163 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 164 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), 165 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), 166 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), 167 SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), 168 SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), 169 SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), 170 SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), 171 SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), 172 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), 173 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), 174 SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), 175 SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), 176 SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), 177 SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), 178 SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), 179 SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), 180 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), 181 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), 182 SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), 183 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), 184 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), 185 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), 186 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), 187 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), 188 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), 189 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), 190 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), 191 /* cp header registers */ 192 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 193 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 194 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 195 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 196 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 197 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 198 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 199 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 200 /* SE status registers */ 201 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), 202 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), 203 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), 204 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), 205 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), 206 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) 207 }; 208 209 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { 210 /* compute registers */ 211 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), 212 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), 213 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), 214 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), 215 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), 216 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), 217 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), 218 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), 219 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), 220 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), 221 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), 222 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), 223 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), 224 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), 225 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), 226 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), 227 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), 228 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), 229 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), 230 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), 231 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), 232 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), 233 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), 234 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), 235 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), 236 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), 237 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), 238 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), 239 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), 240 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), 241 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), 242 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), 243 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), 244 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), 245 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), 246 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), 247 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), 248 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), 249 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), 250 /* cp header registers */ 251 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 252 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 253 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 254 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 255 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 256 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 257 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 258 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 259 }; 260 261 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { 262 /* gfx queue registers */ 263 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), 264 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), 265 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), 266 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), 267 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), 268 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), 269 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), 270 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), 271 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), 272 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), 273 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), 274 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), 275 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), 276 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), 277 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), 278 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), 279 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), 280 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), 281 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), 282 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), 283 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 284 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 285 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 286 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 287 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 288 /* cp header registers */ 289 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 290 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 291 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 292 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 293 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 294 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 295 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 296 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 297 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 298 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 299 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 300 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 301 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 302 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 303 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 304 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 305 }; 306 307 static const struct soc15_reg_golden golden_settings_gc_11_0[] = { 308 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) 309 }; 310 311 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 312 { 313 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 314 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 315 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 316 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 317 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 318 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 319 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 320 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 321 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 322 }; 323 324 #define DEFAULT_SH_MEM_CONFIG \ 325 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 326 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 327 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 328 329 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 330 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 331 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 332 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 333 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 334 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 335 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 336 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 337 struct amdgpu_cu_info *cu_info); 338 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 339 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 340 u32 sh_num, u32 instance, int xcc_id); 341 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 342 343 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 344 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 345 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 346 uint32_t val); 347 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 348 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 349 uint16_t pasid, uint32_t flush_type, 350 bool all_hub, uint8_t dst_sel); 351 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 352 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 353 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 354 bool enable); 355 356 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 357 { 358 struct amdgpu_device *adev = kiq_ring->adev; 359 u64 shader_mc_addr; 360 361 /* Cleaner shader MC address */ 362 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 363 364 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 365 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 366 PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ 367 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 368 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 369 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 370 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 371 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 372 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 373 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 374 } 375 376 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 377 struct amdgpu_ring *ring) 378 { 379 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 380 uint64_t wptr_addr = ring->wptr_gpu_addr; 381 uint32_t me = 0, eng_sel = 0; 382 383 switch (ring->funcs->type) { 384 case AMDGPU_RING_TYPE_COMPUTE: 385 me = 1; 386 eng_sel = 0; 387 break; 388 case AMDGPU_RING_TYPE_GFX: 389 me = 0; 390 eng_sel = 4; 391 break; 392 case AMDGPU_RING_TYPE_MES: 393 me = 2; 394 eng_sel = 5; 395 break; 396 default: 397 WARN_ON(1); 398 } 399 400 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 401 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 402 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 403 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 404 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 405 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 406 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 407 PACKET3_MAP_QUEUES_ME((me)) | 408 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 409 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 410 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 411 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 412 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 413 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 414 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 415 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 416 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 417 } 418 419 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 420 struct amdgpu_ring *ring, 421 enum amdgpu_unmap_queues_action action, 422 u64 gpu_addr, u64 seq) 423 { 424 struct amdgpu_device *adev = kiq_ring->adev; 425 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 426 427 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { 428 amdgpu_mes_unmap_legacy_queue(adev, ring, action, 429 gpu_addr, seq, 0); 430 return; 431 } 432 433 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 434 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 435 PACKET3_UNMAP_QUEUES_ACTION(action) | 436 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 437 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 438 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 439 amdgpu_ring_write(kiq_ring, 440 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 441 442 if (action == PREEMPT_QUEUES_NO_UNMAP) { 443 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 444 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 445 amdgpu_ring_write(kiq_ring, seq); 446 } else { 447 amdgpu_ring_write(kiq_ring, 0); 448 amdgpu_ring_write(kiq_ring, 0); 449 amdgpu_ring_write(kiq_ring, 0); 450 } 451 } 452 453 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 454 struct amdgpu_ring *ring, 455 u64 addr, 456 u64 seq) 457 { 458 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 459 460 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 461 amdgpu_ring_write(kiq_ring, 462 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 463 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 464 PACKET3_QUERY_STATUS_COMMAND(2)); 465 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 466 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 467 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 468 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 469 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 470 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 471 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 472 } 473 474 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 475 uint16_t pasid, uint32_t flush_type, 476 bool all_hub) 477 { 478 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 479 } 480 481 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 482 .kiq_set_resources = gfx11_kiq_set_resources, 483 .kiq_map_queues = gfx11_kiq_map_queues, 484 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 485 .kiq_query_status = gfx11_kiq_query_status, 486 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 487 .set_resources_size = 8, 488 .map_queues_size = 7, 489 .unmap_queues_size = 6, 490 .query_status_size = 7, 491 .invalidate_tlbs_size = 2, 492 }; 493 494 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 495 { 496 adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; 497 } 498 499 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 500 { 501 if (amdgpu_sriov_vf(adev)) 502 return; 503 504 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 505 case IP_VERSION(11, 0, 1): 506 case IP_VERSION(11, 0, 4): 507 soc15_program_register_sequence(adev, 508 golden_settings_gc_11_0_1, 509 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 510 break; 511 default: 512 break; 513 } 514 soc15_program_register_sequence(adev, 515 golden_settings_gc_11_0, 516 (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); 517 518 } 519 520 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 521 bool wc, uint32_t reg, uint32_t val) 522 { 523 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 524 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 525 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 526 amdgpu_ring_write(ring, reg); 527 amdgpu_ring_write(ring, 0); 528 amdgpu_ring_write(ring, val); 529 } 530 531 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 532 int mem_space, int opt, uint32_t addr0, 533 uint32_t addr1, uint32_t ref, uint32_t mask, 534 uint32_t inv) 535 { 536 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 537 amdgpu_ring_write(ring, 538 /* memory (1) or register (0) */ 539 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 540 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 541 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 542 WAIT_REG_MEM_ENGINE(eng_sel))); 543 544 if (mem_space) 545 BUG_ON(addr0 & 0x3); /* Dword align */ 546 amdgpu_ring_write(ring, addr0); 547 amdgpu_ring_write(ring, addr1); 548 amdgpu_ring_write(ring, ref); 549 amdgpu_ring_write(ring, mask); 550 amdgpu_ring_write(ring, inv); /* poll interval */ 551 } 552 553 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 554 { 555 /* Header itself is a NOP packet */ 556 if (num_nop == 1) { 557 amdgpu_ring_write(ring, ring->funcs->nop); 558 return; 559 } 560 561 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 562 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 563 564 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 565 amdgpu_ring_insert_nop(ring, num_nop - 1); 566 } 567 568 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 569 { 570 struct amdgpu_device *adev = ring->adev; 571 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 572 uint32_t tmp = 0; 573 unsigned i; 574 int r; 575 576 WREG32(scratch, 0xCAFEDEAD); 577 r = amdgpu_ring_alloc(ring, 5); 578 if (r) { 579 drm_err(adev_to_drm(adev), "cp failed to lock ring %d (%d).\n", 580 ring->idx, r); 581 return r; 582 } 583 584 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 585 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 586 } else { 587 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 588 amdgpu_ring_write(ring, scratch - 589 PACKET3_SET_UCONFIG_REG_START); 590 amdgpu_ring_write(ring, 0xDEADBEEF); 591 } 592 amdgpu_ring_commit(ring); 593 594 for (i = 0; i < adev->usec_timeout; i++) { 595 tmp = RREG32(scratch); 596 if (tmp == 0xDEADBEEF) 597 break; 598 if (amdgpu_emu_mode == 1) 599 msleep(1); 600 else 601 udelay(1); 602 } 603 604 if (i >= adev->usec_timeout) 605 r = -ETIMEDOUT; 606 return r; 607 } 608 609 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 610 { 611 struct amdgpu_device *adev = ring->adev; 612 struct amdgpu_ib ib; 613 struct dma_fence *f = NULL; 614 unsigned index; 615 uint64_t gpu_addr; 616 uint32_t *cpu_ptr; 617 long r; 618 619 /* MES KIQ fw hasn't indirect buffer support for now */ 620 if (adev->enable_mes_kiq && 621 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 622 return 0; 623 624 memset(&ib, 0, sizeof(ib)); 625 626 r = amdgpu_device_wb_get(adev, &index); 627 if (r) 628 return r; 629 630 gpu_addr = adev->wb.gpu_addr + (index * 4); 631 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 632 cpu_ptr = &adev->wb.wb[index]; 633 634 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 635 if (r) { 636 drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r); 637 goto err1; 638 } 639 640 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 641 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 642 ib.ptr[2] = lower_32_bits(gpu_addr); 643 ib.ptr[3] = upper_32_bits(gpu_addr); 644 ib.ptr[4] = 0xDEADBEEF; 645 ib.length_dw = 5; 646 647 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 648 if (r) 649 goto err2; 650 651 r = dma_fence_wait_timeout(f, false, timeout); 652 if (r == 0) { 653 r = -ETIMEDOUT; 654 goto err2; 655 } else if (r < 0) { 656 goto err2; 657 } 658 659 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 660 r = 0; 661 else 662 r = -EINVAL; 663 err2: 664 amdgpu_ib_free(&ib, NULL); 665 dma_fence_put(f); 666 err1: 667 amdgpu_device_wb_free(adev, index); 668 return r; 669 } 670 671 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 672 { 673 amdgpu_ucode_release(&adev->gfx.pfp_fw); 674 amdgpu_ucode_release(&adev->gfx.me_fw); 675 amdgpu_ucode_release(&adev->gfx.rlc_fw); 676 amdgpu_ucode_release(&adev->gfx.mec_fw); 677 678 kfree(adev->gfx.rlc.register_list_format); 679 } 680 681 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) 682 { 683 const struct psp_firmware_header_v1_0 *toc_hdr; 684 int err = 0; 685 686 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, 687 AMDGPU_UCODE_REQUIRED, 688 "amdgpu/%s_toc.bin", ucode_prefix); 689 if (err) 690 goto out; 691 692 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 693 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 694 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 695 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 696 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 697 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 698 return 0; 699 out: 700 amdgpu_ucode_release(&adev->psp.toc_fw); 701 return err; 702 } 703 704 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) 705 { 706 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 707 case IP_VERSION(11, 0, 0): 708 case IP_VERSION(11, 0, 2): 709 case IP_VERSION(11, 0, 3): 710 if ((adev->gfx.me_fw_version >= 1505) && 711 (adev->gfx.pfp_fw_version >= 1600) && 712 (adev->gfx.mec_fw_version >= 512)) { 713 if (amdgpu_sriov_vf(adev)) 714 adev->gfx.cp_gfx_shadow = true; 715 else 716 adev->gfx.cp_gfx_shadow = false; 717 } 718 break; 719 default: 720 adev->gfx.cp_gfx_shadow = false; 721 break; 722 } 723 } 724 725 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 726 { 727 char ucode_prefix[25]; 728 int err; 729 const struct rlc_firmware_header_v2_0 *rlc_hdr; 730 uint16_t version_major; 731 uint16_t version_minor; 732 733 DRM_DEBUG("\n"); 734 735 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 736 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 737 AMDGPU_UCODE_REQUIRED, 738 "amdgpu/%s_pfp.bin", ucode_prefix); 739 if (err) 740 goto out; 741 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 742 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 743 (union amdgpu_firmware_header *) 744 adev->gfx.pfp_fw->data, 2, 0); 745 if (adev->gfx.rs64_enable) { 746 dev_info(adev->dev, "CP RS64 enable\n"); 747 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 748 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 749 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 750 } else { 751 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 752 } 753 754 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 755 AMDGPU_UCODE_REQUIRED, 756 "amdgpu/%s_me.bin", ucode_prefix); 757 if (err) 758 goto out; 759 if (adev->gfx.rs64_enable) { 760 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 761 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 762 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 763 } else { 764 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 765 } 766 767 if (!amdgpu_sriov_vf(adev)) { 768 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && 769 adev->pdev->revision == 0xCE) 770 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 771 AMDGPU_UCODE_REQUIRED, 772 "amdgpu/gc_11_0_0_rlc_1.bin"); 773 else if (amdgpu_is_kicker_fw(adev)) 774 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 775 AMDGPU_UCODE_REQUIRED, 776 "amdgpu/%s_rlc_kicker.bin", ucode_prefix); 777 else 778 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 779 AMDGPU_UCODE_REQUIRED, 780 "amdgpu/%s_rlc.bin", ucode_prefix); 781 if (err) 782 goto out; 783 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 784 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 785 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 786 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 787 if (err) 788 goto out; 789 } 790 791 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 792 AMDGPU_UCODE_REQUIRED, 793 "amdgpu/%s_mec.bin", ucode_prefix); 794 if (err) 795 goto out; 796 if (adev->gfx.rs64_enable) { 797 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 798 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 799 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 800 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 801 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 802 } else { 803 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 804 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 805 } 806 807 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 808 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix); 809 810 /* only one MEC for gfx 11.0.0. */ 811 adev->gfx.mec2_fw = NULL; 812 813 gfx_v11_0_check_fw_cp_gfx_shadow(adev); 814 815 if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { 816 err = adev->gfx.imu.funcs->init_microcode(adev); 817 if (err) 818 DRM_ERROR("Failed to init imu firmware!\n"); 819 return err; 820 } 821 822 out: 823 if (err) { 824 amdgpu_ucode_release(&adev->gfx.pfp_fw); 825 amdgpu_ucode_release(&adev->gfx.me_fw); 826 amdgpu_ucode_release(&adev->gfx.rlc_fw); 827 amdgpu_ucode_release(&adev->gfx.mec_fw); 828 } 829 830 return err; 831 } 832 833 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 834 { 835 u32 count = 0; 836 const struct cs_section_def *sect = NULL; 837 const struct cs_extent_def *ext = NULL; 838 839 /* begin clear state */ 840 count += 2; 841 /* context control state */ 842 count += 3; 843 844 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 845 for (ext = sect->section; ext->extent != NULL; ++ext) { 846 if (sect->id == SECT_CONTEXT) 847 count += 2 + ext->reg_count; 848 else 849 return 0; 850 } 851 } 852 853 /* set PA_SC_TILE_STEERING_OVERRIDE */ 854 count += 3; 855 /* end clear state */ 856 count += 2; 857 /* clear state */ 858 count += 2; 859 860 return count; 861 } 862 863 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) 864 { 865 u32 count = 0; 866 int ctx_reg_offset; 867 868 if (adev->gfx.rlc.cs_data == NULL) 869 return; 870 if (buffer == NULL) 871 return; 872 873 count = amdgpu_gfx_csb_preamble_start(buffer); 874 count = amdgpu_gfx_csb_data_parser(adev, buffer, count); 875 876 ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 877 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 878 buffer[count++] = cpu_to_le32(ctx_reg_offset); 879 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 880 881 amdgpu_gfx_csb_preamble_end(buffer, count); 882 } 883 884 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 885 { 886 /* clear state block */ 887 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 888 &adev->gfx.rlc.clear_state_gpu_addr, 889 (void **)&adev->gfx.rlc.cs_ptr); 890 891 /* jump table block */ 892 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 893 &adev->gfx.rlc.cp_table_gpu_addr, 894 (void **)&adev->gfx.rlc.cp_table_ptr); 895 } 896 897 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 898 { 899 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 900 901 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 902 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 903 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 904 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 905 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 906 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 907 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 908 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 909 adev->gfx.rlc.rlcg_reg_access_supported = true; 910 } 911 912 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 913 { 914 const struct cs_section_def *cs_data; 915 int r; 916 917 adev->gfx.rlc.cs_data = gfx11_cs_data; 918 919 cs_data = adev->gfx.rlc.cs_data; 920 921 if (cs_data) { 922 /* init clear state block */ 923 r = amdgpu_gfx_rlc_init_csb(adev); 924 if (r) 925 return r; 926 } 927 928 /* init spm vmid with 0xf */ 929 if (adev->gfx.rlc.funcs->update_spm_vmid) 930 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf); 931 932 return 0; 933 } 934 935 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 936 { 937 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 938 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 939 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 940 } 941 942 static void gfx_v11_0_me_init(struct amdgpu_device *adev) 943 { 944 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 945 946 amdgpu_gfx_graphics_queue_acquire(adev); 947 } 948 949 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 950 { 951 int r; 952 u32 *hpd; 953 size_t mec_hpd_size; 954 955 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 956 957 /* take ownership of the relevant compute queues */ 958 amdgpu_gfx_compute_queue_acquire(adev); 959 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 960 961 if (mec_hpd_size) { 962 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 963 AMDGPU_GEM_DOMAIN_GTT, 964 &adev->gfx.mec.hpd_eop_obj, 965 &adev->gfx.mec.hpd_eop_gpu_addr, 966 (void **)&hpd); 967 if (r) { 968 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 969 gfx_v11_0_mec_fini(adev); 970 return r; 971 } 972 973 memset(hpd, 0, mec_hpd_size); 974 975 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 976 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 977 } 978 979 return 0; 980 } 981 982 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 983 { 984 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 985 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 986 (address << SQ_IND_INDEX__INDEX__SHIFT)); 987 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 988 } 989 990 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 991 uint32_t thread, uint32_t regno, 992 uint32_t num, uint32_t *out) 993 { 994 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 995 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 996 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 997 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 998 (SQ_IND_INDEX__AUTO_INCR_MASK)); 999 while (num--) 1000 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 1001 } 1002 1003 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1004 { 1005 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 1006 * field when performing a select_se_sh so it should be 1007 * zero here */ 1008 WARN_ON(simd != 0); 1009 1010 /* type 3 wave data */ 1011 dst[(*no_fields)++] = 3; 1012 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 1013 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 1014 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 1015 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 1016 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 1017 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 1018 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 1019 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 1020 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 1021 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 1022 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 1023 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 1024 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 1025 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 1026 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 1027 } 1028 1029 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1030 uint32_t wave, uint32_t start, 1031 uint32_t size, uint32_t *dst) 1032 { 1033 WARN_ON(simd != 0); 1034 1035 wave_read_regs( 1036 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 1037 dst); 1038 } 1039 1040 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1041 uint32_t wave, uint32_t thread, 1042 uint32_t start, uint32_t size, 1043 uint32_t *dst) 1044 { 1045 wave_read_regs( 1046 adev, wave, thread, 1047 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1048 } 1049 1050 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 1051 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1052 { 1053 soc21_grbm_select(adev, me, pipe, q, vm); 1054 } 1055 1056 /* all sizes are in bytes */ 1057 #define MQD_SHADOW_BASE_SIZE 73728 1058 #define MQD_SHADOW_BASE_ALIGNMENT 256 1059 #define MQD_FWWORKAREA_SIZE 484 1060 #define MQD_FWWORKAREA_ALIGNMENT 256 1061 1062 static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, 1063 struct amdgpu_gfx_shadow_info *shadow_info) 1064 { 1065 /* for gfx */ 1066 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; 1067 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; 1068 shadow_info->csa_size = MQD_FWWORKAREA_SIZE; 1069 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; 1070 /* for compute */ 1071 shadow_info->eop_size = GFX11_MEC_HPD_SIZE; 1072 shadow_info->eop_alignment = 256; 1073 } 1074 1075 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, 1076 struct amdgpu_gfx_shadow_info *shadow_info, 1077 bool skip_check) 1078 { 1079 if (adev->gfx.cp_gfx_shadow || skip_check) { 1080 gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); 1081 return 0; 1082 } else { 1083 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); 1084 return -ENOTSUPP; 1085 } 1086 } 1087 1088 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 1089 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 1090 .select_se_sh = &gfx_v11_0_select_se_sh, 1091 .read_wave_data = &gfx_v11_0_read_wave_data, 1092 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 1093 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 1094 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 1095 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 1096 .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, 1097 .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask, 1098 }; 1099 1100 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 1101 { 1102 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1103 case IP_VERSION(11, 0, 0): 1104 case IP_VERSION(11, 0, 2): 1105 adev->gfx.config.max_hw_contexts = 8; 1106 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1107 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1108 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1109 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1110 break; 1111 case IP_VERSION(11, 0, 3): 1112 adev->gfx.ras = &gfx_v11_0_3_ras; 1113 adev->gfx.config.max_hw_contexts = 8; 1114 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1115 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1116 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1117 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1118 break; 1119 case IP_VERSION(11, 0, 1): 1120 case IP_VERSION(11, 0, 4): 1121 case IP_VERSION(11, 5, 0): 1122 case IP_VERSION(11, 5, 1): 1123 case IP_VERSION(11, 5, 2): 1124 case IP_VERSION(11, 5, 3): 1125 case IP_VERSION(11, 5, 4): 1126 adev->gfx.config.max_hw_contexts = 8; 1127 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1128 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1129 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1130 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 1131 break; 1132 default: 1133 BUG(); 1134 break; 1135 } 1136 1137 return 0; 1138 } 1139 1140 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1141 int me, int pipe, int queue) 1142 { 1143 struct amdgpu_ring *ring; 1144 unsigned int irq_type; 1145 unsigned int hw_prio; 1146 1147 ring = &adev->gfx.gfx_ring[ring_id]; 1148 1149 ring->me = me; 1150 ring->pipe = pipe; 1151 ring->queue = queue; 1152 1153 ring->ring_obj = NULL; 1154 ring->use_doorbell = true; 1155 if (adev->gfx.disable_kq) { 1156 ring->no_scheduler = true; 1157 ring->no_user_submission = true; 1158 } 1159 1160 if (!ring_id) 1161 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1162 else 1163 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1164 ring->vm_hub = AMDGPU_GFXHUB(0); 1165 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1166 1167 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1168 hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? 1169 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1170 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1171 hw_prio, NULL); 1172 } 1173 1174 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1175 int mec, int pipe, int queue) 1176 { 1177 int r; 1178 unsigned irq_type; 1179 struct amdgpu_ring *ring; 1180 unsigned int hw_prio; 1181 1182 ring = &adev->gfx.compute_ring[ring_id]; 1183 1184 /* mec0 is me1 */ 1185 ring->me = mec + 1; 1186 ring->pipe = pipe; 1187 ring->queue = queue; 1188 1189 ring->ring_obj = NULL; 1190 ring->use_doorbell = true; 1191 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1192 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1193 + (ring_id * GFX11_MEC_HPD_SIZE); 1194 ring->vm_hub = AMDGPU_GFXHUB(0); 1195 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1196 1197 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1198 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1199 + ring->pipe; 1200 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 1201 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1202 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1203 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1204 hw_prio, NULL); 1205 if (r) 1206 return r; 1207 1208 return 0; 1209 } 1210 1211 static struct { 1212 SOC21_FIRMWARE_ID id; 1213 unsigned int offset; 1214 unsigned int size; 1215 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 1216 1217 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 1218 { 1219 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 1220 1221 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 1222 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 1223 rlc_autoload_info[ucode->id].id = ucode->id; 1224 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 1225 rlc_autoload_info[ucode->id].size = ucode->size * 4; 1226 1227 ucode++; 1228 } 1229 } 1230 1231 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 1232 { 1233 uint32_t total_size = 0; 1234 SOC21_FIRMWARE_ID id; 1235 1236 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 1237 1238 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 1239 total_size += rlc_autoload_info[id].size; 1240 1241 /* In case the offset in rlc toc ucode is aligned */ 1242 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 1243 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 1244 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 1245 1246 return total_size; 1247 } 1248 1249 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 1250 { 1251 int r; 1252 uint32_t total_size; 1253 1254 total_size = gfx_v11_0_calc_toc_total_size(adev); 1255 1256 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 1257 AMDGPU_GEM_DOMAIN_VRAM | 1258 AMDGPU_GEM_DOMAIN_GTT, 1259 &adev->gfx.rlc.rlc_autoload_bo, 1260 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1261 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1262 1263 if (r) { 1264 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1265 return r; 1266 } 1267 1268 return 0; 1269 } 1270 1271 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1272 SOC21_FIRMWARE_ID id, 1273 const void *fw_data, 1274 uint32_t fw_size, 1275 uint32_t *fw_autoload_mask) 1276 { 1277 uint32_t toc_offset; 1278 uint32_t toc_fw_size; 1279 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1280 1281 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1282 return; 1283 1284 toc_offset = rlc_autoload_info[id].offset; 1285 toc_fw_size = rlc_autoload_info[id].size; 1286 1287 if (fw_size == 0) 1288 fw_size = toc_fw_size; 1289 1290 if (fw_size > toc_fw_size) 1291 fw_size = toc_fw_size; 1292 1293 memcpy(ptr + toc_offset, fw_data, fw_size); 1294 1295 if (fw_size < toc_fw_size) 1296 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1297 1298 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1299 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1300 } 1301 1302 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1303 uint32_t *fw_autoload_mask) 1304 { 1305 void *data; 1306 uint32_t size; 1307 uint64_t *toc_ptr; 1308 1309 *(uint64_t *)fw_autoload_mask |= 0x1; 1310 1311 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1312 1313 data = adev->psp.toc.start_addr; 1314 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1315 1316 toc_ptr = (uint64_t *)data + size / 8 - 1; 1317 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1318 1319 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1320 data, size, fw_autoload_mask); 1321 } 1322 1323 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1324 uint32_t *fw_autoload_mask) 1325 { 1326 const __le32 *fw_data; 1327 uint32_t fw_size; 1328 const struct gfx_firmware_header_v1_0 *cp_hdr; 1329 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1330 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1331 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1332 uint16_t version_major, version_minor; 1333 1334 if (adev->gfx.rs64_enable) { 1335 /* pfp ucode */ 1336 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1337 adev->gfx.pfp_fw->data; 1338 /* instruction */ 1339 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1340 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1341 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1342 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1343 fw_data, fw_size, fw_autoload_mask); 1344 /* data */ 1345 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1346 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1347 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1348 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1349 fw_data, fw_size, fw_autoload_mask); 1350 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1351 fw_data, fw_size, fw_autoload_mask); 1352 /* me ucode */ 1353 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1354 adev->gfx.me_fw->data; 1355 /* instruction */ 1356 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1357 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1358 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1359 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1360 fw_data, fw_size, fw_autoload_mask); 1361 /* data */ 1362 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1363 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1364 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1365 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1366 fw_data, fw_size, fw_autoload_mask); 1367 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1368 fw_data, fw_size, fw_autoload_mask); 1369 /* mec ucode */ 1370 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1371 adev->gfx.mec_fw->data; 1372 /* instruction */ 1373 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1374 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1375 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1376 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1377 fw_data, fw_size, fw_autoload_mask); 1378 /* data */ 1379 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1380 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1381 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1382 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1383 fw_data, fw_size, fw_autoload_mask); 1384 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1385 fw_data, fw_size, fw_autoload_mask); 1386 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1387 fw_data, fw_size, fw_autoload_mask); 1388 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1389 fw_data, fw_size, fw_autoload_mask); 1390 } else { 1391 /* pfp ucode */ 1392 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1393 adev->gfx.pfp_fw->data; 1394 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1395 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1396 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1397 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1398 fw_data, fw_size, fw_autoload_mask); 1399 1400 /* me ucode */ 1401 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1402 adev->gfx.me_fw->data; 1403 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1404 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1405 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1406 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1407 fw_data, fw_size, fw_autoload_mask); 1408 1409 /* mec ucode */ 1410 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1411 adev->gfx.mec_fw->data; 1412 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1413 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1414 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1415 cp_hdr->jt_size * 4; 1416 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1417 fw_data, fw_size, fw_autoload_mask); 1418 } 1419 1420 /* rlc ucode */ 1421 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1422 adev->gfx.rlc_fw->data; 1423 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1424 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1425 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1426 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1427 fw_data, fw_size, fw_autoload_mask); 1428 1429 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1430 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1431 if (version_major == 2) { 1432 if (version_minor >= 2) { 1433 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1434 1435 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1436 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1437 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1438 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1439 fw_data, fw_size, fw_autoload_mask); 1440 1441 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1442 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1443 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1444 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1445 fw_data, fw_size, fw_autoload_mask); 1446 } 1447 } 1448 } 1449 1450 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1451 uint32_t *fw_autoload_mask) 1452 { 1453 const __le32 *fw_data; 1454 uint32_t fw_size; 1455 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1456 1457 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1458 adev->sdma.instance[0].fw->data; 1459 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1460 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1461 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1462 1463 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1464 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1465 1466 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1467 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1468 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1469 1470 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1471 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1472 } 1473 1474 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1475 uint32_t *fw_autoload_mask) 1476 { 1477 const __le32 *fw_data; 1478 unsigned fw_size; 1479 const struct mes_firmware_header_v1_0 *mes_hdr; 1480 int pipe, ucode_id, data_id; 1481 1482 for (pipe = 0; pipe < 2; pipe++) { 1483 if (pipe==0) { 1484 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1485 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1486 } else { 1487 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1488 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1489 } 1490 1491 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1492 adev->mes.fw[pipe]->data; 1493 1494 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1495 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1496 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1497 1498 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1499 ucode_id, fw_data, fw_size, fw_autoload_mask); 1500 1501 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1502 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1503 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1504 1505 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1506 data_id, fw_data, fw_size, fw_autoload_mask); 1507 } 1508 } 1509 1510 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1511 { 1512 uint32_t rlc_g_offset, rlc_g_size; 1513 uint64_t gpu_addr; 1514 uint32_t autoload_fw_id[2]; 1515 1516 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1517 1518 /* RLC autoload sequence 2: copy ucode */ 1519 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1520 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1521 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1522 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1523 1524 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1525 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1526 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1527 1528 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1529 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1530 1531 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1532 1533 /* RLC autoload sequence 3: load IMU fw */ 1534 if (adev->gfx.imu.funcs->load_microcode) 1535 adev->gfx.imu.funcs->load_microcode(adev); 1536 /* RLC autoload sequence 4 init IMU fw */ 1537 if (adev->gfx.imu.funcs->setup_imu) 1538 adev->gfx.imu.funcs->setup_imu(adev); 1539 if (adev->gfx.imu.funcs->start_imu) 1540 adev->gfx.imu.funcs->start_imu(adev); 1541 1542 /* RLC autoload sequence 5 disable gpa mode */ 1543 gfx_v11_0_disable_gpa_mode(adev); 1544 1545 return 0; 1546 } 1547 1548 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) 1549 { 1550 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 1551 uint32_t *ptr; 1552 uint32_t inst; 1553 1554 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 1555 if (!ptr) { 1556 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 1557 adev->gfx.ip_dump_core = NULL; 1558 } else { 1559 adev->gfx.ip_dump_core = ptr; 1560 } 1561 1562 /* Allocate memory for compute queue registers for all the instances */ 1563 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 1564 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 1565 adev->gfx.mec.num_queue_per_pipe; 1566 1567 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1568 if (!ptr) { 1569 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 1570 adev->gfx.ip_dump_compute_queues = NULL; 1571 } else { 1572 adev->gfx.ip_dump_compute_queues = ptr; 1573 } 1574 1575 /* Allocate memory for gfx queue registers for all the instances */ 1576 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 1577 inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * 1578 adev->gfx.me.num_queue_per_pipe; 1579 1580 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1581 if (!ptr) { 1582 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); 1583 adev->gfx.ip_dump_gfx_queues = NULL; 1584 } else { 1585 adev->gfx.ip_dump_gfx_queues = ptr; 1586 } 1587 } 1588 1589 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) 1590 { 1591 int i, j, k, r, ring_id; 1592 int xcc_id = 0; 1593 struct amdgpu_device *adev = ip_block->adev; 1594 int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ 1595 1596 INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); 1597 1598 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1599 case IP_VERSION(11, 0, 0): 1600 case IP_VERSION(11, 0, 1): 1601 case IP_VERSION(11, 0, 2): 1602 case IP_VERSION(11, 0, 3): 1603 case IP_VERSION(11, 0, 4): 1604 case IP_VERSION(11, 5, 0): 1605 case IP_VERSION(11, 5, 1): 1606 case IP_VERSION(11, 5, 2): 1607 case IP_VERSION(11, 5, 3): 1608 case IP_VERSION(11, 5, 4): 1609 adev->gfx.me.num_me = 1; 1610 adev->gfx.me.num_pipe_per_me = 1; 1611 adev->gfx.me.num_queue_per_pipe = 2; 1612 adev->gfx.mec.num_mec = 1; 1613 adev->gfx.mec.num_pipe_per_mec = 4; 1614 adev->gfx.mec.num_queue_per_pipe = 4; 1615 break; 1616 default: 1617 adev->gfx.me.num_me = 1; 1618 adev->gfx.me.num_pipe_per_me = 1; 1619 adev->gfx.me.num_queue_per_pipe = 1; 1620 adev->gfx.mec.num_mec = 1; 1621 adev->gfx.mec.num_pipe_per_mec = 4; 1622 adev->gfx.mec.num_queue_per_pipe = 8; 1623 break; 1624 } 1625 1626 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1627 case IP_VERSION(11, 0, 0): 1628 case IP_VERSION(11, 0, 2): 1629 case IP_VERSION(11, 0, 3): 1630 if (!adev->gfx.disable_uq && 1631 adev->gfx.me_fw_version >= 2420 && 1632 adev->gfx.pfp_fw_version >= 2580 && 1633 adev->gfx.mec_fw_version >= 2650 && 1634 adev->mes.fw_version[0] >= 120) { 1635 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1636 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1637 } 1638 break; 1639 case IP_VERSION(11, 0, 1): 1640 case IP_VERSION(11, 0, 4): 1641 case IP_VERSION(11, 5, 0): 1642 case IP_VERSION(11, 5, 1): 1643 case IP_VERSION(11, 5, 2): 1644 case IP_VERSION(11, 5, 3): 1645 /* add firmware version checks here */ 1646 if (0 && !adev->gfx.disable_uq) { 1647 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1648 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1649 } 1650 break; 1651 default: 1652 break; 1653 } 1654 1655 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1656 case IP_VERSION(11, 0, 0): 1657 case IP_VERSION(11, 0, 2): 1658 case IP_VERSION(11, 0, 3): 1659 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1660 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1661 if (adev->gfx.me_fw_version >= 2280 && 1662 adev->gfx.pfp_fw_version >= 2370 && 1663 adev->gfx.mec_fw_version >= 2450 && 1664 adev->mes.fw_version[0] >= 99) { 1665 adev->gfx.enable_cleaner_shader = true; 1666 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1667 if (r) { 1668 adev->gfx.enable_cleaner_shader = false; 1669 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1670 } 1671 } 1672 break; 1673 case IP_VERSION(11, 0, 1): 1674 case IP_VERSION(11, 0, 4): 1675 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1676 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1677 if (adev->gfx.pfp_fw_version >= 102 && 1678 adev->gfx.mec_fw_version >= 66 && 1679 adev->mes.fw_version[0] >= 128) { 1680 adev->gfx.enable_cleaner_shader = true; 1681 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1682 if (r) { 1683 adev->gfx.enable_cleaner_shader = false; 1684 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1685 } 1686 } 1687 break; 1688 case IP_VERSION(11, 5, 0): 1689 case IP_VERSION(11, 5, 1): 1690 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1691 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1692 if (adev->gfx.mec_fw_version >= 26 && 1693 adev->mes.fw_version[0] >= 114) { 1694 adev->gfx.enable_cleaner_shader = true; 1695 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1696 if (r) { 1697 adev->gfx.enable_cleaner_shader = false; 1698 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1699 } 1700 } 1701 break; 1702 case IP_VERSION(11, 5, 2): 1703 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1704 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1705 if (adev->gfx.me_fw_version >= 12 && 1706 adev->gfx.pfp_fw_version >= 15 && 1707 adev->gfx.mec_fw_version >= 15) { 1708 adev->gfx.enable_cleaner_shader = true; 1709 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1710 if (r) { 1711 adev->gfx.enable_cleaner_shader = false; 1712 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1713 } 1714 } 1715 break; 1716 case IP_VERSION(11, 5, 3): 1717 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1718 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1719 if (adev->gfx.me_fw_version >= 7 && 1720 adev->gfx.pfp_fw_version >= 8 && 1721 adev->gfx.mec_fw_version >= 8) { 1722 adev->gfx.enable_cleaner_shader = true; 1723 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1724 if (r) { 1725 adev->gfx.enable_cleaner_shader = false; 1726 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1727 } 1728 } 1729 break; 1730 case IP_VERSION(11, 5, 4): 1731 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1732 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1733 if (adev->gfx.me_fw_version >= 4 && 1734 adev->gfx.pfp_fw_version >= 7 && 1735 adev->gfx.mec_fw_version >= 5) { 1736 adev->gfx.enable_cleaner_shader = true; 1737 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1738 if (r) { 1739 adev->gfx.enable_cleaner_shader = false; 1740 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1741 } 1742 } 1743 break; 1744 default: 1745 adev->gfx.enable_cleaner_shader = false; 1746 break; 1747 } 1748 1749 /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ 1750 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && 1751 amdgpu_sriov_is_pp_one_vf(adev)) 1752 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; 1753 1754 /* EOP Event */ 1755 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1756 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1757 &adev->gfx.eop_irq); 1758 if (r) 1759 return r; 1760 1761 /* Bad opcode Event */ 1762 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1763 GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, 1764 &adev->gfx.bad_op_irq); 1765 if (r) 1766 return r; 1767 1768 /* Privileged reg */ 1769 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1770 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1771 &adev->gfx.priv_reg_irq); 1772 if (r) 1773 return r; 1774 1775 /* Privileged inst */ 1776 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1777 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1778 &adev->gfx.priv_inst_irq); 1779 if (r) 1780 return r; 1781 1782 /* FED error */ 1783 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1784 GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, 1785 &adev->gfx.rlc_gc_fed_irq); 1786 if (r) 1787 return r; 1788 1789 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1790 1791 gfx_v11_0_me_init(adev); 1792 1793 r = gfx_v11_0_rlc_init(adev); 1794 if (r) { 1795 DRM_ERROR("Failed to init rlc BOs!\n"); 1796 return r; 1797 } 1798 1799 r = gfx_v11_0_mec_init(adev); 1800 if (r) { 1801 DRM_ERROR("Failed to init MEC BOs!\n"); 1802 return r; 1803 } 1804 1805 if (adev->gfx.num_gfx_rings) { 1806 ring_id = 0; 1807 /* set up the gfx ring */ 1808 for (i = 0; i < adev->gfx.me.num_me; i++) { 1809 for (j = 0; j < num_queue_per_pipe; j++) { 1810 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1811 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1812 continue; 1813 1814 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1815 i, k, j); 1816 if (r) 1817 return r; 1818 ring_id++; 1819 } 1820 } 1821 } 1822 } 1823 1824 if (adev->gfx.num_compute_rings) { 1825 ring_id = 0; 1826 /* set up the compute queues - allocate horizontally across pipes */ 1827 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1828 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1829 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1830 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 1831 k, j)) 1832 continue; 1833 1834 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1835 i, k, j); 1836 if (r) 1837 return r; 1838 1839 ring_id++; 1840 } 1841 } 1842 } 1843 } 1844 1845 adev->gfx.gfx_supported_reset = 1846 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1847 adev->gfx.compute_supported_reset = 1848 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1849 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1850 case IP_VERSION(11, 0, 0): 1851 case IP_VERSION(11, 0, 2): 1852 case IP_VERSION(11, 0, 3): 1853 if ((adev->gfx.me_fw_version >= 2280) && 1854 (adev->gfx.mec_fw_version >= 2410) && 1855 !amdgpu_sriov_vf(adev) && 1856 !adev->debug_disable_gpu_ring_reset) { 1857 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1858 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1859 } 1860 break; 1861 default: 1862 if (!amdgpu_sriov_vf(adev) && 1863 !adev->debug_disable_gpu_ring_reset) { 1864 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1865 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1866 } 1867 break; 1868 } 1869 1870 if (!adev->enable_mes_kiq) { 1871 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); 1872 if (r) { 1873 DRM_ERROR("Failed to init KIQ BOs!\n"); 1874 return r; 1875 } 1876 1877 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 1878 if (r) 1879 return r; 1880 } 1881 1882 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); 1883 if (r) 1884 return r; 1885 1886 /* allocate visible FB for rlc auto-loading fw */ 1887 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1888 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1889 if (r) 1890 return r; 1891 } 1892 1893 r = gfx_v11_0_gpu_early_init(adev); 1894 if (r) 1895 return r; 1896 1897 if (amdgpu_gfx_ras_sw_init(adev)) { 1898 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 1899 return -EINVAL; 1900 } 1901 1902 gfx_v11_0_alloc_ip_dump(adev); 1903 1904 r = amdgpu_gfx_sysfs_init(adev); 1905 if (r) 1906 return r; 1907 1908 return 0; 1909 } 1910 1911 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1912 { 1913 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1914 &adev->gfx.pfp.pfp_fw_gpu_addr, 1915 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1916 1917 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1918 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1919 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1920 } 1921 1922 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1923 { 1924 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1925 &adev->gfx.me.me_fw_gpu_addr, 1926 (void **)&adev->gfx.me.me_fw_ptr); 1927 1928 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1929 &adev->gfx.me.me_fw_data_gpu_addr, 1930 (void **)&adev->gfx.me.me_fw_data_ptr); 1931 } 1932 1933 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1934 { 1935 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1936 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1937 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1938 } 1939 1940 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) 1941 { 1942 int i; 1943 struct amdgpu_device *adev = ip_block->adev; 1944 1945 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1946 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1947 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1948 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1949 1950 amdgpu_gfx_mqd_sw_fini(adev, 0); 1951 1952 if (!adev->enable_mes_kiq) { 1953 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 1954 amdgpu_gfx_kiq_fini(adev, 0); 1955 } 1956 1957 amdgpu_gfx_cleaner_shader_sw_fini(adev); 1958 1959 gfx_v11_0_pfp_fini(adev); 1960 gfx_v11_0_me_fini(adev); 1961 gfx_v11_0_rlc_fini(adev); 1962 gfx_v11_0_mec_fini(adev); 1963 1964 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1965 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1966 1967 gfx_v11_0_free_microcode(adev); 1968 1969 amdgpu_gfx_sysfs_fini(adev); 1970 1971 kfree(adev->gfx.ip_dump_core); 1972 kfree(adev->gfx.ip_dump_compute_queues); 1973 kfree(adev->gfx.ip_dump_gfx_queues); 1974 1975 return 0; 1976 } 1977 1978 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1979 u32 sh_num, u32 instance, int xcc_id) 1980 { 1981 u32 data; 1982 1983 if (instance == 0xffffffff) 1984 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1985 INSTANCE_BROADCAST_WRITES, 1); 1986 else 1987 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1988 instance); 1989 1990 if (se_num == 0xffffffff) 1991 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1992 1); 1993 else 1994 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1995 1996 if (sh_num == 0xffffffff) 1997 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1998 1); 1999 else 2000 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 2001 2002 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 2003 } 2004 2005 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) 2006 { 2007 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; 2008 2009 gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); 2010 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, 2011 CC_GC_SA_UNIT_DISABLE, 2012 SA_DISABLE); 2013 gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); 2014 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, 2015 GC_USER_SA_UNIT_DISABLE, 2016 SA_DISABLE); 2017 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * 2018 adev->gfx.config.max_shader_engines); 2019 2020 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 2021 } 2022 2023 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2024 { 2025 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; 2026 u32 rb_mask; 2027 2028 gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 2029 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, 2030 CC_RB_BACKEND_DISABLE, 2031 BACKEND_DISABLE); 2032 gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 2033 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, 2034 GC_USER_RB_BACKEND_DISABLE, 2035 BACKEND_DISABLE); 2036 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * 2037 adev->gfx.config.max_shader_engines); 2038 2039 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); 2040 } 2041 2042 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 2043 { 2044 u32 rb_bitmap_per_sa; 2045 u32 rb_bitmap_width_per_sa; 2046 u32 max_sa; 2047 u32 active_sa_bitmap; 2048 u32 global_active_rb_bitmap; 2049 u32 active_rb_bitmap = 0; 2050 u32 i; 2051 2052 /* query sa bitmap from SA_UNIT_DISABLE registers */ 2053 active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); 2054 /* query rb bitmap from RB_BACKEND_DISABLE registers */ 2055 global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); 2056 2057 /* generate active rb bitmap according to active sa bitmap */ 2058 max_sa = adev->gfx.config.max_shader_engines * 2059 adev->gfx.config.max_sh_per_se; 2060 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 2061 adev->gfx.config.max_sh_per_se; 2062 rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); 2063 2064 for (i = 0; i < max_sa; i++) { 2065 if (active_sa_bitmap & (1 << i)) 2066 active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); 2067 } 2068 2069 active_rb_bitmap &= global_active_rb_bitmap; 2070 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 2071 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 2072 } 2073 2074 #define DEFAULT_SH_MEM_BASES (0x6000) 2075 #define LDS_APP_BASE 0x1 2076 #define SCRATCH_APP_BASE 0x2 2077 2078 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 2079 { 2080 int i; 2081 uint32_t sh_mem_bases; 2082 uint32_t data; 2083 2084 /* 2085 * Configure apertures: 2086 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2087 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2088 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2089 */ 2090 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 2091 SCRATCH_APP_BASE; 2092 2093 mutex_lock(&adev->srbm_mutex); 2094 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2095 soc21_grbm_select(adev, 0, 0, 0, i); 2096 /* CP and shaders */ 2097 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2098 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 2099 2100 /* Enable trap for each kfd vmid. */ 2101 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 2102 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 2103 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); 2104 } 2105 soc21_grbm_select(adev, 0, 0, 0, 0); 2106 mutex_unlock(&adev->srbm_mutex); 2107 2108 /* 2109 * Initialize all compute VMIDs to have no GDS, GWS, or OA 2110 * access. These should be enabled by FW for target VMIDs. 2111 */ 2112 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2113 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 2114 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 2115 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 2116 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 2117 } 2118 } 2119 2120 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 2121 { 2122 int vmid; 2123 2124 /* 2125 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2126 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2127 * the driver can enable them for graphics. VMID0 should maintain 2128 * access so that HWS firmware can save/restore entries. 2129 */ 2130 for (vmid = 1; vmid < 16; vmid++) { 2131 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 2132 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 2133 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 2134 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 2135 } 2136 } 2137 2138 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 2139 { 2140 /* TODO: harvest feature to be added later. */ 2141 } 2142 2143 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 2144 { 2145 /* TCCs are global (not instanced). */ 2146 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 2147 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 2148 2149 adev->gfx.config.tcc_disabled_mask = 2150 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 2151 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 2152 } 2153 2154 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 2155 { 2156 u32 tmp; 2157 int i; 2158 2159 if (!amdgpu_sriov_vf(adev)) 2160 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2161 2162 gfx_v11_0_setup_rb(adev); 2163 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 2164 gfx_v11_0_get_tcc_info(adev); 2165 adev->gfx.config.pa_sc_tile_steering_override = 0; 2166 2167 /* Set whether texture coordinate truncation is conformant. */ 2168 tmp = RREG32_SOC15(GC, 0, regTA_CNTL2); 2169 adev->gfx.config.ta_cntl2_truncate_coord_mode = 2170 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE); 2171 2172 /* XXX SH_MEM regs */ 2173 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2174 mutex_lock(&adev->srbm_mutex); 2175 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2176 soc21_grbm_select(adev, 0, 0, 0, i); 2177 /* CP and shaders */ 2178 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2179 if (i != 0) { 2180 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2181 (adev->gmc.private_aperture_start >> 48)); 2182 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2183 (adev->gmc.shared_aperture_start >> 48)); 2184 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 2185 } 2186 } 2187 soc21_grbm_select(adev, 0, 0, 0, 0); 2188 2189 mutex_unlock(&adev->srbm_mutex); 2190 2191 gfx_v11_0_init_compute_vmid(adev); 2192 gfx_v11_0_init_gds_vmid(adev); 2193 } 2194 2195 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, 2196 int me, int pipe) 2197 { 2198 if (me != 0) 2199 return 0; 2200 2201 switch (pipe) { 2202 case 0: 2203 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 2204 case 1: 2205 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 2206 default: 2207 return 0; 2208 } 2209 } 2210 2211 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, 2212 int me, int pipe) 2213 { 2214 /* 2215 * amdgpu controls only the first MEC. That's why this function only 2216 * handles the setting of interrupts for this specific MEC. All other 2217 * pipes' interrupts are set by amdkfd. 2218 */ 2219 if (me != 1) 2220 return 0; 2221 2222 switch (pipe) { 2223 case 0: 2224 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 2225 case 1: 2226 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 2227 case 2: 2228 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 2229 case 3: 2230 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 2231 default: 2232 return 0; 2233 } 2234 } 2235 2236 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2237 bool enable) 2238 { 2239 u32 tmp, cp_int_cntl_reg; 2240 int i, j; 2241 2242 if (amdgpu_sriov_vf(adev)) 2243 return; 2244 2245 for (i = 0; i < adev->gfx.me.num_me; i++) { 2246 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 2247 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 2248 2249 if (cp_int_cntl_reg) { 2250 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 2251 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 2252 enable ? 1 : 0); 2253 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 2254 enable ? 1 : 0); 2255 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 2256 enable ? 1 : 0); 2257 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 2258 enable ? 1 : 0); 2259 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); 2260 } 2261 } 2262 } 2263 } 2264 2265 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 2266 { 2267 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2268 2269 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 2270 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2271 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 2272 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2273 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 2274 2275 return 0; 2276 } 2277 2278 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 2279 { 2280 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 2281 2282 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 2283 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 2284 } 2285 2286 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 2287 { 2288 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2289 udelay(50); 2290 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2291 udelay(50); 2292 } 2293 2294 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 2295 bool enable) 2296 { 2297 uint32_t rlc_pg_cntl; 2298 2299 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 2300 2301 if (!enable) { 2302 /* RLC_PG_CNTL[23] = 0 (default) 2303 * RLC will wait for handshake acks with SMU 2304 * GFXOFF will be enabled 2305 * RLC_PG_CNTL[23] = 1 2306 * RLC will not issue any message to SMU 2307 * hence no handshake between SMU & RLC 2308 * GFXOFF will be disabled 2309 */ 2310 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2311 } else 2312 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2313 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 2314 } 2315 2316 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 2317 { 2318 /* TODO: enable rlc & smu handshake until smu 2319 * and gfxoff feature works as expected */ 2320 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 2321 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 2322 2323 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2324 udelay(50); 2325 } 2326 2327 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 2328 { 2329 uint32_t tmp; 2330 2331 /* enable Save Restore Machine */ 2332 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 2333 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2334 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 2335 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 2336 } 2337 2338 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 2339 { 2340 const struct rlc_firmware_header_v2_0 *hdr; 2341 const __le32 *fw_data; 2342 unsigned i, fw_size; 2343 2344 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2345 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2346 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2347 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2348 2349 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 2350 RLCG_UCODE_LOADING_START_ADDRESS); 2351 2352 for (i = 0; i < fw_size; i++) 2353 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 2354 le32_to_cpup(fw_data++)); 2355 2356 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2357 } 2358 2359 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 2360 { 2361 const struct rlc_firmware_header_v2_2 *hdr; 2362 const __le32 *fw_data; 2363 unsigned i, fw_size; 2364 u32 tmp; 2365 2366 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 2367 2368 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2369 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 2370 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 2371 2372 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 2373 2374 for (i = 0; i < fw_size; i++) { 2375 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2376 msleep(1); 2377 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 2378 le32_to_cpup(fw_data++)); 2379 } 2380 2381 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2382 2383 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2384 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 2385 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 2386 2387 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 2388 for (i = 0; i < fw_size; i++) { 2389 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2390 msleep(1); 2391 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 2392 le32_to_cpup(fw_data++)); 2393 } 2394 2395 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2396 2397 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 2398 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 2399 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 2400 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 2401 } 2402 2403 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 2404 { 2405 const struct rlc_firmware_header_v2_3 *hdr; 2406 const __le32 *fw_data; 2407 unsigned i, fw_size; 2408 u32 tmp; 2409 2410 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 2411 2412 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2413 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 2414 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 2415 2416 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 2417 2418 for (i = 0; i < fw_size; i++) { 2419 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2420 msleep(1); 2421 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 2422 le32_to_cpup(fw_data++)); 2423 } 2424 2425 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 2426 2427 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 2428 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 2429 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 2430 2431 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2432 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 2433 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 2434 2435 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 2436 2437 for (i = 0; i < fw_size; i++) { 2438 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2439 msleep(1); 2440 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 2441 le32_to_cpup(fw_data++)); 2442 } 2443 2444 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 2445 2446 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 2447 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 2448 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 2449 } 2450 2451 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 2452 { 2453 const struct rlc_firmware_header_v2_0 *hdr; 2454 uint16_t version_major; 2455 uint16_t version_minor; 2456 2457 if (!adev->gfx.rlc_fw) 2458 return -EINVAL; 2459 2460 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2461 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2462 2463 version_major = le16_to_cpu(hdr->header.header_version_major); 2464 version_minor = le16_to_cpu(hdr->header.header_version_minor); 2465 2466 if (version_major == 2) { 2467 gfx_v11_0_load_rlcg_microcode(adev); 2468 if (amdgpu_dpm == 1) { 2469 if (version_minor >= 2) 2470 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 2471 if (version_minor == 3) 2472 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 2473 } 2474 2475 return 0; 2476 } 2477 2478 return -EINVAL; 2479 } 2480 2481 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 2482 { 2483 int r; 2484 2485 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 2486 gfx_v11_0_init_csb(adev); 2487 2488 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 2489 gfx_v11_0_rlc_enable_srm(adev); 2490 } else { 2491 if (amdgpu_sriov_vf(adev)) { 2492 gfx_v11_0_init_csb(adev); 2493 return 0; 2494 } 2495 2496 adev->gfx.rlc.funcs->stop(adev); 2497 2498 /* disable CG */ 2499 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 2500 2501 /* disable PG */ 2502 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 2503 2504 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2505 /* legacy rlc firmware loading */ 2506 r = gfx_v11_0_rlc_load_microcode(adev); 2507 if (r) 2508 return r; 2509 } 2510 2511 gfx_v11_0_init_csb(adev); 2512 2513 adev->gfx.rlc.funcs->start(adev); 2514 } 2515 return 0; 2516 } 2517 2518 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 2519 { 2520 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2521 uint32_t tmp; 2522 int i; 2523 2524 /* Trigger an invalidation of the L1 instruction caches */ 2525 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2526 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2527 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2528 2529 /* Wait for invalidation complete */ 2530 for (i = 0; i < usec_timeout; i++) { 2531 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2532 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2533 INVALIDATE_CACHE_COMPLETE)) 2534 break; 2535 udelay(1); 2536 } 2537 2538 if (i >= usec_timeout) { 2539 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2540 return -EINVAL; 2541 } 2542 2543 if (amdgpu_emu_mode == 1) 2544 amdgpu_device_flush_hdp(adev, NULL); 2545 2546 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2547 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2548 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2549 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2550 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2551 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2552 2553 /* Program me ucode address into intruction cache address register */ 2554 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2555 lower_32_bits(addr) & 0xFFFFF000); 2556 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2557 upper_32_bits(addr)); 2558 2559 return 0; 2560 } 2561 2562 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 2563 { 2564 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2565 uint32_t tmp; 2566 int i; 2567 2568 /* Trigger an invalidation of the L1 instruction caches */ 2569 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2570 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2571 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2572 2573 /* Wait for invalidation complete */ 2574 for (i = 0; i < usec_timeout; i++) { 2575 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2576 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2577 INVALIDATE_CACHE_COMPLETE)) 2578 break; 2579 udelay(1); 2580 } 2581 2582 if (i >= usec_timeout) { 2583 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2584 return -EINVAL; 2585 } 2586 2587 if (amdgpu_emu_mode == 1) 2588 amdgpu_device_flush_hdp(adev, NULL); 2589 2590 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2591 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2592 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2593 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2594 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2595 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2596 2597 /* Program pfp ucode address into intruction cache address register */ 2598 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2599 lower_32_bits(addr) & 0xFFFFF000); 2600 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2601 upper_32_bits(addr)); 2602 2603 return 0; 2604 } 2605 2606 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2607 { 2608 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2609 uint32_t tmp; 2610 int i; 2611 2612 /* Trigger an invalidation of the L1 instruction caches */ 2613 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2614 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2615 2616 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2617 2618 /* Wait for invalidation complete */ 2619 for (i = 0; i < usec_timeout; i++) { 2620 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2621 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2622 INVALIDATE_CACHE_COMPLETE)) 2623 break; 2624 udelay(1); 2625 } 2626 2627 if (i >= usec_timeout) { 2628 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2629 return -EINVAL; 2630 } 2631 2632 if (amdgpu_emu_mode == 1) 2633 amdgpu_device_flush_hdp(adev, NULL); 2634 2635 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2636 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2637 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2638 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2639 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2640 2641 /* Program mec1 ucode address into intruction cache address register */ 2642 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2643 lower_32_bits(addr) & 0xFFFFF000); 2644 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2645 upper_32_bits(addr)); 2646 2647 return 0; 2648 } 2649 2650 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2651 { 2652 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2653 uint32_t tmp; 2654 unsigned i, pipe_id; 2655 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2656 2657 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2658 adev->gfx.pfp_fw->data; 2659 2660 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2661 lower_32_bits(addr)); 2662 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2663 upper_32_bits(addr)); 2664 2665 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2666 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2667 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2668 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2669 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2670 2671 /* 2672 * Programming any of the CP_PFP_IC_BASE registers 2673 * forces invalidation of the ME L1 I$. Wait for the 2674 * invalidation complete 2675 */ 2676 for (i = 0; i < usec_timeout; i++) { 2677 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2678 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2679 INVALIDATE_CACHE_COMPLETE)) 2680 break; 2681 udelay(1); 2682 } 2683 2684 if (i >= usec_timeout) { 2685 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2686 return -EINVAL; 2687 } 2688 2689 /* Prime the L1 instruction caches */ 2690 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2691 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2692 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2693 /* Waiting for cache primed*/ 2694 for (i = 0; i < usec_timeout; i++) { 2695 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2696 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2697 ICACHE_PRIMED)) 2698 break; 2699 udelay(1); 2700 } 2701 2702 if (i >= usec_timeout) { 2703 dev_err(adev->dev, "failed to prime instruction cache\n"); 2704 return -EINVAL; 2705 } 2706 2707 mutex_lock(&adev->srbm_mutex); 2708 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2709 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2710 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2711 (pfp_hdr->ucode_start_addr_hi << 30) | 2712 (pfp_hdr->ucode_start_addr_lo >> 2)); 2713 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2714 pfp_hdr->ucode_start_addr_hi >> 2); 2715 2716 /* 2717 * Program CP_ME_CNTL to reset given PIPE to take 2718 * effect of CP_PFP_PRGRM_CNTR_START. 2719 */ 2720 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2721 if (pipe_id == 0) 2722 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2723 PFP_PIPE0_RESET, 1); 2724 else 2725 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2726 PFP_PIPE1_RESET, 1); 2727 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2728 2729 /* Clear pfp pipe0 reset bit. */ 2730 if (pipe_id == 0) 2731 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2732 PFP_PIPE0_RESET, 0); 2733 else 2734 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2735 PFP_PIPE1_RESET, 0); 2736 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2737 2738 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2739 lower_32_bits(addr2)); 2740 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2741 upper_32_bits(addr2)); 2742 } 2743 soc21_grbm_select(adev, 0, 0, 0, 0); 2744 mutex_unlock(&adev->srbm_mutex); 2745 2746 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2747 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2748 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2749 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2750 2751 /* Invalidate the data caches */ 2752 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2753 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2754 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2755 2756 for (i = 0; i < usec_timeout; i++) { 2757 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2758 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2759 INVALIDATE_DCACHE_COMPLETE)) 2760 break; 2761 udelay(1); 2762 } 2763 2764 if (i >= usec_timeout) { 2765 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2766 return -EINVAL; 2767 } 2768 2769 return 0; 2770 } 2771 2772 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2773 { 2774 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2775 uint32_t tmp; 2776 unsigned i, pipe_id; 2777 const struct gfx_firmware_header_v2_0 *me_hdr; 2778 2779 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2780 adev->gfx.me_fw->data; 2781 2782 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2783 lower_32_bits(addr)); 2784 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2785 upper_32_bits(addr)); 2786 2787 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2788 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2789 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2790 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2791 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2792 2793 /* 2794 * Programming any of the CP_ME_IC_BASE registers 2795 * forces invalidation of the ME L1 I$. Wait for the 2796 * invalidation complete 2797 */ 2798 for (i = 0; i < usec_timeout; i++) { 2799 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2800 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2801 INVALIDATE_CACHE_COMPLETE)) 2802 break; 2803 udelay(1); 2804 } 2805 2806 if (i >= usec_timeout) { 2807 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2808 return -EINVAL; 2809 } 2810 2811 /* Prime the instruction caches */ 2812 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2813 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2814 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2815 2816 /* Waiting for instruction cache primed*/ 2817 for (i = 0; i < usec_timeout; i++) { 2818 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2819 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2820 ICACHE_PRIMED)) 2821 break; 2822 udelay(1); 2823 } 2824 2825 if (i >= usec_timeout) { 2826 dev_err(adev->dev, "failed to prime instruction cache\n"); 2827 return -EINVAL; 2828 } 2829 2830 mutex_lock(&adev->srbm_mutex); 2831 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2832 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2833 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2834 (me_hdr->ucode_start_addr_hi << 30) | 2835 (me_hdr->ucode_start_addr_lo >> 2) ); 2836 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2837 me_hdr->ucode_start_addr_hi>>2); 2838 2839 /* 2840 * Program CP_ME_CNTL to reset given PIPE to take 2841 * effect of CP_PFP_PRGRM_CNTR_START. 2842 */ 2843 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2844 if (pipe_id == 0) 2845 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2846 ME_PIPE0_RESET, 1); 2847 else 2848 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2849 ME_PIPE1_RESET, 1); 2850 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2851 2852 /* Clear pfp pipe0 reset bit. */ 2853 if (pipe_id == 0) 2854 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2855 ME_PIPE0_RESET, 0); 2856 else 2857 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2858 ME_PIPE1_RESET, 0); 2859 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2860 2861 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2862 lower_32_bits(addr2)); 2863 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2864 upper_32_bits(addr2)); 2865 } 2866 soc21_grbm_select(adev, 0, 0, 0, 0); 2867 mutex_unlock(&adev->srbm_mutex); 2868 2869 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2870 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2871 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2872 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2873 2874 /* Invalidate the data caches */ 2875 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2876 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2877 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2878 2879 for (i = 0; i < usec_timeout; i++) { 2880 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2881 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2882 INVALIDATE_DCACHE_COMPLETE)) 2883 break; 2884 udelay(1); 2885 } 2886 2887 if (i >= usec_timeout) { 2888 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2889 return -EINVAL; 2890 } 2891 2892 return 0; 2893 } 2894 2895 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2896 { 2897 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2898 uint32_t tmp; 2899 unsigned i; 2900 const struct gfx_firmware_header_v2_0 *mec_hdr; 2901 2902 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2903 adev->gfx.mec_fw->data; 2904 2905 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2906 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2907 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2908 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2909 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2910 2911 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2912 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2913 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2914 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2915 2916 mutex_lock(&adev->srbm_mutex); 2917 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2918 soc21_grbm_select(adev, 1, i, 0, 0); 2919 2920 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2921 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2922 upper_32_bits(addr2)); 2923 2924 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2925 mec_hdr->ucode_start_addr_lo >> 2 | 2926 mec_hdr->ucode_start_addr_hi << 30); 2927 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2928 mec_hdr->ucode_start_addr_hi >> 2); 2929 2930 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2931 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2932 upper_32_bits(addr)); 2933 } 2934 mutex_unlock(&adev->srbm_mutex); 2935 soc21_grbm_select(adev, 0, 0, 0, 0); 2936 2937 /* Trigger an invalidation of the L1 instruction caches */ 2938 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2939 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2940 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2941 2942 /* Wait for invalidation complete */ 2943 for (i = 0; i < usec_timeout; i++) { 2944 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2945 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2946 INVALIDATE_DCACHE_COMPLETE)) 2947 break; 2948 udelay(1); 2949 } 2950 2951 if (i >= usec_timeout) { 2952 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2953 return -EINVAL; 2954 } 2955 2956 /* Trigger an invalidation of the L1 instruction caches */ 2957 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2958 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2959 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2960 2961 /* Wait for invalidation complete */ 2962 for (i = 0; i < usec_timeout; i++) { 2963 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2964 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2965 INVALIDATE_CACHE_COMPLETE)) 2966 break; 2967 udelay(1); 2968 } 2969 2970 if (i >= usec_timeout) { 2971 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2972 return -EINVAL; 2973 } 2974 2975 return 0; 2976 } 2977 2978 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2979 { 2980 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2981 const struct gfx_firmware_header_v2_0 *me_hdr; 2982 const struct gfx_firmware_header_v2_0 *mec_hdr; 2983 uint32_t pipe_id, tmp; 2984 2985 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2986 adev->gfx.mec_fw->data; 2987 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2988 adev->gfx.me_fw->data; 2989 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2990 adev->gfx.pfp_fw->data; 2991 2992 /* config pfp program start addr */ 2993 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2994 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2995 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2996 (pfp_hdr->ucode_start_addr_hi << 30) | 2997 (pfp_hdr->ucode_start_addr_lo >> 2)); 2998 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2999 pfp_hdr->ucode_start_addr_hi >> 2); 3000 } 3001 soc21_grbm_select(adev, 0, 0, 0, 0); 3002 3003 /* reset pfp pipe */ 3004 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3005 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 3006 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 3007 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3008 3009 /* clear pfp pipe reset */ 3010 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 3011 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 3012 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3013 3014 /* config me program start addr */ 3015 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 3016 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3017 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3018 (me_hdr->ucode_start_addr_hi << 30) | 3019 (me_hdr->ucode_start_addr_lo >> 2) ); 3020 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3021 me_hdr->ucode_start_addr_hi>>2); 3022 } 3023 soc21_grbm_select(adev, 0, 0, 0, 0); 3024 3025 /* reset me pipe */ 3026 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3027 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 3028 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 3029 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3030 3031 /* clear me pipe reset */ 3032 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 3033 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 3034 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3035 3036 /* config mec program start addr */ 3037 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 3038 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 3039 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3040 mec_hdr->ucode_start_addr_lo >> 2 | 3041 mec_hdr->ucode_start_addr_hi << 30); 3042 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3043 mec_hdr->ucode_start_addr_hi >> 2); 3044 } 3045 soc21_grbm_select(adev, 0, 0, 0, 0); 3046 3047 /* reset mec pipe */ 3048 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3049 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 3050 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 3051 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 3052 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 3053 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3054 3055 /* clear mec pipe reset */ 3056 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 3057 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 3058 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 3059 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 3060 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3061 } 3062 3063 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 3064 { 3065 uint32_t cp_status; 3066 uint32_t bootload_status; 3067 int i, r; 3068 uint64_t addr, addr2; 3069 3070 for (i = 0; i < adev->usec_timeout; i++) { 3071 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 3072 3073 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 3074 IP_VERSION(11, 0, 1) || 3075 amdgpu_ip_version(adev, GC_HWIP, 0) == 3076 IP_VERSION(11, 0, 4) || 3077 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || 3078 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || 3079 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || 3080 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3) || 3081 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4)) 3082 bootload_status = RREG32_SOC15(GC, 0, 3083 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 3084 else 3085 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 3086 3087 if ((cp_status == 0) && 3088 (REG_GET_FIELD(bootload_status, 3089 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 3090 break; 3091 } 3092 udelay(1); 3093 } 3094 3095 if (i >= adev->usec_timeout) { 3096 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 3097 return -ETIMEDOUT; 3098 } 3099 3100 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 3101 if (adev->gfx.rs64_enable) { 3102 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3103 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 3104 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3105 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 3106 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 3107 if (r) 3108 return r; 3109 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3110 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 3111 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3112 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 3113 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 3114 if (r) 3115 return r; 3116 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3117 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 3118 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3119 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 3120 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 3121 if (r) 3122 return r; 3123 } else { 3124 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3125 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 3126 r = gfx_v11_0_config_me_cache(adev, addr); 3127 if (r) 3128 return r; 3129 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3130 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 3131 r = gfx_v11_0_config_pfp_cache(adev, addr); 3132 if (r) 3133 return r; 3134 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3135 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 3136 r = gfx_v11_0_config_mec_cache(adev, addr); 3137 if (r) 3138 return r; 3139 } 3140 } 3141 3142 return 0; 3143 } 3144 3145 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3146 { 3147 int i; 3148 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3149 3150 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3151 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3152 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3153 3154 for (i = 0; i < adev->usec_timeout; i++) { 3155 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 3156 break; 3157 udelay(1); 3158 } 3159 3160 if (i >= adev->usec_timeout) 3161 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 3162 3163 return 0; 3164 } 3165 3166 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 3167 { 3168 int r; 3169 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3170 const __le32 *fw_data; 3171 unsigned i, fw_size; 3172 3173 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3174 adev->gfx.pfp_fw->data; 3175 3176 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3177 3178 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3179 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3180 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 3181 3182 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 3183 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3184 &adev->gfx.pfp.pfp_fw_obj, 3185 &adev->gfx.pfp.pfp_fw_gpu_addr, 3186 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3187 if (r) { 3188 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 3189 gfx_v11_0_pfp_fini(adev); 3190 return r; 3191 } 3192 3193 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 3194 3195 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3196 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3197 3198 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 3199 3200 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 3201 3202 for (i = 0; i < pfp_hdr->jt_size; i++) 3203 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 3204 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 3205 3206 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3207 3208 return 0; 3209 } 3210 3211 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 3212 { 3213 int r; 3214 const struct gfx_firmware_header_v2_0 *pfp_hdr; 3215 const __le32 *fw_ucode, *fw_data; 3216 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3217 uint32_t tmp; 3218 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3219 3220 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 3221 adev->gfx.pfp_fw->data; 3222 3223 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3224 3225 /* instruction */ 3226 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 3227 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 3228 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 3229 /* data */ 3230 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3231 le32_to_cpu(pfp_hdr->data_offset_bytes)); 3232 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 3233 3234 /* 64kb align */ 3235 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3236 64 * 1024, 3237 AMDGPU_GEM_DOMAIN_VRAM | 3238 AMDGPU_GEM_DOMAIN_GTT, 3239 &adev->gfx.pfp.pfp_fw_obj, 3240 &adev->gfx.pfp.pfp_fw_gpu_addr, 3241 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3242 if (r) { 3243 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 3244 gfx_v11_0_pfp_fini(adev); 3245 return r; 3246 } 3247 3248 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3249 64 * 1024, 3250 AMDGPU_GEM_DOMAIN_VRAM | 3251 AMDGPU_GEM_DOMAIN_GTT, 3252 &adev->gfx.pfp.pfp_fw_data_obj, 3253 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 3254 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 3255 if (r) { 3256 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 3257 gfx_v11_0_pfp_fini(adev); 3258 return r; 3259 } 3260 3261 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 3262 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 3263 3264 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3265 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 3266 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3267 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3268 3269 if (amdgpu_emu_mode == 1) 3270 amdgpu_device_flush_hdp(adev, NULL); 3271 3272 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3273 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3274 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 3275 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3276 3277 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 3278 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 3279 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 3280 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 3281 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 3282 3283 /* 3284 * Programming any of the CP_PFP_IC_BASE registers 3285 * forces invalidation of the ME L1 I$. Wait for the 3286 * invalidation complete 3287 */ 3288 for (i = 0; i < usec_timeout; i++) { 3289 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3290 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3291 INVALIDATE_CACHE_COMPLETE)) 3292 break; 3293 udelay(1); 3294 } 3295 3296 if (i >= usec_timeout) { 3297 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3298 return -EINVAL; 3299 } 3300 3301 /* Prime the L1 instruction caches */ 3302 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3303 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 3304 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 3305 /* Waiting for cache primed*/ 3306 for (i = 0; i < usec_timeout; i++) { 3307 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3308 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3309 ICACHE_PRIMED)) 3310 break; 3311 udelay(1); 3312 } 3313 3314 if (i >= usec_timeout) { 3315 dev_err(adev->dev, "failed to prime instruction cache\n"); 3316 return -EINVAL; 3317 } 3318 3319 mutex_lock(&adev->srbm_mutex); 3320 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3321 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3322 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 3323 (pfp_hdr->ucode_start_addr_hi << 30) | 3324 (pfp_hdr->ucode_start_addr_lo >> 2) ); 3325 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 3326 pfp_hdr->ucode_start_addr_hi>>2); 3327 3328 /* 3329 * Program CP_ME_CNTL to reset given PIPE to take 3330 * effect of CP_PFP_PRGRM_CNTR_START. 3331 */ 3332 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3333 if (pipe_id == 0) 3334 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3335 PFP_PIPE0_RESET, 1); 3336 else 3337 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3338 PFP_PIPE1_RESET, 1); 3339 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3340 3341 /* Clear pfp pipe0 reset bit. */ 3342 if (pipe_id == 0) 3343 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3344 PFP_PIPE0_RESET, 0); 3345 else 3346 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3347 PFP_PIPE1_RESET, 0); 3348 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3349 3350 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 3351 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3352 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 3353 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3354 } 3355 soc21_grbm_select(adev, 0, 0, 0, 0); 3356 mutex_unlock(&adev->srbm_mutex); 3357 3358 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3359 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3360 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3361 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3362 3363 /* Invalidate the data caches */ 3364 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3365 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3366 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3367 3368 for (i = 0; i < usec_timeout; i++) { 3369 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3370 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3371 INVALIDATE_DCACHE_COMPLETE)) 3372 break; 3373 udelay(1); 3374 } 3375 3376 if (i >= usec_timeout) { 3377 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3378 return -EINVAL; 3379 } 3380 3381 return 0; 3382 } 3383 3384 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 3385 { 3386 int r; 3387 const struct gfx_firmware_header_v1_0 *me_hdr; 3388 const __le32 *fw_data; 3389 unsigned i, fw_size; 3390 3391 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3392 adev->gfx.me_fw->data; 3393 3394 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3395 3396 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3397 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3398 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 3399 3400 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 3401 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3402 &adev->gfx.me.me_fw_obj, 3403 &adev->gfx.me.me_fw_gpu_addr, 3404 (void **)&adev->gfx.me.me_fw_ptr); 3405 if (r) { 3406 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 3407 gfx_v11_0_me_fini(adev); 3408 return r; 3409 } 3410 3411 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 3412 3413 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3414 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3415 3416 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 3417 3418 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 3419 3420 for (i = 0; i < me_hdr->jt_size; i++) 3421 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 3422 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 3423 3424 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 3425 3426 return 0; 3427 } 3428 3429 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 3430 { 3431 int r; 3432 const struct gfx_firmware_header_v2_0 *me_hdr; 3433 const __le32 *fw_ucode, *fw_data; 3434 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3435 uint32_t tmp; 3436 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3437 3438 me_hdr = (const struct gfx_firmware_header_v2_0 *) 3439 adev->gfx.me_fw->data; 3440 3441 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3442 3443 /* instruction */ 3444 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 3445 le32_to_cpu(me_hdr->ucode_offset_bytes)); 3446 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 3447 /* data */ 3448 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3449 le32_to_cpu(me_hdr->data_offset_bytes)); 3450 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 3451 3452 /* 64kb align*/ 3453 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3454 64 * 1024, 3455 AMDGPU_GEM_DOMAIN_VRAM | 3456 AMDGPU_GEM_DOMAIN_GTT, 3457 &adev->gfx.me.me_fw_obj, 3458 &adev->gfx.me.me_fw_gpu_addr, 3459 (void **)&adev->gfx.me.me_fw_ptr); 3460 if (r) { 3461 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 3462 gfx_v11_0_me_fini(adev); 3463 return r; 3464 } 3465 3466 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3467 64 * 1024, 3468 AMDGPU_GEM_DOMAIN_VRAM | 3469 AMDGPU_GEM_DOMAIN_GTT, 3470 &adev->gfx.me.me_fw_data_obj, 3471 &adev->gfx.me.me_fw_data_gpu_addr, 3472 (void **)&adev->gfx.me.me_fw_data_ptr); 3473 if (r) { 3474 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 3475 gfx_v11_0_pfp_fini(adev); 3476 return r; 3477 } 3478 3479 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 3480 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 3481 3482 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3483 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 3484 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3485 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3486 3487 if (amdgpu_emu_mode == 1) 3488 amdgpu_device_flush_hdp(adev, NULL); 3489 3490 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3491 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3492 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 3493 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3494 3495 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 3496 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 3497 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 3498 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 3499 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 3500 3501 /* 3502 * Programming any of the CP_ME_IC_BASE registers 3503 * forces invalidation of the ME L1 I$. Wait for the 3504 * invalidation complete 3505 */ 3506 for (i = 0; i < usec_timeout; i++) { 3507 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3508 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3509 INVALIDATE_CACHE_COMPLETE)) 3510 break; 3511 udelay(1); 3512 } 3513 3514 if (i >= usec_timeout) { 3515 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3516 return -EINVAL; 3517 } 3518 3519 /* Prime the instruction caches */ 3520 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3521 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 3522 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 3523 3524 /* Waiting for instruction cache primed*/ 3525 for (i = 0; i < usec_timeout; i++) { 3526 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3527 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3528 ICACHE_PRIMED)) 3529 break; 3530 udelay(1); 3531 } 3532 3533 if (i >= usec_timeout) { 3534 dev_err(adev->dev, "failed to prime instruction cache\n"); 3535 return -EINVAL; 3536 } 3537 3538 mutex_lock(&adev->srbm_mutex); 3539 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3540 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3541 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3542 (me_hdr->ucode_start_addr_hi << 30) | 3543 (me_hdr->ucode_start_addr_lo >> 2) ); 3544 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3545 me_hdr->ucode_start_addr_hi>>2); 3546 3547 /* 3548 * Program CP_ME_CNTL to reset given PIPE to take 3549 * effect of CP_PFP_PRGRM_CNTR_START. 3550 */ 3551 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3552 if (pipe_id == 0) 3553 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3554 ME_PIPE0_RESET, 1); 3555 else 3556 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3557 ME_PIPE1_RESET, 1); 3558 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3559 3560 /* Clear pfp pipe0 reset bit. */ 3561 if (pipe_id == 0) 3562 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3563 ME_PIPE0_RESET, 0); 3564 else 3565 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3566 ME_PIPE1_RESET, 0); 3567 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3568 3569 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 3570 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3571 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 3572 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3573 } 3574 soc21_grbm_select(adev, 0, 0, 0, 0); 3575 mutex_unlock(&adev->srbm_mutex); 3576 3577 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3578 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3579 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3580 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3581 3582 /* Invalidate the data caches */ 3583 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3584 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3585 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3586 3587 for (i = 0; i < usec_timeout; i++) { 3588 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3589 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3590 INVALIDATE_DCACHE_COMPLETE)) 3591 break; 3592 udelay(1); 3593 } 3594 3595 if (i >= usec_timeout) { 3596 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3597 return -EINVAL; 3598 } 3599 3600 return 0; 3601 } 3602 3603 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3604 { 3605 int r; 3606 3607 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3608 return -EINVAL; 3609 3610 gfx_v11_0_cp_gfx_enable(adev, false); 3611 3612 if (adev->gfx.rs64_enable) 3613 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3614 else 3615 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3616 if (r) { 3617 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3618 return r; 3619 } 3620 3621 if (adev->gfx.rs64_enable) 3622 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3623 else 3624 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3625 if (r) { 3626 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3627 return r; 3628 } 3629 3630 return 0; 3631 } 3632 3633 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3634 { 3635 struct amdgpu_ring *ring; 3636 const struct cs_section_def *sect = NULL; 3637 const struct cs_extent_def *ext = NULL; 3638 int r, i; 3639 int ctx_reg_offset; 3640 3641 /* init the CP */ 3642 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3643 adev->gfx.config.max_hw_contexts - 1); 3644 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3645 3646 if (!amdgpu_async_gfx_ring) 3647 gfx_v11_0_cp_gfx_enable(adev, true); 3648 3649 ring = &adev->gfx.gfx_ring[0]; 3650 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3651 if (r) { 3652 drm_err(&adev->ddev, "cp failed to lock ring (%d).\n", r); 3653 return r; 3654 } 3655 3656 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3657 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3658 3659 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3660 amdgpu_ring_write(ring, 0x80000000); 3661 amdgpu_ring_write(ring, 0x80000000); 3662 3663 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3664 for (ext = sect->section; ext->extent != NULL; ++ext) { 3665 if (sect->id == SECT_CONTEXT) { 3666 amdgpu_ring_write(ring, 3667 PACKET3(PACKET3_SET_CONTEXT_REG, 3668 ext->reg_count)); 3669 amdgpu_ring_write(ring, ext->reg_index - 3670 PACKET3_SET_CONTEXT_REG_START); 3671 for (i = 0; i < ext->reg_count; i++) 3672 amdgpu_ring_write(ring, ext->extent[i]); 3673 } 3674 } 3675 } 3676 3677 ctx_reg_offset = 3678 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3679 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3680 amdgpu_ring_write(ring, ctx_reg_offset); 3681 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3682 3683 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3684 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3685 3686 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3687 amdgpu_ring_write(ring, 0); 3688 3689 amdgpu_ring_commit(ring); 3690 3691 /* submit cs packet to copy state 0 to next available state */ 3692 if (adev->gfx.num_gfx_rings > 1) { 3693 /* maximum supported gfx ring is 2 */ 3694 ring = &adev->gfx.gfx_ring[1]; 3695 r = amdgpu_ring_alloc(ring, 2); 3696 if (r) { 3697 drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r); 3698 return r; 3699 } 3700 3701 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3702 amdgpu_ring_write(ring, 0); 3703 3704 amdgpu_ring_commit(ring); 3705 } 3706 return 0; 3707 } 3708 3709 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3710 CP_PIPE_ID pipe) 3711 { 3712 u32 tmp; 3713 3714 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3715 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3716 3717 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3718 } 3719 3720 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3721 struct amdgpu_ring *ring) 3722 { 3723 u32 tmp; 3724 3725 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3726 if (ring->use_doorbell) { 3727 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3728 DOORBELL_OFFSET, ring->doorbell_index); 3729 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3730 DOORBELL_EN, 1); 3731 } else { 3732 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3733 DOORBELL_EN, 0); 3734 } 3735 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3736 3737 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3738 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3739 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3740 3741 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3742 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3743 } 3744 3745 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3746 { 3747 struct amdgpu_ring *ring; 3748 u32 tmp; 3749 u32 rb_bufsz; 3750 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3751 3752 /* Set the write pointer delay */ 3753 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3754 3755 /* set the RB to use vmid 0 */ 3756 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3757 3758 /* Init gfx ring 0 for pipe 0 */ 3759 mutex_lock(&adev->srbm_mutex); 3760 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3761 3762 /* Set ring buffer size */ 3763 ring = &adev->gfx.gfx_ring[0]; 3764 rb_bufsz = order_base_2(ring->ring_size / 8); 3765 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3766 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3767 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3768 3769 /* Initialize the ring buffer's write pointers */ 3770 ring->wptr = 0; 3771 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3772 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3773 3774 /* set the wb address whether it's enabled or not */ 3775 rptr_addr = ring->rptr_gpu_addr; 3776 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3777 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3778 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3779 3780 wptr_gpu_addr = ring->wptr_gpu_addr; 3781 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3782 lower_32_bits(wptr_gpu_addr)); 3783 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3784 upper_32_bits(wptr_gpu_addr)); 3785 3786 mdelay(1); 3787 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3788 3789 rb_addr = ring->gpu_addr >> 8; 3790 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3791 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3792 3793 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3794 3795 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3796 mutex_unlock(&adev->srbm_mutex); 3797 3798 /* Init gfx ring 1 for pipe 1 */ 3799 if (adev->gfx.num_gfx_rings > 1) { 3800 mutex_lock(&adev->srbm_mutex); 3801 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3802 /* maximum supported gfx ring is 2 */ 3803 ring = &adev->gfx.gfx_ring[1]; 3804 rb_bufsz = order_base_2(ring->ring_size / 8); 3805 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3806 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3807 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3808 /* Initialize the ring buffer's write pointers */ 3809 ring->wptr = 0; 3810 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3811 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3812 /* Set the wb address whether it's enabled or not */ 3813 rptr_addr = ring->rptr_gpu_addr; 3814 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3815 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3816 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3817 wptr_gpu_addr = ring->wptr_gpu_addr; 3818 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3819 lower_32_bits(wptr_gpu_addr)); 3820 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3821 upper_32_bits(wptr_gpu_addr)); 3822 3823 mdelay(1); 3824 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3825 3826 rb_addr = ring->gpu_addr >> 8; 3827 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3828 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3829 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3830 3831 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3832 mutex_unlock(&adev->srbm_mutex); 3833 } 3834 /* Switch to pipe 0 */ 3835 mutex_lock(&adev->srbm_mutex); 3836 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3837 mutex_unlock(&adev->srbm_mutex); 3838 3839 /* start the ring */ 3840 gfx_v11_0_cp_gfx_start(adev); 3841 3842 return 0; 3843 } 3844 3845 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3846 { 3847 u32 data; 3848 3849 if (adev->gfx.rs64_enable) { 3850 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3851 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3852 enable ? 0 : 1); 3853 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3854 enable ? 0 : 1); 3855 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3856 enable ? 0 : 1); 3857 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3858 enable ? 0 : 1); 3859 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3860 enable ? 0 : 1); 3861 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3862 enable ? 1 : 0); 3863 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3864 enable ? 1 : 0); 3865 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3866 enable ? 1 : 0); 3867 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3868 enable ? 1 : 0); 3869 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3870 enable ? 0 : 1); 3871 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3872 } else { 3873 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3874 3875 if (enable) { 3876 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3877 if (!adev->enable_mes_kiq) 3878 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3879 MEC_ME2_HALT, 0); 3880 } else { 3881 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3882 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3883 } 3884 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3885 } 3886 3887 udelay(50); 3888 } 3889 3890 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3891 { 3892 const struct gfx_firmware_header_v1_0 *mec_hdr; 3893 const __le32 *fw_data; 3894 unsigned i, fw_size; 3895 u32 *fw = NULL; 3896 int r; 3897 3898 if (!adev->gfx.mec_fw) 3899 return -EINVAL; 3900 3901 gfx_v11_0_cp_compute_enable(adev, false); 3902 3903 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3904 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3905 3906 fw_data = (const __le32 *) 3907 (adev->gfx.mec_fw->data + 3908 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3909 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3910 3911 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3912 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3913 &adev->gfx.mec.mec_fw_obj, 3914 &adev->gfx.mec.mec_fw_gpu_addr, 3915 (void **)&fw); 3916 if (r) { 3917 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3918 gfx_v11_0_mec_fini(adev); 3919 return r; 3920 } 3921 3922 memcpy(fw, fw_data, fw_size); 3923 3924 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3925 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3926 3927 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3928 3929 /* MEC1 */ 3930 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3931 3932 for (i = 0; i < mec_hdr->jt_size; i++) 3933 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3934 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3935 3936 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3937 3938 return 0; 3939 } 3940 3941 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3942 { 3943 const struct gfx_firmware_header_v2_0 *mec_hdr; 3944 const __le32 *fw_ucode, *fw_data; 3945 u32 tmp, fw_ucode_size, fw_data_size; 3946 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3947 u32 *fw_ucode_ptr, *fw_data_ptr; 3948 int r; 3949 3950 if (!adev->gfx.mec_fw) 3951 return -EINVAL; 3952 3953 gfx_v11_0_cp_compute_enable(adev, false); 3954 3955 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3956 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3957 3958 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3959 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3960 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3961 3962 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3963 le32_to_cpu(mec_hdr->data_offset_bytes)); 3964 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3965 3966 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3967 64 * 1024, 3968 AMDGPU_GEM_DOMAIN_VRAM | 3969 AMDGPU_GEM_DOMAIN_GTT, 3970 &adev->gfx.mec.mec_fw_obj, 3971 &adev->gfx.mec.mec_fw_gpu_addr, 3972 (void **)&fw_ucode_ptr); 3973 if (r) { 3974 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3975 gfx_v11_0_mec_fini(adev); 3976 return r; 3977 } 3978 3979 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3980 64 * 1024, 3981 AMDGPU_GEM_DOMAIN_VRAM | 3982 AMDGPU_GEM_DOMAIN_GTT, 3983 &adev->gfx.mec.mec_fw_data_obj, 3984 &adev->gfx.mec.mec_fw_data_gpu_addr, 3985 (void **)&fw_data_ptr); 3986 if (r) { 3987 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3988 gfx_v11_0_mec_fini(adev); 3989 return r; 3990 } 3991 3992 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 3993 memcpy(fw_data_ptr, fw_data, fw_data_size); 3994 3995 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3996 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 3997 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3998 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 3999 4000 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 4001 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 4002 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 4003 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 4004 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 4005 4006 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 4007 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 4008 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 4009 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 4010 4011 mutex_lock(&adev->srbm_mutex); 4012 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 4013 soc21_grbm_select(adev, 1, i, 0, 0); 4014 4015 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 4016 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 4017 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 4018 4019 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 4020 mec_hdr->ucode_start_addr_lo >> 2 | 4021 mec_hdr->ucode_start_addr_hi << 30); 4022 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 4023 mec_hdr->ucode_start_addr_hi >> 2); 4024 4025 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 4026 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 4027 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 4028 } 4029 mutex_unlock(&adev->srbm_mutex); 4030 soc21_grbm_select(adev, 0, 0, 0, 0); 4031 4032 /* Trigger an invalidation of the L1 instruction caches */ 4033 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 4034 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 4035 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 4036 4037 /* Wait for invalidation complete */ 4038 for (i = 0; i < usec_timeout; i++) { 4039 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 4040 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 4041 INVALIDATE_DCACHE_COMPLETE)) 4042 break; 4043 udelay(1); 4044 } 4045 4046 if (i >= usec_timeout) { 4047 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4048 return -EINVAL; 4049 } 4050 4051 /* Trigger an invalidation of the L1 instruction caches */ 4052 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4053 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 4054 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 4055 4056 /* Wait for invalidation complete */ 4057 for (i = 0; i < usec_timeout; i++) { 4058 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4059 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 4060 INVALIDATE_CACHE_COMPLETE)) 4061 break; 4062 udelay(1); 4063 } 4064 4065 if (i >= usec_timeout) { 4066 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4067 return -EINVAL; 4068 } 4069 4070 return 0; 4071 } 4072 4073 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 4074 { 4075 uint32_t tmp; 4076 struct amdgpu_device *adev = ring->adev; 4077 4078 /* tell RLC which is KIQ queue */ 4079 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 4080 tmp &= 0xffffff00; 4081 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4082 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); 4083 } 4084 4085 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 4086 { 4087 /* set graphics engine doorbell range */ 4088 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 4089 (adev->doorbell_index.gfx_ring0 * 2) << 2); 4090 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 4091 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 4092 4093 /* set compute engine doorbell range */ 4094 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4095 (adev->doorbell_index.kiq * 2) << 2); 4096 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4097 (adev->doorbell_index.userqueue_end * 2) << 2); 4098 } 4099 4100 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, 4101 struct v11_gfx_mqd *mqd, 4102 struct amdgpu_mqd_prop *prop) 4103 { 4104 bool priority = 0; 4105 u32 tmp; 4106 4107 /* set up default queue priority level 4108 * 0x0 = low priority, 0x1 = high priority 4109 */ 4110 if (prop->hqd_queue_priority == AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) 4111 priority = 1; 4112 4113 tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; 4114 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); 4115 mqd->cp_gfx_hqd_queue_priority = tmp; 4116 } 4117 4118 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 4119 struct amdgpu_mqd_prop *prop) 4120 { 4121 struct v11_gfx_mqd *mqd = m; 4122 uint64_t hqd_gpu_addr, wb_gpu_addr; 4123 uint32_t tmp; 4124 uint32_t rb_bufsz; 4125 4126 /* set up gfx hqd wptr */ 4127 mqd->cp_gfx_hqd_wptr = 0; 4128 mqd->cp_gfx_hqd_wptr_hi = 0; 4129 4130 /* set the pointer to the MQD */ 4131 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 4132 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4133 4134 /* set up mqd control */ 4135 tmp = regCP_GFX_MQD_CONTROL_DEFAULT; 4136 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 4137 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 4138 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 4139 mqd->cp_gfx_mqd_control = tmp; 4140 4141 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 4142 tmp = regCP_GFX_HQD_VMID_DEFAULT; 4143 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 4144 mqd->cp_gfx_hqd_vmid = 0; 4145 4146 /* set up gfx queue priority */ 4147 gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); 4148 4149 /* set up time quantum */ 4150 tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; 4151 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 4152 mqd->cp_gfx_hqd_quantum = tmp; 4153 4154 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 4155 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4156 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 4157 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 4158 4159 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 4160 wb_gpu_addr = prop->rptr_gpu_addr; 4161 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 4162 mqd->cp_gfx_hqd_rptr_addr_hi = 4163 upper_32_bits(wb_gpu_addr) & 0xffff; 4164 4165 /* set up rb_wptr_poll addr */ 4166 wb_gpu_addr = prop->wptr_gpu_addr; 4167 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4168 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4169 4170 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 4171 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 4172 tmp = regCP_GFX_HQD_CNTL_DEFAULT; 4173 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 4174 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 4175 #ifdef __BIG_ENDIAN 4176 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 4177 #endif 4178 if (prop->tmz_queue) 4179 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); 4180 if (!prop->kernel_queue) 4181 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); 4182 mqd->cp_gfx_hqd_cntl = tmp; 4183 4184 /* set up cp_doorbell_control */ 4185 tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; 4186 if (prop->use_doorbell) { 4187 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4188 DOORBELL_OFFSET, prop->doorbell_index); 4189 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4190 DOORBELL_EN, 1); 4191 } else 4192 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4193 DOORBELL_EN, 0); 4194 mqd->cp_rb_doorbell_control = tmp; 4195 4196 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4197 mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; 4198 4199 /* active the queue */ 4200 mqd->cp_gfx_hqd_active = 1; 4201 4202 /* set gfx UQ items */ 4203 mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); 4204 mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); 4205 mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); 4206 mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); 4207 mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); 4208 mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); 4209 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4210 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4211 4212 return 0; 4213 } 4214 4215 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) 4216 { 4217 struct amdgpu_device *adev = ring->adev; 4218 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 4219 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 4220 4221 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4222 memset((void *)mqd, 0, sizeof(*mqd)); 4223 mutex_lock(&adev->srbm_mutex); 4224 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4225 amdgpu_ring_init_mqd(ring); 4226 soc21_grbm_select(adev, 0, 0, 0, 0); 4227 mutex_unlock(&adev->srbm_mutex); 4228 if (adev->gfx.me.mqd_backup[mqd_idx]) 4229 memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4230 } else { 4231 /* restore mqd with the backup copy */ 4232 if (adev->gfx.me.mqd_backup[mqd_idx]) 4233 memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 4234 /* reset the ring */ 4235 ring->wptr = 0; 4236 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4237 amdgpu_ring_clear_ring(ring); 4238 } 4239 4240 return 0; 4241 } 4242 4243 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 4244 { 4245 int r, i; 4246 4247 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4248 r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); 4249 if (r) 4250 return r; 4251 } 4252 4253 r = amdgpu_gfx_enable_kgq(adev, 0); 4254 if (r) 4255 return r; 4256 4257 return gfx_v11_0_cp_gfx_start(adev); 4258 } 4259 4260 static void gfx_v11_0_compute_mqd_set_cu_mask(struct amdgpu_device *adev, 4261 struct v11_compute_mqd *mqd, 4262 struct amdgpu_mqd_prop *prop) 4263 { 4264 uint32_t se_mask[8] = {0}; 4265 uint32_t wa_mask; 4266 bool has_wa_flag = prop->cu_flags & (AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE | 4267 AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE); 4268 4269 if (!has_wa_flag && (!prop->cu_mask || !prop->cu_mask_count)) 4270 return; 4271 4272 if (has_wa_flag) { 4273 wa_mask = (prop->cu_flags & AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE) ? 4274 0xffff : 0xffffffff; 4275 mqd->compute_static_thread_mgmt_se0 = wa_mask; 4276 mqd->compute_static_thread_mgmt_se1 = wa_mask; 4277 mqd->compute_static_thread_mgmt_se2 = wa_mask; 4278 mqd->compute_static_thread_mgmt_se3 = wa_mask; 4279 return; 4280 } 4281 4282 amdgpu_gfx_mqd_symmetrically_map_cu_mask(adev, prop->cu_mask, 4283 prop->cu_mask_count, se_mask); 4284 4285 mqd->compute_static_thread_mgmt_se0 = se_mask[0]; 4286 mqd->compute_static_thread_mgmt_se1 = se_mask[1]; 4287 mqd->compute_static_thread_mgmt_se2 = se_mask[2]; 4288 mqd->compute_static_thread_mgmt_se3 = se_mask[3]; 4289 } 4290 4291 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 4292 struct amdgpu_mqd_prop *prop) 4293 { 4294 struct v11_compute_mqd *mqd = m; 4295 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4296 uint32_t tmp; 4297 4298 mqd->header = 0xC0310800; 4299 mqd->compute_pipelinestat_enable = 0x00000001; 4300 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4301 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4302 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4303 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4304 mqd->compute_misc_reserved = 0x00000007; 4305 4306 eop_base_addr = prop->eop_gpu_addr >> 8; 4307 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4308 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4309 4310 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4311 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 4312 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4313 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 4314 4315 mqd->cp_hqd_eop_control = tmp; 4316 4317 /* enable doorbell? */ 4318 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4319 4320 if (prop->use_doorbell) { 4321 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4322 DOORBELL_OFFSET, prop->doorbell_index); 4323 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4324 DOORBELL_EN, 1); 4325 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4326 DOORBELL_SOURCE, 0); 4327 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4328 DOORBELL_HIT, 0); 4329 } else { 4330 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4331 DOORBELL_EN, 0); 4332 } 4333 4334 mqd->cp_hqd_pq_doorbell_control = tmp; 4335 4336 /* disable the queue if it's active */ 4337 mqd->cp_hqd_dequeue_request = 0; 4338 mqd->cp_hqd_pq_rptr = 0; 4339 mqd->cp_hqd_pq_wptr_lo = 0; 4340 mqd->cp_hqd_pq_wptr_hi = 0; 4341 4342 /* set the pointer to the MQD */ 4343 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 4344 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4345 4346 /* set MQD vmid to 0 */ 4347 tmp = regCP_MQD_CONTROL_DEFAULT; 4348 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4349 mqd->cp_mqd_control = tmp; 4350 4351 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4352 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4353 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4354 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4355 4356 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4357 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 4358 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4359 (order_base_2(prop->queue_size / 4) - 1)); 4360 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4361 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 4362 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 4363 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 4364 prop->allow_tunneling); 4365 if (prop->kernel_queue) { 4366 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4367 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4368 } 4369 if (prop->tmz_queue) 4370 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); 4371 mqd->cp_hqd_pq_control = tmp; 4372 4373 /* set the wb address whether it's enabled or not */ 4374 wb_gpu_addr = prop->rptr_gpu_addr; 4375 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4376 mqd->cp_hqd_pq_rptr_report_addr_hi = 4377 upper_32_bits(wb_gpu_addr) & 0xffff; 4378 4379 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4380 wb_gpu_addr = prop->wptr_gpu_addr; 4381 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4382 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4383 4384 tmp = 0; 4385 /* enable the doorbell if requested */ 4386 if (prop->use_doorbell) { 4387 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4388 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4389 DOORBELL_OFFSET, prop->doorbell_index); 4390 4391 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4392 DOORBELL_EN, 1); 4393 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4394 DOORBELL_SOURCE, 0); 4395 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4396 DOORBELL_HIT, 0); 4397 } 4398 4399 mqd->cp_hqd_pq_doorbell_control = tmp; 4400 4401 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4402 mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; 4403 4404 /* set the vmid for the queue */ 4405 mqd->cp_hqd_vmid = 0; 4406 4407 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 4408 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 4409 mqd->cp_hqd_persistent_state = tmp; 4410 4411 /* set MIN_IB_AVAIL_SIZE */ 4412 tmp = regCP_HQD_IB_CONTROL_DEFAULT; 4413 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4414 mqd->cp_hqd_ib_control = tmp; 4415 4416 /* set static priority for a compute queue/ring */ 4417 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 4418 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 4419 4420 tmp = REG_SET_FIELD(0, CP_HQD_QUANTUM, QUANTUM_EN, 1); 4421 tmp = REG_SET_FIELD(tmp, CP_HQD_QUANTUM, QUANTUM_SCALE, 1); 4422 tmp = REG_SET_FIELD(tmp, CP_HQD_QUANTUM, QUANTUM_DURATION, 1); 4423 mqd->cp_hqd_quantum = tmp; 4424 4425 mqd->cp_hqd_active = prop->hqd_active; 4426 4427 /* set UQ fenceaddress */ 4428 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4429 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4430 /* set CU mask */ 4431 gfx_v11_0_compute_mqd_set_cu_mask(adev, mqd, prop); 4432 4433 return 0; 4434 } 4435 4436 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 4437 { 4438 struct amdgpu_device *adev = ring->adev; 4439 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4440 int j; 4441 4442 /* inactivate the queue */ 4443 if (amdgpu_sriov_vf(adev)) 4444 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 4445 4446 /* disable wptr polling */ 4447 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4448 4449 /* write the EOP addr */ 4450 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 4451 mqd->cp_hqd_eop_base_addr_lo); 4452 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 4453 mqd->cp_hqd_eop_base_addr_hi); 4454 4455 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4456 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 4457 mqd->cp_hqd_eop_control); 4458 4459 /* enable doorbell? */ 4460 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4461 mqd->cp_hqd_pq_doorbell_control); 4462 4463 /* disable the queue if it's active */ 4464 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 4465 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 4466 for (j = 0; j < adev->usec_timeout; j++) { 4467 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 4468 break; 4469 udelay(1); 4470 } 4471 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 4472 mqd->cp_hqd_dequeue_request); 4473 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 4474 mqd->cp_hqd_pq_rptr); 4475 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4476 mqd->cp_hqd_pq_wptr_lo); 4477 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4478 mqd->cp_hqd_pq_wptr_hi); 4479 } 4480 4481 /* set the pointer to the MQD */ 4482 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 4483 mqd->cp_mqd_base_addr_lo); 4484 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 4485 mqd->cp_mqd_base_addr_hi); 4486 4487 /* set MQD vmid to 0 */ 4488 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 4489 mqd->cp_mqd_control); 4490 4491 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4492 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 4493 mqd->cp_hqd_pq_base_lo); 4494 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 4495 mqd->cp_hqd_pq_base_hi); 4496 4497 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4498 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 4499 mqd->cp_hqd_pq_control); 4500 4501 /* set the wb address whether it's enabled or not */ 4502 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 4503 mqd->cp_hqd_pq_rptr_report_addr_lo); 4504 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4505 mqd->cp_hqd_pq_rptr_report_addr_hi); 4506 4507 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4508 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 4509 mqd->cp_hqd_pq_wptr_poll_addr_lo); 4510 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4511 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4512 4513 /* enable the doorbell if requested */ 4514 if (ring->use_doorbell) { 4515 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4516 (adev->doorbell_index.kiq * 2) << 2); 4517 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4518 (adev->doorbell_index.userqueue_end * 2) << 2); 4519 } 4520 4521 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4522 mqd->cp_hqd_pq_doorbell_control); 4523 4524 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4525 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4526 mqd->cp_hqd_pq_wptr_lo); 4527 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4528 mqd->cp_hqd_pq_wptr_hi); 4529 4530 /* set the vmid for the queue */ 4531 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 4532 4533 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 4534 mqd->cp_hqd_persistent_state); 4535 4536 /* activate the queue */ 4537 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 4538 mqd->cp_hqd_active); 4539 4540 if (ring->use_doorbell) 4541 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4542 4543 return 0; 4544 } 4545 4546 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 4547 { 4548 struct amdgpu_device *adev = ring->adev; 4549 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4550 4551 gfx_v11_0_kiq_setting(ring); 4552 4553 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4554 /* reset MQD to a clean status */ 4555 if (adev->gfx.kiq[0].mqd_backup) 4556 memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); 4557 4558 /* reset ring buffer */ 4559 ring->wptr = 0; 4560 amdgpu_ring_clear_ring(ring); 4561 4562 mutex_lock(&adev->srbm_mutex); 4563 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4564 gfx_v11_0_kiq_init_register(ring); 4565 soc21_grbm_select(adev, 0, 0, 0, 0); 4566 mutex_unlock(&adev->srbm_mutex); 4567 } else { 4568 memset((void *)mqd, 0, sizeof(*mqd)); 4569 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4570 amdgpu_ring_clear_ring(ring); 4571 mutex_lock(&adev->srbm_mutex); 4572 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4573 amdgpu_ring_init_mqd(ring); 4574 gfx_v11_0_kiq_init_register(ring); 4575 soc21_grbm_select(adev, 0, 0, 0, 0); 4576 mutex_unlock(&adev->srbm_mutex); 4577 4578 if (adev->gfx.kiq[0].mqd_backup) 4579 memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); 4580 } 4581 4582 return 0; 4583 } 4584 4585 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) 4586 { 4587 struct amdgpu_device *adev = ring->adev; 4588 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4589 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4590 4591 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4592 memset((void *)mqd, 0, sizeof(*mqd)); 4593 mutex_lock(&adev->srbm_mutex); 4594 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4595 amdgpu_ring_init_mqd(ring); 4596 soc21_grbm_select(adev, 0, 0, 0, 0); 4597 mutex_unlock(&adev->srbm_mutex); 4598 4599 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4600 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4601 } else { 4602 /* restore MQD to a clean status */ 4603 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4604 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4605 /* reset ring buffer */ 4606 ring->wptr = 0; 4607 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4608 amdgpu_ring_clear_ring(ring); 4609 } 4610 4611 return 0; 4612 } 4613 4614 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4615 { 4616 gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring); 4617 return 0; 4618 } 4619 4620 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4621 { 4622 int i, r; 4623 4624 if (!amdgpu_async_gfx_ring) 4625 gfx_v11_0_cp_compute_enable(adev, true); 4626 4627 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4628 r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); 4629 if (r) 4630 return r; 4631 } 4632 4633 return amdgpu_gfx_enable_kcq(adev, 0); 4634 } 4635 4636 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4637 { 4638 int r, i; 4639 struct amdgpu_ring *ring; 4640 4641 if (!(adev->flags & AMD_IS_APU)) 4642 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4643 4644 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4645 /* legacy firmware loading */ 4646 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4647 if (r) 4648 return r; 4649 4650 if (adev->gfx.rs64_enable) 4651 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4652 else 4653 r = gfx_v11_0_cp_compute_load_microcode(adev); 4654 if (r) 4655 return r; 4656 } 4657 4658 gfx_v11_0_cp_set_doorbell_range(adev); 4659 4660 if (amdgpu_async_gfx_ring) { 4661 gfx_v11_0_cp_compute_enable(adev, true); 4662 gfx_v11_0_cp_gfx_enable(adev, true); 4663 } 4664 4665 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4666 r = amdgpu_mes_kiq_hw_init(adev, 0); 4667 else 4668 r = gfx_v11_0_kiq_resume(adev); 4669 if (r) 4670 return r; 4671 4672 r = gfx_v11_0_kcq_resume(adev); 4673 if (r) 4674 return r; 4675 4676 if (!amdgpu_async_gfx_ring) { 4677 r = gfx_v11_0_cp_gfx_resume(adev); 4678 if (r) 4679 return r; 4680 } else { 4681 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4682 if (r) 4683 return r; 4684 } 4685 4686 if (adev->gfx.disable_kq) { 4687 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4688 ring = &adev->gfx.gfx_ring[i]; 4689 /* we don't want to set ring->ready */ 4690 r = amdgpu_ring_test_ring(ring); 4691 if (r) 4692 return r; 4693 } 4694 if (amdgpu_async_gfx_ring) 4695 amdgpu_gfx_disable_kgq(adev, 0); 4696 } else { 4697 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4698 ring = &adev->gfx.gfx_ring[i]; 4699 r = amdgpu_ring_test_helper(ring); 4700 if (r) 4701 return r; 4702 } 4703 } 4704 4705 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4706 ring = &adev->gfx.compute_ring[i]; 4707 r = amdgpu_ring_test_helper(ring); 4708 if (r) 4709 return r; 4710 } 4711 4712 return 0; 4713 } 4714 4715 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4716 { 4717 gfx_v11_0_cp_gfx_enable(adev, enable); 4718 gfx_v11_0_cp_compute_enable(adev, enable); 4719 } 4720 4721 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4722 { 4723 int r; 4724 bool value; 4725 4726 r = adev->gfxhub.funcs->gart_enable(adev); 4727 if (r) 4728 return r; 4729 4730 amdgpu_device_flush_hdp(adev, NULL); 4731 4732 value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; 4733 4734 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4735 /* TODO investigate why this and the hdp flush above is needed, 4736 * are we missing a flush somewhere else? */ 4737 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); 4738 4739 return 0; 4740 } 4741 4742 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4743 { 4744 u32 tmp; 4745 4746 /* select RS64 */ 4747 if (adev->gfx.rs64_enable) { 4748 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4749 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4750 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4751 4752 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4753 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4754 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4755 } 4756 4757 if (amdgpu_emu_mode == 1) 4758 msleep(100); 4759 } 4760 4761 static int get_gb_addr_config(struct amdgpu_device * adev) 4762 { 4763 u32 gb_addr_config; 4764 4765 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4766 if (gb_addr_config == 0) 4767 return -EINVAL; 4768 4769 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4770 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4771 4772 adev->gfx.config.gb_addr_config = gb_addr_config; 4773 4774 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4775 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4776 GB_ADDR_CONFIG, NUM_PIPES); 4777 4778 adev->gfx.config.max_tile_pipes = 4779 adev->gfx.config.gb_addr_config_fields.num_pipes; 4780 4781 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4782 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4783 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4784 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4785 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4786 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4787 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4788 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4789 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4790 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4791 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4792 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4793 4794 return 0; 4795 } 4796 4797 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4798 { 4799 uint32_t data; 4800 4801 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4802 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4803 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4804 4805 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4806 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4807 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4808 } 4809 4810 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) 4811 { 4812 int r; 4813 struct amdgpu_device *adev = ip_block->adev; 4814 4815 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4816 adev->gfx.cleaner_shader_ptr); 4817 4818 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4819 if (adev->gfx.imu.funcs) { 4820 /* RLC autoload sequence 1: Program rlc ram */ 4821 if (adev->gfx.imu.funcs->program_rlc_ram) 4822 adev->gfx.imu.funcs->program_rlc_ram(adev); 4823 /* rlc autoload firmware */ 4824 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4825 if (r) 4826 return r; 4827 } 4828 } else { 4829 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4830 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4831 if (adev->gfx.imu.funcs->load_microcode) 4832 adev->gfx.imu.funcs->load_microcode(adev); 4833 if (adev->gfx.imu.funcs->setup_imu) 4834 adev->gfx.imu.funcs->setup_imu(adev); 4835 if (adev->gfx.imu.funcs->start_imu) 4836 adev->gfx.imu.funcs->start_imu(adev); 4837 } 4838 4839 /* disable gpa mode in backdoor loading */ 4840 gfx_v11_0_disable_gpa_mode(adev); 4841 } 4842 } 4843 4844 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4845 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4846 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4847 if (r) { 4848 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4849 return r; 4850 } 4851 } 4852 4853 adev->gfx.is_poweron = true; 4854 4855 if(get_gb_addr_config(adev)) 4856 drm_warn(adev_to_drm(adev), "Invalid gb_addr_config !\n"); 4857 4858 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4859 adev->gfx.rs64_enable) 4860 gfx_v11_0_config_gfx_rs64(adev); 4861 4862 r = gfx_v11_0_gfxhub_enable(adev); 4863 if (r) 4864 return r; 4865 4866 if (!amdgpu_emu_mode) 4867 gfx_v11_0_init_golden_registers(adev); 4868 4869 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4870 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4871 /** 4872 * For gfx 11, rlc firmware loading relies on smu firmware is 4873 * loaded firstly, so in direct type, it has to load smc ucode 4874 * here before rlc. 4875 */ 4876 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4877 if (r) 4878 return r; 4879 } 4880 4881 gfx_v11_0_constants_init(adev); 4882 4883 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4884 gfx_v11_0_select_cp_fw_arch(adev); 4885 4886 if (adev->nbio.funcs->gc_doorbell_init) 4887 adev->nbio.funcs->gc_doorbell_init(adev); 4888 4889 r = gfx_v11_0_rlc_resume(adev); 4890 if (r) 4891 return r; 4892 4893 /* 4894 * init golden registers and rlc resume may override some registers, 4895 * reconfig them here 4896 */ 4897 gfx_v11_0_tcp_harvest(adev); 4898 4899 r = gfx_v11_0_cp_resume(adev); 4900 if (r) 4901 return r; 4902 4903 /* get IMU version from HW if it's not set */ 4904 if (!adev->gfx.imu_fw_version) 4905 adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); 4906 4907 return r; 4908 } 4909 4910 static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, 4911 bool enable) 4912 { 4913 unsigned int irq_type; 4914 int m, p, r; 4915 4916 if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { 4917 for (m = 0; m < adev->gfx.me.num_me; m++) { 4918 for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { 4919 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; 4920 if (enable) 4921 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4922 irq_type); 4923 else 4924 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4925 irq_type); 4926 if (r) 4927 return r; 4928 } 4929 } 4930 } 4931 4932 if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { 4933 for (m = 0; m < adev->gfx.mec.num_mec; ++m) { 4934 for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { 4935 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 4936 + (m * adev->gfx.mec.num_pipe_per_mec) 4937 + p; 4938 if (enable) 4939 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4940 irq_type); 4941 else 4942 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4943 irq_type); 4944 if (r) 4945 return r; 4946 } 4947 } 4948 } 4949 4950 return 0; 4951 } 4952 4953 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) 4954 { 4955 struct amdgpu_device *adev = ip_block->adev; 4956 4957 cancel_delayed_work_sync(&adev->gfx.idle_work); 4958 4959 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4960 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4961 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4962 gfx_v11_0_set_userq_eop_interrupts(adev, false); 4963 4964 if (!adev->no_hw_access) { 4965 if (amdgpu_async_gfx_ring && 4966 !adev->gfx.disable_kq) { 4967 if (amdgpu_gfx_disable_kgq(adev, 0)) 4968 DRM_ERROR("KGQ disable failed\n"); 4969 } 4970 4971 if (amdgpu_gfx_disable_kcq(adev, 0)) 4972 DRM_ERROR("KCQ disable failed\n"); 4973 4974 amdgpu_mes_kiq_hw_fini(adev, 0); 4975 } 4976 4977 if (amdgpu_sriov_vf(adev)) 4978 /* Remove the steps disabling CPG and clearing KIQ position, 4979 * so that CP could perform IDLE-SAVE during switch. Those 4980 * steps are necessary to avoid a DMAR error in gfx9 but it is 4981 * not reproduced on gfx11. 4982 */ 4983 return 0; 4984 4985 gfx_v11_0_cp_enable(adev, false); 4986 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4987 4988 adev->gfxhub.funcs->gart_disable(adev); 4989 4990 adev->gfx.is_poweron = false; 4991 4992 return 0; 4993 } 4994 4995 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) 4996 { 4997 return gfx_v11_0_hw_fini(ip_block); 4998 } 4999 5000 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) 5001 { 5002 return gfx_v11_0_hw_init(ip_block); 5003 } 5004 5005 static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block) 5006 { 5007 struct amdgpu_device *adev = ip_block->adev; 5008 5009 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 5010 GRBM_STATUS, GUI_ACTIVE)) 5011 return false; 5012 else 5013 return true; 5014 } 5015 5016 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 5017 { 5018 unsigned i; 5019 u32 tmp; 5020 struct amdgpu_device *adev = ip_block->adev; 5021 5022 for (i = 0; i < adev->usec_timeout; i++) { 5023 /* read MC_STATUS */ 5024 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 5025 GRBM_STATUS__GUI_ACTIVE_MASK; 5026 5027 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 5028 return 0; 5029 udelay(1); 5030 } 5031 return -ETIMEDOUT; 5032 } 5033 5034 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, 5035 bool req) 5036 { 5037 u32 i, tmp, val; 5038 5039 for (i = 0; i < adev->usec_timeout; i++) { 5040 /* Request with MeId=2, PipeId=0 */ 5041 tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); 5042 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); 5043 WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); 5044 5045 val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); 5046 if (req) { 5047 if (val == tmp) 5048 break; 5049 } else { 5050 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, 5051 REQUEST, 1); 5052 5053 /* unlocked or locked by firmware */ 5054 if (val != tmp) 5055 break; 5056 } 5057 udelay(1); 5058 } 5059 5060 if (i >= adev->usec_timeout) 5061 return -EINVAL; 5062 5063 return 0; 5064 } 5065 5066 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) 5067 { 5068 u32 grbm_soft_reset = 0; 5069 u32 tmp; 5070 int r, i, j, k; 5071 struct amdgpu_device *adev = ip_block->adev; 5072 5073 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5074 5075 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5076 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 5077 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 5078 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 5079 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 5080 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5081 5082 mutex_lock(&adev->srbm_mutex); 5083 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 5084 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 5085 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 5086 soc21_grbm_select(adev, i, k, j, 0); 5087 5088 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 5089 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 5090 } 5091 } 5092 } 5093 for (i = 0; i < adev->gfx.me.num_me; ++i) { 5094 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 5095 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 5096 soc21_grbm_select(adev, i, k, j, 0); 5097 5098 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 5099 } 5100 } 5101 } 5102 soc21_grbm_select(adev, 0, 0, 0, 0); 5103 mutex_unlock(&adev->srbm_mutex); 5104 5105 /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ 5106 mutex_lock(&adev->gfx.reset_sem_mutex); 5107 r = gfx_v11_0_request_gfx_index_mutex(adev, true); 5108 if (r) { 5109 mutex_unlock(&adev->gfx.reset_sem_mutex); 5110 DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); 5111 return r; 5112 } 5113 5114 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 5115 5116 // Read CP_VMID_RESET register three times. 5117 // to get sufficient time for GFX_HQD_ACTIVE reach 0 5118 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5119 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5120 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5121 5122 /* release the gfx mutex */ 5123 r = gfx_v11_0_request_gfx_index_mutex(adev, false); 5124 mutex_unlock(&adev->gfx.reset_sem_mutex); 5125 if (r) { 5126 DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); 5127 return r; 5128 } 5129 5130 for (i = 0; i < adev->usec_timeout; i++) { 5131 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 5132 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 5133 break; 5134 udelay(1); 5135 } 5136 if (i >= adev->usec_timeout) { 5137 printk("Failed to wait all pipes clean\n"); 5138 return -EINVAL; 5139 } 5140 5141 /********** trigger soft reset ***********/ 5142 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5143 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5144 SOFT_RESET_CP, 1); 5145 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5146 SOFT_RESET_GFX, 1); 5147 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5148 SOFT_RESET_CPF, 1); 5149 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5150 SOFT_RESET_CPC, 1); 5151 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5152 SOFT_RESET_CPG, 1); 5153 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5154 /********** exit soft reset ***********/ 5155 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5156 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5157 SOFT_RESET_CP, 0); 5158 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5159 SOFT_RESET_GFX, 0); 5160 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5161 SOFT_RESET_CPF, 0); 5162 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5163 SOFT_RESET_CPC, 0); 5164 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5165 SOFT_RESET_CPG, 0); 5166 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5167 5168 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 5169 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 5170 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 5171 5172 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 5173 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 5174 5175 for (i = 0; i < adev->usec_timeout; i++) { 5176 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 5177 break; 5178 udelay(1); 5179 } 5180 if (i >= adev->usec_timeout) { 5181 printk("Failed to wait CP_VMID_RESET to 0\n"); 5182 return -EINVAL; 5183 } 5184 5185 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5186 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5187 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5188 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5189 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5190 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5191 5192 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5193 5194 return gfx_v11_0_cp_resume(adev); 5195 } 5196 5197 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) 5198 { 5199 int i, r; 5200 struct amdgpu_device *adev = ip_block->adev; 5201 struct amdgpu_ring *ring; 5202 long tmo = msecs_to_jiffies(1000); 5203 5204 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 5205 ring = &adev->gfx.gfx_ring[i]; 5206 r = amdgpu_ring_test_ib(ring, tmo); 5207 if (r) 5208 return true; 5209 } 5210 5211 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5212 ring = &adev->gfx.compute_ring[i]; 5213 r = amdgpu_ring_test_ib(ring, tmo); 5214 if (r) 5215 return true; 5216 } 5217 5218 return false; 5219 } 5220 5221 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) 5222 { 5223 struct amdgpu_device *adev = ip_block->adev; 5224 /** 5225 * GFX soft reset will impact MES, need resume MES when do GFX soft reset 5226 */ 5227 return amdgpu_mes_resume(adev, 0); 5228 } 5229 5230 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5231 { 5232 uint64_t clock; 5233 uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; 5234 5235 if (amdgpu_sriov_vf(adev)) { 5236 amdgpu_gfx_off_ctrl(adev, false); 5237 mutex_lock(&adev->gfx.gpu_clock_mutex); 5238 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5239 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5240 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5241 if (clock_counter_hi_pre != clock_counter_hi_after) 5242 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5243 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5244 amdgpu_gfx_off_ctrl(adev, true); 5245 } else { 5246 preempt_disable(); 5247 if (amdgpu_ip_version(adev, SMUIO_HWIP, 0) < IP_VERSION(15, 0, 0)) { 5248 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, 5249 regGOLDEN_TSC_COUNT_UPPER); 5250 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, 5251 regGOLDEN_TSC_COUNT_LOWER); 5252 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, 5253 regGOLDEN_TSC_COUNT_UPPER); 5254 if (clock_counter_hi_pre != clock_counter_hi_after) 5255 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, 5256 regGOLDEN_TSC_COUNT_LOWER); 5257 } else { 5258 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, 5259 regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0); 5260 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, 5261 regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0); 5262 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, 5263 regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0); 5264 if (clock_counter_hi_pre != clock_counter_hi_after) 5265 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, 5266 regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0); 5267 } 5268 preempt_enable(); 5269 } 5270 clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); 5271 5272 return clock; 5273 } 5274 5275 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5276 uint32_t vmid, 5277 uint32_t gds_base, uint32_t gds_size, 5278 uint32_t gws_base, uint32_t gws_size, 5279 uint32_t oa_base, uint32_t oa_size) 5280 { 5281 struct amdgpu_device *adev = ring->adev; 5282 5283 /* GDS Base */ 5284 gfx_v11_0_write_data_to_reg(ring, 0, false, 5285 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 5286 gds_base); 5287 5288 /* GDS Size */ 5289 gfx_v11_0_write_data_to_reg(ring, 0, false, 5290 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 5291 gds_size); 5292 5293 /* GWS */ 5294 gfx_v11_0_write_data_to_reg(ring, 0, false, 5295 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 5296 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5297 5298 /* OA */ 5299 gfx_v11_0_write_data_to_reg(ring, 0, false, 5300 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 5301 (1 << (oa_size + oa_base)) - (1 << oa_base)); 5302 } 5303 5304 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) 5305 { 5306 struct amdgpu_device *adev = ip_block->adev; 5307 5308 switch (amdgpu_user_queue) { 5309 case -1: 5310 case 0: 5311 default: 5312 adev->gfx.disable_kq = false; 5313 adev->gfx.disable_uq = true; 5314 break; 5315 case 1: 5316 adev->gfx.disable_kq = false; 5317 adev->gfx.disable_uq = false; 5318 break; 5319 case 2: 5320 adev->gfx.disable_kq = true; 5321 adev->gfx.disable_uq = false; 5322 break; 5323 } 5324 5325 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 5326 5327 if (adev->gfx.disable_kq) { 5328 /* We need one GFX ring temporarily to set up 5329 * the clear state. 5330 */ 5331 adev->gfx.num_gfx_rings = 1; 5332 adev->gfx.num_compute_rings = 0; 5333 } else { 5334 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 5335 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 5336 AMDGPU_MAX_COMPUTE_RINGS); 5337 } 5338 5339 gfx_v11_0_set_kiq_pm4_funcs(adev); 5340 gfx_v11_0_set_ring_funcs(adev); 5341 gfx_v11_0_set_irq_funcs(adev); 5342 gfx_v11_0_set_gds_init(adev); 5343 gfx_v11_0_set_rlc_funcs(adev); 5344 gfx_v11_0_set_mqd_funcs(adev); 5345 gfx_v11_0_set_imu_funcs(adev); 5346 5347 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 5348 5349 return gfx_v11_0_init_microcode(adev); 5350 } 5351 5352 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) 5353 { 5354 struct amdgpu_device *adev = ip_block->adev; 5355 int r; 5356 5357 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5358 if (r) 5359 return r; 5360 5361 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5362 if (r) 5363 return r; 5364 5365 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 5366 if (r) 5367 return r; 5368 5369 r = gfx_v11_0_set_userq_eop_interrupts(adev, true); 5370 if (r) 5371 return r; 5372 5373 return 0; 5374 } 5375 5376 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 5377 { 5378 uint32_t rlc_cntl; 5379 5380 /* if RLC is not enabled, do nothing */ 5381 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 5382 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 5383 } 5384 5385 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 5386 { 5387 uint32_t data; 5388 unsigned i; 5389 5390 data = RLC_SAFE_MODE__CMD_MASK; 5391 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5392 5393 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 5394 5395 /* wait for RLC_SAFE_MODE */ 5396 for (i = 0; i < adev->usec_timeout; i++) { 5397 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 5398 RLC_SAFE_MODE, CMD)) 5399 break; 5400 udelay(1); 5401 } 5402 } 5403 5404 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 5405 { 5406 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 5407 } 5408 5409 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 5410 bool enable) 5411 { 5412 uint32_t def, data; 5413 5414 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 5415 return; 5416 5417 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5418 5419 if (enable) 5420 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5421 else 5422 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5423 5424 if (def != data) 5425 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5426 } 5427 5428 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 5429 bool enable) 5430 { 5431 uint32_t def, data; 5432 5433 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 5434 return; 5435 5436 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5437 5438 if (enable) 5439 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5440 else 5441 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5442 5443 if (def != data) 5444 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5445 } 5446 5447 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 5448 bool enable) 5449 { 5450 uint32_t def, data; 5451 5452 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 5453 return; 5454 5455 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5456 5457 if (enable) 5458 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5459 else 5460 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5461 5462 if (def != data) 5463 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5464 } 5465 5466 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5467 bool enable) 5468 { 5469 uint32_t data, def; 5470 5471 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 5472 return; 5473 5474 /* It is disabled by HW by default */ 5475 if (enable) { 5476 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5477 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 5478 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5479 5480 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5481 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5482 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5483 5484 if (def != data) 5485 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5486 } 5487 } else { 5488 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5489 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5490 5491 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5492 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5493 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5494 5495 if (def != data) 5496 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5497 } 5498 } 5499 } 5500 5501 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5502 bool enable) 5503 { 5504 uint32_t def, data; 5505 5506 if (!(adev->cg_flags & 5507 (AMD_CG_SUPPORT_GFX_CGCG | 5508 AMD_CG_SUPPORT_GFX_CGLS | 5509 AMD_CG_SUPPORT_GFX_3D_CGCG | 5510 AMD_CG_SUPPORT_GFX_3D_CGLS))) 5511 return; 5512 5513 if (enable) { 5514 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5515 5516 /* unset CGCG override */ 5517 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5518 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5519 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5520 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5521 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 5522 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5523 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5524 5525 /* update CGCG override bits */ 5526 if (def != data) 5527 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5528 5529 /* enable cgcg FSM(0x0000363F) */ 5530 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5531 5532 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5533 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 5534 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5535 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5536 } 5537 5538 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5539 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 5540 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5541 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5542 } 5543 5544 if (def != data) 5545 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5546 5547 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5548 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5549 5550 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5551 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 5552 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5553 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5554 } 5555 5556 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5557 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 5558 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5559 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5560 } 5561 5562 if (def != data) 5563 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5564 5565 /* set IDLE_POLL_COUNT(0x00900100) */ 5566 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 5567 5568 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 5569 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5570 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5571 5572 if (def != data) 5573 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 5574 5575 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5576 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5577 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5578 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5579 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5580 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 5581 5582 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5583 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5584 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5585 5586 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5587 if (adev->sdma.num_instances > 1) { 5588 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5589 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5590 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5591 } 5592 } else { 5593 /* Program RLC_CGCG_CGLS_CTRL */ 5594 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5595 5596 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5597 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5598 5599 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5600 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5601 5602 if (def != data) 5603 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5604 5605 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5606 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5607 5608 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5609 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5610 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5611 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5612 5613 if (def != data) 5614 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5615 5616 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5617 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5618 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5619 5620 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5621 if (adev->sdma.num_instances > 1) { 5622 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5623 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5624 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5625 } 5626 } 5627 } 5628 5629 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5630 bool enable) 5631 { 5632 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5633 5634 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 5635 5636 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 5637 5638 gfx_v11_0_update_repeater_fgcg(adev, enable); 5639 5640 gfx_v11_0_update_sram_fgcg(adev, enable); 5641 5642 gfx_v11_0_update_perf_clk(adev, enable); 5643 5644 if (adev->cg_flags & 5645 (AMD_CG_SUPPORT_GFX_MGCG | 5646 AMD_CG_SUPPORT_GFX_CGLS | 5647 AMD_CG_SUPPORT_GFX_CGCG | 5648 AMD_CG_SUPPORT_GFX_3D_CGCG | 5649 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5650 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5651 5652 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5653 5654 return 0; 5655 } 5656 5657 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id, 5658 struct amdgpu_ring *ring, unsigned vmid) 5659 { 5660 u32 reg, pre_data, data; 5661 5662 amdgpu_gfx_off_ctrl(adev, false); 5663 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5664 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) 5665 pre_data = RREG32_NO_KIQ(reg); 5666 else 5667 pre_data = RREG32(reg); 5668 5669 data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); 5670 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5671 5672 if (pre_data != data) { 5673 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { 5674 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5675 } else 5676 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5677 } 5678 amdgpu_gfx_off_ctrl(adev, true); 5679 5680 if (ring 5681 && amdgpu_sriov_is_pp_one_vf(adev) 5682 && (pre_data != data) 5683 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) 5684 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { 5685 amdgpu_ring_emit_wreg(ring, reg, data); 5686 } 5687 } 5688 5689 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5690 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5691 .set_safe_mode = gfx_v11_0_set_safe_mode, 5692 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5693 .init = gfx_v11_0_rlc_init, 5694 .get_csb_size = gfx_v11_0_get_csb_size, 5695 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5696 .resume = gfx_v11_0_rlc_resume, 5697 .stop = gfx_v11_0_rlc_stop, 5698 .reset = gfx_v11_0_rlc_reset, 5699 .start = gfx_v11_0_rlc_start, 5700 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5701 }; 5702 5703 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5704 { 5705 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5706 5707 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5708 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5709 else 5710 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5711 5712 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5713 5714 // Program RLC_PG_DELAY3 for CGPG hysteresis 5715 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5716 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5717 case IP_VERSION(11, 0, 1): 5718 case IP_VERSION(11, 0, 4): 5719 case IP_VERSION(11, 5, 0): 5720 case IP_VERSION(11, 5, 1): 5721 case IP_VERSION(11, 5, 2): 5722 case IP_VERSION(11, 5, 3): 5723 case IP_VERSION(11, 5, 4): 5724 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5725 break; 5726 default: 5727 break; 5728 } 5729 } 5730 } 5731 5732 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5733 { 5734 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5735 5736 gfx_v11_cntl_power_gating(adev, enable); 5737 5738 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5739 } 5740 5741 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5742 enum amd_powergating_state state) 5743 { 5744 struct amdgpu_device *adev = ip_block->adev; 5745 bool enable = (state == AMD_PG_STATE_GATE); 5746 5747 if (amdgpu_sriov_vf(adev)) 5748 return 0; 5749 5750 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5751 case IP_VERSION(11, 0, 0): 5752 case IP_VERSION(11, 0, 2): 5753 case IP_VERSION(11, 0, 3): 5754 amdgpu_gfx_off_ctrl(adev, enable); 5755 break; 5756 case IP_VERSION(11, 0, 1): 5757 case IP_VERSION(11, 0, 4): 5758 case IP_VERSION(11, 5, 0): 5759 case IP_VERSION(11, 5, 1): 5760 case IP_VERSION(11, 5, 2): 5761 case IP_VERSION(11, 5, 3): 5762 case IP_VERSION(11, 5, 4): 5763 if (!enable) 5764 amdgpu_gfx_off_ctrl(adev, false); 5765 5766 gfx_v11_cntl_pg(adev, enable); 5767 5768 if (enable) 5769 amdgpu_gfx_off_ctrl(adev, true); 5770 5771 break; 5772 default: 5773 break; 5774 } 5775 5776 return 0; 5777 } 5778 5779 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5780 enum amd_clockgating_state state) 5781 { 5782 struct amdgpu_device *adev = ip_block->adev; 5783 5784 if (amdgpu_sriov_vf(adev)) 5785 return 0; 5786 5787 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5788 case IP_VERSION(11, 0, 0): 5789 case IP_VERSION(11, 0, 1): 5790 case IP_VERSION(11, 0, 2): 5791 case IP_VERSION(11, 0, 3): 5792 case IP_VERSION(11, 0, 4): 5793 case IP_VERSION(11, 5, 0): 5794 case IP_VERSION(11, 5, 1): 5795 case IP_VERSION(11, 5, 2): 5796 case IP_VERSION(11, 5, 3): 5797 case IP_VERSION(11, 5, 4): 5798 gfx_v11_0_update_gfx_clock_gating(adev, 5799 state == AMD_CG_STATE_GATE); 5800 break; 5801 default: 5802 break; 5803 } 5804 5805 return 0; 5806 } 5807 5808 static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5809 { 5810 struct amdgpu_device *adev = ip_block->adev; 5811 int data; 5812 5813 /* AMD_CG_SUPPORT_GFX_MGCG */ 5814 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5815 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5816 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5817 5818 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5819 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5820 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5821 5822 /* AMD_CG_SUPPORT_GFX_FGCG */ 5823 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5824 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5825 5826 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5827 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5828 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5829 5830 /* AMD_CG_SUPPORT_GFX_CGCG */ 5831 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5832 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5833 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5834 5835 /* AMD_CG_SUPPORT_GFX_CGLS */ 5836 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5837 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5838 5839 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5840 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5841 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5842 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5843 5844 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5845 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5846 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5847 } 5848 5849 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5850 { 5851 /* gfx11 is 32bit rptr*/ 5852 return *(uint32_t *)ring->rptr_cpu_addr; 5853 } 5854 5855 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5856 { 5857 struct amdgpu_device *adev = ring->adev; 5858 u64 wptr; 5859 5860 /* XXX check if swapping is necessary on BE */ 5861 if (ring->use_doorbell) { 5862 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5863 } else { 5864 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5865 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5866 } 5867 5868 return wptr; 5869 } 5870 5871 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5872 { 5873 struct amdgpu_device *adev = ring->adev; 5874 5875 if (ring->use_doorbell) { 5876 /* XXX check if swapping is necessary on BE */ 5877 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5878 ring->wptr); 5879 WDOORBELL64(ring->doorbell_index, ring->wptr); 5880 } else { 5881 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5882 lower_32_bits(ring->wptr)); 5883 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5884 upper_32_bits(ring->wptr)); 5885 } 5886 } 5887 5888 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5889 { 5890 /* gfx11 hardware is 32bit rptr */ 5891 return *(uint32_t *)ring->rptr_cpu_addr; 5892 } 5893 5894 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5895 { 5896 u64 wptr; 5897 5898 /* XXX check if swapping is necessary on BE */ 5899 if (ring->use_doorbell) 5900 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5901 else 5902 BUG(); 5903 return wptr; 5904 } 5905 5906 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5907 { 5908 struct amdgpu_device *adev = ring->adev; 5909 5910 /* XXX check if swapping is necessary on BE */ 5911 if (ring->use_doorbell) { 5912 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5913 ring->wptr); 5914 WDOORBELL64(ring->doorbell_index, ring->wptr); 5915 } else { 5916 BUG(); /* only DOORBELL method supported on gfx11 now */ 5917 } 5918 } 5919 5920 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5921 { 5922 struct amdgpu_device *adev = ring->adev; 5923 u32 ref_and_mask, reg_mem_engine; 5924 5925 if (!adev->gfx.funcs->get_hdp_flush_mask) { 5926 dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__); 5927 return; 5928 } 5929 5930 adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, ®_mem_engine); 5931 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5932 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5933 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5934 ref_and_mask, ref_and_mask, 0x20); 5935 } 5936 5937 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5938 struct amdgpu_job *job, 5939 struct amdgpu_ib *ib, 5940 uint32_t flags) 5941 { 5942 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5943 u32 header, control = 0; 5944 5945 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5946 5947 control |= ib->length_dw | (vmid << 24); 5948 5949 if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5950 control |= INDIRECT_BUFFER_PRE_ENB(1); 5951 5952 if (flags & AMDGPU_IB_PREEMPTED) 5953 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5954 5955 if (vmid && !ring->adev->gfx.rs64_enable) 5956 gfx_v11_0_ring_emit_de_meta(ring, 5957 !amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED)); 5958 } 5959 5960 amdgpu_ring_write(ring, header); 5961 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5962 amdgpu_ring_write(ring, 5963 #ifdef __BIG_ENDIAN 5964 (2 << 0) | 5965 #endif 5966 lower_32_bits(ib->gpu_addr)); 5967 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5968 amdgpu_ring_write(ring, control); 5969 } 5970 5971 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5972 struct amdgpu_job *job, 5973 struct amdgpu_ib *ib, 5974 uint32_t flags) 5975 { 5976 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5977 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5978 5979 /* Currently, there is a high possibility to get wave ID mismatch 5980 * between ME and GDS, leading to a hw deadlock, because ME generates 5981 * different wave IDs than the GDS expects. This situation happens 5982 * randomly when at least 5 compute pipes use GDS ordered append. 5983 * The wave IDs generated by ME are also wrong after suspend/resume. 5984 * Those are probably bugs somewhere else in the kernel driver. 5985 * 5986 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5987 * GDS to 0 for this ring (me/pipe). 5988 */ 5989 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5990 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5991 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 5992 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5993 } 5994 5995 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5996 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5997 amdgpu_ring_write(ring, 5998 #ifdef __BIG_ENDIAN 5999 (2 << 0) | 6000 #endif 6001 lower_32_bits(ib->gpu_addr)); 6002 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 6003 amdgpu_ring_write(ring, control); 6004 } 6005 6006 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 6007 u64 seq, unsigned flags) 6008 { 6009 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6010 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6011 6012 /* RELEASE_MEM - flush caches, send int */ 6013 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 6014 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 6015 PACKET3_RELEASE_MEM_GCR_GL2_WB | 6016 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ 6017 PACKET3_RELEASE_MEM_GCR_GLM_WB | 6018 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 6019 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6020 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 6021 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 6022 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 6023 6024 /* 6025 * the address should be Qword aligned if 64bit write, Dword 6026 * aligned if only send 32bit data low (discard data high) 6027 */ 6028 if (write64bit) 6029 BUG_ON(addr & 0x7); 6030 else 6031 BUG_ON(addr & 0x3); 6032 amdgpu_ring_write(ring, lower_32_bits(addr)); 6033 amdgpu_ring_write(ring, upper_32_bits(addr)); 6034 amdgpu_ring_write(ring, lower_32_bits(seq)); 6035 amdgpu_ring_write(ring, upper_32_bits(seq)); 6036 amdgpu_ring_write(ring, 0); 6037 } 6038 6039 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6040 { 6041 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6042 uint32_t seq = ring->fence_drv.sync_seq; 6043 uint64_t addr = ring->fence_drv.gpu_addr; 6044 6045 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 6046 upper_32_bits(addr), seq, 0xffffffff, 4); 6047 } 6048 6049 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 6050 uint16_t pasid, uint32_t flush_type, 6051 bool all_hub, uint8_t dst_sel) 6052 { 6053 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 6054 amdgpu_ring_write(ring, 6055 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 6056 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 6057 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 6058 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 6059 } 6060 6061 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6062 unsigned vmid, uint64_t pd_addr) 6063 { 6064 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6065 6066 /* compute doesn't have PFP */ 6067 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 6068 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6069 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6070 amdgpu_ring_write(ring, 0x0); 6071 } 6072 6073 /* Make sure that we can't skip the SET_Q_MODE packets when the VM 6074 * changed in any way. 6075 */ 6076 ring->set_q_mode_offs = 0; 6077 ring->set_q_mode_ptr = NULL; 6078 } 6079 6080 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6081 u64 seq, unsigned int flags) 6082 { 6083 struct amdgpu_device *adev = ring->adev; 6084 6085 /* we only allocate 32bit for each seq wb address */ 6086 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6087 6088 /* write fence seq to the "addr" */ 6089 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6090 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6091 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6092 amdgpu_ring_write(ring, lower_32_bits(addr)); 6093 amdgpu_ring_write(ring, upper_32_bits(addr)); 6094 amdgpu_ring_write(ring, lower_32_bits(seq)); 6095 6096 if (flags & AMDGPU_FENCE_FLAG_INT) { 6097 /* set register to trigger INT */ 6098 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6099 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6100 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6101 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 6102 amdgpu_ring_write(ring, 0); 6103 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6104 } 6105 } 6106 6107 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 6108 uint32_t flags) 6109 { 6110 uint32_t dw2 = 0; 6111 6112 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6113 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6114 /* set load_global_config & load_global_uconfig */ 6115 dw2 |= 0x8001; 6116 /* set load_cs_sh_regs */ 6117 dw2 |= 0x01000000; 6118 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6119 dw2 |= 0x10002; 6120 } 6121 6122 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6123 amdgpu_ring_write(ring, dw2); 6124 amdgpu_ring_write(ring, 0); 6125 } 6126 6127 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 6128 uint64_t addr) 6129 { 6130 unsigned ret; 6131 6132 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6133 amdgpu_ring_write(ring, lower_32_bits(addr)); 6134 amdgpu_ring_write(ring, upper_32_bits(addr)); 6135 /* discard following DWs if *cond_exec_gpu_addr==0 */ 6136 amdgpu_ring_write(ring, 0); 6137 ret = ring->wptr & ring->buf_mask; 6138 /* patch dummy value later */ 6139 amdgpu_ring_write(ring, 0); 6140 6141 return ret; 6142 } 6143 6144 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, 6145 u64 shadow_va, u64 csa_va, 6146 u64 gds_va, bool init_shadow, 6147 int vmid) 6148 { 6149 struct amdgpu_device *adev = ring->adev; 6150 unsigned int offs, end; 6151 6152 if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) 6153 return; 6154 6155 /* 6156 * The logic here isn't easy to understand because we need to keep state 6157 * accross multiple executions of the function as well as between the 6158 * CPU and GPU. The general idea is that the newly written GPU command 6159 * has a condition on the previous one and only executed if really 6160 * necessary. 6161 */ 6162 6163 /* 6164 * The dw in the NOP controls if the next SET_Q_MODE packet should be 6165 * executed or not. Reserve 64bits just to be on the save side. 6166 */ 6167 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); 6168 offs = ring->wptr & ring->buf_mask; 6169 6170 /* 6171 * We start with skipping the prefix SET_Q_MODE and always executing 6172 * the postfix SET_Q_MODE packet. This is changed below with a 6173 * WRITE_DATA command when the postfix executed. 6174 */ 6175 amdgpu_ring_write(ring, shadow_va ? 1 : 0); 6176 amdgpu_ring_write(ring, 0); 6177 6178 if (ring->set_q_mode_offs) { 6179 uint64_t addr; 6180 6181 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6182 addr += ring->set_q_mode_offs << 2; 6183 end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); 6184 } 6185 6186 /* 6187 * When the postfix SET_Q_MODE packet executes we need to make sure that the 6188 * next prefix SET_Q_MODE packet executes as well. 6189 */ 6190 if (!shadow_va) { 6191 uint64_t addr; 6192 6193 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6194 addr += offs << 2; 6195 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6196 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 6197 amdgpu_ring_write(ring, lower_32_bits(addr)); 6198 amdgpu_ring_write(ring, upper_32_bits(addr)); 6199 amdgpu_ring_write(ring, 0x1); 6200 } 6201 6202 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); 6203 amdgpu_ring_write(ring, lower_32_bits(shadow_va)); 6204 amdgpu_ring_write(ring, upper_32_bits(shadow_va)); 6205 amdgpu_ring_write(ring, lower_32_bits(gds_va)); 6206 amdgpu_ring_write(ring, upper_32_bits(gds_va)); 6207 amdgpu_ring_write(ring, lower_32_bits(csa_va)); 6208 amdgpu_ring_write(ring, upper_32_bits(csa_va)); 6209 amdgpu_ring_write(ring, shadow_va ? 6210 PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); 6211 amdgpu_ring_write(ring, init_shadow ? 6212 PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); 6213 6214 if (ring->set_q_mode_offs) 6215 amdgpu_ring_patch_cond_exec(ring, end); 6216 6217 if (shadow_va) { 6218 uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; 6219 6220 /* 6221 * If the tokens match try to skip the last postfix SET_Q_MODE 6222 * packet to avoid saving/restoring the state all the time. 6223 */ 6224 if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) 6225 *ring->set_q_mode_ptr = 0; 6226 6227 ring->set_q_mode_token = token; 6228 } else { 6229 ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; 6230 } 6231 6232 ring->set_q_mode_offs = offs; 6233 } 6234 6235 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 6236 { 6237 struct amdgpu_device *adev = ring->adev; 6238 struct v10_de_ib_state de_payload = {0}; 6239 uint64_t offset, gds_addr, de_payload_gpu_addr; 6240 void *de_payload_cpu_addr; 6241 int cnt; 6242 6243 offset = offsetof(struct v10_gfx_meta_data, de_payload); 6244 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 6245 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 6246 6247 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 6248 AMDGPU_CSA_SIZE - adev->gds.gds_size, 6249 PAGE_SIZE); 6250 6251 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 6252 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 6253 6254 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 6255 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 6256 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 6257 WRITE_DATA_DST_SEL(8) | 6258 WR_CONFIRM) | 6259 WRITE_DATA_CACHE_POLICY(0)); 6260 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 6261 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 6262 6263 if (resume) 6264 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 6265 sizeof(de_payload) >> 2); 6266 else 6267 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 6268 sizeof(de_payload) >> 2); 6269 } 6270 6271 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 6272 bool secure) 6273 { 6274 uint32_t v = secure ? FRAME_TMZ : 0; 6275 6276 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 6277 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 6278 } 6279 6280 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 6281 uint32_t reg_val_offs) 6282 { 6283 struct amdgpu_device *adev = ring->adev; 6284 6285 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6286 amdgpu_ring_write(ring, 0 | /* src: register*/ 6287 (5 << 8) | /* dst: memory */ 6288 (1 << 20)); /* write confirm */ 6289 amdgpu_ring_write(ring, reg); 6290 amdgpu_ring_write(ring, 0); 6291 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6292 reg_val_offs * 4)); 6293 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6294 reg_val_offs * 4)); 6295 } 6296 6297 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6298 uint32_t val) 6299 { 6300 uint32_t cmd = 0; 6301 6302 switch (ring->funcs->type) { 6303 case AMDGPU_RING_TYPE_GFX: 6304 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6305 break; 6306 case AMDGPU_RING_TYPE_KIQ: 6307 cmd = (1 << 16); /* no inc addr */ 6308 break; 6309 default: 6310 cmd = WR_CONFIRM; 6311 break; 6312 } 6313 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6314 amdgpu_ring_write(ring, cmd); 6315 amdgpu_ring_write(ring, reg); 6316 amdgpu_ring_write(ring, 0); 6317 amdgpu_ring_write(ring, val); 6318 } 6319 6320 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 6321 uint32_t val, uint32_t mask) 6322 { 6323 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 6324 } 6325 6326 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 6327 uint32_t reg0, uint32_t reg1, 6328 uint32_t ref, uint32_t mask) 6329 { 6330 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6331 6332 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 6333 ref, mask, 0x20); 6334 } 6335 6336 static void 6337 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6338 uint32_t me, uint32_t pipe, 6339 enum amdgpu_interrupt_state state) 6340 { 6341 uint32_t cp_int_cntl, cp_int_cntl_reg; 6342 6343 if (!me) { 6344 switch (pipe) { 6345 case 0: 6346 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 6347 break; 6348 case 1: 6349 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 6350 break; 6351 default: 6352 DRM_DEBUG("invalid pipe %d\n", pipe); 6353 return; 6354 } 6355 } else { 6356 DRM_DEBUG("invalid me %d\n", me); 6357 return; 6358 } 6359 6360 switch (state) { 6361 case AMDGPU_IRQ_STATE_DISABLE: 6362 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6363 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6364 TIME_STAMP_INT_ENABLE, 0); 6365 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6366 GENERIC0_INT_ENABLE, 0); 6367 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6368 break; 6369 case AMDGPU_IRQ_STATE_ENABLE: 6370 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6371 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6372 TIME_STAMP_INT_ENABLE, 1); 6373 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6374 GENERIC0_INT_ENABLE, 1); 6375 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6376 break; 6377 default: 6378 break; 6379 } 6380 } 6381 6382 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6383 int me, int pipe, 6384 enum amdgpu_interrupt_state state) 6385 { 6386 u32 mec_int_cntl, mec_int_cntl_reg; 6387 6388 /* 6389 * amdgpu controls only the first MEC. That's why this function only 6390 * handles the setting of interrupts for this specific MEC. All other 6391 * pipes' interrupts are set by amdkfd. 6392 */ 6393 6394 if (me == 1) { 6395 switch (pipe) { 6396 case 0: 6397 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6398 break; 6399 case 1: 6400 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 6401 break; 6402 case 2: 6403 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 6404 break; 6405 case 3: 6406 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 6407 break; 6408 default: 6409 DRM_DEBUG("invalid pipe %d\n", pipe); 6410 return; 6411 } 6412 } else { 6413 DRM_DEBUG("invalid me %d\n", me); 6414 return; 6415 } 6416 6417 switch (state) { 6418 case AMDGPU_IRQ_STATE_DISABLE: 6419 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6420 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6421 TIME_STAMP_INT_ENABLE, 0); 6422 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6423 GENERIC0_INT_ENABLE, 0); 6424 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6425 break; 6426 case AMDGPU_IRQ_STATE_ENABLE: 6427 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6428 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6429 TIME_STAMP_INT_ENABLE, 1); 6430 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6431 GENERIC0_INT_ENABLE, 1); 6432 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6433 break; 6434 default: 6435 break; 6436 } 6437 } 6438 6439 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6440 struct amdgpu_irq_src *src, 6441 unsigned type, 6442 enum amdgpu_interrupt_state state) 6443 { 6444 switch (type) { 6445 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6446 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 6447 break; 6448 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 6449 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 6450 break; 6451 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6452 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6453 break; 6454 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6455 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6456 break; 6457 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6458 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6459 break; 6460 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6461 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6462 break; 6463 default: 6464 break; 6465 } 6466 return 0; 6467 } 6468 6469 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 6470 struct amdgpu_irq_src *source, 6471 struct amdgpu_iv_entry *entry) 6472 { 6473 u32 doorbell_offset = entry->src_data[0]; 6474 u8 me_id, pipe_id, queue_id; 6475 struct amdgpu_ring *ring; 6476 int i; 6477 6478 DRM_DEBUG("IH: CP EOP\n"); 6479 6480 if (adev->enable_mes && doorbell_offset) { 6481 amdgpu_userq_process_fence_irq(adev, doorbell_offset); 6482 } else { 6483 me_id = (entry->ring_id & 0x0c) >> 2; 6484 pipe_id = (entry->ring_id & 0x03) >> 0; 6485 queue_id = (entry->ring_id & 0x70) >> 4; 6486 6487 switch (me_id) { 6488 case 0: 6489 if (pipe_id == 0) 6490 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6491 else 6492 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 6493 break; 6494 case 1: 6495 case 2: 6496 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6497 ring = &adev->gfx.compute_ring[i]; 6498 /* Per-queue interrupt is supported for MEC starting from VI. 6499 * The interrupt can only be enabled/disabled per pipe instead 6500 * of per queue. 6501 */ 6502 if ((ring->me == me_id) && 6503 (ring->pipe == pipe_id) && 6504 (ring->queue == queue_id)) 6505 amdgpu_fence_process(ring); 6506 } 6507 break; 6508 } 6509 } 6510 6511 return 0; 6512 } 6513 6514 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6515 struct amdgpu_irq_src *source, 6516 unsigned int type, 6517 enum amdgpu_interrupt_state state) 6518 { 6519 u32 cp_int_cntl_reg, cp_int_cntl; 6520 int i, j; 6521 6522 switch (state) { 6523 case AMDGPU_IRQ_STATE_DISABLE: 6524 case AMDGPU_IRQ_STATE_ENABLE: 6525 for (i = 0; i < adev->gfx.me.num_me; i++) { 6526 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6527 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6528 6529 if (cp_int_cntl_reg) { 6530 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6531 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6532 PRIV_REG_INT_ENABLE, 6533 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6534 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6535 } 6536 } 6537 } 6538 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6539 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6540 /* MECs start at 1 */ 6541 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6542 6543 if (cp_int_cntl_reg) { 6544 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6545 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6546 PRIV_REG_INT_ENABLE, 6547 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6548 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6549 } 6550 } 6551 } 6552 break; 6553 default: 6554 break; 6555 } 6556 6557 return 0; 6558 } 6559 6560 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6561 struct amdgpu_irq_src *source, 6562 unsigned type, 6563 enum amdgpu_interrupt_state state) 6564 { 6565 u32 cp_int_cntl_reg, cp_int_cntl; 6566 int i, j; 6567 6568 switch (state) { 6569 case AMDGPU_IRQ_STATE_DISABLE: 6570 case AMDGPU_IRQ_STATE_ENABLE: 6571 for (i = 0; i < adev->gfx.me.num_me; i++) { 6572 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6573 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6574 6575 if (cp_int_cntl_reg) { 6576 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6577 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6578 OPCODE_ERROR_INT_ENABLE, 6579 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6580 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6581 } 6582 } 6583 } 6584 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6585 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6586 /* MECs start at 1 */ 6587 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6588 6589 if (cp_int_cntl_reg) { 6590 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6591 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6592 OPCODE_ERROR_INT_ENABLE, 6593 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6594 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6595 } 6596 } 6597 } 6598 break; 6599 default: 6600 break; 6601 } 6602 return 0; 6603 } 6604 6605 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6606 struct amdgpu_irq_src *source, 6607 unsigned int type, 6608 enum amdgpu_interrupt_state state) 6609 { 6610 u32 cp_int_cntl_reg, cp_int_cntl; 6611 int i, j; 6612 6613 switch (state) { 6614 case AMDGPU_IRQ_STATE_DISABLE: 6615 case AMDGPU_IRQ_STATE_ENABLE: 6616 for (i = 0; i < adev->gfx.me.num_me; i++) { 6617 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6618 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6619 6620 if (cp_int_cntl_reg) { 6621 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6622 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6623 PRIV_INSTR_INT_ENABLE, 6624 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6625 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6626 } 6627 } 6628 } 6629 break; 6630 default: 6631 break; 6632 } 6633 6634 return 0; 6635 } 6636 6637 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 6638 struct amdgpu_iv_entry *entry) 6639 { 6640 u8 me_id, pipe_id, queue_id; 6641 struct amdgpu_ring *ring; 6642 int i; 6643 6644 me_id = (entry->ring_id & 0x0c) >> 2; 6645 pipe_id = (entry->ring_id & 0x03) >> 0; 6646 queue_id = (entry->ring_id & 0x70) >> 4; 6647 6648 if (!adev->gfx.disable_kq) { 6649 switch (me_id) { 6650 case 0: 6651 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 6652 ring = &adev->gfx.gfx_ring[i]; 6653 if (ring->me == me_id && ring->pipe == pipe_id && 6654 ring->queue == queue_id) 6655 drm_sched_fault(&ring->sched); 6656 } 6657 break; 6658 case 1: 6659 case 2: 6660 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6661 ring = &adev->gfx.compute_ring[i]; 6662 if (ring->me == me_id && ring->pipe == pipe_id && 6663 ring->queue == queue_id) 6664 drm_sched_fault(&ring->sched); 6665 } 6666 break; 6667 default: 6668 BUG(); 6669 break; 6670 } 6671 } 6672 } 6673 6674 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 6675 struct amdgpu_irq_src *source, 6676 struct amdgpu_iv_entry *entry) 6677 { 6678 DRM_ERROR("Illegal register access in command stream\n"); 6679 gfx_v11_0_handle_priv_fault(adev, entry); 6680 return 0; 6681 } 6682 6683 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, 6684 struct amdgpu_irq_src *source, 6685 struct amdgpu_iv_entry *entry) 6686 { 6687 DRM_ERROR("Illegal opcode in command stream\n"); 6688 gfx_v11_0_handle_priv_fault(adev, entry); 6689 return 0; 6690 } 6691 6692 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6693 struct amdgpu_irq_src *source, 6694 struct amdgpu_iv_entry *entry) 6695 { 6696 DRM_ERROR("Illegal instruction in command stream\n"); 6697 gfx_v11_0_handle_priv_fault(adev, entry); 6698 return 0; 6699 } 6700 6701 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, 6702 struct amdgpu_irq_src *source, 6703 struct amdgpu_iv_entry *entry) 6704 { 6705 if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) 6706 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); 6707 6708 return 0; 6709 } 6710 6711 #if 0 6712 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6713 struct amdgpu_irq_src *src, 6714 unsigned int type, 6715 enum amdgpu_interrupt_state state) 6716 { 6717 uint32_t tmp, target; 6718 struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); 6719 6720 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6721 target += ring->pipe; 6722 6723 switch (type) { 6724 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6725 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6726 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6727 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6728 GENERIC2_INT_ENABLE, 0); 6729 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6730 6731 tmp = RREG32_SOC15_IP(GC, target); 6732 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6733 GENERIC2_INT_ENABLE, 0); 6734 WREG32_SOC15_IP(GC, target, tmp); 6735 } else { 6736 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6737 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6738 GENERIC2_INT_ENABLE, 1); 6739 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6740 6741 tmp = RREG32_SOC15_IP(GC, target); 6742 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6743 GENERIC2_INT_ENABLE, 1); 6744 WREG32_SOC15_IP(GC, target, tmp); 6745 } 6746 break; 6747 default: 6748 BUG(); /* kiq only support GENERIC2_INT now */ 6749 break; 6750 } 6751 return 0; 6752 } 6753 #endif 6754 6755 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6756 { 6757 const unsigned int gcr_cntl = 6758 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6759 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6760 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6761 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6762 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6763 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6764 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6765 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6766 6767 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6768 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6769 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6770 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6771 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6772 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6773 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6774 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6775 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6776 } 6777 6778 static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) 6779 { 6780 /* Disable the pipe reset until the CPFW fully support it.*/ 6781 dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); 6782 return false; 6783 } 6784 6785 6786 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) 6787 { 6788 struct amdgpu_device *adev = ring->adev; 6789 uint32_t reset_pipe = 0, clean_pipe = 0; 6790 int r; 6791 6792 if (!gfx_v11_pipe_reset_support(adev)) 6793 return -EOPNOTSUPP; 6794 6795 gfx_v11_0_set_safe_mode(adev, 0); 6796 mutex_lock(&adev->srbm_mutex); 6797 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6798 6799 switch (ring->pipe) { 6800 case 0: 6801 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6802 PFP_PIPE0_RESET, 1); 6803 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6804 ME_PIPE0_RESET, 1); 6805 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6806 PFP_PIPE0_RESET, 0); 6807 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6808 ME_PIPE0_RESET, 0); 6809 break; 6810 case 1: 6811 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6812 PFP_PIPE1_RESET, 1); 6813 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6814 ME_PIPE1_RESET, 1); 6815 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6816 PFP_PIPE1_RESET, 0); 6817 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6818 ME_PIPE1_RESET, 0); 6819 break; 6820 default: 6821 break; 6822 } 6823 6824 WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); 6825 WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); 6826 6827 r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - 6828 RS64_FW_UC_START_ADDR_LO; 6829 soc21_grbm_select(adev, 0, 0, 0, 0); 6830 mutex_unlock(&adev->srbm_mutex); 6831 gfx_v11_0_unset_safe_mode(adev, 0); 6832 6833 dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, 6834 r == 0 ? "successfully" : "failed"); 6835 /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, 6836 * so the pipe reset status relies on the later gfx ring test result. 6837 */ 6838 return 0; 6839 } 6840 6841 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, 6842 unsigned int vmid, 6843 struct amdgpu_fence *timedout_fence) 6844 { 6845 struct amdgpu_device *adev = ring->adev; 6846 bool use_mmio = false; 6847 int r; 6848 6849 amdgpu_ring_reset_helper_begin(ring, timedout_fence); 6850 6851 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio, 0); 6852 if (r) { 6853 6854 dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); 6855 r = gfx_v11_reset_gfx_pipe(ring); 6856 if (r) 6857 return r; 6858 } 6859 6860 if (use_mmio) { 6861 r = gfx_v11_0_kgq_init_queue(ring, true); 6862 if (r) { 6863 dev_err(adev->dev, "failed to init kgq\n"); 6864 return r; 6865 } 6866 6867 r = amdgpu_mes_map_legacy_queue(adev, ring, 0); 6868 if (r) { 6869 dev_err(adev->dev, "failed to remap kgq\n"); 6870 return r; 6871 } 6872 } 6873 6874 return amdgpu_ring_reset_helper_end(ring, timedout_fence); 6875 } 6876 6877 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) 6878 { 6879 6880 struct amdgpu_device *adev = ring->adev; 6881 uint32_t reset_pipe = 0, clean_pipe = 0; 6882 int r; 6883 6884 if (!gfx_v11_pipe_reset_support(adev)) 6885 return -EOPNOTSUPP; 6886 6887 gfx_v11_0_set_safe_mode(adev, 0); 6888 mutex_lock(&adev->srbm_mutex); 6889 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6890 6891 reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 6892 clean_pipe = reset_pipe; 6893 6894 if (adev->gfx.rs64_enable) { 6895 6896 switch (ring->pipe) { 6897 case 0: 6898 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6899 MEC_PIPE0_RESET, 1); 6900 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6901 MEC_PIPE0_RESET, 0); 6902 break; 6903 case 1: 6904 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6905 MEC_PIPE1_RESET, 1); 6906 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6907 MEC_PIPE1_RESET, 0); 6908 break; 6909 case 2: 6910 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6911 MEC_PIPE2_RESET, 1); 6912 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6913 MEC_PIPE2_RESET, 0); 6914 break; 6915 case 3: 6916 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6917 MEC_PIPE3_RESET, 1); 6918 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6919 MEC_PIPE3_RESET, 0); 6920 break; 6921 default: 6922 break; 6923 } 6924 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); 6925 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); 6926 r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - 6927 RS64_FW_UC_START_ADDR_LO; 6928 } else { 6929 if (ring->me == 1) { 6930 switch (ring->pipe) { 6931 case 0: 6932 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6933 MEC_ME1_PIPE0_RESET, 1); 6934 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6935 MEC_ME1_PIPE0_RESET, 0); 6936 break; 6937 case 1: 6938 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6939 MEC_ME1_PIPE1_RESET, 1); 6940 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6941 MEC_ME1_PIPE1_RESET, 0); 6942 break; 6943 case 2: 6944 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6945 MEC_ME1_PIPE2_RESET, 1); 6946 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6947 MEC_ME1_PIPE2_RESET, 0); 6948 break; 6949 case 3: 6950 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6951 MEC_ME1_PIPE3_RESET, 1); 6952 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6953 MEC_ME1_PIPE3_RESET, 0); 6954 break; 6955 default: 6956 break; 6957 } 6958 /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ 6959 } else { 6960 switch (ring->pipe) { 6961 case 0: 6962 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6963 MEC_ME2_PIPE0_RESET, 1); 6964 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6965 MEC_ME2_PIPE0_RESET, 0); 6966 break; 6967 case 1: 6968 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6969 MEC_ME2_PIPE1_RESET, 1); 6970 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6971 MEC_ME2_PIPE1_RESET, 0); 6972 break; 6973 case 2: 6974 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6975 MEC_ME2_PIPE2_RESET, 1); 6976 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6977 MEC_ME2_PIPE2_RESET, 0); 6978 break; 6979 case 3: 6980 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6981 MEC_ME2_PIPE3_RESET, 1); 6982 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6983 MEC_ME2_PIPE3_RESET, 0); 6984 break; 6985 default: 6986 break; 6987 } 6988 /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ 6989 } 6990 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); 6991 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); 6992 r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); 6993 } 6994 6995 soc21_grbm_select(adev, 0, 0, 0, 0); 6996 mutex_unlock(&adev->srbm_mutex); 6997 gfx_v11_0_unset_safe_mode(adev, 0); 6998 6999 dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, 7000 r == 0 ? "successfully" : "failed"); 7001 /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe 7002 * reset status relies on the compute ring test result. 7003 */ 7004 return 0; 7005 } 7006 7007 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, 7008 unsigned int vmid, 7009 struct amdgpu_fence *timedout_fence) 7010 { 7011 struct amdgpu_device *adev = ring->adev; 7012 int r = 0; 7013 7014 amdgpu_ring_reset_helper_begin(ring, timedout_fence); 7015 7016 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0); 7017 if (r) { 7018 dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); 7019 r = gfx_v11_0_reset_compute_pipe(ring); 7020 if (r) 7021 return r; 7022 } 7023 7024 r = gfx_v11_0_kcq_init_queue(ring, true); 7025 if (r) { 7026 dev_err(adev->dev, "fail to init kcq\n"); 7027 return r; 7028 } 7029 r = amdgpu_mes_map_legacy_queue(adev, ring, 0); 7030 if (r) { 7031 dev_err(adev->dev, "failed to remap kcq\n"); 7032 return r; 7033 } 7034 7035 return amdgpu_ring_reset_helper_end(ring, timedout_fence); 7036 } 7037 7038 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 7039 { 7040 struct amdgpu_device *adev = ip_block->adev; 7041 uint32_t i, j, k, reg, index = 0; 7042 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7043 7044 if (!adev->gfx.ip_dump_core) 7045 return; 7046 7047 for (i = 0; i < reg_count; i++) 7048 drm_printf(p, "%-50s \t 0x%08x\n", 7049 gc_reg_list_11_0[i].reg_name, 7050 adev->gfx.ip_dump_core[i]); 7051 7052 /* print compute queue registers for all instances */ 7053 if (!adev->gfx.ip_dump_compute_queues) 7054 return; 7055 7056 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7057 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7058 adev->gfx.mec.num_mec, 7059 adev->gfx.mec.num_pipe_per_mec, 7060 adev->gfx.mec.num_queue_per_pipe); 7061 7062 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7063 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7064 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7065 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7066 for (reg = 0; reg < reg_count; reg++) { 7067 if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) 7068 drm_printf(p, "%-50s \t 0x%08x\n", 7069 "regCP_MEC_ME2_HEADER_DUMP", 7070 adev->gfx.ip_dump_compute_queues[index + reg]); 7071 else 7072 drm_printf(p, "%-50s \t 0x%08x\n", 7073 gc_cp_reg_list_11[reg].reg_name, 7074 adev->gfx.ip_dump_compute_queues[index + reg]); 7075 } 7076 index += reg_count; 7077 } 7078 } 7079 } 7080 7081 /* print gfx queue registers for all instances */ 7082 if (!adev->gfx.ip_dump_gfx_queues) 7083 return; 7084 7085 index = 0; 7086 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7087 drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", 7088 adev->gfx.me.num_me, 7089 adev->gfx.me.num_pipe_per_me, 7090 adev->gfx.me.num_queue_per_pipe); 7091 7092 for (i = 0; i < adev->gfx.me.num_me; i++) { 7093 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7094 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7095 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); 7096 for (reg = 0; reg < reg_count; reg++) { 7097 drm_printf(p, "%-50s \t 0x%08x\n", 7098 gc_gfx_queue_reg_list_11[reg].reg_name, 7099 adev->gfx.ip_dump_gfx_queues[index + reg]); 7100 } 7101 index += reg_count; 7102 } 7103 } 7104 } 7105 } 7106 7107 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) 7108 { 7109 struct amdgpu_device *adev = ip_block->adev; 7110 uint32_t i, j, k, reg, index = 0; 7111 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7112 7113 if (!adev->gfx.ip_dump_core) 7114 return; 7115 7116 amdgpu_gfx_off_ctrl(adev, false); 7117 for (i = 0; i < reg_count; i++) 7118 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); 7119 amdgpu_gfx_off_ctrl(adev, true); 7120 7121 /* dump compute queue registers for all instances */ 7122 if (!adev->gfx.ip_dump_compute_queues) 7123 return; 7124 7125 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7126 amdgpu_gfx_off_ctrl(adev, false); 7127 mutex_lock(&adev->srbm_mutex); 7128 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7129 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7130 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7131 /* ME0 is for GFX so start from 1 for CP */ 7132 soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); 7133 for (reg = 0; reg < reg_count; reg++) { 7134 if (i && 7135 gc_cp_reg_list_11[reg].reg_offset == 7136 regCP_MEC_ME1_HEADER_DUMP) 7137 adev->gfx.ip_dump_compute_queues[index + reg] = 7138 RREG32(SOC15_REG_OFFSET(GC, 0, 7139 regCP_MEC_ME2_HEADER_DUMP)); 7140 else 7141 adev->gfx.ip_dump_compute_queues[index + reg] = 7142 RREG32(SOC15_REG_ENTRY_OFFSET( 7143 gc_cp_reg_list_11[reg])); 7144 } 7145 index += reg_count; 7146 } 7147 } 7148 } 7149 soc21_grbm_select(adev, 0, 0, 0, 0); 7150 mutex_unlock(&adev->srbm_mutex); 7151 amdgpu_gfx_off_ctrl(adev, true); 7152 7153 /* dump gfx queue registers for all instances */ 7154 if (!adev->gfx.ip_dump_gfx_queues) 7155 return; 7156 7157 index = 0; 7158 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7159 amdgpu_gfx_off_ctrl(adev, false); 7160 mutex_lock(&adev->srbm_mutex); 7161 for (i = 0; i < adev->gfx.me.num_me; i++) { 7162 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7163 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7164 soc21_grbm_select(adev, i, j, k, 0); 7165 7166 for (reg = 0; reg < reg_count; reg++) { 7167 adev->gfx.ip_dump_gfx_queues[index + reg] = 7168 RREG32(SOC15_REG_ENTRY_OFFSET( 7169 gc_gfx_queue_reg_list_11[reg])); 7170 } 7171 index += reg_count; 7172 } 7173 } 7174 } 7175 soc21_grbm_select(adev, 0, 0, 0, 0); 7176 mutex_unlock(&adev->srbm_mutex); 7177 amdgpu_gfx_off_ctrl(adev, true); 7178 } 7179 7180 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 7181 { 7182 /* Emit the cleaner shader */ 7183 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7184 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7185 } 7186 7187 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring) 7188 { 7189 amdgpu_gfx_profile_ring_begin_use(ring); 7190 7191 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 7192 } 7193 7194 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring) 7195 { 7196 amdgpu_gfx_profile_ring_end_use(ring); 7197 7198 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 7199 } 7200 7201 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 7202 .name = "gfx_v11_0", 7203 .early_init = gfx_v11_0_early_init, 7204 .late_init = gfx_v11_0_late_init, 7205 .sw_init = gfx_v11_0_sw_init, 7206 .sw_fini = gfx_v11_0_sw_fini, 7207 .hw_init = gfx_v11_0_hw_init, 7208 .hw_fini = gfx_v11_0_hw_fini, 7209 .suspend = gfx_v11_0_suspend, 7210 .resume = gfx_v11_0_resume, 7211 .is_idle = gfx_v11_0_is_idle, 7212 .wait_for_idle = gfx_v11_0_wait_for_idle, 7213 .soft_reset = gfx_v11_0_soft_reset, 7214 .check_soft_reset = gfx_v11_0_check_soft_reset, 7215 .post_soft_reset = gfx_v11_0_post_soft_reset, 7216 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 7217 .set_powergating_state = gfx_v11_0_set_powergating_state, 7218 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 7219 .dump_ip_state = gfx_v11_ip_dump, 7220 .print_ip_state = gfx_v11_ip_print, 7221 }; 7222 7223 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7224 .type = AMDGPU_RING_TYPE_GFX, 7225 .align_mask = 0xff, 7226 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7227 .support_64bit_ptrs = true, 7228 .secure_submission_supported = true, 7229 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 7230 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 7231 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 7232 .emit_frame_size = /* totally 247 maximum if 16 IBs */ 7233 5 + /* update_spm_vmid */ 7234 5 + /* COND_EXEC */ 7235 22 + /* SET_Q_PREEMPTION_MODE */ 7236 7 + /* PIPELINE_SYNC */ 7237 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7238 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7239 4 + /* VM_FLUSH */ 7240 8 + /* FENCE for VM_FLUSH */ 7241 20 + /* GDS switch */ 7242 5 + /* COND_EXEC */ 7243 7 + /* HDP_flush */ 7244 4 + /* VGT_flush */ 7245 31 + /* DE_META */ 7246 3 + /* CNTX_CTRL */ 7247 5 + /* HDP_INVL */ 7248 22 + /* SET_Q_PREEMPTION_MODE */ 7249 8 + 8 + /* FENCE x2 */ 7250 8 + /* gfx_v11_0_emit_mem_sync */ 7251 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7252 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 7253 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 7254 .emit_fence = gfx_v11_0_ring_emit_fence, 7255 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7256 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7257 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7258 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7259 .test_ring = gfx_v11_0_ring_test_ring, 7260 .test_ib = gfx_v11_0_ring_test_ib, 7261 .insert_nop = gfx_v11_ring_insert_nop, 7262 .pad_ib = amdgpu_ring_generic_pad_ib, 7263 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 7264 .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, 7265 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 7266 .preempt_ib = amdgpu_gfx_ring_preempt_ib, 7267 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 7268 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7269 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7270 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7271 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7272 .reset = gfx_v11_0_reset_kgq, 7273 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7274 .begin_use = gfx_v11_0_ring_begin_use, 7275 .end_use = gfx_v11_0_ring_end_use, 7276 }; 7277 7278 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 7279 .type = AMDGPU_RING_TYPE_COMPUTE, 7280 .align_mask = 0xff, 7281 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7282 .support_64bit_ptrs = true, 7283 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7284 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7285 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7286 .emit_frame_size = 7287 5 + /* update_spm_vmid */ 7288 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7289 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7290 5 + /* hdp invalidate */ 7291 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7292 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7293 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7294 2 + /* gfx_v11_0_ring_emit_vm_flush */ 7295 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 7296 8 + /* gfx_v11_0_emit_mem_sync */ 7297 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7298 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7299 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7300 .emit_fence = gfx_v11_0_ring_emit_fence, 7301 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7302 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7303 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7304 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7305 .test_ring = gfx_v11_0_ring_test_ring, 7306 .test_ib = gfx_v11_0_ring_test_ib, 7307 .insert_nop = gfx_v11_ring_insert_nop, 7308 .pad_ib = amdgpu_ring_generic_pad_ib, 7309 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7310 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7311 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7312 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7313 .reset = gfx_v11_0_reset_kcq, 7314 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7315 .begin_use = gfx_v11_0_ring_begin_use, 7316 .end_use = gfx_v11_0_ring_end_use, 7317 }; 7318 7319 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7320 .type = AMDGPU_RING_TYPE_KIQ, 7321 .align_mask = 0xff, 7322 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7323 .support_64bit_ptrs = true, 7324 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7325 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7326 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7327 .emit_frame_size = 7328 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7329 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7330 5 + /*hdp invalidate */ 7331 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7332 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7333 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7334 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7335 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7336 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7337 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 7338 .test_ring = gfx_v11_0_ring_test_ring, 7339 .test_ib = gfx_v11_0_ring_test_ib, 7340 .insert_nop = amdgpu_ring_insert_nop, 7341 .pad_ib = amdgpu_ring_generic_pad_ib, 7342 .emit_rreg = gfx_v11_0_ring_emit_rreg, 7343 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7344 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7345 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7346 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7347 }; 7348 7349 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 7350 { 7351 int i; 7352 7353 adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; 7354 7355 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7356 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 7357 7358 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7359 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 7360 } 7361 7362 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 7363 .set = gfx_v11_0_set_eop_interrupt_state, 7364 .process = gfx_v11_0_eop_irq, 7365 }; 7366 7367 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 7368 .set = gfx_v11_0_set_priv_reg_fault_state, 7369 .process = gfx_v11_0_priv_reg_irq, 7370 }; 7371 7372 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { 7373 .set = gfx_v11_0_set_bad_op_fault_state, 7374 .process = gfx_v11_0_bad_op_irq, 7375 }; 7376 7377 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 7378 .set = gfx_v11_0_set_priv_inst_fault_state, 7379 .process = gfx_v11_0_priv_inst_irq, 7380 }; 7381 7382 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { 7383 .process = gfx_v11_0_rlc_gc_fed_irq, 7384 }; 7385 7386 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 7387 { 7388 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7389 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 7390 7391 adev->gfx.priv_reg_irq.num_types = 1; 7392 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 7393 7394 adev->gfx.bad_op_irq.num_types = 1; 7395 adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; 7396 7397 adev->gfx.priv_inst_irq.num_types = 1; 7398 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 7399 7400 adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ 7401 adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; 7402 7403 } 7404 7405 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 7406 { 7407 if (adev->flags & AMD_IS_APU) 7408 adev->gfx.imu.mode = MISSION_MODE; 7409 else 7410 adev->gfx.imu.mode = DEBUG_MODE; 7411 7412 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 7413 } 7414 7415 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 7416 { 7417 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 7418 } 7419 7420 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 7421 { 7422 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 7423 adev->gfx.config.max_sh_per_se * 7424 adev->gfx.config.max_shader_engines; 7425 7426 adev->gds.gds_size = 0x1000; 7427 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 7428 adev->gds.gws_size = 64; 7429 adev->gds.oa_size = 16; 7430 } 7431 7432 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 7433 { 7434 /* set gfx eng mqd */ 7435 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 7436 sizeof(struct v11_gfx_mqd); 7437 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 7438 gfx_v11_0_gfx_mqd_init; 7439 /* set compute eng mqd */ 7440 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 7441 sizeof(struct v11_compute_mqd); 7442 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 7443 gfx_v11_0_compute_mqd_init; 7444 } 7445 7446 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 7447 u32 bitmap) 7448 { 7449 u32 data; 7450 7451 if (!bitmap) 7452 return; 7453 7454 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7455 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7456 7457 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 7458 } 7459 7460 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 7461 { 7462 u32 data, wgp_bitmask; 7463 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 7464 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 7465 7466 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7467 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7468 7469 wgp_bitmask = 7470 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 7471 7472 return (~data) & wgp_bitmask; 7473 } 7474 7475 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 7476 { 7477 u32 wgp_idx, wgp_active_bitmap; 7478 u32 cu_bitmap_per_wgp, cu_active_bitmap; 7479 7480 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 7481 cu_active_bitmap = 0; 7482 7483 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 7484 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 7485 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 7486 if (wgp_active_bitmap & (1 << wgp_idx)) 7487 cu_active_bitmap |= cu_bitmap_per_wgp; 7488 } 7489 7490 return cu_active_bitmap; 7491 } 7492 7493 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 7494 struct amdgpu_cu_info *cu_info) 7495 { 7496 int i, j, k, counter, active_cu_number = 0; 7497 u32 mask, bitmap; 7498 unsigned disable_masks[8 * 2]; 7499 7500 if (!adev || !cu_info) 7501 return -EINVAL; 7502 7503 amdgpu_gfx_parse_disable_cu(adev, disable_masks, 8, 2); 7504 7505 mutex_lock(&adev->grbm_idx_mutex); 7506 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7507 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7508 bitmap = i * adev->gfx.config.max_sh_per_se + j; 7509 if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) 7510 continue; 7511 mask = 1; 7512 counter = 0; 7513 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); 7514 if (i < 8 && j < 2) 7515 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 7516 adev, disable_masks[i * 2 + j]); 7517 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 7518 7519 /** 7520 * GFX11 could support more than 4 SEs, while the bitmap 7521 * in cu_info struct is 4x4 and ioctl interface struct 7522 * drm_amdgpu_info_device should keep stable. 7523 * So we use last two columns of bitmap to store cu mask for 7524 * SEs 4 to 7, the layout of the bitmap is as below: 7525 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 7526 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 7527 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 7528 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 7529 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 7530 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 7531 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 7532 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 7533 */ 7534 cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; 7535 7536 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 7537 if (bitmap & mask) 7538 counter++; 7539 7540 mask <<= 1; 7541 } 7542 active_cu_number += counter; 7543 } 7544 } 7545 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7546 mutex_unlock(&adev->grbm_idx_mutex); 7547 7548 cu_info->number = active_cu_number; 7549 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7550 7551 return 0; 7552 } 7553 7554 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 7555 { 7556 .type = AMD_IP_BLOCK_TYPE_GFX, 7557 .major = 11, 7558 .minor = 0, 7559 .rev = 0, 7560 .funcs = &gfx_v11_0_ip_funcs, 7561 }; 7562