1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "imu_v11_0.h" 33 #include "soc21.h" 34 #include "nvd.h" 35 36 #include "gc/gc_11_0_0_offset.h" 37 #include "gc/gc_11_0_0_sh_mask.h" 38 #include "smuio/smuio_13_0_6_offset.h" 39 #include "smuio/smuio_13_0_6_sh_mask.h" 40 #include "navi10_enum.h" 41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 42 43 #include "soc15.h" 44 #include "clearstate_gfx11.h" 45 #include "v11_structs.h" 46 #include "gfx_v11_0.h" 47 #include "gfx_v11_0_cleaner_shader.h" 48 #include "gfx_v11_0_3.h" 49 #include "nbio_v4_3.h" 50 #include "mes_v11_0.h" 51 #include "mes_userqueue.h" 52 #include "amdgpu_userq_fence.h" 53 54 #define GFX11_NUM_GFX_RINGS 1 55 #define GFX11_MEC_HPD_SIZE 2048 56 57 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 58 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 59 60 #define regCGTT_WD_CLK_CTRL 0x5086 61 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 63 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 64 #define regPC_CONFIG_CNTL_1 0x194d 65 #define regPC_CONFIG_CNTL_1_BASE_IDX 1 66 67 #define regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0 0x0030 68 #define regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0_BASE_IDX 1 69 #define regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0 0x0031 70 #define regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0_BASE_IDX 1 71 72 #define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 73 #define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 74 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 75 #define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 76 #define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 77 #define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 78 #define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 79 80 #define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 81 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 82 #define regCP_MQD_CONTROL_DEFAULT 0x00000100 83 #define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 84 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 85 #define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 86 #define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 87 #define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 88 89 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 90 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 91 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 92 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 93 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); 94 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); 95 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 96 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 98 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 101 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 102 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 103 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 104 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 105 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 106 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 107 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 109 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 110 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 112 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); 113 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); 114 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); 115 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); 116 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); 118 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); 120 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); 121 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); 122 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); 123 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); 124 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); 125 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); 126 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); 127 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); 128 MODULE_FIRMWARE("amdgpu/gc_11_5_4_pfp.bin"); 129 MODULE_FIRMWARE("amdgpu/gc_11_5_4_me.bin"); 130 MODULE_FIRMWARE("amdgpu/gc_11_5_4_mec.bin"); 131 MODULE_FIRMWARE("amdgpu/gc_11_5_4_rlc.bin"); 132 133 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { 134 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), 135 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), 136 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), 137 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), 138 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), 139 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), 140 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), 141 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), 142 SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), 143 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), 144 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), 145 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), 146 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), 147 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), 148 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), 149 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), 150 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), 151 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 152 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), 153 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), 154 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), 155 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), 156 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), 157 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), 158 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), 159 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 160 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), 161 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 162 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 163 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 164 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), 165 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), 166 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), 167 SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), 168 SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), 169 SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), 170 SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), 171 SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), 172 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), 173 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), 174 SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), 175 SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), 176 SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), 177 SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), 178 SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), 179 SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), 180 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), 181 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), 182 SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), 183 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), 184 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), 185 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), 186 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), 187 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), 188 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), 189 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), 190 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), 191 /* cp header registers */ 192 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 193 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 194 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 195 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 196 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 197 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 198 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 199 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 200 /* SE status registers */ 201 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), 202 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), 203 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), 204 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), 205 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), 206 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) 207 }; 208 209 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { 210 /* compute registers */ 211 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), 212 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), 213 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), 214 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), 215 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), 216 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), 217 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), 218 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), 219 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), 220 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), 221 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), 222 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), 223 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), 224 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), 225 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), 226 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), 227 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), 228 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), 229 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), 230 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), 231 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), 232 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), 233 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), 234 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), 235 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), 236 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), 237 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), 238 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), 239 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), 240 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), 241 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), 242 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), 243 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), 244 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), 245 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), 246 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), 247 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), 248 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), 249 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), 250 /* cp header registers */ 251 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 252 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 253 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 254 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 255 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 256 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 257 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 258 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 259 }; 260 261 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { 262 /* gfx queue registers */ 263 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), 264 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), 265 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), 266 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), 267 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), 268 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), 269 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), 270 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), 271 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), 272 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), 273 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), 274 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), 275 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), 276 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), 277 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), 278 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), 279 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), 280 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), 281 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), 282 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), 283 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 284 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 285 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 286 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 287 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 288 /* cp header registers */ 289 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 290 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 291 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 292 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 293 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 294 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 295 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 296 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 297 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 298 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 299 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 300 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 301 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 302 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 303 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 304 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 305 }; 306 307 static const struct soc15_reg_golden golden_settings_gc_11_0[] = { 308 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) 309 }; 310 311 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 312 { 313 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 314 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 315 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 316 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 317 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 318 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 319 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 320 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 321 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 322 }; 323 324 #define DEFAULT_SH_MEM_CONFIG \ 325 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 326 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 327 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 328 329 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 330 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 331 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 332 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 333 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 334 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 335 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 336 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 337 struct amdgpu_cu_info *cu_info); 338 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 339 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 340 u32 sh_num, u32 instance, int xcc_id); 341 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 342 343 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 344 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 345 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 346 uint32_t val); 347 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 348 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 349 uint16_t pasid, uint32_t flush_type, 350 bool all_hub, uint8_t dst_sel); 351 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 352 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 353 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 354 bool enable); 355 356 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 357 { 358 struct amdgpu_device *adev = kiq_ring->adev; 359 u64 shader_mc_addr; 360 361 /* Cleaner shader MC address */ 362 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 363 364 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 365 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 366 PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ 367 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 368 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 369 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 370 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 371 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 372 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 373 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 374 } 375 376 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 377 struct amdgpu_ring *ring) 378 { 379 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 380 uint64_t wptr_addr = ring->wptr_gpu_addr; 381 uint32_t me = 0, eng_sel = 0; 382 383 switch (ring->funcs->type) { 384 case AMDGPU_RING_TYPE_COMPUTE: 385 me = 1; 386 eng_sel = 0; 387 break; 388 case AMDGPU_RING_TYPE_GFX: 389 me = 0; 390 eng_sel = 4; 391 break; 392 case AMDGPU_RING_TYPE_MES: 393 me = 2; 394 eng_sel = 5; 395 break; 396 default: 397 WARN_ON(1); 398 } 399 400 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 401 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 402 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 403 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 404 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 405 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 406 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 407 PACKET3_MAP_QUEUES_ME((me)) | 408 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 409 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 410 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 411 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 412 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 413 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 414 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 415 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 416 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 417 } 418 419 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 420 struct amdgpu_ring *ring, 421 enum amdgpu_unmap_queues_action action, 422 u64 gpu_addr, u64 seq) 423 { 424 struct amdgpu_device *adev = kiq_ring->adev; 425 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 426 427 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { 428 amdgpu_mes_unmap_legacy_queue(adev, ring, action, 429 gpu_addr, seq, 0); 430 return; 431 } 432 433 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 434 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 435 PACKET3_UNMAP_QUEUES_ACTION(action) | 436 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 437 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 438 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 439 amdgpu_ring_write(kiq_ring, 440 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 441 442 if (action == PREEMPT_QUEUES_NO_UNMAP) { 443 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 444 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 445 amdgpu_ring_write(kiq_ring, seq); 446 } else { 447 amdgpu_ring_write(kiq_ring, 0); 448 amdgpu_ring_write(kiq_ring, 0); 449 amdgpu_ring_write(kiq_ring, 0); 450 } 451 } 452 453 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 454 struct amdgpu_ring *ring, 455 u64 addr, 456 u64 seq) 457 { 458 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 459 460 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 461 amdgpu_ring_write(kiq_ring, 462 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 463 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 464 PACKET3_QUERY_STATUS_COMMAND(2)); 465 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 466 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 467 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 468 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 469 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 470 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 471 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 472 } 473 474 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 475 uint16_t pasid, uint32_t flush_type, 476 bool all_hub) 477 { 478 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 479 } 480 481 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 482 .kiq_set_resources = gfx11_kiq_set_resources, 483 .kiq_map_queues = gfx11_kiq_map_queues, 484 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 485 .kiq_query_status = gfx11_kiq_query_status, 486 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 487 .set_resources_size = 8, 488 .map_queues_size = 7, 489 .unmap_queues_size = 6, 490 .query_status_size = 7, 491 .invalidate_tlbs_size = 2, 492 }; 493 494 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 495 { 496 adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; 497 } 498 499 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 500 { 501 if (amdgpu_sriov_vf(adev)) 502 return; 503 504 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 505 case IP_VERSION(11, 0, 1): 506 case IP_VERSION(11, 0, 4): 507 soc15_program_register_sequence(adev, 508 golden_settings_gc_11_0_1, 509 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 510 break; 511 default: 512 break; 513 } 514 soc15_program_register_sequence(adev, 515 golden_settings_gc_11_0, 516 (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); 517 518 } 519 520 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 521 bool wc, uint32_t reg, uint32_t val) 522 { 523 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 524 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 525 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 526 amdgpu_ring_write(ring, reg); 527 amdgpu_ring_write(ring, 0); 528 amdgpu_ring_write(ring, val); 529 } 530 531 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 532 int mem_space, int opt, uint32_t addr0, 533 uint32_t addr1, uint32_t ref, uint32_t mask, 534 uint32_t inv) 535 { 536 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 537 amdgpu_ring_write(ring, 538 /* memory (1) or register (0) */ 539 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 540 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 541 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 542 WAIT_REG_MEM_ENGINE(eng_sel))); 543 544 if (mem_space) 545 BUG_ON(addr0 & 0x3); /* Dword align */ 546 amdgpu_ring_write(ring, addr0); 547 amdgpu_ring_write(ring, addr1); 548 amdgpu_ring_write(ring, ref); 549 amdgpu_ring_write(ring, mask); 550 amdgpu_ring_write(ring, inv); /* poll interval */ 551 } 552 553 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 554 { 555 /* Header itself is a NOP packet */ 556 if (num_nop == 1) { 557 amdgpu_ring_write(ring, ring->funcs->nop); 558 return; 559 } 560 561 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 562 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 563 564 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 565 amdgpu_ring_insert_nop(ring, num_nop - 1); 566 } 567 568 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 569 { 570 struct amdgpu_device *adev = ring->adev; 571 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 572 uint32_t tmp = 0; 573 unsigned i; 574 int r; 575 576 WREG32(scratch, 0xCAFEDEAD); 577 r = amdgpu_ring_alloc(ring, 5); 578 if (r) { 579 drm_err(adev_to_drm(adev), "cp failed to lock ring %d (%d).\n", 580 ring->idx, r); 581 return r; 582 } 583 584 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 585 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 586 } else { 587 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 588 amdgpu_ring_write(ring, scratch - 589 PACKET3_SET_UCONFIG_REG_START); 590 amdgpu_ring_write(ring, 0xDEADBEEF); 591 } 592 amdgpu_ring_commit(ring); 593 594 for (i = 0; i < adev->usec_timeout; i++) { 595 tmp = RREG32(scratch); 596 if (tmp == 0xDEADBEEF) 597 break; 598 if (amdgpu_emu_mode == 1) 599 msleep(1); 600 else 601 udelay(1); 602 } 603 604 if (i >= adev->usec_timeout) 605 r = -ETIMEDOUT; 606 return r; 607 } 608 609 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 610 { 611 struct amdgpu_device *adev = ring->adev; 612 struct amdgpu_ib ib; 613 struct dma_fence *f = NULL; 614 unsigned index; 615 uint64_t gpu_addr; 616 uint32_t *cpu_ptr; 617 long r; 618 619 /* MES KIQ fw hasn't indirect buffer support for now */ 620 if (adev->enable_mes_kiq && 621 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 622 return 0; 623 624 memset(&ib, 0, sizeof(ib)); 625 626 r = amdgpu_device_wb_get(adev, &index); 627 if (r) 628 return r; 629 630 gpu_addr = adev->wb.gpu_addr + (index * 4); 631 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 632 cpu_ptr = &adev->wb.wb[index]; 633 634 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 635 if (r) { 636 drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r); 637 goto err1; 638 } 639 640 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 641 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 642 ib.ptr[2] = lower_32_bits(gpu_addr); 643 ib.ptr[3] = upper_32_bits(gpu_addr); 644 ib.ptr[4] = 0xDEADBEEF; 645 ib.length_dw = 5; 646 647 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 648 if (r) 649 goto err2; 650 651 r = dma_fence_wait_timeout(f, false, timeout); 652 if (r == 0) { 653 r = -ETIMEDOUT; 654 goto err2; 655 } else if (r < 0) { 656 goto err2; 657 } 658 659 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 660 r = 0; 661 else 662 r = -EINVAL; 663 err2: 664 amdgpu_ib_free(&ib, NULL); 665 dma_fence_put(f); 666 err1: 667 amdgpu_device_wb_free(adev, index); 668 return r; 669 } 670 671 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 672 { 673 amdgpu_ucode_release(&adev->gfx.pfp_fw); 674 amdgpu_ucode_release(&adev->gfx.me_fw); 675 amdgpu_ucode_release(&adev->gfx.rlc_fw); 676 amdgpu_ucode_release(&adev->gfx.mec_fw); 677 678 kfree(adev->gfx.rlc.register_list_format); 679 } 680 681 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) 682 { 683 const struct psp_firmware_header_v1_0 *toc_hdr; 684 int err = 0; 685 686 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, 687 AMDGPU_UCODE_REQUIRED, 688 "amdgpu/%s_toc.bin", ucode_prefix); 689 if (err) 690 goto out; 691 692 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 693 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 694 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 695 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 696 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 697 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 698 return 0; 699 out: 700 amdgpu_ucode_release(&adev->psp.toc_fw); 701 return err; 702 } 703 704 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) 705 { 706 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 707 case IP_VERSION(11, 0, 0): 708 case IP_VERSION(11, 0, 2): 709 case IP_VERSION(11, 0, 3): 710 if ((adev->gfx.me_fw_version >= 1505) && 711 (adev->gfx.pfp_fw_version >= 1600) && 712 (adev->gfx.mec_fw_version >= 512)) { 713 if (amdgpu_sriov_vf(adev)) 714 adev->gfx.cp_gfx_shadow = true; 715 else 716 adev->gfx.cp_gfx_shadow = false; 717 } 718 break; 719 default: 720 adev->gfx.cp_gfx_shadow = false; 721 break; 722 } 723 } 724 725 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 726 { 727 char ucode_prefix[25]; 728 int err; 729 const struct rlc_firmware_header_v2_0 *rlc_hdr; 730 uint16_t version_major; 731 uint16_t version_minor; 732 733 DRM_DEBUG("\n"); 734 735 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 736 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 737 AMDGPU_UCODE_REQUIRED, 738 "amdgpu/%s_pfp.bin", ucode_prefix); 739 if (err) 740 goto out; 741 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 742 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 743 (union amdgpu_firmware_header *) 744 adev->gfx.pfp_fw->data, 2, 0); 745 if (adev->gfx.rs64_enable) { 746 dev_info(adev->dev, "CP RS64 enable\n"); 747 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 748 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 749 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 750 } else { 751 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 752 } 753 754 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 755 AMDGPU_UCODE_REQUIRED, 756 "amdgpu/%s_me.bin", ucode_prefix); 757 if (err) 758 goto out; 759 if (adev->gfx.rs64_enable) { 760 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 761 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 762 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 763 } else { 764 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 765 } 766 767 if (!amdgpu_sriov_vf(adev)) { 768 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && 769 adev->pdev->revision == 0xCE) 770 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 771 AMDGPU_UCODE_REQUIRED, 772 "amdgpu/gc_11_0_0_rlc_1.bin"); 773 else if (amdgpu_is_kicker_fw(adev)) 774 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 775 AMDGPU_UCODE_REQUIRED, 776 "amdgpu/%s_rlc_kicker.bin", ucode_prefix); 777 else 778 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 779 AMDGPU_UCODE_REQUIRED, 780 "amdgpu/%s_rlc.bin", ucode_prefix); 781 if (err) 782 goto out; 783 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 784 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 785 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 786 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 787 if (err) 788 goto out; 789 } 790 791 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 792 AMDGPU_UCODE_REQUIRED, 793 "amdgpu/%s_mec.bin", ucode_prefix); 794 if (err) 795 goto out; 796 if (adev->gfx.rs64_enable) { 797 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 798 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 799 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 800 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 801 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 802 } else { 803 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 804 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 805 } 806 807 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 808 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix); 809 810 /* only one MEC for gfx 11.0.0. */ 811 adev->gfx.mec2_fw = NULL; 812 813 gfx_v11_0_check_fw_cp_gfx_shadow(adev); 814 815 if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { 816 err = adev->gfx.imu.funcs->init_microcode(adev); 817 if (err) 818 DRM_ERROR("Failed to init imu firmware!\n"); 819 return err; 820 } 821 822 out: 823 if (err) { 824 amdgpu_ucode_release(&adev->gfx.pfp_fw); 825 amdgpu_ucode_release(&adev->gfx.me_fw); 826 amdgpu_ucode_release(&adev->gfx.rlc_fw); 827 amdgpu_ucode_release(&adev->gfx.mec_fw); 828 } 829 830 return err; 831 } 832 833 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 834 { 835 u32 count = 0; 836 const struct cs_section_def *sect = NULL; 837 const struct cs_extent_def *ext = NULL; 838 839 /* begin clear state */ 840 count += 2; 841 /* context control state */ 842 count += 3; 843 844 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 845 for (ext = sect->section; ext->extent != NULL; ++ext) { 846 if (sect->id == SECT_CONTEXT) 847 count += 2 + ext->reg_count; 848 else 849 return 0; 850 } 851 } 852 853 /* set PA_SC_TILE_STEERING_OVERRIDE */ 854 count += 3; 855 /* end clear state */ 856 count += 2; 857 /* clear state */ 858 count += 2; 859 860 return count; 861 } 862 863 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) 864 { 865 u32 count = 0; 866 int ctx_reg_offset; 867 868 if (adev->gfx.rlc.cs_data == NULL) 869 return; 870 if (buffer == NULL) 871 return; 872 873 count = amdgpu_gfx_csb_preamble_start(buffer); 874 count = amdgpu_gfx_csb_data_parser(adev, buffer, count); 875 876 ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 877 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 878 buffer[count++] = cpu_to_le32(ctx_reg_offset); 879 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 880 881 amdgpu_gfx_csb_preamble_end(buffer, count); 882 } 883 884 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 885 { 886 /* clear state block */ 887 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 888 &adev->gfx.rlc.clear_state_gpu_addr, 889 (void **)&adev->gfx.rlc.cs_ptr); 890 891 /* jump table block */ 892 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 893 &adev->gfx.rlc.cp_table_gpu_addr, 894 (void **)&adev->gfx.rlc.cp_table_ptr); 895 } 896 897 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 898 { 899 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 900 901 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 902 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 903 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 904 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 905 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 906 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 907 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 908 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 909 adev->gfx.rlc.rlcg_reg_access_supported = true; 910 } 911 912 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 913 { 914 const struct cs_section_def *cs_data; 915 int r; 916 917 adev->gfx.rlc.cs_data = gfx11_cs_data; 918 919 cs_data = adev->gfx.rlc.cs_data; 920 921 if (cs_data) { 922 /* init clear state block */ 923 r = amdgpu_gfx_rlc_init_csb(adev); 924 if (r) 925 return r; 926 } 927 928 /* init spm vmid with 0xf */ 929 if (adev->gfx.rlc.funcs->update_spm_vmid) 930 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf); 931 932 return 0; 933 } 934 935 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 936 { 937 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 938 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 939 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 940 } 941 942 static void gfx_v11_0_me_init(struct amdgpu_device *adev) 943 { 944 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 945 946 amdgpu_gfx_graphics_queue_acquire(adev); 947 } 948 949 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 950 { 951 int r; 952 u32 *hpd; 953 size_t mec_hpd_size; 954 955 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 956 957 /* take ownership of the relevant compute queues */ 958 amdgpu_gfx_compute_queue_acquire(adev); 959 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 960 961 if (mec_hpd_size) { 962 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 963 AMDGPU_GEM_DOMAIN_GTT, 964 &adev->gfx.mec.hpd_eop_obj, 965 &adev->gfx.mec.hpd_eop_gpu_addr, 966 (void **)&hpd); 967 if (r) { 968 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 969 gfx_v11_0_mec_fini(adev); 970 return r; 971 } 972 973 memset(hpd, 0, mec_hpd_size); 974 975 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 976 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 977 } 978 979 return 0; 980 } 981 982 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 983 { 984 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 985 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 986 (address << SQ_IND_INDEX__INDEX__SHIFT)); 987 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 988 } 989 990 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 991 uint32_t thread, uint32_t regno, 992 uint32_t num, uint32_t *out) 993 { 994 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 995 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 996 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 997 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 998 (SQ_IND_INDEX__AUTO_INCR_MASK)); 999 while (num--) 1000 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 1001 } 1002 1003 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1004 { 1005 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 1006 * field when performing a select_se_sh so it should be 1007 * zero here */ 1008 WARN_ON(simd != 0); 1009 1010 /* type 3 wave data */ 1011 dst[(*no_fields)++] = 3; 1012 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 1013 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 1014 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 1015 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 1016 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 1017 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 1018 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 1019 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 1020 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 1021 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 1022 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 1023 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 1024 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 1025 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 1026 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 1027 } 1028 1029 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1030 uint32_t wave, uint32_t start, 1031 uint32_t size, uint32_t *dst) 1032 { 1033 WARN_ON(simd != 0); 1034 1035 wave_read_regs( 1036 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 1037 dst); 1038 } 1039 1040 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1041 uint32_t wave, uint32_t thread, 1042 uint32_t start, uint32_t size, 1043 uint32_t *dst) 1044 { 1045 wave_read_regs( 1046 adev, wave, thread, 1047 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1048 } 1049 1050 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 1051 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1052 { 1053 soc21_grbm_select(adev, me, pipe, q, vm); 1054 } 1055 1056 /* all sizes are in bytes */ 1057 #define MQD_SHADOW_BASE_SIZE 73728 1058 #define MQD_SHADOW_BASE_ALIGNMENT 256 1059 #define MQD_FWWORKAREA_SIZE 484 1060 #define MQD_FWWORKAREA_ALIGNMENT 256 1061 1062 static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, 1063 struct amdgpu_gfx_shadow_info *shadow_info) 1064 { 1065 /* for gfx */ 1066 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; 1067 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; 1068 shadow_info->csa_size = MQD_FWWORKAREA_SIZE; 1069 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; 1070 /* for compute */ 1071 shadow_info->eop_size = GFX11_MEC_HPD_SIZE; 1072 shadow_info->eop_alignment = 256; 1073 } 1074 1075 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, 1076 struct amdgpu_gfx_shadow_info *shadow_info, 1077 bool skip_check) 1078 { 1079 if (adev->gfx.cp_gfx_shadow || skip_check) { 1080 gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); 1081 return 0; 1082 } else { 1083 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); 1084 return -ENOTSUPP; 1085 } 1086 } 1087 1088 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 1089 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 1090 .select_se_sh = &gfx_v11_0_select_se_sh, 1091 .read_wave_data = &gfx_v11_0_read_wave_data, 1092 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 1093 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 1094 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 1095 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 1096 .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, 1097 .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask, 1098 }; 1099 1100 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 1101 { 1102 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1103 case IP_VERSION(11, 0, 0): 1104 case IP_VERSION(11, 0, 2): 1105 adev->gfx.config.max_hw_contexts = 8; 1106 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1107 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1108 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1109 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1110 break; 1111 case IP_VERSION(11, 0, 3): 1112 adev->gfx.ras = &gfx_v11_0_3_ras; 1113 adev->gfx.config.max_hw_contexts = 8; 1114 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1115 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1116 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1117 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1118 break; 1119 case IP_VERSION(11, 0, 1): 1120 case IP_VERSION(11, 0, 4): 1121 case IP_VERSION(11, 5, 0): 1122 case IP_VERSION(11, 5, 1): 1123 case IP_VERSION(11, 5, 2): 1124 case IP_VERSION(11, 5, 3): 1125 case IP_VERSION(11, 5, 4): 1126 adev->gfx.config.max_hw_contexts = 8; 1127 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1128 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1129 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1130 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 1131 break; 1132 default: 1133 BUG(); 1134 break; 1135 } 1136 1137 return 0; 1138 } 1139 1140 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1141 int me, int pipe, int queue) 1142 { 1143 struct amdgpu_ring *ring; 1144 unsigned int irq_type; 1145 unsigned int hw_prio; 1146 1147 ring = &adev->gfx.gfx_ring[ring_id]; 1148 1149 ring->me = me; 1150 ring->pipe = pipe; 1151 ring->queue = queue; 1152 1153 ring->ring_obj = NULL; 1154 ring->use_doorbell = true; 1155 if (adev->gfx.disable_kq) { 1156 ring->no_scheduler = true; 1157 ring->no_user_submission = true; 1158 } 1159 1160 if (!ring_id) 1161 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1162 else 1163 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1164 ring->vm_hub = AMDGPU_GFXHUB(0); 1165 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1166 1167 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1168 hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? 1169 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1170 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1171 hw_prio, NULL); 1172 } 1173 1174 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1175 int mec, int pipe, int queue) 1176 { 1177 int r; 1178 unsigned irq_type; 1179 struct amdgpu_ring *ring; 1180 unsigned int hw_prio; 1181 1182 ring = &adev->gfx.compute_ring[ring_id]; 1183 1184 /* mec0 is me1 */ 1185 ring->me = mec + 1; 1186 ring->pipe = pipe; 1187 ring->queue = queue; 1188 1189 ring->ring_obj = NULL; 1190 ring->use_doorbell = true; 1191 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1192 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1193 + (ring_id * GFX11_MEC_HPD_SIZE); 1194 ring->vm_hub = AMDGPU_GFXHUB(0); 1195 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1196 1197 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1198 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1199 + ring->pipe; 1200 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 1201 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1202 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1203 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1204 hw_prio, NULL); 1205 if (r) 1206 return r; 1207 1208 return 0; 1209 } 1210 1211 static struct { 1212 SOC21_FIRMWARE_ID id; 1213 unsigned int offset; 1214 unsigned int size; 1215 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 1216 1217 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 1218 { 1219 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 1220 1221 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 1222 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 1223 rlc_autoload_info[ucode->id].id = ucode->id; 1224 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 1225 rlc_autoload_info[ucode->id].size = ucode->size * 4; 1226 1227 ucode++; 1228 } 1229 } 1230 1231 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 1232 { 1233 uint32_t total_size = 0; 1234 SOC21_FIRMWARE_ID id; 1235 1236 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 1237 1238 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 1239 total_size += rlc_autoload_info[id].size; 1240 1241 /* In case the offset in rlc toc ucode is aligned */ 1242 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 1243 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 1244 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 1245 1246 return total_size; 1247 } 1248 1249 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 1250 { 1251 int r; 1252 uint32_t total_size; 1253 1254 total_size = gfx_v11_0_calc_toc_total_size(adev); 1255 1256 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 1257 AMDGPU_GEM_DOMAIN_VRAM | 1258 AMDGPU_GEM_DOMAIN_GTT, 1259 &adev->gfx.rlc.rlc_autoload_bo, 1260 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1261 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1262 1263 if (r) { 1264 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1265 return r; 1266 } 1267 1268 return 0; 1269 } 1270 1271 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1272 SOC21_FIRMWARE_ID id, 1273 const void *fw_data, 1274 uint32_t fw_size, 1275 uint32_t *fw_autoload_mask) 1276 { 1277 uint32_t toc_offset; 1278 uint32_t toc_fw_size; 1279 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1280 1281 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1282 return; 1283 1284 toc_offset = rlc_autoload_info[id].offset; 1285 toc_fw_size = rlc_autoload_info[id].size; 1286 1287 if (fw_size == 0) 1288 fw_size = toc_fw_size; 1289 1290 if (fw_size > toc_fw_size) 1291 fw_size = toc_fw_size; 1292 1293 memcpy(ptr + toc_offset, fw_data, fw_size); 1294 1295 if (fw_size < toc_fw_size) 1296 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1297 1298 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1299 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1300 } 1301 1302 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1303 uint32_t *fw_autoload_mask) 1304 { 1305 void *data; 1306 uint32_t size; 1307 uint64_t *toc_ptr; 1308 1309 *(uint64_t *)fw_autoload_mask |= 0x1; 1310 1311 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1312 1313 data = adev->psp.toc.start_addr; 1314 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1315 1316 toc_ptr = (uint64_t *)data + size / 8 - 1; 1317 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1318 1319 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1320 data, size, fw_autoload_mask); 1321 } 1322 1323 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1324 uint32_t *fw_autoload_mask) 1325 { 1326 const __le32 *fw_data; 1327 uint32_t fw_size; 1328 const struct gfx_firmware_header_v1_0 *cp_hdr; 1329 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1330 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1331 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1332 uint16_t version_major, version_minor; 1333 1334 if (adev->gfx.rs64_enable) { 1335 /* pfp ucode */ 1336 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1337 adev->gfx.pfp_fw->data; 1338 /* instruction */ 1339 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1340 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1341 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1342 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1343 fw_data, fw_size, fw_autoload_mask); 1344 /* data */ 1345 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1346 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1347 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1348 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1349 fw_data, fw_size, fw_autoload_mask); 1350 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1351 fw_data, fw_size, fw_autoload_mask); 1352 /* me ucode */ 1353 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1354 adev->gfx.me_fw->data; 1355 /* instruction */ 1356 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1357 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1358 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1359 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1360 fw_data, fw_size, fw_autoload_mask); 1361 /* data */ 1362 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1363 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1364 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1365 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1366 fw_data, fw_size, fw_autoload_mask); 1367 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1368 fw_data, fw_size, fw_autoload_mask); 1369 /* mec ucode */ 1370 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1371 adev->gfx.mec_fw->data; 1372 /* instruction */ 1373 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1374 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1375 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1376 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1377 fw_data, fw_size, fw_autoload_mask); 1378 /* data */ 1379 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1380 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1381 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1382 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1383 fw_data, fw_size, fw_autoload_mask); 1384 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1385 fw_data, fw_size, fw_autoload_mask); 1386 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1387 fw_data, fw_size, fw_autoload_mask); 1388 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1389 fw_data, fw_size, fw_autoload_mask); 1390 } else { 1391 /* pfp ucode */ 1392 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1393 adev->gfx.pfp_fw->data; 1394 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1395 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1396 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1397 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1398 fw_data, fw_size, fw_autoload_mask); 1399 1400 /* me ucode */ 1401 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1402 adev->gfx.me_fw->data; 1403 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1404 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1405 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1406 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1407 fw_data, fw_size, fw_autoload_mask); 1408 1409 /* mec ucode */ 1410 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1411 adev->gfx.mec_fw->data; 1412 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1413 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1414 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1415 cp_hdr->jt_size * 4; 1416 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1417 fw_data, fw_size, fw_autoload_mask); 1418 } 1419 1420 /* rlc ucode */ 1421 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1422 adev->gfx.rlc_fw->data; 1423 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1424 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1425 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1426 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1427 fw_data, fw_size, fw_autoload_mask); 1428 1429 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1430 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1431 if (version_major == 2) { 1432 if (version_minor >= 2) { 1433 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1434 1435 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1436 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1437 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1438 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1439 fw_data, fw_size, fw_autoload_mask); 1440 1441 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1442 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1443 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1444 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1445 fw_data, fw_size, fw_autoload_mask); 1446 } 1447 } 1448 } 1449 1450 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1451 uint32_t *fw_autoload_mask) 1452 { 1453 const __le32 *fw_data; 1454 uint32_t fw_size; 1455 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1456 1457 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1458 adev->sdma.instance[0].fw->data; 1459 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1460 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1461 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1462 1463 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1464 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1465 1466 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1467 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1468 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1469 1470 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1471 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1472 } 1473 1474 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1475 uint32_t *fw_autoload_mask) 1476 { 1477 const __le32 *fw_data; 1478 unsigned fw_size; 1479 const struct mes_firmware_header_v1_0 *mes_hdr; 1480 int pipe, ucode_id, data_id; 1481 1482 for (pipe = 0; pipe < 2; pipe++) { 1483 if (pipe==0) { 1484 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1485 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1486 } else { 1487 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1488 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1489 } 1490 1491 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1492 adev->mes.fw[pipe]->data; 1493 1494 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1495 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1496 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1497 1498 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1499 ucode_id, fw_data, fw_size, fw_autoload_mask); 1500 1501 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1502 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1503 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1504 1505 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1506 data_id, fw_data, fw_size, fw_autoload_mask); 1507 } 1508 } 1509 1510 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1511 { 1512 uint32_t rlc_g_offset, rlc_g_size; 1513 uint64_t gpu_addr; 1514 uint32_t autoload_fw_id[2]; 1515 1516 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1517 1518 /* RLC autoload sequence 2: copy ucode */ 1519 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1520 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1521 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1522 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1523 1524 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1525 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1526 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1527 1528 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1529 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1530 1531 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1532 1533 /* RLC autoload sequence 3: load IMU fw */ 1534 if (adev->gfx.imu.funcs->load_microcode) 1535 adev->gfx.imu.funcs->load_microcode(adev); 1536 /* RLC autoload sequence 4 init IMU fw */ 1537 if (adev->gfx.imu.funcs->setup_imu) 1538 adev->gfx.imu.funcs->setup_imu(adev); 1539 if (adev->gfx.imu.funcs->start_imu) 1540 adev->gfx.imu.funcs->start_imu(adev); 1541 1542 /* RLC autoload sequence 5 disable gpa mode */ 1543 gfx_v11_0_disable_gpa_mode(adev); 1544 1545 return 0; 1546 } 1547 1548 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) 1549 { 1550 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 1551 uint32_t *ptr; 1552 uint32_t inst; 1553 1554 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 1555 if (!ptr) { 1556 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 1557 adev->gfx.ip_dump_core = NULL; 1558 } else { 1559 adev->gfx.ip_dump_core = ptr; 1560 } 1561 1562 /* Allocate memory for compute queue registers for all the instances */ 1563 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 1564 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 1565 adev->gfx.mec.num_queue_per_pipe; 1566 1567 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1568 if (!ptr) { 1569 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 1570 adev->gfx.ip_dump_compute_queues = NULL; 1571 } else { 1572 adev->gfx.ip_dump_compute_queues = ptr; 1573 } 1574 1575 /* Allocate memory for gfx queue registers for all the instances */ 1576 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 1577 inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * 1578 adev->gfx.me.num_queue_per_pipe; 1579 1580 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1581 if (!ptr) { 1582 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); 1583 adev->gfx.ip_dump_gfx_queues = NULL; 1584 } else { 1585 adev->gfx.ip_dump_gfx_queues = ptr; 1586 } 1587 } 1588 1589 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) 1590 { 1591 int i, j, k, r, ring_id; 1592 int xcc_id = 0; 1593 struct amdgpu_device *adev = ip_block->adev; 1594 int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ 1595 1596 INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); 1597 1598 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1599 case IP_VERSION(11, 0, 0): 1600 case IP_VERSION(11, 0, 1): 1601 case IP_VERSION(11, 0, 2): 1602 case IP_VERSION(11, 0, 3): 1603 case IP_VERSION(11, 0, 4): 1604 case IP_VERSION(11, 5, 0): 1605 case IP_VERSION(11, 5, 1): 1606 case IP_VERSION(11, 5, 2): 1607 case IP_VERSION(11, 5, 3): 1608 case IP_VERSION(11, 5, 4): 1609 adev->gfx.me.num_me = 1; 1610 adev->gfx.me.num_pipe_per_me = 1; 1611 adev->gfx.me.num_queue_per_pipe = 2; 1612 adev->gfx.mec.num_mec = 1; 1613 adev->gfx.mec.num_pipe_per_mec = 4; 1614 adev->gfx.mec.num_queue_per_pipe = 4; 1615 break; 1616 default: 1617 adev->gfx.me.num_me = 1; 1618 adev->gfx.me.num_pipe_per_me = 1; 1619 adev->gfx.me.num_queue_per_pipe = 1; 1620 adev->gfx.mec.num_mec = 1; 1621 adev->gfx.mec.num_pipe_per_mec = 4; 1622 adev->gfx.mec.num_queue_per_pipe = 8; 1623 break; 1624 } 1625 1626 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1627 case IP_VERSION(11, 0, 0): 1628 case IP_VERSION(11, 0, 2): 1629 case IP_VERSION(11, 0, 3): 1630 if (!adev->gfx.disable_uq && 1631 adev->gfx.me_fw_version >= 2420 && 1632 adev->gfx.pfp_fw_version >= 2580 && 1633 adev->gfx.mec_fw_version >= 2650 && 1634 adev->mes.fw_version[0] >= 120) { 1635 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1636 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1637 } 1638 break; 1639 case IP_VERSION(11, 0, 1): 1640 case IP_VERSION(11, 0, 4): 1641 case IP_VERSION(11, 5, 0): 1642 case IP_VERSION(11, 5, 1): 1643 case IP_VERSION(11, 5, 2): 1644 case IP_VERSION(11, 5, 3): 1645 /* add firmware version checks here */ 1646 if (0 && !adev->gfx.disable_uq) { 1647 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1648 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1649 } 1650 break; 1651 default: 1652 break; 1653 } 1654 1655 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1656 case IP_VERSION(11, 0, 0): 1657 case IP_VERSION(11, 0, 2): 1658 case IP_VERSION(11, 0, 3): 1659 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1660 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1661 if (adev->gfx.me_fw_version >= 2280 && 1662 adev->gfx.pfp_fw_version >= 2370 && 1663 adev->gfx.mec_fw_version >= 2450 && 1664 adev->mes.fw_version[0] >= 99) { 1665 adev->gfx.enable_cleaner_shader = true; 1666 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1667 if (r) { 1668 adev->gfx.enable_cleaner_shader = false; 1669 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1670 } 1671 } 1672 break; 1673 case IP_VERSION(11, 0, 1): 1674 case IP_VERSION(11, 0, 4): 1675 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1676 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1677 if (adev->gfx.pfp_fw_version >= 102 && 1678 adev->gfx.mec_fw_version >= 66 && 1679 adev->mes.fw_version[0] >= 128) { 1680 adev->gfx.enable_cleaner_shader = true; 1681 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1682 if (r) { 1683 adev->gfx.enable_cleaner_shader = false; 1684 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1685 } 1686 } 1687 break; 1688 case IP_VERSION(11, 5, 0): 1689 case IP_VERSION(11, 5, 1): 1690 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1691 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1692 if (adev->gfx.mec_fw_version >= 26 && 1693 adev->mes.fw_version[0] >= 114) { 1694 adev->gfx.enable_cleaner_shader = true; 1695 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1696 if (r) { 1697 adev->gfx.enable_cleaner_shader = false; 1698 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1699 } 1700 } 1701 break; 1702 case IP_VERSION(11, 5, 2): 1703 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1704 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1705 if (adev->gfx.me_fw_version >= 12 && 1706 adev->gfx.pfp_fw_version >= 15 && 1707 adev->gfx.mec_fw_version >= 15) { 1708 adev->gfx.enable_cleaner_shader = true; 1709 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1710 if (r) { 1711 adev->gfx.enable_cleaner_shader = false; 1712 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1713 } 1714 } 1715 break; 1716 case IP_VERSION(11, 5, 3): 1717 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1718 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1719 if (adev->gfx.me_fw_version >= 7 && 1720 adev->gfx.pfp_fw_version >= 8 && 1721 adev->gfx.mec_fw_version >= 8) { 1722 adev->gfx.enable_cleaner_shader = true; 1723 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1724 if (r) { 1725 adev->gfx.enable_cleaner_shader = false; 1726 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1727 } 1728 } 1729 break; 1730 case IP_VERSION(11, 5, 4): 1731 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1732 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1733 if (adev->gfx.me_fw_version >= 4 && 1734 adev->gfx.pfp_fw_version >= 7 && 1735 adev->gfx.mec_fw_version >= 5) { 1736 adev->gfx.enable_cleaner_shader = true; 1737 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1738 if (r) { 1739 adev->gfx.enable_cleaner_shader = false; 1740 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1741 } 1742 } 1743 break; 1744 default: 1745 adev->gfx.enable_cleaner_shader = false; 1746 break; 1747 } 1748 1749 /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ 1750 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && 1751 amdgpu_sriov_is_pp_one_vf(adev)) 1752 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; 1753 1754 /* EOP Event */ 1755 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1756 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1757 &adev->gfx.eop_irq); 1758 if (r) 1759 return r; 1760 1761 /* Bad opcode Event */ 1762 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1763 GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, 1764 &adev->gfx.bad_op_irq); 1765 if (r) 1766 return r; 1767 1768 /* Privileged reg */ 1769 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1770 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1771 &adev->gfx.priv_reg_irq); 1772 if (r) 1773 return r; 1774 1775 /* Privileged inst */ 1776 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1777 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1778 &adev->gfx.priv_inst_irq); 1779 if (r) 1780 return r; 1781 1782 /* FED error */ 1783 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1784 GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, 1785 &adev->gfx.rlc_gc_fed_irq); 1786 if (r) 1787 return r; 1788 1789 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1790 1791 gfx_v11_0_me_init(adev); 1792 1793 r = gfx_v11_0_rlc_init(adev); 1794 if (r) { 1795 DRM_ERROR("Failed to init rlc BOs!\n"); 1796 return r; 1797 } 1798 1799 r = gfx_v11_0_mec_init(adev); 1800 if (r) { 1801 DRM_ERROR("Failed to init MEC BOs!\n"); 1802 return r; 1803 } 1804 1805 if (adev->gfx.num_gfx_rings) { 1806 ring_id = 0; 1807 /* set up the gfx ring */ 1808 for (i = 0; i < adev->gfx.me.num_me; i++) { 1809 for (j = 0; j < num_queue_per_pipe; j++) { 1810 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1811 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1812 continue; 1813 1814 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1815 i, k, j); 1816 if (r) 1817 return r; 1818 ring_id++; 1819 } 1820 } 1821 } 1822 } 1823 1824 if (adev->gfx.num_compute_rings) { 1825 ring_id = 0; 1826 /* set up the compute queues - allocate horizontally across pipes */ 1827 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1828 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1829 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1830 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 1831 k, j)) 1832 continue; 1833 1834 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1835 i, k, j); 1836 if (r) 1837 return r; 1838 1839 ring_id++; 1840 } 1841 } 1842 } 1843 } 1844 1845 adev->gfx.gfx_supported_reset = 1846 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1847 adev->gfx.compute_supported_reset = 1848 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1849 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1850 case IP_VERSION(11, 0, 0): 1851 case IP_VERSION(11, 0, 2): 1852 case IP_VERSION(11, 0, 3): 1853 if ((adev->gfx.me_fw_version >= 2280) && 1854 (adev->gfx.mec_fw_version >= 2410) && 1855 !amdgpu_sriov_vf(adev) && 1856 !adev->debug_disable_gpu_ring_reset) { 1857 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1858 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1859 } 1860 break; 1861 default: 1862 if (!amdgpu_sriov_vf(adev) && 1863 !adev->debug_disable_gpu_ring_reset) { 1864 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1865 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1866 } 1867 break; 1868 } 1869 1870 if (!adev->enable_mes_kiq) { 1871 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); 1872 if (r) { 1873 DRM_ERROR("Failed to init KIQ BOs!\n"); 1874 return r; 1875 } 1876 1877 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 1878 if (r) 1879 return r; 1880 } 1881 1882 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); 1883 if (r) 1884 return r; 1885 1886 /* allocate visible FB for rlc auto-loading fw */ 1887 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1888 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1889 if (r) 1890 return r; 1891 } 1892 1893 r = gfx_v11_0_gpu_early_init(adev); 1894 if (r) 1895 return r; 1896 1897 if (amdgpu_gfx_ras_sw_init(adev)) { 1898 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 1899 return -EINVAL; 1900 } 1901 1902 gfx_v11_0_alloc_ip_dump(adev); 1903 1904 r = amdgpu_gfx_sysfs_init(adev); 1905 if (r) 1906 return r; 1907 1908 return 0; 1909 } 1910 1911 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1912 { 1913 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1914 &adev->gfx.pfp.pfp_fw_gpu_addr, 1915 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1916 1917 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1918 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1919 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1920 } 1921 1922 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1923 { 1924 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1925 &adev->gfx.me.me_fw_gpu_addr, 1926 (void **)&adev->gfx.me.me_fw_ptr); 1927 1928 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1929 &adev->gfx.me.me_fw_data_gpu_addr, 1930 (void **)&adev->gfx.me.me_fw_data_ptr); 1931 } 1932 1933 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1934 { 1935 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1936 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1937 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1938 } 1939 1940 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) 1941 { 1942 int i; 1943 struct amdgpu_device *adev = ip_block->adev; 1944 1945 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1946 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1947 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1948 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1949 1950 amdgpu_gfx_mqd_sw_fini(adev, 0); 1951 1952 if (!adev->enable_mes_kiq) { 1953 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 1954 amdgpu_gfx_kiq_fini(adev, 0); 1955 } 1956 1957 amdgpu_gfx_cleaner_shader_sw_fini(adev); 1958 1959 gfx_v11_0_pfp_fini(adev); 1960 gfx_v11_0_me_fini(adev); 1961 gfx_v11_0_rlc_fini(adev); 1962 gfx_v11_0_mec_fini(adev); 1963 1964 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1965 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1966 1967 gfx_v11_0_free_microcode(adev); 1968 1969 amdgpu_gfx_sysfs_fini(adev); 1970 1971 kfree(adev->gfx.ip_dump_core); 1972 kfree(adev->gfx.ip_dump_compute_queues); 1973 kfree(adev->gfx.ip_dump_gfx_queues); 1974 1975 return 0; 1976 } 1977 1978 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1979 u32 sh_num, u32 instance, int xcc_id) 1980 { 1981 u32 data; 1982 1983 if (instance == 0xffffffff) 1984 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1985 INSTANCE_BROADCAST_WRITES, 1); 1986 else 1987 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1988 instance); 1989 1990 if (se_num == 0xffffffff) 1991 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1992 1); 1993 else 1994 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1995 1996 if (sh_num == 0xffffffff) 1997 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1998 1); 1999 else 2000 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 2001 2002 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 2003 } 2004 2005 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) 2006 { 2007 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; 2008 2009 gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); 2010 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, 2011 CC_GC_SA_UNIT_DISABLE, 2012 SA_DISABLE); 2013 gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); 2014 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, 2015 GC_USER_SA_UNIT_DISABLE, 2016 SA_DISABLE); 2017 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * 2018 adev->gfx.config.max_shader_engines); 2019 2020 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 2021 } 2022 2023 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2024 { 2025 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; 2026 u32 rb_mask; 2027 2028 gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 2029 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, 2030 CC_RB_BACKEND_DISABLE, 2031 BACKEND_DISABLE); 2032 gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 2033 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, 2034 GC_USER_RB_BACKEND_DISABLE, 2035 BACKEND_DISABLE); 2036 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * 2037 adev->gfx.config.max_shader_engines); 2038 2039 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); 2040 } 2041 2042 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 2043 { 2044 u32 rb_bitmap_per_sa; 2045 u32 rb_bitmap_width_per_sa; 2046 u32 max_sa; 2047 u32 active_sa_bitmap; 2048 u32 global_active_rb_bitmap; 2049 u32 active_rb_bitmap = 0; 2050 u32 i; 2051 2052 /* query sa bitmap from SA_UNIT_DISABLE registers */ 2053 active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); 2054 /* query rb bitmap from RB_BACKEND_DISABLE registers */ 2055 global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); 2056 2057 /* generate active rb bitmap according to active sa bitmap */ 2058 max_sa = adev->gfx.config.max_shader_engines * 2059 adev->gfx.config.max_sh_per_se; 2060 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 2061 adev->gfx.config.max_sh_per_se; 2062 rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); 2063 2064 for (i = 0; i < max_sa; i++) { 2065 if (active_sa_bitmap & (1 << i)) 2066 active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); 2067 } 2068 2069 active_rb_bitmap &= global_active_rb_bitmap; 2070 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 2071 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 2072 } 2073 2074 #define DEFAULT_SH_MEM_BASES (0x6000) 2075 #define LDS_APP_BASE 0x1 2076 #define SCRATCH_APP_BASE 0x2 2077 2078 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 2079 { 2080 int i; 2081 uint32_t sh_mem_bases; 2082 uint32_t data; 2083 2084 /* 2085 * Configure apertures: 2086 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2087 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2088 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2089 */ 2090 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 2091 SCRATCH_APP_BASE; 2092 2093 mutex_lock(&adev->srbm_mutex); 2094 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2095 soc21_grbm_select(adev, 0, 0, 0, i); 2096 /* CP and shaders */ 2097 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2098 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 2099 2100 /* Enable trap for each kfd vmid. */ 2101 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 2102 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 2103 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); 2104 } 2105 soc21_grbm_select(adev, 0, 0, 0, 0); 2106 mutex_unlock(&adev->srbm_mutex); 2107 2108 /* 2109 * Initialize all compute VMIDs to have no GDS, GWS, or OA 2110 * access. These should be enabled by FW for target VMIDs. 2111 */ 2112 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2113 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 2114 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 2115 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 2116 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 2117 } 2118 } 2119 2120 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 2121 { 2122 int vmid; 2123 2124 /* 2125 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2126 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2127 * the driver can enable them for graphics. VMID0 should maintain 2128 * access so that HWS firmware can save/restore entries. 2129 */ 2130 for (vmid = 1; vmid < 16; vmid++) { 2131 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 2132 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 2133 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 2134 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 2135 } 2136 } 2137 2138 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 2139 { 2140 /* TODO: harvest feature to be added later. */ 2141 } 2142 2143 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 2144 { 2145 /* TCCs are global (not instanced). */ 2146 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 2147 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 2148 2149 adev->gfx.config.tcc_disabled_mask = 2150 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 2151 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 2152 } 2153 2154 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 2155 { 2156 u32 tmp; 2157 int i; 2158 2159 if (!amdgpu_sriov_vf(adev)) 2160 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2161 2162 gfx_v11_0_setup_rb(adev); 2163 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 2164 gfx_v11_0_get_tcc_info(adev); 2165 adev->gfx.config.pa_sc_tile_steering_override = 0; 2166 2167 /* Set whether texture coordinate truncation is conformant. */ 2168 tmp = RREG32_SOC15(GC, 0, regTA_CNTL2); 2169 adev->gfx.config.ta_cntl2_truncate_coord_mode = 2170 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE); 2171 2172 /* XXX SH_MEM regs */ 2173 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2174 mutex_lock(&adev->srbm_mutex); 2175 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2176 soc21_grbm_select(adev, 0, 0, 0, i); 2177 /* CP and shaders */ 2178 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2179 if (i != 0) { 2180 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2181 (adev->gmc.private_aperture_start >> 48)); 2182 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2183 (adev->gmc.shared_aperture_start >> 48)); 2184 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 2185 } 2186 } 2187 soc21_grbm_select(adev, 0, 0, 0, 0); 2188 2189 mutex_unlock(&adev->srbm_mutex); 2190 2191 gfx_v11_0_init_compute_vmid(adev); 2192 gfx_v11_0_init_gds_vmid(adev); 2193 } 2194 2195 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, 2196 int me, int pipe) 2197 { 2198 if (me != 0) 2199 return 0; 2200 2201 switch (pipe) { 2202 case 0: 2203 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 2204 case 1: 2205 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 2206 default: 2207 return 0; 2208 } 2209 } 2210 2211 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, 2212 int me, int pipe) 2213 { 2214 /* 2215 * amdgpu controls only the first MEC. That's why this function only 2216 * handles the setting of interrupts for this specific MEC. All other 2217 * pipes' interrupts are set by amdkfd. 2218 */ 2219 if (me != 1) 2220 return 0; 2221 2222 switch (pipe) { 2223 case 0: 2224 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 2225 case 1: 2226 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 2227 case 2: 2228 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 2229 case 3: 2230 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 2231 default: 2232 return 0; 2233 } 2234 } 2235 2236 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2237 bool enable) 2238 { 2239 u32 tmp, cp_int_cntl_reg; 2240 int i, j; 2241 2242 if (amdgpu_sriov_vf(adev)) 2243 return; 2244 2245 for (i = 0; i < adev->gfx.me.num_me; i++) { 2246 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 2247 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 2248 2249 if (cp_int_cntl_reg) { 2250 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 2251 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 2252 enable ? 1 : 0); 2253 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 2254 enable ? 1 : 0); 2255 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 2256 enable ? 1 : 0); 2257 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 2258 enable ? 1 : 0); 2259 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); 2260 } 2261 } 2262 } 2263 } 2264 2265 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 2266 { 2267 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2268 2269 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 2270 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2271 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 2272 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2273 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 2274 2275 return 0; 2276 } 2277 2278 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 2279 { 2280 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 2281 2282 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 2283 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 2284 } 2285 2286 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 2287 { 2288 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2289 udelay(50); 2290 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2291 udelay(50); 2292 } 2293 2294 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 2295 bool enable) 2296 { 2297 uint32_t rlc_pg_cntl; 2298 2299 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 2300 2301 if (!enable) { 2302 /* RLC_PG_CNTL[23] = 0 (default) 2303 * RLC will wait for handshake acks with SMU 2304 * GFXOFF will be enabled 2305 * RLC_PG_CNTL[23] = 1 2306 * RLC will not issue any message to SMU 2307 * hence no handshake between SMU & RLC 2308 * GFXOFF will be disabled 2309 */ 2310 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2311 } else 2312 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2313 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 2314 } 2315 2316 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 2317 { 2318 /* TODO: enable rlc & smu handshake until smu 2319 * and gfxoff feature works as expected */ 2320 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 2321 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 2322 2323 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2324 udelay(50); 2325 } 2326 2327 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 2328 { 2329 uint32_t tmp; 2330 2331 /* enable Save Restore Machine */ 2332 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 2333 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2334 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 2335 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 2336 } 2337 2338 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 2339 { 2340 const struct rlc_firmware_header_v2_0 *hdr; 2341 const __le32 *fw_data; 2342 unsigned i, fw_size; 2343 2344 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2345 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2346 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2347 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2348 2349 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 2350 RLCG_UCODE_LOADING_START_ADDRESS); 2351 2352 for (i = 0; i < fw_size; i++) 2353 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 2354 le32_to_cpup(fw_data++)); 2355 2356 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2357 } 2358 2359 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 2360 { 2361 const struct rlc_firmware_header_v2_2 *hdr; 2362 const __le32 *fw_data; 2363 unsigned i, fw_size; 2364 u32 tmp; 2365 2366 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 2367 2368 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2369 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 2370 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 2371 2372 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 2373 2374 for (i = 0; i < fw_size; i++) { 2375 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2376 msleep(1); 2377 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 2378 le32_to_cpup(fw_data++)); 2379 } 2380 2381 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2382 2383 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2384 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 2385 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 2386 2387 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 2388 for (i = 0; i < fw_size; i++) { 2389 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2390 msleep(1); 2391 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 2392 le32_to_cpup(fw_data++)); 2393 } 2394 2395 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2396 2397 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 2398 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 2399 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 2400 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 2401 } 2402 2403 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 2404 { 2405 const struct rlc_firmware_header_v2_3 *hdr; 2406 const __le32 *fw_data; 2407 unsigned i, fw_size; 2408 u32 tmp; 2409 2410 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 2411 2412 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2413 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 2414 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 2415 2416 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 2417 2418 for (i = 0; i < fw_size; i++) { 2419 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2420 msleep(1); 2421 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 2422 le32_to_cpup(fw_data++)); 2423 } 2424 2425 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 2426 2427 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 2428 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 2429 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 2430 2431 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2432 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 2433 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 2434 2435 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 2436 2437 for (i = 0; i < fw_size; i++) { 2438 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2439 msleep(1); 2440 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 2441 le32_to_cpup(fw_data++)); 2442 } 2443 2444 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 2445 2446 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 2447 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 2448 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 2449 } 2450 2451 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 2452 { 2453 const struct rlc_firmware_header_v2_0 *hdr; 2454 uint16_t version_major; 2455 uint16_t version_minor; 2456 2457 if (!adev->gfx.rlc_fw) 2458 return -EINVAL; 2459 2460 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2461 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2462 2463 version_major = le16_to_cpu(hdr->header.header_version_major); 2464 version_minor = le16_to_cpu(hdr->header.header_version_minor); 2465 2466 if (version_major == 2) { 2467 gfx_v11_0_load_rlcg_microcode(adev); 2468 if (amdgpu_dpm == 1) { 2469 if (version_minor >= 2) 2470 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 2471 if (version_minor == 3) 2472 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 2473 } 2474 2475 return 0; 2476 } 2477 2478 return -EINVAL; 2479 } 2480 2481 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 2482 { 2483 int r; 2484 2485 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 2486 gfx_v11_0_init_csb(adev); 2487 2488 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 2489 gfx_v11_0_rlc_enable_srm(adev); 2490 } else { 2491 if (amdgpu_sriov_vf(adev)) { 2492 gfx_v11_0_init_csb(adev); 2493 return 0; 2494 } 2495 2496 adev->gfx.rlc.funcs->stop(adev); 2497 2498 /* disable CG */ 2499 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 2500 2501 /* disable PG */ 2502 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 2503 2504 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2505 /* legacy rlc firmware loading */ 2506 r = gfx_v11_0_rlc_load_microcode(adev); 2507 if (r) 2508 return r; 2509 } 2510 2511 gfx_v11_0_init_csb(adev); 2512 2513 adev->gfx.rlc.funcs->start(adev); 2514 } 2515 return 0; 2516 } 2517 2518 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 2519 { 2520 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2521 uint32_t tmp; 2522 int i; 2523 2524 /* Trigger an invalidation of the L1 instruction caches */ 2525 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2526 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2527 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2528 2529 /* Wait for invalidation complete */ 2530 for (i = 0; i < usec_timeout; i++) { 2531 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2532 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2533 INVALIDATE_CACHE_COMPLETE)) 2534 break; 2535 udelay(1); 2536 } 2537 2538 if (i >= usec_timeout) { 2539 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2540 return -EINVAL; 2541 } 2542 2543 if (amdgpu_emu_mode == 1) 2544 amdgpu_device_flush_hdp(adev, NULL); 2545 2546 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2547 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2548 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2549 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2550 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2551 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2552 2553 /* Program me ucode address into intruction cache address register */ 2554 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2555 lower_32_bits(addr) & 0xFFFFF000); 2556 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2557 upper_32_bits(addr)); 2558 2559 return 0; 2560 } 2561 2562 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 2563 { 2564 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2565 uint32_t tmp; 2566 int i; 2567 2568 /* Trigger an invalidation of the L1 instruction caches */ 2569 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2570 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2571 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2572 2573 /* Wait for invalidation complete */ 2574 for (i = 0; i < usec_timeout; i++) { 2575 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2576 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2577 INVALIDATE_CACHE_COMPLETE)) 2578 break; 2579 udelay(1); 2580 } 2581 2582 if (i >= usec_timeout) { 2583 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2584 return -EINVAL; 2585 } 2586 2587 if (amdgpu_emu_mode == 1) 2588 amdgpu_device_flush_hdp(adev, NULL); 2589 2590 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2591 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2592 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2593 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2594 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2595 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2596 2597 /* Program pfp ucode address into intruction cache address register */ 2598 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2599 lower_32_bits(addr) & 0xFFFFF000); 2600 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2601 upper_32_bits(addr)); 2602 2603 return 0; 2604 } 2605 2606 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2607 { 2608 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2609 uint32_t tmp; 2610 int i; 2611 2612 /* Trigger an invalidation of the L1 instruction caches */ 2613 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2614 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2615 2616 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2617 2618 /* Wait for invalidation complete */ 2619 for (i = 0; i < usec_timeout; i++) { 2620 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2621 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2622 INVALIDATE_CACHE_COMPLETE)) 2623 break; 2624 udelay(1); 2625 } 2626 2627 if (i >= usec_timeout) { 2628 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2629 return -EINVAL; 2630 } 2631 2632 if (amdgpu_emu_mode == 1) 2633 amdgpu_device_flush_hdp(adev, NULL); 2634 2635 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2636 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2637 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2638 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2639 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2640 2641 /* Program mec1 ucode address into intruction cache address register */ 2642 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2643 lower_32_bits(addr) & 0xFFFFF000); 2644 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2645 upper_32_bits(addr)); 2646 2647 return 0; 2648 } 2649 2650 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2651 { 2652 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2653 uint32_t tmp; 2654 unsigned i, pipe_id; 2655 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2656 2657 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2658 adev->gfx.pfp_fw->data; 2659 2660 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2661 lower_32_bits(addr)); 2662 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2663 upper_32_bits(addr)); 2664 2665 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2666 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2667 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2668 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2669 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2670 2671 /* 2672 * Programming any of the CP_PFP_IC_BASE registers 2673 * forces invalidation of the ME L1 I$. Wait for the 2674 * invalidation complete 2675 */ 2676 for (i = 0; i < usec_timeout; i++) { 2677 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2678 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2679 INVALIDATE_CACHE_COMPLETE)) 2680 break; 2681 udelay(1); 2682 } 2683 2684 if (i >= usec_timeout) { 2685 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2686 return -EINVAL; 2687 } 2688 2689 /* Prime the L1 instruction caches */ 2690 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2691 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2692 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2693 /* Waiting for cache primed*/ 2694 for (i = 0; i < usec_timeout; i++) { 2695 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2696 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2697 ICACHE_PRIMED)) 2698 break; 2699 udelay(1); 2700 } 2701 2702 if (i >= usec_timeout) { 2703 dev_err(adev->dev, "failed to prime instruction cache\n"); 2704 return -EINVAL; 2705 } 2706 2707 mutex_lock(&adev->srbm_mutex); 2708 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2709 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2710 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2711 (pfp_hdr->ucode_start_addr_hi << 30) | 2712 (pfp_hdr->ucode_start_addr_lo >> 2)); 2713 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2714 pfp_hdr->ucode_start_addr_hi >> 2); 2715 2716 /* 2717 * Program CP_ME_CNTL to reset given PIPE to take 2718 * effect of CP_PFP_PRGRM_CNTR_START. 2719 */ 2720 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2721 if (pipe_id == 0) 2722 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2723 PFP_PIPE0_RESET, 1); 2724 else 2725 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2726 PFP_PIPE1_RESET, 1); 2727 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2728 2729 /* Clear pfp pipe0 reset bit. */ 2730 if (pipe_id == 0) 2731 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2732 PFP_PIPE0_RESET, 0); 2733 else 2734 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2735 PFP_PIPE1_RESET, 0); 2736 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2737 2738 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2739 lower_32_bits(addr2)); 2740 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2741 upper_32_bits(addr2)); 2742 } 2743 soc21_grbm_select(adev, 0, 0, 0, 0); 2744 mutex_unlock(&adev->srbm_mutex); 2745 2746 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2747 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2748 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2749 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2750 2751 /* Invalidate the data caches */ 2752 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2753 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2754 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2755 2756 for (i = 0; i < usec_timeout; i++) { 2757 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2758 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2759 INVALIDATE_DCACHE_COMPLETE)) 2760 break; 2761 udelay(1); 2762 } 2763 2764 if (i >= usec_timeout) { 2765 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2766 return -EINVAL; 2767 } 2768 2769 return 0; 2770 } 2771 2772 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2773 { 2774 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2775 uint32_t tmp; 2776 unsigned i, pipe_id; 2777 const struct gfx_firmware_header_v2_0 *me_hdr; 2778 2779 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2780 adev->gfx.me_fw->data; 2781 2782 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2783 lower_32_bits(addr)); 2784 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2785 upper_32_bits(addr)); 2786 2787 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2788 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2789 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2790 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2791 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2792 2793 /* 2794 * Programming any of the CP_ME_IC_BASE registers 2795 * forces invalidation of the ME L1 I$. Wait for the 2796 * invalidation complete 2797 */ 2798 for (i = 0; i < usec_timeout; i++) { 2799 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2800 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2801 INVALIDATE_CACHE_COMPLETE)) 2802 break; 2803 udelay(1); 2804 } 2805 2806 if (i >= usec_timeout) { 2807 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2808 return -EINVAL; 2809 } 2810 2811 /* Prime the instruction caches */ 2812 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2813 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2814 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2815 2816 /* Waiting for instruction cache primed*/ 2817 for (i = 0; i < usec_timeout; i++) { 2818 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2819 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2820 ICACHE_PRIMED)) 2821 break; 2822 udelay(1); 2823 } 2824 2825 if (i >= usec_timeout) { 2826 dev_err(adev->dev, "failed to prime instruction cache\n"); 2827 return -EINVAL; 2828 } 2829 2830 mutex_lock(&adev->srbm_mutex); 2831 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2832 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2833 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2834 (me_hdr->ucode_start_addr_hi << 30) | 2835 (me_hdr->ucode_start_addr_lo >> 2) ); 2836 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2837 me_hdr->ucode_start_addr_hi>>2); 2838 2839 /* 2840 * Program CP_ME_CNTL to reset given PIPE to take 2841 * effect of CP_PFP_PRGRM_CNTR_START. 2842 */ 2843 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2844 if (pipe_id == 0) 2845 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2846 ME_PIPE0_RESET, 1); 2847 else 2848 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2849 ME_PIPE1_RESET, 1); 2850 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2851 2852 /* Clear pfp pipe0 reset bit. */ 2853 if (pipe_id == 0) 2854 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2855 ME_PIPE0_RESET, 0); 2856 else 2857 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2858 ME_PIPE1_RESET, 0); 2859 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2860 2861 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2862 lower_32_bits(addr2)); 2863 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2864 upper_32_bits(addr2)); 2865 } 2866 soc21_grbm_select(adev, 0, 0, 0, 0); 2867 mutex_unlock(&adev->srbm_mutex); 2868 2869 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2870 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2871 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2872 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2873 2874 /* Invalidate the data caches */ 2875 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2876 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2877 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2878 2879 for (i = 0; i < usec_timeout; i++) { 2880 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2881 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2882 INVALIDATE_DCACHE_COMPLETE)) 2883 break; 2884 udelay(1); 2885 } 2886 2887 if (i >= usec_timeout) { 2888 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2889 return -EINVAL; 2890 } 2891 2892 return 0; 2893 } 2894 2895 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2896 { 2897 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2898 uint32_t tmp; 2899 unsigned i; 2900 const struct gfx_firmware_header_v2_0 *mec_hdr; 2901 2902 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2903 adev->gfx.mec_fw->data; 2904 2905 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2906 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2907 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2908 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2909 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2910 2911 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2912 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2913 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2914 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2915 2916 mutex_lock(&adev->srbm_mutex); 2917 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2918 soc21_grbm_select(adev, 1, i, 0, 0); 2919 2920 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2921 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2922 upper_32_bits(addr2)); 2923 2924 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2925 mec_hdr->ucode_start_addr_lo >> 2 | 2926 mec_hdr->ucode_start_addr_hi << 30); 2927 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2928 mec_hdr->ucode_start_addr_hi >> 2); 2929 2930 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2931 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2932 upper_32_bits(addr)); 2933 } 2934 mutex_unlock(&adev->srbm_mutex); 2935 soc21_grbm_select(adev, 0, 0, 0, 0); 2936 2937 /* Trigger an invalidation of the L1 instruction caches */ 2938 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2939 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2940 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2941 2942 /* Wait for invalidation complete */ 2943 for (i = 0; i < usec_timeout; i++) { 2944 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2945 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2946 INVALIDATE_DCACHE_COMPLETE)) 2947 break; 2948 udelay(1); 2949 } 2950 2951 if (i >= usec_timeout) { 2952 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2953 return -EINVAL; 2954 } 2955 2956 /* Trigger an invalidation of the L1 instruction caches */ 2957 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2958 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2959 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2960 2961 /* Wait for invalidation complete */ 2962 for (i = 0; i < usec_timeout; i++) { 2963 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2964 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2965 INVALIDATE_CACHE_COMPLETE)) 2966 break; 2967 udelay(1); 2968 } 2969 2970 if (i >= usec_timeout) { 2971 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2972 return -EINVAL; 2973 } 2974 2975 return 0; 2976 } 2977 2978 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2979 { 2980 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2981 const struct gfx_firmware_header_v2_0 *me_hdr; 2982 const struct gfx_firmware_header_v2_0 *mec_hdr; 2983 uint32_t pipe_id, tmp; 2984 2985 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2986 adev->gfx.mec_fw->data; 2987 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2988 adev->gfx.me_fw->data; 2989 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2990 adev->gfx.pfp_fw->data; 2991 2992 /* config pfp program start addr */ 2993 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 2994 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2995 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2996 (pfp_hdr->ucode_start_addr_hi << 30) | 2997 (pfp_hdr->ucode_start_addr_lo >> 2)); 2998 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2999 pfp_hdr->ucode_start_addr_hi >> 2); 3000 } 3001 soc21_grbm_select(adev, 0, 0, 0, 0); 3002 3003 /* reset pfp pipe */ 3004 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3005 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 3006 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 3007 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3008 3009 /* clear pfp pipe reset */ 3010 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 3011 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 3012 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3013 3014 /* config me program start addr */ 3015 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 3016 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3017 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3018 (me_hdr->ucode_start_addr_hi << 30) | 3019 (me_hdr->ucode_start_addr_lo >> 2) ); 3020 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3021 me_hdr->ucode_start_addr_hi>>2); 3022 } 3023 soc21_grbm_select(adev, 0, 0, 0, 0); 3024 3025 /* reset me pipe */ 3026 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3027 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 3028 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 3029 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3030 3031 /* clear me pipe reset */ 3032 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 3033 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 3034 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3035 3036 /* config mec program start addr */ 3037 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 3038 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 3039 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3040 mec_hdr->ucode_start_addr_lo >> 2 | 3041 mec_hdr->ucode_start_addr_hi << 30); 3042 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3043 mec_hdr->ucode_start_addr_hi >> 2); 3044 } 3045 soc21_grbm_select(adev, 0, 0, 0, 0); 3046 3047 /* reset mec pipe */ 3048 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3049 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 3050 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 3051 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 3052 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 3053 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3054 3055 /* clear mec pipe reset */ 3056 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 3057 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 3058 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 3059 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 3060 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3061 } 3062 3063 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 3064 { 3065 uint32_t cp_status; 3066 uint32_t bootload_status; 3067 int i, r; 3068 uint64_t addr, addr2; 3069 3070 for (i = 0; i < adev->usec_timeout; i++) { 3071 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 3072 3073 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 3074 IP_VERSION(11, 0, 1) || 3075 amdgpu_ip_version(adev, GC_HWIP, 0) == 3076 IP_VERSION(11, 0, 4) || 3077 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || 3078 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || 3079 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || 3080 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3) || 3081 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4)) 3082 bootload_status = RREG32_SOC15(GC, 0, 3083 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 3084 else 3085 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 3086 3087 if ((cp_status == 0) && 3088 (REG_GET_FIELD(bootload_status, 3089 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 3090 break; 3091 } 3092 udelay(1); 3093 } 3094 3095 if (i >= adev->usec_timeout) { 3096 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 3097 return -ETIMEDOUT; 3098 } 3099 3100 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 3101 if (adev->gfx.rs64_enable) { 3102 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3103 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 3104 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3105 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 3106 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 3107 if (r) 3108 return r; 3109 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3110 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 3111 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3112 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 3113 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 3114 if (r) 3115 return r; 3116 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3117 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 3118 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3119 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 3120 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 3121 if (r) 3122 return r; 3123 } else { 3124 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3125 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 3126 r = gfx_v11_0_config_me_cache(adev, addr); 3127 if (r) 3128 return r; 3129 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3130 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 3131 r = gfx_v11_0_config_pfp_cache(adev, addr); 3132 if (r) 3133 return r; 3134 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3135 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 3136 r = gfx_v11_0_config_mec_cache(adev, addr); 3137 if (r) 3138 return r; 3139 } 3140 } 3141 3142 return 0; 3143 } 3144 3145 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3146 { 3147 int i; 3148 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3149 3150 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3151 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3152 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3153 3154 for (i = 0; i < adev->usec_timeout; i++) { 3155 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 3156 break; 3157 udelay(1); 3158 } 3159 3160 if (i >= adev->usec_timeout) 3161 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 3162 3163 return 0; 3164 } 3165 3166 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 3167 { 3168 int r; 3169 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3170 const __le32 *fw_data; 3171 unsigned i, fw_size; 3172 3173 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3174 adev->gfx.pfp_fw->data; 3175 3176 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3177 3178 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3179 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3180 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 3181 3182 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 3183 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3184 &adev->gfx.pfp.pfp_fw_obj, 3185 &adev->gfx.pfp.pfp_fw_gpu_addr, 3186 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3187 if (r) { 3188 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 3189 gfx_v11_0_pfp_fini(adev); 3190 return r; 3191 } 3192 3193 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 3194 3195 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3196 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3197 3198 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 3199 3200 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 3201 3202 for (i = 0; i < pfp_hdr->jt_size; i++) 3203 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 3204 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 3205 3206 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3207 3208 return 0; 3209 } 3210 3211 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 3212 { 3213 int r; 3214 const struct gfx_firmware_header_v2_0 *pfp_hdr; 3215 const __le32 *fw_ucode, *fw_data; 3216 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3217 uint32_t tmp; 3218 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3219 3220 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 3221 adev->gfx.pfp_fw->data; 3222 3223 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3224 3225 /* instruction */ 3226 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 3227 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 3228 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 3229 /* data */ 3230 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3231 le32_to_cpu(pfp_hdr->data_offset_bytes)); 3232 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 3233 3234 /* 64kb align */ 3235 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3236 64 * 1024, 3237 AMDGPU_GEM_DOMAIN_VRAM | 3238 AMDGPU_GEM_DOMAIN_GTT, 3239 &adev->gfx.pfp.pfp_fw_obj, 3240 &adev->gfx.pfp.pfp_fw_gpu_addr, 3241 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3242 if (r) { 3243 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 3244 gfx_v11_0_pfp_fini(adev); 3245 return r; 3246 } 3247 3248 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3249 64 * 1024, 3250 AMDGPU_GEM_DOMAIN_VRAM | 3251 AMDGPU_GEM_DOMAIN_GTT, 3252 &adev->gfx.pfp.pfp_fw_data_obj, 3253 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 3254 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 3255 if (r) { 3256 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 3257 gfx_v11_0_pfp_fini(adev); 3258 return r; 3259 } 3260 3261 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 3262 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 3263 3264 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3265 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 3266 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3267 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3268 3269 if (amdgpu_emu_mode == 1) 3270 amdgpu_device_flush_hdp(adev, NULL); 3271 3272 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3273 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3274 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 3275 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3276 3277 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 3278 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 3279 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 3280 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 3281 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 3282 3283 /* 3284 * Programming any of the CP_PFP_IC_BASE registers 3285 * forces invalidation of the ME L1 I$. Wait for the 3286 * invalidation complete 3287 */ 3288 for (i = 0; i < usec_timeout; i++) { 3289 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3290 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3291 INVALIDATE_CACHE_COMPLETE)) 3292 break; 3293 udelay(1); 3294 } 3295 3296 if (i >= usec_timeout) { 3297 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3298 return -EINVAL; 3299 } 3300 3301 /* Prime the L1 instruction caches */ 3302 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3303 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 3304 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 3305 /* Waiting for cache primed*/ 3306 for (i = 0; i < usec_timeout; i++) { 3307 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3308 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3309 ICACHE_PRIMED)) 3310 break; 3311 udelay(1); 3312 } 3313 3314 if (i >= usec_timeout) { 3315 dev_err(adev->dev, "failed to prime instruction cache\n"); 3316 return -EINVAL; 3317 } 3318 3319 mutex_lock(&adev->srbm_mutex); 3320 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3321 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3322 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 3323 (pfp_hdr->ucode_start_addr_hi << 30) | 3324 (pfp_hdr->ucode_start_addr_lo >> 2) ); 3325 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 3326 pfp_hdr->ucode_start_addr_hi>>2); 3327 3328 /* 3329 * Program CP_ME_CNTL to reset given PIPE to take 3330 * effect of CP_PFP_PRGRM_CNTR_START. 3331 */ 3332 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3333 if (pipe_id == 0) 3334 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3335 PFP_PIPE0_RESET, 1); 3336 else 3337 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3338 PFP_PIPE1_RESET, 1); 3339 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3340 3341 /* Clear pfp pipe0 reset bit. */ 3342 if (pipe_id == 0) 3343 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3344 PFP_PIPE0_RESET, 0); 3345 else 3346 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3347 PFP_PIPE1_RESET, 0); 3348 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3349 3350 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 3351 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3352 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 3353 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3354 } 3355 soc21_grbm_select(adev, 0, 0, 0, 0); 3356 mutex_unlock(&adev->srbm_mutex); 3357 3358 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3359 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3360 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3361 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3362 3363 /* Invalidate the data caches */ 3364 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3365 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3366 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3367 3368 for (i = 0; i < usec_timeout; i++) { 3369 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3370 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3371 INVALIDATE_DCACHE_COMPLETE)) 3372 break; 3373 udelay(1); 3374 } 3375 3376 if (i >= usec_timeout) { 3377 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3378 return -EINVAL; 3379 } 3380 3381 return 0; 3382 } 3383 3384 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 3385 { 3386 int r; 3387 const struct gfx_firmware_header_v1_0 *me_hdr; 3388 const __le32 *fw_data; 3389 unsigned i, fw_size; 3390 3391 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3392 adev->gfx.me_fw->data; 3393 3394 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3395 3396 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3397 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3398 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 3399 3400 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 3401 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3402 &adev->gfx.me.me_fw_obj, 3403 &adev->gfx.me.me_fw_gpu_addr, 3404 (void **)&adev->gfx.me.me_fw_ptr); 3405 if (r) { 3406 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 3407 gfx_v11_0_me_fini(adev); 3408 return r; 3409 } 3410 3411 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 3412 3413 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3414 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3415 3416 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 3417 3418 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 3419 3420 for (i = 0; i < me_hdr->jt_size; i++) 3421 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 3422 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 3423 3424 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 3425 3426 return 0; 3427 } 3428 3429 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 3430 { 3431 int r; 3432 const struct gfx_firmware_header_v2_0 *me_hdr; 3433 const __le32 *fw_ucode, *fw_data; 3434 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3435 uint32_t tmp; 3436 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3437 3438 me_hdr = (const struct gfx_firmware_header_v2_0 *) 3439 adev->gfx.me_fw->data; 3440 3441 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3442 3443 /* instruction */ 3444 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 3445 le32_to_cpu(me_hdr->ucode_offset_bytes)); 3446 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 3447 /* data */ 3448 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3449 le32_to_cpu(me_hdr->data_offset_bytes)); 3450 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 3451 3452 /* 64kb align*/ 3453 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3454 64 * 1024, 3455 AMDGPU_GEM_DOMAIN_VRAM | 3456 AMDGPU_GEM_DOMAIN_GTT, 3457 &adev->gfx.me.me_fw_obj, 3458 &adev->gfx.me.me_fw_gpu_addr, 3459 (void **)&adev->gfx.me.me_fw_ptr); 3460 if (r) { 3461 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 3462 gfx_v11_0_me_fini(adev); 3463 return r; 3464 } 3465 3466 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3467 64 * 1024, 3468 AMDGPU_GEM_DOMAIN_VRAM | 3469 AMDGPU_GEM_DOMAIN_GTT, 3470 &adev->gfx.me.me_fw_data_obj, 3471 &adev->gfx.me.me_fw_data_gpu_addr, 3472 (void **)&adev->gfx.me.me_fw_data_ptr); 3473 if (r) { 3474 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 3475 gfx_v11_0_pfp_fini(adev); 3476 return r; 3477 } 3478 3479 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 3480 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 3481 3482 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3483 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 3484 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3485 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3486 3487 if (amdgpu_emu_mode == 1) 3488 amdgpu_device_flush_hdp(adev, NULL); 3489 3490 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3491 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3492 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 3493 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3494 3495 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 3496 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 3497 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 3498 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 3499 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 3500 3501 /* 3502 * Programming any of the CP_ME_IC_BASE registers 3503 * forces invalidation of the ME L1 I$. Wait for the 3504 * invalidation complete 3505 */ 3506 for (i = 0; i < usec_timeout; i++) { 3507 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3508 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3509 INVALIDATE_CACHE_COMPLETE)) 3510 break; 3511 udelay(1); 3512 } 3513 3514 if (i >= usec_timeout) { 3515 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3516 return -EINVAL; 3517 } 3518 3519 /* Prime the instruction caches */ 3520 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3521 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 3522 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 3523 3524 /* Waiting for instruction cache primed*/ 3525 for (i = 0; i < usec_timeout; i++) { 3526 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3527 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3528 ICACHE_PRIMED)) 3529 break; 3530 udelay(1); 3531 } 3532 3533 if (i >= usec_timeout) { 3534 dev_err(adev->dev, "failed to prime instruction cache\n"); 3535 return -EINVAL; 3536 } 3537 3538 mutex_lock(&adev->srbm_mutex); 3539 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3540 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3541 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3542 (me_hdr->ucode_start_addr_hi << 30) | 3543 (me_hdr->ucode_start_addr_lo >> 2) ); 3544 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3545 me_hdr->ucode_start_addr_hi>>2); 3546 3547 /* 3548 * Program CP_ME_CNTL to reset given PIPE to take 3549 * effect of CP_PFP_PRGRM_CNTR_START. 3550 */ 3551 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3552 if (pipe_id == 0) 3553 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3554 ME_PIPE0_RESET, 1); 3555 else 3556 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3557 ME_PIPE1_RESET, 1); 3558 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3559 3560 /* Clear pfp pipe0 reset bit. */ 3561 if (pipe_id == 0) 3562 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3563 ME_PIPE0_RESET, 0); 3564 else 3565 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3566 ME_PIPE1_RESET, 0); 3567 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3568 3569 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 3570 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3571 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 3572 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3573 } 3574 soc21_grbm_select(adev, 0, 0, 0, 0); 3575 mutex_unlock(&adev->srbm_mutex); 3576 3577 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3578 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3579 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3580 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3581 3582 /* Invalidate the data caches */ 3583 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3584 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3585 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3586 3587 for (i = 0; i < usec_timeout; i++) { 3588 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3589 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3590 INVALIDATE_DCACHE_COMPLETE)) 3591 break; 3592 udelay(1); 3593 } 3594 3595 if (i >= usec_timeout) { 3596 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3597 return -EINVAL; 3598 } 3599 3600 return 0; 3601 } 3602 3603 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3604 { 3605 int r; 3606 3607 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3608 return -EINVAL; 3609 3610 gfx_v11_0_cp_gfx_enable(adev, false); 3611 3612 if (adev->gfx.rs64_enable) 3613 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3614 else 3615 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3616 if (r) { 3617 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3618 return r; 3619 } 3620 3621 if (adev->gfx.rs64_enable) 3622 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3623 else 3624 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3625 if (r) { 3626 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3627 return r; 3628 } 3629 3630 return 0; 3631 } 3632 3633 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3634 { 3635 struct amdgpu_ring *ring; 3636 const struct cs_section_def *sect = NULL; 3637 const struct cs_extent_def *ext = NULL; 3638 int r, i; 3639 int ctx_reg_offset; 3640 3641 /* init the CP */ 3642 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3643 adev->gfx.config.max_hw_contexts - 1); 3644 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3645 3646 if (!amdgpu_async_gfx_ring) 3647 gfx_v11_0_cp_gfx_enable(adev, true); 3648 3649 ring = &adev->gfx.gfx_ring[0]; 3650 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3651 if (r) { 3652 drm_err(&adev->ddev, "cp failed to lock ring (%d).\n", r); 3653 return r; 3654 } 3655 3656 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3657 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3658 3659 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3660 amdgpu_ring_write(ring, 0x80000000); 3661 amdgpu_ring_write(ring, 0x80000000); 3662 3663 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3664 for (ext = sect->section; ext->extent != NULL; ++ext) { 3665 if (sect->id == SECT_CONTEXT) { 3666 amdgpu_ring_write(ring, 3667 PACKET3(PACKET3_SET_CONTEXT_REG, 3668 ext->reg_count)); 3669 amdgpu_ring_write(ring, ext->reg_index - 3670 PACKET3_SET_CONTEXT_REG_START); 3671 for (i = 0; i < ext->reg_count; i++) 3672 amdgpu_ring_write(ring, ext->extent[i]); 3673 } 3674 } 3675 } 3676 3677 ctx_reg_offset = 3678 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3679 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3680 amdgpu_ring_write(ring, ctx_reg_offset); 3681 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3682 3683 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3684 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3685 3686 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3687 amdgpu_ring_write(ring, 0); 3688 3689 amdgpu_ring_commit(ring); 3690 3691 /* submit cs packet to copy state 0 to next available state */ 3692 if (adev->gfx.num_gfx_rings > 1) { 3693 /* maximum supported gfx ring is 2 */ 3694 ring = &adev->gfx.gfx_ring[1]; 3695 r = amdgpu_ring_alloc(ring, 2); 3696 if (r) { 3697 drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r); 3698 return r; 3699 } 3700 3701 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3702 amdgpu_ring_write(ring, 0); 3703 3704 amdgpu_ring_commit(ring); 3705 } 3706 return 0; 3707 } 3708 3709 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3710 CP_PIPE_ID pipe) 3711 { 3712 u32 tmp; 3713 3714 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3715 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3716 3717 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3718 } 3719 3720 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3721 struct amdgpu_ring *ring) 3722 { 3723 u32 tmp; 3724 3725 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3726 if (ring->use_doorbell) { 3727 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3728 DOORBELL_OFFSET, ring->doorbell_index); 3729 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3730 DOORBELL_EN, 1); 3731 } else { 3732 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3733 DOORBELL_EN, 0); 3734 } 3735 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3736 3737 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3738 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3739 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3740 3741 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3742 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3743 } 3744 3745 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3746 { 3747 struct amdgpu_ring *ring; 3748 u32 tmp; 3749 u32 rb_bufsz; 3750 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3751 3752 /* Set the write pointer delay */ 3753 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3754 3755 /* set the RB to use vmid 0 */ 3756 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3757 3758 /* Init gfx ring 0 for pipe 0 */ 3759 mutex_lock(&adev->srbm_mutex); 3760 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3761 3762 /* Set ring buffer size */ 3763 ring = &adev->gfx.gfx_ring[0]; 3764 rb_bufsz = order_base_2(ring->ring_size / 8); 3765 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3766 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3767 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3768 3769 /* Initialize the ring buffer's write pointers */ 3770 ring->wptr = 0; 3771 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3772 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3773 3774 /* set the wb address whether it's enabled or not */ 3775 rptr_addr = ring->rptr_gpu_addr; 3776 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3777 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3778 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3779 3780 wptr_gpu_addr = ring->wptr_gpu_addr; 3781 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3782 lower_32_bits(wptr_gpu_addr)); 3783 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3784 upper_32_bits(wptr_gpu_addr)); 3785 3786 mdelay(1); 3787 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3788 3789 rb_addr = ring->gpu_addr >> 8; 3790 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3791 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3792 3793 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3794 3795 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3796 mutex_unlock(&adev->srbm_mutex); 3797 3798 /* Init gfx ring 1 for pipe 1 */ 3799 if (adev->gfx.num_gfx_rings > 1) { 3800 mutex_lock(&adev->srbm_mutex); 3801 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3802 /* maximum supported gfx ring is 2 */ 3803 ring = &adev->gfx.gfx_ring[1]; 3804 rb_bufsz = order_base_2(ring->ring_size / 8); 3805 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3806 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3807 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3808 /* Initialize the ring buffer's write pointers */ 3809 ring->wptr = 0; 3810 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3811 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3812 /* Set the wb address whether it's enabled or not */ 3813 rptr_addr = ring->rptr_gpu_addr; 3814 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3815 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3816 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3817 wptr_gpu_addr = ring->wptr_gpu_addr; 3818 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3819 lower_32_bits(wptr_gpu_addr)); 3820 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3821 upper_32_bits(wptr_gpu_addr)); 3822 3823 mdelay(1); 3824 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3825 3826 rb_addr = ring->gpu_addr >> 8; 3827 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3828 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3829 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3830 3831 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3832 mutex_unlock(&adev->srbm_mutex); 3833 } 3834 /* Switch to pipe 0 */ 3835 mutex_lock(&adev->srbm_mutex); 3836 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3837 mutex_unlock(&adev->srbm_mutex); 3838 3839 /* start the ring */ 3840 gfx_v11_0_cp_gfx_start(adev); 3841 3842 return 0; 3843 } 3844 3845 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3846 { 3847 u32 data; 3848 3849 if (adev->gfx.rs64_enable) { 3850 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3851 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3852 enable ? 0 : 1); 3853 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3854 enable ? 0 : 1); 3855 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3856 enable ? 0 : 1); 3857 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3858 enable ? 0 : 1); 3859 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3860 enable ? 0 : 1); 3861 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3862 enable ? 1 : 0); 3863 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3864 enable ? 1 : 0); 3865 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3866 enable ? 1 : 0); 3867 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3868 enable ? 1 : 0); 3869 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3870 enable ? 0 : 1); 3871 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3872 } else { 3873 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3874 3875 if (enable) { 3876 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3877 if (!adev->enable_mes_kiq) 3878 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3879 MEC_ME2_HALT, 0); 3880 } else { 3881 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3882 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3883 } 3884 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3885 } 3886 3887 udelay(50); 3888 } 3889 3890 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3891 { 3892 const struct gfx_firmware_header_v1_0 *mec_hdr; 3893 const __le32 *fw_data; 3894 unsigned i, fw_size; 3895 u32 *fw = NULL; 3896 int r; 3897 3898 if (!adev->gfx.mec_fw) 3899 return -EINVAL; 3900 3901 gfx_v11_0_cp_compute_enable(adev, false); 3902 3903 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3904 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3905 3906 fw_data = (const __le32 *) 3907 (adev->gfx.mec_fw->data + 3908 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3909 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3910 3911 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3912 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3913 &adev->gfx.mec.mec_fw_obj, 3914 &adev->gfx.mec.mec_fw_gpu_addr, 3915 (void **)&fw); 3916 if (r) { 3917 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3918 gfx_v11_0_mec_fini(adev); 3919 return r; 3920 } 3921 3922 memcpy(fw, fw_data, fw_size); 3923 3924 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3925 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3926 3927 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3928 3929 /* MEC1 */ 3930 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3931 3932 for (i = 0; i < mec_hdr->jt_size; i++) 3933 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3934 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3935 3936 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3937 3938 return 0; 3939 } 3940 3941 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3942 { 3943 const struct gfx_firmware_header_v2_0 *mec_hdr; 3944 const __le32 *fw_ucode, *fw_data; 3945 u32 tmp, fw_ucode_size, fw_data_size; 3946 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3947 u32 *fw_ucode_ptr, *fw_data_ptr; 3948 int r; 3949 3950 if (!adev->gfx.mec_fw) 3951 return -EINVAL; 3952 3953 gfx_v11_0_cp_compute_enable(adev, false); 3954 3955 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3956 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3957 3958 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3959 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3960 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3961 3962 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3963 le32_to_cpu(mec_hdr->data_offset_bytes)); 3964 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3965 3966 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3967 64 * 1024, 3968 AMDGPU_GEM_DOMAIN_VRAM | 3969 AMDGPU_GEM_DOMAIN_GTT, 3970 &adev->gfx.mec.mec_fw_obj, 3971 &adev->gfx.mec.mec_fw_gpu_addr, 3972 (void **)&fw_ucode_ptr); 3973 if (r) { 3974 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3975 gfx_v11_0_mec_fini(adev); 3976 return r; 3977 } 3978 3979 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3980 64 * 1024, 3981 AMDGPU_GEM_DOMAIN_VRAM | 3982 AMDGPU_GEM_DOMAIN_GTT, 3983 &adev->gfx.mec.mec_fw_data_obj, 3984 &adev->gfx.mec.mec_fw_data_gpu_addr, 3985 (void **)&fw_data_ptr); 3986 if (r) { 3987 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3988 gfx_v11_0_mec_fini(adev); 3989 return r; 3990 } 3991 3992 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 3993 memcpy(fw_data_ptr, fw_data, fw_data_size); 3994 3995 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3996 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 3997 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3998 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 3999 4000 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 4001 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 4002 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 4003 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 4004 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 4005 4006 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 4007 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 4008 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 4009 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 4010 4011 mutex_lock(&adev->srbm_mutex); 4012 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 4013 soc21_grbm_select(adev, 1, i, 0, 0); 4014 4015 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 4016 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 4017 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 4018 4019 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 4020 mec_hdr->ucode_start_addr_lo >> 2 | 4021 mec_hdr->ucode_start_addr_hi << 30); 4022 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 4023 mec_hdr->ucode_start_addr_hi >> 2); 4024 4025 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 4026 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 4027 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 4028 } 4029 mutex_unlock(&adev->srbm_mutex); 4030 soc21_grbm_select(adev, 0, 0, 0, 0); 4031 4032 /* Trigger an invalidation of the L1 instruction caches */ 4033 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 4034 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 4035 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 4036 4037 /* Wait for invalidation complete */ 4038 for (i = 0; i < usec_timeout; i++) { 4039 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 4040 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 4041 INVALIDATE_DCACHE_COMPLETE)) 4042 break; 4043 udelay(1); 4044 } 4045 4046 if (i >= usec_timeout) { 4047 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4048 return -EINVAL; 4049 } 4050 4051 /* Trigger an invalidation of the L1 instruction caches */ 4052 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4053 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 4054 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 4055 4056 /* Wait for invalidation complete */ 4057 for (i = 0; i < usec_timeout; i++) { 4058 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4059 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 4060 INVALIDATE_CACHE_COMPLETE)) 4061 break; 4062 udelay(1); 4063 } 4064 4065 if (i >= usec_timeout) { 4066 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4067 return -EINVAL; 4068 } 4069 4070 return 0; 4071 } 4072 4073 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 4074 { 4075 uint32_t tmp; 4076 struct amdgpu_device *adev = ring->adev; 4077 4078 /* tell RLC which is KIQ queue */ 4079 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 4080 tmp &= 0xffffff00; 4081 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4082 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); 4083 } 4084 4085 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 4086 { 4087 /* set graphics engine doorbell range */ 4088 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 4089 (adev->doorbell_index.gfx_ring0 * 2) << 2); 4090 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 4091 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 4092 4093 /* set compute engine doorbell range */ 4094 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4095 (adev->doorbell_index.kiq * 2) << 2); 4096 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4097 (adev->doorbell_index.userqueue_end * 2) << 2); 4098 } 4099 4100 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, 4101 struct v11_gfx_mqd *mqd, 4102 struct amdgpu_mqd_prop *prop) 4103 { 4104 bool priority = 0; 4105 u32 tmp; 4106 4107 /* set up default queue priority level 4108 * 0x0 = low priority, 0x1 = high priority 4109 */ 4110 if (prop->hqd_queue_priority == AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) 4111 priority = 1; 4112 4113 tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; 4114 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); 4115 mqd->cp_gfx_hqd_queue_priority = tmp; 4116 } 4117 4118 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 4119 struct amdgpu_mqd_prop *prop) 4120 { 4121 struct v11_gfx_mqd *mqd = m; 4122 uint64_t hqd_gpu_addr, wb_gpu_addr; 4123 uint32_t tmp; 4124 uint32_t rb_bufsz; 4125 4126 /* set up gfx hqd wptr */ 4127 mqd->cp_gfx_hqd_wptr = 0; 4128 mqd->cp_gfx_hqd_wptr_hi = 0; 4129 4130 /* set the pointer to the MQD */ 4131 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 4132 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4133 4134 /* set up mqd control */ 4135 tmp = regCP_GFX_MQD_CONTROL_DEFAULT; 4136 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 4137 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 4138 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 4139 mqd->cp_gfx_mqd_control = tmp; 4140 4141 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 4142 tmp = regCP_GFX_HQD_VMID_DEFAULT; 4143 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 4144 mqd->cp_gfx_hqd_vmid = 0; 4145 4146 /* set up gfx queue priority */ 4147 gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); 4148 4149 /* set up time quantum */ 4150 tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; 4151 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 4152 mqd->cp_gfx_hqd_quantum = tmp; 4153 4154 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 4155 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4156 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 4157 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 4158 4159 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 4160 wb_gpu_addr = prop->rptr_gpu_addr; 4161 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 4162 mqd->cp_gfx_hqd_rptr_addr_hi = 4163 upper_32_bits(wb_gpu_addr) & 0xffff; 4164 4165 /* set up rb_wptr_poll addr */ 4166 wb_gpu_addr = prop->wptr_gpu_addr; 4167 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4168 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4169 4170 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 4171 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 4172 tmp = regCP_GFX_HQD_CNTL_DEFAULT; 4173 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 4174 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 4175 #ifdef __BIG_ENDIAN 4176 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 4177 #endif 4178 if (prop->tmz_queue) 4179 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); 4180 if (!prop->kernel_queue) 4181 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); 4182 mqd->cp_gfx_hqd_cntl = tmp; 4183 4184 /* set up cp_doorbell_control */ 4185 tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; 4186 if (prop->use_doorbell) { 4187 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4188 DOORBELL_OFFSET, prop->doorbell_index); 4189 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4190 DOORBELL_EN, 1); 4191 } else 4192 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4193 DOORBELL_EN, 0); 4194 mqd->cp_rb_doorbell_control = tmp; 4195 4196 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4197 mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; 4198 4199 /* active the queue */ 4200 mqd->cp_gfx_hqd_active = 1; 4201 4202 /* set gfx UQ items */ 4203 mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); 4204 mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); 4205 mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); 4206 mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); 4207 mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); 4208 mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); 4209 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4210 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4211 4212 return 0; 4213 } 4214 4215 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) 4216 { 4217 struct amdgpu_device *adev = ring->adev; 4218 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 4219 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 4220 4221 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4222 memset((void *)mqd, 0, sizeof(*mqd)); 4223 mutex_lock(&adev->srbm_mutex); 4224 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4225 amdgpu_ring_init_mqd(ring); 4226 soc21_grbm_select(adev, 0, 0, 0, 0); 4227 mutex_unlock(&adev->srbm_mutex); 4228 if (adev->gfx.me.mqd_backup[mqd_idx]) 4229 memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4230 } else { 4231 /* restore mqd with the backup copy */ 4232 if (adev->gfx.me.mqd_backup[mqd_idx]) 4233 memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 4234 /* reset the ring */ 4235 ring->wptr = 0; 4236 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4237 amdgpu_ring_clear_ring(ring); 4238 } 4239 4240 return 0; 4241 } 4242 4243 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 4244 { 4245 int r, i; 4246 4247 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4248 r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); 4249 if (r) 4250 return r; 4251 } 4252 4253 r = amdgpu_gfx_enable_kgq(adev, 0); 4254 if (r) 4255 return r; 4256 4257 return gfx_v11_0_cp_gfx_start(adev); 4258 } 4259 4260 static void gfx_v11_0_compute_mqd_set_cu_mask(struct amdgpu_device *adev, 4261 struct v11_compute_mqd *mqd, 4262 struct amdgpu_mqd_prop *prop) 4263 { 4264 uint32_t se_mask[8] = {0}; 4265 uint32_t wa_mask; 4266 bool has_wa_flag = prop->cu_flags & (AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE | 4267 AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE); 4268 4269 if (!has_wa_flag && (!prop->cu_mask || !prop->cu_mask_count)) 4270 return; 4271 4272 if (has_wa_flag) { 4273 wa_mask = (prop->cu_flags & AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE) ? 4274 0xffff : 0xffffffff; 4275 mqd->compute_static_thread_mgmt_se0 = wa_mask; 4276 mqd->compute_static_thread_mgmt_se1 = wa_mask; 4277 mqd->compute_static_thread_mgmt_se2 = wa_mask; 4278 mqd->compute_static_thread_mgmt_se3 = wa_mask; 4279 return; 4280 } 4281 4282 amdgpu_gfx_mqd_symmetrically_map_cu_mask(adev, prop->cu_mask, 4283 prop->cu_mask_count, se_mask); 4284 4285 mqd->compute_static_thread_mgmt_se0 = se_mask[0]; 4286 mqd->compute_static_thread_mgmt_se1 = se_mask[1]; 4287 mqd->compute_static_thread_mgmt_se2 = se_mask[2]; 4288 mqd->compute_static_thread_mgmt_se3 = se_mask[3]; 4289 } 4290 4291 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 4292 struct amdgpu_mqd_prop *prop) 4293 { 4294 struct v11_compute_mqd *mqd = m; 4295 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4296 uint32_t tmp; 4297 4298 mqd->header = 0xC0310800; 4299 mqd->compute_pipelinestat_enable = 0x00000001; 4300 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4301 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4302 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4303 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4304 mqd->compute_misc_reserved = 0x00000007; 4305 4306 eop_base_addr = prop->eop_gpu_addr >> 8; 4307 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4308 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4309 4310 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4311 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 4312 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4313 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 4314 4315 mqd->cp_hqd_eop_control = tmp; 4316 4317 /* enable doorbell? */ 4318 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4319 4320 if (prop->use_doorbell) { 4321 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4322 DOORBELL_OFFSET, prop->doorbell_index); 4323 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4324 DOORBELL_EN, 1); 4325 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4326 DOORBELL_SOURCE, 0); 4327 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4328 DOORBELL_HIT, 0); 4329 } else { 4330 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4331 DOORBELL_EN, 0); 4332 } 4333 4334 mqd->cp_hqd_pq_doorbell_control = tmp; 4335 4336 /* disable the queue if it's active */ 4337 mqd->cp_hqd_dequeue_request = 0; 4338 mqd->cp_hqd_pq_rptr = 0; 4339 mqd->cp_hqd_pq_wptr_lo = 0; 4340 mqd->cp_hqd_pq_wptr_hi = 0; 4341 4342 /* set the pointer to the MQD */ 4343 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 4344 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4345 4346 /* set MQD vmid to 0 */ 4347 tmp = regCP_MQD_CONTROL_DEFAULT; 4348 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4349 mqd->cp_mqd_control = tmp; 4350 4351 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4352 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4353 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4354 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4355 4356 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4357 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 4358 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4359 (order_base_2(prop->queue_size / 4) - 1)); 4360 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4361 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 4362 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 4363 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 4364 prop->allow_tunneling); 4365 if (prop->kernel_queue) { 4366 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4367 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4368 } 4369 if (prop->tmz_queue) 4370 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); 4371 mqd->cp_hqd_pq_control = tmp; 4372 4373 /* set the wb address whether it's enabled or not */ 4374 wb_gpu_addr = prop->rptr_gpu_addr; 4375 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4376 mqd->cp_hqd_pq_rptr_report_addr_hi = 4377 upper_32_bits(wb_gpu_addr) & 0xffff; 4378 4379 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4380 wb_gpu_addr = prop->wptr_gpu_addr; 4381 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4382 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4383 4384 tmp = 0; 4385 /* enable the doorbell if requested */ 4386 if (prop->use_doorbell) { 4387 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4388 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4389 DOORBELL_OFFSET, prop->doorbell_index); 4390 4391 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4392 DOORBELL_EN, 1); 4393 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4394 DOORBELL_SOURCE, 0); 4395 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4396 DOORBELL_HIT, 0); 4397 } 4398 4399 mqd->cp_hqd_pq_doorbell_control = tmp; 4400 4401 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4402 mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; 4403 4404 /* set the vmid for the queue */ 4405 mqd->cp_hqd_vmid = 0; 4406 4407 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 4408 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 4409 mqd->cp_hqd_persistent_state = tmp; 4410 4411 /* set MIN_IB_AVAIL_SIZE */ 4412 tmp = regCP_HQD_IB_CONTROL_DEFAULT; 4413 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4414 mqd->cp_hqd_ib_control = tmp; 4415 4416 /* set static priority for a compute queue/ring */ 4417 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 4418 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 4419 4420 mqd->cp_hqd_active = prop->hqd_active; 4421 4422 /* set UQ fenceaddress */ 4423 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4424 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4425 /* set CU mask */ 4426 gfx_v11_0_compute_mqd_set_cu_mask(adev, mqd, prop); 4427 4428 return 0; 4429 } 4430 4431 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 4432 { 4433 struct amdgpu_device *adev = ring->adev; 4434 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4435 int j; 4436 4437 /* inactivate the queue */ 4438 if (amdgpu_sriov_vf(adev)) 4439 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 4440 4441 /* disable wptr polling */ 4442 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4443 4444 /* write the EOP addr */ 4445 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 4446 mqd->cp_hqd_eop_base_addr_lo); 4447 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 4448 mqd->cp_hqd_eop_base_addr_hi); 4449 4450 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4451 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 4452 mqd->cp_hqd_eop_control); 4453 4454 /* enable doorbell? */ 4455 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4456 mqd->cp_hqd_pq_doorbell_control); 4457 4458 /* disable the queue if it's active */ 4459 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 4460 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 4461 for (j = 0; j < adev->usec_timeout; j++) { 4462 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 4463 break; 4464 udelay(1); 4465 } 4466 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 4467 mqd->cp_hqd_dequeue_request); 4468 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 4469 mqd->cp_hqd_pq_rptr); 4470 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4471 mqd->cp_hqd_pq_wptr_lo); 4472 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4473 mqd->cp_hqd_pq_wptr_hi); 4474 } 4475 4476 /* set the pointer to the MQD */ 4477 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 4478 mqd->cp_mqd_base_addr_lo); 4479 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 4480 mqd->cp_mqd_base_addr_hi); 4481 4482 /* set MQD vmid to 0 */ 4483 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 4484 mqd->cp_mqd_control); 4485 4486 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4487 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 4488 mqd->cp_hqd_pq_base_lo); 4489 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 4490 mqd->cp_hqd_pq_base_hi); 4491 4492 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4493 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 4494 mqd->cp_hqd_pq_control); 4495 4496 /* set the wb address whether it's enabled or not */ 4497 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 4498 mqd->cp_hqd_pq_rptr_report_addr_lo); 4499 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4500 mqd->cp_hqd_pq_rptr_report_addr_hi); 4501 4502 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4503 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 4504 mqd->cp_hqd_pq_wptr_poll_addr_lo); 4505 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4506 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4507 4508 /* enable the doorbell if requested */ 4509 if (ring->use_doorbell) { 4510 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4511 (adev->doorbell_index.kiq * 2) << 2); 4512 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4513 (adev->doorbell_index.userqueue_end * 2) << 2); 4514 } 4515 4516 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4517 mqd->cp_hqd_pq_doorbell_control); 4518 4519 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4520 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4521 mqd->cp_hqd_pq_wptr_lo); 4522 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4523 mqd->cp_hqd_pq_wptr_hi); 4524 4525 /* set the vmid for the queue */ 4526 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 4527 4528 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 4529 mqd->cp_hqd_persistent_state); 4530 4531 /* activate the queue */ 4532 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 4533 mqd->cp_hqd_active); 4534 4535 if (ring->use_doorbell) 4536 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4537 4538 return 0; 4539 } 4540 4541 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 4542 { 4543 struct amdgpu_device *adev = ring->adev; 4544 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4545 4546 gfx_v11_0_kiq_setting(ring); 4547 4548 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4549 /* reset MQD to a clean status */ 4550 if (adev->gfx.kiq[0].mqd_backup) 4551 memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); 4552 4553 /* reset ring buffer */ 4554 ring->wptr = 0; 4555 amdgpu_ring_clear_ring(ring); 4556 4557 mutex_lock(&adev->srbm_mutex); 4558 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4559 gfx_v11_0_kiq_init_register(ring); 4560 soc21_grbm_select(adev, 0, 0, 0, 0); 4561 mutex_unlock(&adev->srbm_mutex); 4562 } else { 4563 memset((void *)mqd, 0, sizeof(*mqd)); 4564 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4565 amdgpu_ring_clear_ring(ring); 4566 mutex_lock(&adev->srbm_mutex); 4567 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4568 amdgpu_ring_init_mqd(ring); 4569 gfx_v11_0_kiq_init_register(ring); 4570 soc21_grbm_select(adev, 0, 0, 0, 0); 4571 mutex_unlock(&adev->srbm_mutex); 4572 4573 if (adev->gfx.kiq[0].mqd_backup) 4574 memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); 4575 } 4576 4577 return 0; 4578 } 4579 4580 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) 4581 { 4582 struct amdgpu_device *adev = ring->adev; 4583 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4584 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4585 4586 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4587 memset((void *)mqd, 0, sizeof(*mqd)); 4588 mutex_lock(&adev->srbm_mutex); 4589 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4590 amdgpu_ring_init_mqd(ring); 4591 soc21_grbm_select(adev, 0, 0, 0, 0); 4592 mutex_unlock(&adev->srbm_mutex); 4593 4594 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4595 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4596 } else { 4597 /* restore MQD to a clean status */ 4598 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4599 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4600 /* reset ring buffer */ 4601 ring->wptr = 0; 4602 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4603 amdgpu_ring_clear_ring(ring); 4604 } 4605 4606 return 0; 4607 } 4608 4609 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4610 { 4611 gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring); 4612 return 0; 4613 } 4614 4615 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4616 { 4617 int i, r; 4618 4619 if (!amdgpu_async_gfx_ring) 4620 gfx_v11_0_cp_compute_enable(adev, true); 4621 4622 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4623 r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); 4624 if (r) 4625 return r; 4626 } 4627 4628 return amdgpu_gfx_enable_kcq(adev, 0); 4629 } 4630 4631 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4632 { 4633 int r, i; 4634 struct amdgpu_ring *ring; 4635 4636 if (!(adev->flags & AMD_IS_APU)) 4637 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4638 4639 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4640 /* legacy firmware loading */ 4641 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4642 if (r) 4643 return r; 4644 4645 if (adev->gfx.rs64_enable) 4646 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4647 else 4648 r = gfx_v11_0_cp_compute_load_microcode(adev); 4649 if (r) 4650 return r; 4651 } 4652 4653 gfx_v11_0_cp_set_doorbell_range(adev); 4654 4655 if (amdgpu_async_gfx_ring) { 4656 gfx_v11_0_cp_compute_enable(adev, true); 4657 gfx_v11_0_cp_gfx_enable(adev, true); 4658 } 4659 4660 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4661 r = amdgpu_mes_kiq_hw_init(adev, 0); 4662 else 4663 r = gfx_v11_0_kiq_resume(adev); 4664 if (r) 4665 return r; 4666 4667 r = gfx_v11_0_kcq_resume(adev); 4668 if (r) 4669 return r; 4670 4671 if (!amdgpu_async_gfx_ring) { 4672 r = gfx_v11_0_cp_gfx_resume(adev); 4673 if (r) 4674 return r; 4675 } else { 4676 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4677 if (r) 4678 return r; 4679 } 4680 4681 if (adev->gfx.disable_kq) { 4682 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4683 ring = &adev->gfx.gfx_ring[i]; 4684 /* we don't want to set ring->ready */ 4685 r = amdgpu_ring_test_ring(ring); 4686 if (r) 4687 return r; 4688 } 4689 if (amdgpu_async_gfx_ring) 4690 amdgpu_gfx_disable_kgq(adev, 0); 4691 } else { 4692 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4693 ring = &adev->gfx.gfx_ring[i]; 4694 r = amdgpu_ring_test_helper(ring); 4695 if (r) 4696 return r; 4697 } 4698 } 4699 4700 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4701 ring = &adev->gfx.compute_ring[i]; 4702 r = amdgpu_ring_test_helper(ring); 4703 if (r) 4704 return r; 4705 } 4706 4707 return 0; 4708 } 4709 4710 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4711 { 4712 gfx_v11_0_cp_gfx_enable(adev, enable); 4713 gfx_v11_0_cp_compute_enable(adev, enable); 4714 } 4715 4716 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4717 { 4718 int r; 4719 bool value; 4720 4721 r = adev->gfxhub.funcs->gart_enable(adev); 4722 if (r) 4723 return r; 4724 4725 amdgpu_device_flush_hdp(adev, NULL); 4726 4727 value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; 4728 4729 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4730 /* TODO investigate why this and the hdp flush above is needed, 4731 * are we missing a flush somewhere else? */ 4732 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); 4733 4734 return 0; 4735 } 4736 4737 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4738 { 4739 u32 tmp; 4740 4741 /* select RS64 */ 4742 if (adev->gfx.rs64_enable) { 4743 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4744 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4745 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4746 4747 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4748 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4749 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4750 } 4751 4752 if (amdgpu_emu_mode == 1) 4753 msleep(100); 4754 } 4755 4756 static int get_gb_addr_config(struct amdgpu_device * adev) 4757 { 4758 u32 gb_addr_config; 4759 4760 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4761 if (gb_addr_config == 0) 4762 return -EINVAL; 4763 4764 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4765 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4766 4767 adev->gfx.config.gb_addr_config = gb_addr_config; 4768 4769 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4770 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4771 GB_ADDR_CONFIG, NUM_PIPES); 4772 4773 adev->gfx.config.max_tile_pipes = 4774 adev->gfx.config.gb_addr_config_fields.num_pipes; 4775 4776 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4777 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4778 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4779 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4780 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4781 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4782 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4783 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4784 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4785 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4786 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4787 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4788 4789 return 0; 4790 } 4791 4792 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4793 { 4794 uint32_t data; 4795 4796 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4797 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4798 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4799 4800 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4801 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4802 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4803 } 4804 4805 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) 4806 { 4807 int r; 4808 struct amdgpu_device *adev = ip_block->adev; 4809 4810 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4811 adev->gfx.cleaner_shader_ptr); 4812 4813 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4814 if (adev->gfx.imu.funcs) { 4815 /* RLC autoload sequence 1: Program rlc ram */ 4816 if (adev->gfx.imu.funcs->program_rlc_ram) 4817 adev->gfx.imu.funcs->program_rlc_ram(adev); 4818 /* rlc autoload firmware */ 4819 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4820 if (r) 4821 return r; 4822 } 4823 } else { 4824 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4825 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4826 if (adev->gfx.imu.funcs->load_microcode) 4827 adev->gfx.imu.funcs->load_microcode(adev); 4828 if (adev->gfx.imu.funcs->setup_imu) 4829 adev->gfx.imu.funcs->setup_imu(adev); 4830 if (adev->gfx.imu.funcs->start_imu) 4831 adev->gfx.imu.funcs->start_imu(adev); 4832 } 4833 4834 /* disable gpa mode in backdoor loading */ 4835 gfx_v11_0_disable_gpa_mode(adev); 4836 } 4837 } 4838 4839 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4840 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4841 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4842 if (r) { 4843 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4844 return r; 4845 } 4846 } 4847 4848 adev->gfx.is_poweron = true; 4849 4850 if(get_gb_addr_config(adev)) 4851 drm_warn(adev_to_drm(adev), "Invalid gb_addr_config !\n"); 4852 4853 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4854 adev->gfx.rs64_enable) 4855 gfx_v11_0_config_gfx_rs64(adev); 4856 4857 r = gfx_v11_0_gfxhub_enable(adev); 4858 if (r) 4859 return r; 4860 4861 if (!amdgpu_emu_mode) 4862 gfx_v11_0_init_golden_registers(adev); 4863 4864 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4865 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4866 /** 4867 * For gfx 11, rlc firmware loading relies on smu firmware is 4868 * loaded firstly, so in direct type, it has to load smc ucode 4869 * here before rlc. 4870 */ 4871 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4872 if (r) 4873 return r; 4874 } 4875 4876 gfx_v11_0_constants_init(adev); 4877 4878 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4879 gfx_v11_0_select_cp_fw_arch(adev); 4880 4881 if (adev->nbio.funcs->gc_doorbell_init) 4882 adev->nbio.funcs->gc_doorbell_init(adev); 4883 4884 r = gfx_v11_0_rlc_resume(adev); 4885 if (r) 4886 return r; 4887 4888 /* 4889 * init golden registers and rlc resume may override some registers, 4890 * reconfig them here 4891 */ 4892 gfx_v11_0_tcp_harvest(adev); 4893 4894 r = gfx_v11_0_cp_resume(adev); 4895 if (r) 4896 return r; 4897 4898 /* get IMU version from HW if it's not set */ 4899 if (!adev->gfx.imu_fw_version) 4900 adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); 4901 4902 return r; 4903 } 4904 4905 static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, 4906 bool enable) 4907 { 4908 unsigned int irq_type; 4909 int m, p, r; 4910 4911 if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { 4912 for (m = 0; m < adev->gfx.me.num_me; m++) { 4913 for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { 4914 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; 4915 if (enable) 4916 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4917 irq_type); 4918 else 4919 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4920 irq_type); 4921 if (r) 4922 return r; 4923 } 4924 } 4925 } 4926 4927 if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { 4928 for (m = 0; m < adev->gfx.mec.num_mec; ++m) { 4929 for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { 4930 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 4931 + (m * adev->gfx.mec.num_pipe_per_mec) 4932 + p; 4933 if (enable) 4934 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 4935 irq_type); 4936 else 4937 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 4938 irq_type); 4939 if (r) 4940 return r; 4941 } 4942 } 4943 } 4944 4945 return 0; 4946 } 4947 4948 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) 4949 { 4950 struct amdgpu_device *adev = ip_block->adev; 4951 4952 cancel_delayed_work_sync(&adev->gfx.idle_work); 4953 4954 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4955 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4956 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 4957 gfx_v11_0_set_userq_eop_interrupts(adev, false); 4958 4959 if (!adev->no_hw_access) { 4960 if (amdgpu_async_gfx_ring && 4961 !adev->gfx.disable_kq) { 4962 if (amdgpu_gfx_disable_kgq(adev, 0)) 4963 DRM_ERROR("KGQ disable failed\n"); 4964 } 4965 4966 if (amdgpu_gfx_disable_kcq(adev, 0)) 4967 DRM_ERROR("KCQ disable failed\n"); 4968 4969 amdgpu_mes_kiq_hw_fini(adev, 0); 4970 } 4971 4972 if (amdgpu_sriov_vf(adev)) 4973 /* Remove the steps disabling CPG and clearing KIQ position, 4974 * so that CP could perform IDLE-SAVE during switch. Those 4975 * steps are necessary to avoid a DMAR error in gfx9 but it is 4976 * not reproduced on gfx11. 4977 */ 4978 return 0; 4979 4980 gfx_v11_0_cp_enable(adev, false); 4981 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4982 4983 adev->gfxhub.funcs->gart_disable(adev); 4984 4985 adev->gfx.is_poweron = false; 4986 4987 return 0; 4988 } 4989 4990 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) 4991 { 4992 return gfx_v11_0_hw_fini(ip_block); 4993 } 4994 4995 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) 4996 { 4997 return gfx_v11_0_hw_init(ip_block); 4998 } 4999 5000 static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block) 5001 { 5002 struct amdgpu_device *adev = ip_block->adev; 5003 5004 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 5005 GRBM_STATUS, GUI_ACTIVE)) 5006 return false; 5007 else 5008 return true; 5009 } 5010 5011 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 5012 { 5013 unsigned i; 5014 u32 tmp; 5015 struct amdgpu_device *adev = ip_block->adev; 5016 5017 for (i = 0; i < adev->usec_timeout; i++) { 5018 /* read MC_STATUS */ 5019 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 5020 GRBM_STATUS__GUI_ACTIVE_MASK; 5021 5022 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 5023 return 0; 5024 udelay(1); 5025 } 5026 return -ETIMEDOUT; 5027 } 5028 5029 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, 5030 bool req) 5031 { 5032 u32 i, tmp, val; 5033 5034 for (i = 0; i < adev->usec_timeout; i++) { 5035 /* Request with MeId=2, PipeId=0 */ 5036 tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); 5037 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); 5038 WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); 5039 5040 val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); 5041 if (req) { 5042 if (val == tmp) 5043 break; 5044 } else { 5045 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, 5046 REQUEST, 1); 5047 5048 /* unlocked or locked by firmware */ 5049 if (val != tmp) 5050 break; 5051 } 5052 udelay(1); 5053 } 5054 5055 if (i >= adev->usec_timeout) 5056 return -EINVAL; 5057 5058 return 0; 5059 } 5060 5061 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) 5062 { 5063 u32 grbm_soft_reset = 0; 5064 u32 tmp; 5065 int r, i, j, k; 5066 struct amdgpu_device *adev = ip_block->adev; 5067 5068 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5069 5070 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5071 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 5072 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 5073 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 5074 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 5075 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5076 5077 mutex_lock(&adev->srbm_mutex); 5078 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 5079 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 5080 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 5081 soc21_grbm_select(adev, i, k, j, 0); 5082 5083 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 5084 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 5085 } 5086 } 5087 } 5088 for (i = 0; i < adev->gfx.me.num_me; ++i) { 5089 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 5090 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 5091 soc21_grbm_select(adev, i, k, j, 0); 5092 5093 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 5094 } 5095 } 5096 } 5097 soc21_grbm_select(adev, 0, 0, 0, 0); 5098 mutex_unlock(&adev->srbm_mutex); 5099 5100 /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ 5101 mutex_lock(&adev->gfx.reset_sem_mutex); 5102 r = gfx_v11_0_request_gfx_index_mutex(adev, true); 5103 if (r) { 5104 mutex_unlock(&adev->gfx.reset_sem_mutex); 5105 DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); 5106 return r; 5107 } 5108 5109 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 5110 5111 // Read CP_VMID_RESET register three times. 5112 // to get sufficient time for GFX_HQD_ACTIVE reach 0 5113 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5114 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5115 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5116 5117 /* release the gfx mutex */ 5118 r = gfx_v11_0_request_gfx_index_mutex(adev, false); 5119 mutex_unlock(&adev->gfx.reset_sem_mutex); 5120 if (r) { 5121 DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); 5122 return r; 5123 } 5124 5125 for (i = 0; i < adev->usec_timeout; i++) { 5126 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 5127 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 5128 break; 5129 udelay(1); 5130 } 5131 if (i >= adev->usec_timeout) { 5132 printk("Failed to wait all pipes clean\n"); 5133 return -EINVAL; 5134 } 5135 5136 /********** trigger soft reset ***********/ 5137 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5138 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5139 SOFT_RESET_CP, 1); 5140 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5141 SOFT_RESET_GFX, 1); 5142 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5143 SOFT_RESET_CPF, 1); 5144 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5145 SOFT_RESET_CPC, 1); 5146 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5147 SOFT_RESET_CPG, 1); 5148 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5149 /********** exit soft reset ***********/ 5150 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5151 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5152 SOFT_RESET_CP, 0); 5153 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5154 SOFT_RESET_GFX, 0); 5155 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5156 SOFT_RESET_CPF, 0); 5157 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5158 SOFT_RESET_CPC, 0); 5159 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5160 SOFT_RESET_CPG, 0); 5161 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5162 5163 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 5164 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 5165 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 5166 5167 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 5168 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 5169 5170 for (i = 0; i < adev->usec_timeout; i++) { 5171 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 5172 break; 5173 udelay(1); 5174 } 5175 if (i >= adev->usec_timeout) { 5176 printk("Failed to wait CP_VMID_RESET to 0\n"); 5177 return -EINVAL; 5178 } 5179 5180 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5181 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5182 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5183 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5184 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5185 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5186 5187 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5188 5189 return gfx_v11_0_cp_resume(adev); 5190 } 5191 5192 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) 5193 { 5194 int i, r; 5195 struct amdgpu_device *adev = ip_block->adev; 5196 struct amdgpu_ring *ring; 5197 long tmo = msecs_to_jiffies(1000); 5198 5199 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 5200 ring = &adev->gfx.gfx_ring[i]; 5201 r = amdgpu_ring_test_ib(ring, tmo); 5202 if (r) 5203 return true; 5204 } 5205 5206 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5207 ring = &adev->gfx.compute_ring[i]; 5208 r = amdgpu_ring_test_ib(ring, tmo); 5209 if (r) 5210 return true; 5211 } 5212 5213 return false; 5214 } 5215 5216 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) 5217 { 5218 struct amdgpu_device *adev = ip_block->adev; 5219 /** 5220 * GFX soft reset will impact MES, need resume MES when do GFX soft reset 5221 */ 5222 return amdgpu_mes_resume(adev); 5223 } 5224 5225 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5226 { 5227 uint64_t clock; 5228 uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; 5229 5230 if (amdgpu_sriov_vf(adev)) { 5231 amdgpu_gfx_off_ctrl(adev, false); 5232 mutex_lock(&adev->gfx.gpu_clock_mutex); 5233 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5234 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5235 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5236 if (clock_counter_hi_pre != clock_counter_hi_after) 5237 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5238 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5239 amdgpu_gfx_off_ctrl(adev, true); 5240 } else { 5241 preempt_disable(); 5242 if (amdgpu_ip_version(adev, SMUIO_HWIP, 0) < IP_VERSION(15, 0, 0)) { 5243 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, 5244 regGOLDEN_TSC_COUNT_UPPER); 5245 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, 5246 regGOLDEN_TSC_COUNT_LOWER); 5247 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, 5248 regGOLDEN_TSC_COUNT_UPPER); 5249 if (clock_counter_hi_pre != clock_counter_hi_after) 5250 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, 5251 regGOLDEN_TSC_COUNT_LOWER); 5252 } else { 5253 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, 5254 regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0); 5255 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, 5256 regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0); 5257 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, 5258 regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0); 5259 if (clock_counter_hi_pre != clock_counter_hi_after) 5260 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, 5261 regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0); 5262 } 5263 preempt_enable(); 5264 } 5265 clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); 5266 5267 return clock; 5268 } 5269 5270 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5271 uint32_t vmid, 5272 uint32_t gds_base, uint32_t gds_size, 5273 uint32_t gws_base, uint32_t gws_size, 5274 uint32_t oa_base, uint32_t oa_size) 5275 { 5276 struct amdgpu_device *adev = ring->adev; 5277 5278 /* GDS Base */ 5279 gfx_v11_0_write_data_to_reg(ring, 0, false, 5280 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 5281 gds_base); 5282 5283 /* GDS Size */ 5284 gfx_v11_0_write_data_to_reg(ring, 0, false, 5285 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 5286 gds_size); 5287 5288 /* GWS */ 5289 gfx_v11_0_write_data_to_reg(ring, 0, false, 5290 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 5291 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5292 5293 /* OA */ 5294 gfx_v11_0_write_data_to_reg(ring, 0, false, 5295 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 5296 (1 << (oa_size + oa_base)) - (1 << oa_base)); 5297 } 5298 5299 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) 5300 { 5301 struct amdgpu_device *adev = ip_block->adev; 5302 5303 switch (amdgpu_user_queue) { 5304 case -1: 5305 case 0: 5306 default: 5307 adev->gfx.disable_kq = false; 5308 adev->gfx.disable_uq = true; 5309 break; 5310 case 1: 5311 adev->gfx.disable_kq = false; 5312 adev->gfx.disable_uq = false; 5313 break; 5314 case 2: 5315 adev->gfx.disable_kq = true; 5316 adev->gfx.disable_uq = false; 5317 break; 5318 } 5319 5320 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 5321 5322 if (adev->gfx.disable_kq) { 5323 /* We need one GFX ring temporarily to set up 5324 * the clear state. 5325 */ 5326 adev->gfx.num_gfx_rings = 1; 5327 adev->gfx.num_compute_rings = 0; 5328 } else { 5329 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 5330 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 5331 AMDGPU_MAX_COMPUTE_RINGS); 5332 } 5333 5334 gfx_v11_0_set_kiq_pm4_funcs(adev); 5335 gfx_v11_0_set_ring_funcs(adev); 5336 gfx_v11_0_set_irq_funcs(adev); 5337 gfx_v11_0_set_gds_init(adev); 5338 gfx_v11_0_set_rlc_funcs(adev); 5339 gfx_v11_0_set_mqd_funcs(adev); 5340 gfx_v11_0_set_imu_funcs(adev); 5341 5342 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 5343 5344 return gfx_v11_0_init_microcode(adev); 5345 } 5346 5347 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block) 5348 { 5349 struct amdgpu_device *adev = ip_block->adev; 5350 int r; 5351 5352 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5353 if (r) 5354 return r; 5355 5356 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5357 if (r) 5358 return r; 5359 5360 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 5361 if (r) 5362 return r; 5363 5364 r = gfx_v11_0_set_userq_eop_interrupts(adev, true); 5365 if (r) 5366 return r; 5367 5368 return 0; 5369 } 5370 5371 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 5372 { 5373 uint32_t rlc_cntl; 5374 5375 /* if RLC is not enabled, do nothing */ 5376 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 5377 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 5378 } 5379 5380 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 5381 { 5382 uint32_t data; 5383 unsigned i; 5384 5385 data = RLC_SAFE_MODE__CMD_MASK; 5386 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5387 5388 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 5389 5390 /* wait for RLC_SAFE_MODE */ 5391 for (i = 0; i < adev->usec_timeout; i++) { 5392 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 5393 RLC_SAFE_MODE, CMD)) 5394 break; 5395 udelay(1); 5396 } 5397 } 5398 5399 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 5400 { 5401 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 5402 } 5403 5404 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 5405 bool enable) 5406 { 5407 uint32_t def, data; 5408 5409 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 5410 return; 5411 5412 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5413 5414 if (enable) 5415 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5416 else 5417 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5418 5419 if (def != data) 5420 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5421 } 5422 5423 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 5424 bool enable) 5425 { 5426 uint32_t def, data; 5427 5428 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 5429 return; 5430 5431 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5432 5433 if (enable) 5434 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5435 else 5436 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5437 5438 if (def != data) 5439 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5440 } 5441 5442 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 5443 bool enable) 5444 { 5445 uint32_t def, data; 5446 5447 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 5448 return; 5449 5450 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5451 5452 if (enable) 5453 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5454 else 5455 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5456 5457 if (def != data) 5458 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5459 } 5460 5461 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5462 bool enable) 5463 { 5464 uint32_t data, def; 5465 5466 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 5467 return; 5468 5469 /* It is disabled by HW by default */ 5470 if (enable) { 5471 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5472 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 5473 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5474 5475 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5476 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5477 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5478 5479 if (def != data) 5480 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5481 } 5482 } else { 5483 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5484 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5485 5486 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5487 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5488 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5489 5490 if (def != data) 5491 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5492 } 5493 } 5494 } 5495 5496 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5497 bool enable) 5498 { 5499 uint32_t def, data; 5500 5501 if (!(adev->cg_flags & 5502 (AMD_CG_SUPPORT_GFX_CGCG | 5503 AMD_CG_SUPPORT_GFX_CGLS | 5504 AMD_CG_SUPPORT_GFX_3D_CGCG | 5505 AMD_CG_SUPPORT_GFX_3D_CGLS))) 5506 return; 5507 5508 if (enable) { 5509 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5510 5511 /* unset CGCG override */ 5512 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5513 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5514 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5515 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5516 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 5517 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5518 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5519 5520 /* update CGCG override bits */ 5521 if (def != data) 5522 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5523 5524 /* enable cgcg FSM(0x0000363F) */ 5525 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5526 5527 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5528 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 5529 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5530 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5531 } 5532 5533 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5534 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 5535 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5536 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5537 } 5538 5539 if (def != data) 5540 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5541 5542 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5543 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5544 5545 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5546 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 5547 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5548 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5549 } 5550 5551 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5552 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 5553 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5554 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5555 } 5556 5557 if (def != data) 5558 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5559 5560 /* set IDLE_POLL_COUNT(0x00900100) */ 5561 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 5562 5563 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 5564 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5565 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5566 5567 if (def != data) 5568 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 5569 5570 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5571 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5572 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5573 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5574 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5575 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 5576 5577 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5578 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5579 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5580 5581 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5582 if (adev->sdma.num_instances > 1) { 5583 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5584 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5585 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5586 } 5587 } else { 5588 /* Program RLC_CGCG_CGLS_CTRL */ 5589 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5590 5591 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5592 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5593 5594 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5595 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5596 5597 if (def != data) 5598 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5599 5600 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5601 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5602 5603 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5604 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5605 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5606 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5607 5608 if (def != data) 5609 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5610 5611 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5612 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5613 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5614 5615 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5616 if (adev->sdma.num_instances > 1) { 5617 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5618 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5619 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5620 } 5621 } 5622 } 5623 5624 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5625 bool enable) 5626 { 5627 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5628 5629 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 5630 5631 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 5632 5633 gfx_v11_0_update_repeater_fgcg(adev, enable); 5634 5635 gfx_v11_0_update_sram_fgcg(adev, enable); 5636 5637 gfx_v11_0_update_perf_clk(adev, enable); 5638 5639 if (adev->cg_flags & 5640 (AMD_CG_SUPPORT_GFX_MGCG | 5641 AMD_CG_SUPPORT_GFX_CGLS | 5642 AMD_CG_SUPPORT_GFX_CGCG | 5643 AMD_CG_SUPPORT_GFX_3D_CGCG | 5644 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5645 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5646 5647 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5648 5649 return 0; 5650 } 5651 5652 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id, 5653 struct amdgpu_ring *ring, unsigned vmid) 5654 { 5655 u32 reg, pre_data, data; 5656 5657 amdgpu_gfx_off_ctrl(adev, false); 5658 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5659 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) 5660 pre_data = RREG32_NO_KIQ(reg); 5661 else 5662 pre_data = RREG32(reg); 5663 5664 data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); 5665 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5666 5667 if (pre_data != data) { 5668 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { 5669 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5670 } else 5671 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5672 } 5673 amdgpu_gfx_off_ctrl(adev, true); 5674 5675 if (ring 5676 && amdgpu_sriov_is_pp_one_vf(adev) 5677 && (pre_data != data) 5678 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) 5679 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { 5680 amdgpu_ring_emit_wreg(ring, reg, data); 5681 } 5682 } 5683 5684 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5685 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5686 .set_safe_mode = gfx_v11_0_set_safe_mode, 5687 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5688 .init = gfx_v11_0_rlc_init, 5689 .get_csb_size = gfx_v11_0_get_csb_size, 5690 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5691 .resume = gfx_v11_0_rlc_resume, 5692 .stop = gfx_v11_0_rlc_stop, 5693 .reset = gfx_v11_0_rlc_reset, 5694 .start = gfx_v11_0_rlc_start, 5695 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5696 }; 5697 5698 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5699 { 5700 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5701 5702 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5703 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5704 else 5705 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5706 5707 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5708 5709 // Program RLC_PG_DELAY3 for CGPG hysteresis 5710 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5711 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5712 case IP_VERSION(11, 0, 1): 5713 case IP_VERSION(11, 0, 4): 5714 case IP_VERSION(11, 5, 0): 5715 case IP_VERSION(11, 5, 1): 5716 case IP_VERSION(11, 5, 2): 5717 case IP_VERSION(11, 5, 3): 5718 case IP_VERSION(11, 5, 4): 5719 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5720 break; 5721 default: 5722 break; 5723 } 5724 } 5725 } 5726 5727 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5728 { 5729 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5730 5731 gfx_v11_cntl_power_gating(adev, enable); 5732 5733 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5734 } 5735 5736 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5737 enum amd_powergating_state state) 5738 { 5739 struct amdgpu_device *adev = ip_block->adev; 5740 bool enable = (state == AMD_PG_STATE_GATE); 5741 5742 if (amdgpu_sriov_vf(adev)) 5743 return 0; 5744 5745 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5746 case IP_VERSION(11, 0, 0): 5747 case IP_VERSION(11, 0, 2): 5748 case IP_VERSION(11, 0, 3): 5749 amdgpu_gfx_off_ctrl(adev, enable); 5750 break; 5751 case IP_VERSION(11, 0, 1): 5752 case IP_VERSION(11, 0, 4): 5753 case IP_VERSION(11, 5, 0): 5754 case IP_VERSION(11, 5, 1): 5755 case IP_VERSION(11, 5, 2): 5756 case IP_VERSION(11, 5, 3): 5757 case IP_VERSION(11, 5, 4): 5758 if (!enable) 5759 amdgpu_gfx_off_ctrl(adev, false); 5760 5761 gfx_v11_cntl_pg(adev, enable); 5762 5763 if (enable) 5764 amdgpu_gfx_off_ctrl(adev, true); 5765 5766 break; 5767 default: 5768 break; 5769 } 5770 5771 return 0; 5772 } 5773 5774 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5775 enum amd_clockgating_state state) 5776 { 5777 struct amdgpu_device *adev = ip_block->adev; 5778 5779 if (amdgpu_sriov_vf(adev)) 5780 return 0; 5781 5782 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5783 case IP_VERSION(11, 0, 0): 5784 case IP_VERSION(11, 0, 1): 5785 case IP_VERSION(11, 0, 2): 5786 case IP_VERSION(11, 0, 3): 5787 case IP_VERSION(11, 0, 4): 5788 case IP_VERSION(11, 5, 0): 5789 case IP_VERSION(11, 5, 1): 5790 case IP_VERSION(11, 5, 2): 5791 case IP_VERSION(11, 5, 3): 5792 case IP_VERSION(11, 5, 4): 5793 gfx_v11_0_update_gfx_clock_gating(adev, 5794 state == AMD_CG_STATE_GATE); 5795 break; 5796 default: 5797 break; 5798 } 5799 5800 return 0; 5801 } 5802 5803 static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5804 { 5805 struct amdgpu_device *adev = ip_block->adev; 5806 int data; 5807 5808 /* AMD_CG_SUPPORT_GFX_MGCG */ 5809 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5810 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5811 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5812 5813 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5814 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5815 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5816 5817 /* AMD_CG_SUPPORT_GFX_FGCG */ 5818 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5819 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5820 5821 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5822 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5823 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5824 5825 /* AMD_CG_SUPPORT_GFX_CGCG */ 5826 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5827 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5828 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5829 5830 /* AMD_CG_SUPPORT_GFX_CGLS */ 5831 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5832 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5833 5834 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5835 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5836 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5837 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5838 5839 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5840 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5841 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5842 } 5843 5844 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5845 { 5846 /* gfx11 is 32bit rptr*/ 5847 return *(uint32_t *)ring->rptr_cpu_addr; 5848 } 5849 5850 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5851 { 5852 struct amdgpu_device *adev = ring->adev; 5853 u64 wptr; 5854 5855 /* XXX check if swapping is necessary on BE */ 5856 if (ring->use_doorbell) { 5857 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5858 } else { 5859 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5860 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5861 } 5862 5863 return wptr; 5864 } 5865 5866 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5867 { 5868 struct amdgpu_device *adev = ring->adev; 5869 5870 if (ring->use_doorbell) { 5871 /* XXX check if swapping is necessary on BE */ 5872 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5873 ring->wptr); 5874 WDOORBELL64(ring->doorbell_index, ring->wptr); 5875 } else { 5876 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5877 lower_32_bits(ring->wptr)); 5878 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5879 upper_32_bits(ring->wptr)); 5880 } 5881 } 5882 5883 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5884 { 5885 /* gfx11 hardware is 32bit rptr */ 5886 return *(uint32_t *)ring->rptr_cpu_addr; 5887 } 5888 5889 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5890 { 5891 u64 wptr; 5892 5893 /* XXX check if swapping is necessary on BE */ 5894 if (ring->use_doorbell) 5895 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5896 else 5897 BUG(); 5898 return wptr; 5899 } 5900 5901 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5902 { 5903 struct amdgpu_device *adev = ring->adev; 5904 5905 /* XXX check if swapping is necessary on BE */ 5906 if (ring->use_doorbell) { 5907 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5908 ring->wptr); 5909 WDOORBELL64(ring->doorbell_index, ring->wptr); 5910 } else { 5911 BUG(); /* only DOORBELL method supported on gfx11 now */ 5912 } 5913 } 5914 5915 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5916 { 5917 struct amdgpu_device *adev = ring->adev; 5918 u32 ref_and_mask, reg_mem_engine; 5919 5920 if (!adev->gfx.funcs->get_hdp_flush_mask) { 5921 dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__); 5922 return; 5923 } 5924 5925 adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, ®_mem_engine); 5926 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5927 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5928 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5929 ref_and_mask, ref_and_mask, 0x20); 5930 } 5931 5932 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5933 struct amdgpu_job *job, 5934 struct amdgpu_ib *ib, 5935 uint32_t flags) 5936 { 5937 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5938 u32 header, control = 0; 5939 5940 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5941 5942 control |= ib->length_dw | (vmid << 24); 5943 5944 if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5945 control |= INDIRECT_BUFFER_PRE_ENB(1); 5946 5947 if (flags & AMDGPU_IB_PREEMPTED) 5948 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5949 5950 if (vmid && !ring->adev->gfx.rs64_enable) 5951 gfx_v11_0_ring_emit_de_meta(ring, 5952 !amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED)); 5953 } 5954 5955 amdgpu_ring_write(ring, header); 5956 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5957 amdgpu_ring_write(ring, 5958 #ifdef __BIG_ENDIAN 5959 (2 << 0) | 5960 #endif 5961 lower_32_bits(ib->gpu_addr)); 5962 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5963 amdgpu_ring_write(ring, control); 5964 } 5965 5966 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5967 struct amdgpu_job *job, 5968 struct amdgpu_ib *ib, 5969 uint32_t flags) 5970 { 5971 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5972 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 5973 5974 /* Currently, there is a high possibility to get wave ID mismatch 5975 * between ME and GDS, leading to a hw deadlock, because ME generates 5976 * different wave IDs than the GDS expects. This situation happens 5977 * randomly when at least 5 compute pipes use GDS ordered append. 5978 * The wave IDs generated by ME are also wrong after suspend/resume. 5979 * Those are probably bugs somewhere else in the kernel driver. 5980 * 5981 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 5982 * GDS to 0 for this ring (me/pipe). 5983 */ 5984 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 5985 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 5986 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 5987 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 5988 } 5989 5990 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 5991 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 5992 amdgpu_ring_write(ring, 5993 #ifdef __BIG_ENDIAN 5994 (2 << 0) | 5995 #endif 5996 lower_32_bits(ib->gpu_addr)); 5997 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 5998 amdgpu_ring_write(ring, control); 5999 } 6000 6001 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 6002 u64 seq, unsigned flags) 6003 { 6004 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6005 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6006 6007 /* RELEASE_MEM - flush caches, send int */ 6008 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 6009 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 6010 PACKET3_RELEASE_MEM_GCR_GL2_WB | 6011 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ 6012 PACKET3_RELEASE_MEM_GCR_GLM_WB | 6013 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 6014 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6015 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 6016 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 6017 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 6018 6019 /* 6020 * the address should be Qword aligned if 64bit write, Dword 6021 * aligned if only send 32bit data low (discard data high) 6022 */ 6023 if (write64bit) 6024 BUG_ON(addr & 0x7); 6025 else 6026 BUG_ON(addr & 0x3); 6027 amdgpu_ring_write(ring, lower_32_bits(addr)); 6028 amdgpu_ring_write(ring, upper_32_bits(addr)); 6029 amdgpu_ring_write(ring, lower_32_bits(seq)); 6030 amdgpu_ring_write(ring, upper_32_bits(seq)); 6031 amdgpu_ring_write(ring, 0); 6032 } 6033 6034 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6035 { 6036 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6037 uint32_t seq = ring->fence_drv.sync_seq; 6038 uint64_t addr = ring->fence_drv.gpu_addr; 6039 6040 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 6041 upper_32_bits(addr), seq, 0xffffffff, 4); 6042 } 6043 6044 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 6045 uint16_t pasid, uint32_t flush_type, 6046 bool all_hub, uint8_t dst_sel) 6047 { 6048 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 6049 amdgpu_ring_write(ring, 6050 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 6051 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 6052 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 6053 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 6054 } 6055 6056 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6057 unsigned vmid, uint64_t pd_addr) 6058 { 6059 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6060 6061 /* compute doesn't have PFP */ 6062 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 6063 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6064 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6065 amdgpu_ring_write(ring, 0x0); 6066 } 6067 6068 /* Make sure that we can't skip the SET_Q_MODE packets when the VM 6069 * changed in any way. 6070 */ 6071 ring->set_q_mode_offs = 0; 6072 ring->set_q_mode_ptr = NULL; 6073 } 6074 6075 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6076 u64 seq, unsigned int flags) 6077 { 6078 struct amdgpu_device *adev = ring->adev; 6079 6080 /* we only allocate 32bit for each seq wb address */ 6081 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6082 6083 /* write fence seq to the "addr" */ 6084 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6085 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6086 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6087 amdgpu_ring_write(ring, lower_32_bits(addr)); 6088 amdgpu_ring_write(ring, upper_32_bits(addr)); 6089 amdgpu_ring_write(ring, lower_32_bits(seq)); 6090 6091 if (flags & AMDGPU_FENCE_FLAG_INT) { 6092 /* set register to trigger INT */ 6093 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6094 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6095 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6096 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 6097 amdgpu_ring_write(ring, 0); 6098 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6099 } 6100 } 6101 6102 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 6103 uint32_t flags) 6104 { 6105 uint32_t dw2 = 0; 6106 6107 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6108 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6109 /* set load_global_config & load_global_uconfig */ 6110 dw2 |= 0x8001; 6111 /* set load_cs_sh_regs */ 6112 dw2 |= 0x01000000; 6113 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6114 dw2 |= 0x10002; 6115 } 6116 6117 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6118 amdgpu_ring_write(ring, dw2); 6119 amdgpu_ring_write(ring, 0); 6120 } 6121 6122 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 6123 uint64_t addr) 6124 { 6125 unsigned ret; 6126 6127 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6128 amdgpu_ring_write(ring, lower_32_bits(addr)); 6129 amdgpu_ring_write(ring, upper_32_bits(addr)); 6130 /* discard following DWs if *cond_exec_gpu_addr==0 */ 6131 amdgpu_ring_write(ring, 0); 6132 ret = ring->wptr & ring->buf_mask; 6133 /* patch dummy value later */ 6134 amdgpu_ring_write(ring, 0); 6135 6136 return ret; 6137 } 6138 6139 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, 6140 u64 shadow_va, u64 csa_va, 6141 u64 gds_va, bool init_shadow, 6142 int vmid) 6143 { 6144 struct amdgpu_device *adev = ring->adev; 6145 unsigned int offs, end; 6146 6147 if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) 6148 return; 6149 6150 /* 6151 * The logic here isn't easy to understand because we need to keep state 6152 * accross multiple executions of the function as well as between the 6153 * CPU and GPU. The general idea is that the newly written GPU command 6154 * has a condition on the previous one and only executed if really 6155 * necessary. 6156 */ 6157 6158 /* 6159 * The dw in the NOP controls if the next SET_Q_MODE packet should be 6160 * executed or not. Reserve 64bits just to be on the save side. 6161 */ 6162 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); 6163 offs = ring->wptr & ring->buf_mask; 6164 6165 /* 6166 * We start with skipping the prefix SET_Q_MODE and always executing 6167 * the postfix SET_Q_MODE packet. This is changed below with a 6168 * WRITE_DATA command when the postfix executed. 6169 */ 6170 amdgpu_ring_write(ring, shadow_va ? 1 : 0); 6171 amdgpu_ring_write(ring, 0); 6172 6173 if (ring->set_q_mode_offs) { 6174 uint64_t addr; 6175 6176 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6177 addr += ring->set_q_mode_offs << 2; 6178 end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); 6179 } 6180 6181 /* 6182 * When the postfix SET_Q_MODE packet executes we need to make sure that the 6183 * next prefix SET_Q_MODE packet executes as well. 6184 */ 6185 if (!shadow_va) { 6186 uint64_t addr; 6187 6188 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6189 addr += offs << 2; 6190 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6191 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 6192 amdgpu_ring_write(ring, lower_32_bits(addr)); 6193 amdgpu_ring_write(ring, upper_32_bits(addr)); 6194 amdgpu_ring_write(ring, 0x1); 6195 } 6196 6197 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); 6198 amdgpu_ring_write(ring, lower_32_bits(shadow_va)); 6199 amdgpu_ring_write(ring, upper_32_bits(shadow_va)); 6200 amdgpu_ring_write(ring, lower_32_bits(gds_va)); 6201 amdgpu_ring_write(ring, upper_32_bits(gds_va)); 6202 amdgpu_ring_write(ring, lower_32_bits(csa_va)); 6203 amdgpu_ring_write(ring, upper_32_bits(csa_va)); 6204 amdgpu_ring_write(ring, shadow_va ? 6205 PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); 6206 amdgpu_ring_write(ring, init_shadow ? 6207 PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); 6208 6209 if (ring->set_q_mode_offs) 6210 amdgpu_ring_patch_cond_exec(ring, end); 6211 6212 if (shadow_va) { 6213 uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; 6214 6215 /* 6216 * If the tokens match try to skip the last postfix SET_Q_MODE 6217 * packet to avoid saving/restoring the state all the time. 6218 */ 6219 if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) 6220 *ring->set_q_mode_ptr = 0; 6221 6222 ring->set_q_mode_token = token; 6223 } else { 6224 ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; 6225 } 6226 6227 ring->set_q_mode_offs = offs; 6228 } 6229 6230 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) 6231 { 6232 int i, r = 0; 6233 struct amdgpu_device *adev = ring->adev; 6234 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; 6235 struct amdgpu_ring *kiq_ring = &kiq->ring; 6236 unsigned long flags; 6237 6238 if (adev->enable_mes) 6239 return -EINVAL; 6240 6241 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) 6242 return -EINVAL; 6243 6244 spin_lock_irqsave(&kiq->ring_lock, flags); 6245 6246 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { 6247 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6248 return -ENOMEM; 6249 } 6250 6251 /* assert preemption condition */ 6252 amdgpu_ring_set_preempt_cond_exec(ring, false); 6253 6254 /* assert IB preemption, emit the trailing fence */ 6255 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, 6256 ring->trail_fence_gpu_addr, 6257 ++ring->trail_seq); 6258 amdgpu_ring_commit(kiq_ring); 6259 6260 spin_unlock_irqrestore(&kiq->ring_lock, flags); 6261 6262 /* poll the trailing fence */ 6263 for (i = 0; i < adev->usec_timeout; i++) { 6264 if (ring->trail_seq == 6265 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 6266 break; 6267 udelay(1); 6268 } 6269 6270 if (i >= adev->usec_timeout) { 6271 r = -EINVAL; 6272 DRM_ERROR("ring %d failed to preempt ib\n", ring->idx); 6273 } 6274 6275 /* deassert preemption condition */ 6276 amdgpu_ring_set_preempt_cond_exec(ring, true); 6277 return r; 6278 } 6279 6280 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 6281 { 6282 struct amdgpu_device *adev = ring->adev; 6283 struct v10_de_ib_state de_payload = {0}; 6284 uint64_t offset, gds_addr, de_payload_gpu_addr; 6285 void *de_payload_cpu_addr; 6286 int cnt; 6287 6288 offset = offsetof(struct v10_gfx_meta_data, de_payload); 6289 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 6290 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 6291 6292 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 6293 AMDGPU_CSA_SIZE - adev->gds.gds_size, 6294 PAGE_SIZE); 6295 6296 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 6297 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 6298 6299 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 6300 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 6301 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 6302 WRITE_DATA_DST_SEL(8) | 6303 WR_CONFIRM) | 6304 WRITE_DATA_CACHE_POLICY(0)); 6305 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 6306 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 6307 6308 if (resume) 6309 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 6310 sizeof(de_payload) >> 2); 6311 else 6312 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 6313 sizeof(de_payload) >> 2); 6314 } 6315 6316 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 6317 bool secure) 6318 { 6319 uint32_t v = secure ? FRAME_TMZ : 0; 6320 6321 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 6322 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 6323 } 6324 6325 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 6326 uint32_t reg_val_offs) 6327 { 6328 struct amdgpu_device *adev = ring->adev; 6329 6330 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6331 amdgpu_ring_write(ring, 0 | /* src: register*/ 6332 (5 << 8) | /* dst: memory */ 6333 (1 << 20)); /* write confirm */ 6334 amdgpu_ring_write(ring, reg); 6335 amdgpu_ring_write(ring, 0); 6336 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6337 reg_val_offs * 4)); 6338 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6339 reg_val_offs * 4)); 6340 } 6341 6342 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6343 uint32_t val) 6344 { 6345 uint32_t cmd = 0; 6346 6347 switch (ring->funcs->type) { 6348 case AMDGPU_RING_TYPE_GFX: 6349 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6350 break; 6351 case AMDGPU_RING_TYPE_KIQ: 6352 cmd = (1 << 16); /* no inc addr */ 6353 break; 6354 default: 6355 cmd = WR_CONFIRM; 6356 break; 6357 } 6358 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6359 amdgpu_ring_write(ring, cmd); 6360 amdgpu_ring_write(ring, reg); 6361 amdgpu_ring_write(ring, 0); 6362 amdgpu_ring_write(ring, val); 6363 } 6364 6365 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 6366 uint32_t val, uint32_t mask) 6367 { 6368 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 6369 } 6370 6371 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 6372 uint32_t reg0, uint32_t reg1, 6373 uint32_t ref, uint32_t mask) 6374 { 6375 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6376 6377 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 6378 ref, mask, 0x20); 6379 } 6380 6381 static void 6382 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6383 uint32_t me, uint32_t pipe, 6384 enum amdgpu_interrupt_state state) 6385 { 6386 uint32_t cp_int_cntl, cp_int_cntl_reg; 6387 6388 if (!me) { 6389 switch (pipe) { 6390 case 0: 6391 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 6392 break; 6393 case 1: 6394 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 6395 break; 6396 default: 6397 DRM_DEBUG("invalid pipe %d\n", pipe); 6398 return; 6399 } 6400 } else { 6401 DRM_DEBUG("invalid me %d\n", me); 6402 return; 6403 } 6404 6405 switch (state) { 6406 case AMDGPU_IRQ_STATE_DISABLE: 6407 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6408 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6409 TIME_STAMP_INT_ENABLE, 0); 6410 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6411 GENERIC0_INT_ENABLE, 0); 6412 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6413 break; 6414 case AMDGPU_IRQ_STATE_ENABLE: 6415 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6416 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6417 TIME_STAMP_INT_ENABLE, 1); 6418 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6419 GENERIC0_INT_ENABLE, 1); 6420 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6421 break; 6422 default: 6423 break; 6424 } 6425 } 6426 6427 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6428 int me, int pipe, 6429 enum amdgpu_interrupt_state state) 6430 { 6431 u32 mec_int_cntl, mec_int_cntl_reg; 6432 6433 /* 6434 * amdgpu controls only the first MEC. That's why this function only 6435 * handles the setting of interrupts for this specific MEC. All other 6436 * pipes' interrupts are set by amdkfd. 6437 */ 6438 6439 if (me == 1) { 6440 switch (pipe) { 6441 case 0: 6442 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6443 break; 6444 case 1: 6445 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 6446 break; 6447 case 2: 6448 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 6449 break; 6450 case 3: 6451 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 6452 break; 6453 default: 6454 DRM_DEBUG("invalid pipe %d\n", pipe); 6455 return; 6456 } 6457 } else { 6458 DRM_DEBUG("invalid me %d\n", me); 6459 return; 6460 } 6461 6462 switch (state) { 6463 case AMDGPU_IRQ_STATE_DISABLE: 6464 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6465 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6466 TIME_STAMP_INT_ENABLE, 0); 6467 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6468 GENERIC0_INT_ENABLE, 0); 6469 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6470 break; 6471 case AMDGPU_IRQ_STATE_ENABLE: 6472 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6473 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6474 TIME_STAMP_INT_ENABLE, 1); 6475 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6476 GENERIC0_INT_ENABLE, 1); 6477 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6478 break; 6479 default: 6480 break; 6481 } 6482 } 6483 6484 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6485 struct amdgpu_irq_src *src, 6486 unsigned type, 6487 enum amdgpu_interrupt_state state) 6488 { 6489 switch (type) { 6490 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6491 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 6492 break; 6493 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 6494 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 6495 break; 6496 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6497 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6498 break; 6499 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6500 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6501 break; 6502 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6503 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6504 break; 6505 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6506 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6507 break; 6508 default: 6509 break; 6510 } 6511 return 0; 6512 } 6513 6514 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 6515 struct amdgpu_irq_src *source, 6516 struct amdgpu_iv_entry *entry) 6517 { 6518 u32 doorbell_offset = entry->src_data[0]; 6519 u8 me_id, pipe_id, queue_id; 6520 struct amdgpu_ring *ring; 6521 int i; 6522 6523 DRM_DEBUG("IH: CP EOP\n"); 6524 6525 if (adev->enable_mes && doorbell_offset) { 6526 amdgpu_userq_process_fence_irq(adev, doorbell_offset); 6527 } else { 6528 me_id = (entry->ring_id & 0x0c) >> 2; 6529 pipe_id = (entry->ring_id & 0x03) >> 0; 6530 queue_id = (entry->ring_id & 0x70) >> 4; 6531 6532 switch (me_id) { 6533 case 0: 6534 if (pipe_id == 0) 6535 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6536 else 6537 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 6538 break; 6539 case 1: 6540 case 2: 6541 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6542 ring = &adev->gfx.compute_ring[i]; 6543 /* Per-queue interrupt is supported for MEC starting from VI. 6544 * The interrupt can only be enabled/disabled per pipe instead 6545 * of per queue. 6546 */ 6547 if ((ring->me == me_id) && 6548 (ring->pipe == pipe_id) && 6549 (ring->queue == queue_id)) 6550 amdgpu_fence_process(ring); 6551 } 6552 break; 6553 } 6554 } 6555 6556 return 0; 6557 } 6558 6559 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6560 struct amdgpu_irq_src *source, 6561 unsigned int type, 6562 enum amdgpu_interrupt_state state) 6563 { 6564 u32 cp_int_cntl_reg, cp_int_cntl; 6565 int i, j; 6566 6567 switch (state) { 6568 case AMDGPU_IRQ_STATE_DISABLE: 6569 case AMDGPU_IRQ_STATE_ENABLE: 6570 for (i = 0; i < adev->gfx.me.num_me; i++) { 6571 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6572 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6573 6574 if (cp_int_cntl_reg) { 6575 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6576 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6577 PRIV_REG_INT_ENABLE, 6578 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6579 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6580 } 6581 } 6582 } 6583 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6584 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6585 /* MECs start at 1 */ 6586 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6587 6588 if (cp_int_cntl_reg) { 6589 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6590 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6591 PRIV_REG_INT_ENABLE, 6592 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6593 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6594 } 6595 } 6596 } 6597 break; 6598 default: 6599 break; 6600 } 6601 6602 return 0; 6603 } 6604 6605 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6606 struct amdgpu_irq_src *source, 6607 unsigned type, 6608 enum amdgpu_interrupt_state state) 6609 { 6610 u32 cp_int_cntl_reg, cp_int_cntl; 6611 int i, j; 6612 6613 switch (state) { 6614 case AMDGPU_IRQ_STATE_DISABLE: 6615 case AMDGPU_IRQ_STATE_ENABLE: 6616 for (i = 0; i < adev->gfx.me.num_me; i++) { 6617 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6618 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6619 6620 if (cp_int_cntl_reg) { 6621 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6622 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6623 OPCODE_ERROR_INT_ENABLE, 6624 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6625 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6626 } 6627 } 6628 } 6629 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6630 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6631 /* MECs start at 1 */ 6632 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6633 6634 if (cp_int_cntl_reg) { 6635 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6636 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6637 OPCODE_ERROR_INT_ENABLE, 6638 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6639 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6640 } 6641 } 6642 } 6643 break; 6644 default: 6645 break; 6646 } 6647 return 0; 6648 } 6649 6650 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6651 struct amdgpu_irq_src *source, 6652 unsigned int type, 6653 enum amdgpu_interrupt_state state) 6654 { 6655 u32 cp_int_cntl_reg, cp_int_cntl; 6656 int i, j; 6657 6658 switch (state) { 6659 case AMDGPU_IRQ_STATE_DISABLE: 6660 case AMDGPU_IRQ_STATE_ENABLE: 6661 for (i = 0; i < adev->gfx.me.num_me; i++) { 6662 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6663 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6664 6665 if (cp_int_cntl_reg) { 6666 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6667 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6668 PRIV_INSTR_INT_ENABLE, 6669 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6670 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6671 } 6672 } 6673 } 6674 break; 6675 default: 6676 break; 6677 } 6678 6679 return 0; 6680 } 6681 6682 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 6683 struct amdgpu_iv_entry *entry) 6684 { 6685 u8 me_id, pipe_id, queue_id; 6686 struct amdgpu_ring *ring; 6687 int i; 6688 6689 me_id = (entry->ring_id & 0x0c) >> 2; 6690 pipe_id = (entry->ring_id & 0x03) >> 0; 6691 queue_id = (entry->ring_id & 0x70) >> 4; 6692 6693 if (!adev->gfx.disable_kq) { 6694 switch (me_id) { 6695 case 0: 6696 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 6697 ring = &adev->gfx.gfx_ring[i]; 6698 if (ring->me == me_id && ring->pipe == pipe_id && 6699 ring->queue == queue_id) 6700 drm_sched_fault(&ring->sched); 6701 } 6702 break; 6703 case 1: 6704 case 2: 6705 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6706 ring = &adev->gfx.compute_ring[i]; 6707 if (ring->me == me_id && ring->pipe == pipe_id && 6708 ring->queue == queue_id) 6709 drm_sched_fault(&ring->sched); 6710 } 6711 break; 6712 default: 6713 BUG(); 6714 break; 6715 } 6716 } 6717 } 6718 6719 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 6720 struct amdgpu_irq_src *source, 6721 struct amdgpu_iv_entry *entry) 6722 { 6723 DRM_ERROR("Illegal register access in command stream\n"); 6724 gfx_v11_0_handle_priv_fault(adev, entry); 6725 return 0; 6726 } 6727 6728 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, 6729 struct amdgpu_irq_src *source, 6730 struct amdgpu_iv_entry *entry) 6731 { 6732 DRM_ERROR("Illegal opcode in command stream\n"); 6733 gfx_v11_0_handle_priv_fault(adev, entry); 6734 return 0; 6735 } 6736 6737 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6738 struct amdgpu_irq_src *source, 6739 struct amdgpu_iv_entry *entry) 6740 { 6741 DRM_ERROR("Illegal instruction in command stream\n"); 6742 gfx_v11_0_handle_priv_fault(adev, entry); 6743 return 0; 6744 } 6745 6746 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, 6747 struct amdgpu_irq_src *source, 6748 struct amdgpu_iv_entry *entry) 6749 { 6750 if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) 6751 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); 6752 6753 return 0; 6754 } 6755 6756 #if 0 6757 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6758 struct amdgpu_irq_src *src, 6759 unsigned int type, 6760 enum amdgpu_interrupt_state state) 6761 { 6762 uint32_t tmp, target; 6763 struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); 6764 6765 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6766 target += ring->pipe; 6767 6768 switch (type) { 6769 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6770 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6771 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6772 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6773 GENERIC2_INT_ENABLE, 0); 6774 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6775 6776 tmp = RREG32_SOC15_IP(GC, target); 6777 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6778 GENERIC2_INT_ENABLE, 0); 6779 WREG32_SOC15_IP(GC, target, tmp); 6780 } else { 6781 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6782 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6783 GENERIC2_INT_ENABLE, 1); 6784 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6785 6786 tmp = RREG32_SOC15_IP(GC, target); 6787 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6788 GENERIC2_INT_ENABLE, 1); 6789 WREG32_SOC15_IP(GC, target, tmp); 6790 } 6791 break; 6792 default: 6793 BUG(); /* kiq only support GENERIC2_INT now */ 6794 break; 6795 } 6796 return 0; 6797 } 6798 #endif 6799 6800 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6801 { 6802 const unsigned int gcr_cntl = 6803 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6804 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6805 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6806 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6807 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6808 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6809 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6810 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6811 6812 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6813 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6814 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6815 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6816 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6817 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6818 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6819 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6820 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6821 } 6822 6823 static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) 6824 { 6825 /* Disable the pipe reset until the CPFW fully support it.*/ 6826 dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); 6827 return false; 6828 } 6829 6830 6831 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) 6832 { 6833 struct amdgpu_device *adev = ring->adev; 6834 uint32_t reset_pipe = 0, clean_pipe = 0; 6835 int r; 6836 6837 if (!gfx_v11_pipe_reset_support(adev)) 6838 return -EOPNOTSUPP; 6839 6840 gfx_v11_0_set_safe_mode(adev, 0); 6841 mutex_lock(&adev->srbm_mutex); 6842 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6843 6844 switch (ring->pipe) { 6845 case 0: 6846 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6847 PFP_PIPE0_RESET, 1); 6848 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6849 ME_PIPE0_RESET, 1); 6850 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6851 PFP_PIPE0_RESET, 0); 6852 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6853 ME_PIPE0_RESET, 0); 6854 break; 6855 case 1: 6856 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6857 PFP_PIPE1_RESET, 1); 6858 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6859 ME_PIPE1_RESET, 1); 6860 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6861 PFP_PIPE1_RESET, 0); 6862 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6863 ME_PIPE1_RESET, 0); 6864 break; 6865 default: 6866 break; 6867 } 6868 6869 WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); 6870 WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); 6871 6872 r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - 6873 RS64_FW_UC_START_ADDR_LO; 6874 soc21_grbm_select(adev, 0, 0, 0, 0); 6875 mutex_unlock(&adev->srbm_mutex); 6876 gfx_v11_0_unset_safe_mode(adev, 0); 6877 6878 dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, 6879 r == 0 ? "successfully" : "failed"); 6880 /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, 6881 * so the pipe reset status relies on the later gfx ring test result. 6882 */ 6883 return 0; 6884 } 6885 6886 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, 6887 unsigned int vmid, 6888 struct amdgpu_fence *timedout_fence) 6889 { 6890 struct amdgpu_device *adev = ring->adev; 6891 bool use_mmio = false; 6892 int r; 6893 6894 amdgpu_ring_reset_helper_begin(ring, timedout_fence); 6895 6896 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio, 0); 6897 if (r) { 6898 6899 dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); 6900 r = gfx_v11_reset_gfx_pipe(ring); 6901 if (r) 6902 return r; 6903 } 6904 6905 if (use_mmio) { 6906 r = gfx_v11_0_kgq_init_queue(ring, true); 6907 if (r) { 6908 dev_err(adev->dev, "failed to init kgq\n"); 6909 return r; 6910 } 6911 6912 r = amdgpu_mes_map_legacy_queue(adev, ring, 0); 6913 if (r) { 6914 dev_err(adev->dev, "failed to remap kgq\n"); 6915 return r; 6916 } 6917 } 6918 6919 return amdgpu_ring_reset_helper_end(ring, timedout_fence); 6920 } 6921 6922 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) 6923 { 6924 6925 struct amdgpu_device *adev = ring->adev; 6926 uint32_t reset_pipe = 0, clean_pipe = 0; 6927 int r; 6928 6929 if (!gfx_v11_pipe_reset_support(adev)) 6930 return -EOPNOTSUPP; 6931 6932 gfx_v11_0_set_safe_mode(adev, 0); 6933 mutex_lock(&adev->srbm_mutex); 6934 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6935 6936 reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 6937 clean_pipe = reset_pipe; 6938 6939 if (adev->gfx.rs64_enable) { 6940 6941 switch (ring->pipe) { 6942 case 0: 6943 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6944 MEC_PIPE0_RESET, 1); 6945 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6946 MEC_PIPE0_RESET, 0); 6947 break; 6948 case 1: 6949 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6950 MEC_PIPE1_RESET, 1); 6951 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6952 MEC_PIPE1_RESET, 0); 6953 break; 6954 case 2: 6955 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6956 MEC_PIPE2_RESET, 1); 6957 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6958 MEC_PIPE2_RESET, 0); 6959 break; 6960 case 3: 6961 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6962 MEC_PIPE3_RESET, 1); 6963 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6964 MEC_PIPE3_RESET, 0); 6965 break; 6966 default: 6967 break; 6968 } 6969 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); 6970 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); 6971 r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - 6972 RS64_FW_UC_START_ADDR_LO; 6973 } else { 6974 if (ring->me == 1) { 6975 switch (ring->pipe) { 6976 case 0: 6977 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6978 MEC_ME1_PIPE0_RESET, 1); 6979 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6980 MEC_ME1_PIPE0_RESET, 0); 6981 break; 6982 case 1: 6983 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6984 MEC_ME1_PIPE1_RESET, 1); 6985 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6986 MEC_ME1_PIPE1_RESET, 0); 6987 break; 6988 case 2: 6989 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6990 MEC_ME1_PIPE2_RESET, 1); 6991 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6992 MEC_ME1_PIPE2_RESET, 0); 6993 break; 6994 case 3: 6995 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6996 MEC_ME1_PIPE3_RESET, 1); 6997 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6998 MEC_ME1_PIPE3_RESET, 0); 6999 break; 7000 default: 7001 break; 7002 } 7003 /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ 7004 } else { 7005 switch (ring->pipe) { 7006 case 0: 7007 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 7008 MEC_ME2_PIPE0_RESET, 1); 7009 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 7010 MEC_ME2_PIPE0_RESET, 0); 7011 break; 7012 case 1: 7013 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 7014 MEC_ME2_PIPE1_RESET, 1); 7015 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 7016 MEC_ME2_PIPE1_RESET, 0); 7017 break; 7018 case 2: 7019 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 7020 MEC_ME2_PIPE2_RESET, 1); 7021 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 7022 MEC_ME2_PIPE2_RESET, 0); 7023 break; 7024 case 3: 7025 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 7026 MEC_ME2_PIPE3_RESET, 1); 7027 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 7028 MEC_ME2_PIPE3_RESET, 0); 7029 break; 7030 default: 7031 break; 7032 } 7033 /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ 7034 } 7035 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); 7036 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); 7037 r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); 7038 } 7039 7040 soc21_grbm_select(adev, 0, 0, 0, 0); 7041 mutex_unlock(&adev->srbm_mutex); 7042 gfx_v11_0_unset_safe_mode(adev, 0); 7043 7044 dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, 7045 r == 0 ? "successfully" : "failed"); 7046 /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe 7047 * reset status relies on the compute ring test result. 7048 */ 7049 return 0; 7050 } 7051 7052 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, 7053 unsigned int vmid, 7054 struct amdgpu_fence *timedout_fence) 7055 { 7056 struct amdgpu_device *adev = ring->adev; 7057 int r = 0; 7058 7059 amdgpu_ring_reset_helper_begin(ring, timedout_fence); 7060 7061 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0); 7062 if (r) { 7063 dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); 7064 r = gfx_v11_0_reset_compute_pipe(ring); 7065 if (r) 7066 return r; 7067 } 7068 7069 r = gfx_v11_0_kcq_init_queue(ring, true); 7070 if (r) { 7071 dev_err(adev->dev, "fail to init kcq\n"); 7072 return r; 7073 } 7074 r = amdgpu_mes_map_legacy_queue(adev, ring, 0); 7075 if (r) { 7076 dev_err(adev->dev, "failed to remap kcq\n"); 7077 return r; 7078 } 7079 7080 return amdgpu_ring_reset_helper_end(ring, timedout_fence); 7081 } 7082 7083 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 7084 { 7085 struct amdgpu_device *adev = ip_block->adev; 7086 uint32_t i, j, k, reg, index = 0; 7087 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7088 7089 if (!adev->gfx.ip_dump_core) 7090 return; 7091 7092 for (i = 0; i < reg_count; i++) 7093 drm_printf(p, "%-50s \t 0x%08x\n", 7094 gc_reg_list_11_0[i].reg_name, 7095 adev->gfx.ip_dump_core[i]); 7096 7097 /* print compute queue registers for all instances */ 7098 if (!adev->gfx.ip_dump_compute_queues) 7099 return; 7100 7101 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7102 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7103 adev->gfx.mec.num_mec, 7104 adev->gfx.mec.num_pipe_per_mec, 7105 adev->gfx.mec.num_queue_per_pipe); 7106 7107 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7108 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7109 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7110 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7111 for (reg = 0; reg < reg_count; reg++) { 7112 if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) 7113 drm_printf(p, "%-50s \t 0x%08x\n", 7114 "regCP_MEC_ME2_HEADER_DUMP", 7115 adev->gfx.ip_dump_compute_queues[index + reg]); 7116 else 7117 drm_printf(p, "%-50s \t 0x%08x\n", 7118 gc_cp_reg_list_11[reg].reg_name, 7119 adev->gfx.ip_dump_compute_queues[index + reg]); 7120 } 7121 index += reg_count; 7122 } 7123 } 7124 } 7125 7126 /* print gfx queue registers for all instances */ 7127 if (!adev->gfx.ip_dump_gfx_queues) 7128 return; 7129 7130 index = 0; 7131 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7132 drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", 7133 adev->gfx.me.num_me, 7134 adev->gfx.me.num_pipe_per_me, 7135 adev->gfx.me.num_queue_per_pipe); 7136 7137 for (i = 0; i < adev->gfx.me.num_me; i++) { 7138 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7139 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7140 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); 7141 for (reg = 0; reg < reg_count; reg++) { 7142 drm_printf(p, "%-50s \t 0x%08x\n", 7143 gc_gfx_queue_reg_list_11[reg].reg_name, 7144 adev->gfx.ip_dump_gfx_queues[index + reg]); 7145 } 7146 index += reg_count; 7147 } 7148 } 7149 } 7150 } 7151 7152 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) 7153 { 7154 struct amdgpu_device *adev = ip_block->adev; 7155 uint32_t i, j, k, reg, index = 0; 7156 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7157 7158 if (!adev->gfx.ip_dump_core) 7159 return; 7160 7161 amdgpu_gfx_off_ctrl(adev, false); 7162 for (i = 0; i < reg_count; i++) 7163 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); 7164 amdgpu_gfx_off_ctrl(adev, true); 7165 7166 /* dump compute queue registers for all instances */ 7167 if (!adev->gfx.ip_dump_compute_queues) 7168 return; 7169 7170 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7171 amdgpu_gfx_off_ctrl(adev, false); 7172 mutex_lock(&adev->srbm_mutex); 7173 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7174 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7175 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7176 /* ME0 is for GFX so start from 1 for CP */ 7177 soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); 7178 for (reg = 0; reg < reg_count; reg++) { 7179 if (i && 7180 gc_cp_reg_list_11[reg].reg_offset == 7181 regCP_MEC_ME1_HEADER_DUMP) 7182 adev->gfx.ip_dump_compute_queues[index + reg] = 7183 RREG32(SOC15_REG_OFFSET(GC, 0, 7184 regCP_MEC_ME2_HEADER_DUMP)); 7185 else 7186 adev->gfx.ip_dump_compute_queues[index + reg] = 7187 RREG32(SOC15_REG_ENTRY_OFFSET( 7188 gc_cp_reg_list_11[reg])); 7189 } 7190 index += reg_count; 7191 } 7192 } 7193 } 7194 soc21_grbm_select(adev, 0, 0, 0, 0); 7195 mutex_unlock(&adev->srbm_mutex); 7196 amdgpu_gfx_off_ctrl(adev, true); 7197 7198 /* dump gfx queue registers for all instances */ 7199 if (!adev->gfx.ip_dump_gfx_queues) 7200 return; 7201 7202 index = 0; 7203 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7204 amdgpu_gfx_off_ctrl(adev, false); 7205 mutex_lock(&adev->srbm_mutex); 7206 for (i = 0; i < adev->gfx.me.num_me; i++) { 7207 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7208 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7209 soc21_grbm_select(adev, i, j, k, 0); 7210 7211 for (reg = 0; reg < reg_count; reg++) { 7212 adev->gfx.ip_dump_gfx_queues[index + reg] = 7213 RREG32(SOC15_REG_ENTRY_OFFSET( 7214 gc_gfx_queue_reg_list_11[reg])); 7215 } 7216 index += reg_count; 7217 } 7218 } 7219 } 7220 soc21_grbm_select(adev, 0, 0, 0, 0); 7221 mutex_unlock(&adev->srbm_mutex); 7222 amdgpu_gfx_off_ctrl(adev, true); 7223 } 7224 7225 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 7226 { 7227 /* Emit the cleaner shader */ 7228 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7229 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7230 } 7231 7232 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring) 7233 { 7234 amdgpu_gfx_profile_ring_begin_use(ring); 7235 7236 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 7237 } 7238 7239 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring) 7240 { 7241 amdgpu_gfx_profile_ring_end_use(ring); 7242 7243 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 7244 } 7245 7246 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 7247 .name = "gfx_v11_0", 7248 .early_init = gfx_v11_0_early_init, 7249 .late_init = gfx_v11_0_late_init, 7250 .sw_init = gfx_v11_0_sw_init, 7251 .sw_fini = gfx_v11_0_sw_fini, 7252 .hw_init = gfx_v11_0_hw_init, 7253 .hw_fini = gfx_v11_0_hw_fini, 7254 .suspend = gfx_v11_0_suspend, 7255 .resume = gfx_v11_0_resume, 7256 .is_idle = gfx_v11_0_is_idle, 7257 .wait_for_idle = gfx_v11_0_wait_for_idle, 7258 .soft_reset = gfx_v11_0_soft_reset, 7259 .check_soft_reset = gfx_v11_0_check_soft_reset, 7260 .post_soft_reset = gfx_v11_0_post_soft_reset, 7261 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 7262 .set_powergating_state = gfx_v11_0_set_powergating_state, 7263 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 7264 .dump_ip_state = gfx_v11_ip_dump, 7265 .print_ip_state = gfx_v11_ip_print, 7266 }; 7267 7268 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7269 .type = AMDGPU_RING_TYPE_GFX, 7270 .align_mask = 0xff, 7271 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7272 .support_64bit_ptrs = true, 7273 .secure_submission_supported = true, 7274 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 7275 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 7276 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 7277 .emit_frame_size = /* totally 247 maximum if 16 IBs */ 7278 5 + /* update_spm_vmid */ 7279 5 + /* COND_EXEC */ 7280 22 + /* SET_Q_PREEMPTION_MODE */ 7281 7 + /* PIPELINE_SYNC */ 7282 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7283 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7284 4 + /* VM_FLUSH */ 7285 8 + /* FENCE for VM_FLUSH */ 7286 20 + /* GDS switch */ 7287 5 + /* COND_EXEC */ 7288 7 + /* HDP_flush */ 7289 4 + /* VGT_flush */ 7290 31 + /* DE_META */ 7291 3 + /* CNTX_CTRL */ 7292 5 + /* HDP_INVL */ 7293 22 + /* SET_Q_PREEMPTION_MODE */ 7294 8 + 8 + /* FENCE x2 */ 7295 8 + /* gfx_v11_0_emit_mem_sync */ 7296 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7297 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 7298 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 7299 .emit_fence = gfx_v11_0_ring_emit_fence, 7300 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7301 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7302 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7303 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7304 .test_ring = gfx_v11_0_ring_test_ring, 7305 .test_ib = gfx_v11_0_ring_test_ib, 7306 .insert_nop = gfx_v11_ring_insert_nop, 7307 .pad_ib = amdgpu_ring_generic_pad_ib, 7308 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 7309 .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, 7310 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 7311 .preempt_ib = gfx_v11_0_ring_preempt_ib, 7312 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 7313 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7314 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7315 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7316 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7317 .reset = gfx_v11_0_reset_kgq, 7318 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7319 .begin_use = gfx_v11_0_ring_begin_use, 7320 .end_use = gfx_v11_0_ring_end_use, 7321 }; 7322 7323 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 7324 .type = AMDGPU_RING_TYPE_COMPUTE, 7325 .align_mask = 0xff, 7326 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7327 .support_64bit_ptrs = true, 7328 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7329 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7330 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7331 .emit_frame_size = 7332 5 + /* update_spm_vmid */ 7333 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7334 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7335 5 + /* hdp invalidate */ 7336 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7337 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7338 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7339 2 + /* gfx_v11_0_ring_emit_vm_flush */ 7340 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 7341 8 + /* gfx_v11_0_emit_mem_sync */ 7342 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7343 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7344 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7345 .emit_fence = gfx_v11_0_ring_emit_fence, 7346 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7347 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7348 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7349 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7350 .test_ring = gfx_v11_0_ring_test_ring, 7351 .test_ib = gfx_v11_0_ring_test_ib, 7352 .insert_nop = gfx_v11_ring_insert_nop, 7353 .pad_ib = amdgpu_ring_generic_pad_ib, 7354 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7355 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7356 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7357 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7358 .reset = gfx_v11_0_reset_kcq, 7359 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7360 .begin_use = gfx_v11_0_ring_begin_use, 7361 .end_use = gfx_v11_0_ring_end_use, 7362 }; 7363 7364 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7365 .type = AMDGPU_RING_TYPE_KIQ, 7366 .align_mask = 0xff, 7367 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7368 .support_64bit_ptrs = true, 7369 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7370 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7371 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7372 .emit_frame_size = 7373 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7374 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7375 5 + /*hdp invalidate */ 7376 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7377 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7378 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7379 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7380 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7381 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7382 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 7383 .test_ring = gfx_v11_0_ring_test_ring, 7384 .test_ib = gfx_v11_0_ring_test_ib, 7385 .insert_nop = amdgpu_ring_insert_nop, 7386 .pad_ib = amdgpu_ring_generic_pad_ib, 7387 .emit_rreg = gfx_v11_0_ring_emit_rreg, 7388 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7389 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7390 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7391 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7392 }; 7393 7394 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 7395 { 7396 int i; 7397 7398 adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; 7399 7400 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7401 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 7402 7403 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7404 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 7405 } 7406 7407 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 7408 .set = gfx_v11_0_set_eop_interrupt_state, 7409 .process = gfx_v11_0_eop_irq, 7410 }; 7411 7412 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 7413 .set = gfx_v11_0_set_priv_reg_fault_state, 7414 .process = gfx_v11_0_priv_reg_irq, 7415 }; 7416 7417 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { 7418 .set = gfx_v11_0_set_bad_op_fault_state, 7419 .process = gfx_v11_0_bad_op_irq, 7420 }; 7421 7422 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 7423 .set = gfx_v11_0_set_priv_inst_fault_state, 7424 .process = gfx_v11_0_priv_inst_irq, 7425 }; 7426 7427 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { 7428 .process = gfx_v11_0_rlc_gc_fed_irq, 7429 }; 7430 7431 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 7432 { 7433 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7434 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 7435 7436 adev->gfx.priv_reg_irq.num_types = 1; 7437 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 7438 7439 adev->gfx.bad_op_irq.num_types = 1; 7440 adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; 7441 7442 adev->gfx.priv_inst_irq.num_types = 1; 7443 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 7444 7445 adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ 7446 adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; 7447 7448 } 7449 7450 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 7451 { 7452 if (adev->flags & AMD_IS_APU) 7453 adev->gfx.imu.mode = MISSION_MODE; 7454 else 7455 adev->gfx.imu.mode = DEBUG_MODE; 7456 7457 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 7458 } 7459 7460 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 7461 { 7462 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 7463 } 7464 7465 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 7466 { 7467 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 7468 adev->gfx.config.max_sh_per_se * 7469 adev->gfx.config.max_shader_engines; 7470 7471 adev->gds.gds_size = 0x1000; 7472 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 7473 adev->gds.gws_size = 64; 7474 adev->gds.oa_size = 16; 7475 } 7476 7477 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 7478 { 7479 /* set gfx eng mqd */ 7480 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 7481 sizeof(struct v11_gfx_mqd); 7482 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 7483 gfx_v11_0_gfx_mqd_init; 7484 /* set compute eng mqd */ 7485 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 7486 sizeof(struct v11_compute_mqd); 7487 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 7488 gfx_v11_0_compute_mqd_init; 7489 } 7490 7491 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 7492 u32 bitmap) 7493 { 7494 u32 data; 7495 7496 if (!bitmap) 7497 return; 7498 7499 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7500 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7501 7502 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 7503 } 7504 7505 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 7506 { 7507 u32 data, wgp_bitmask; 7508 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 7509 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 7510 7511 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7512 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7513 7514 wgp_bitmask = 7515 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 7516 7517 return (~data) & wgp_bitmask; 7518 } 7519 7520 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 7521 { 7522 u32 wgp_idx, wgp_active_bitmap; 7523 u32 cu_bitmap_per_wgp, cu_active_bitmap; 7524 7525 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 7526 cu_active_bitmap = 0; 7527 7528 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 7529 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 7530 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 7531 if (wgp_active_bitmap & (1 << wgp_idx)) 7532 cu_active_bitmap |= cu_bitmap_per_wgp; 7533 } 7534 7535 return cu_active_bitmap; 7536 } 7537 7538 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 7539 struct amdgpu_cu_info *cu_info) 7540 { 7541 int i, j, k, counter, active_cu_number = 0; 7542 u32 mask, bitmap; 7543 unsigned disable_masks[8 * 2]; 7544 7545 if (!adev || !cu_info) 7546 return -EINVAL; 7547 7548 amdgpu_gfx_parse_disable_cu(adev, disable_masks, 8, 2); 7549 7550 mutex_lock(&adev->grbm_idx_mutex); 7551 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7552 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7553 bitmap = i * adev->gfx.config.max_sh_per_se + j; 7554 if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) 7555 continue; 7556 mask = 1; 7557 counter = 0; 7558 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); 7559 if (i < 8 && j < 2) 7560 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 7561 adev, disable_masks[i * 2 + j]); 7562 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 7563 7564 /** 7565 * GFX11 could support more than 4 SEs, while the bitmap 7566 * in cu_info struct is 4x4 and ioctl interface struct 7567 * drm_amdgpu_info_device should keep stable. 7568 * So we use last two columns of bitmap to store cu mask for 7569 * SEs 4 to 7, the layout of the bitmap is as below: 7570 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 7571 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 7572 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 7573 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 7574 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 7575 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 7576 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 7577 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 7578 */ 7579 cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; 7580 7581 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 7582 if (bitmap & mask) 7583 counter++; 7584 7585 mask <<= 1; 7586 } 7587 active_cu_number += counter; 7588 } 7589 } 7590 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7591 mutex_unlock(&adev->grbm_idx_mutex); 7592 7593 cu_info->number = active_cu_number; 7594 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7595 7596 return 0; 7597 } 7598 7599 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 7600 { 7601 .type = AMD_IP_BLOCK_TYPE_GFX, 7602 .major = 11, 7603 .minor = 0, 7604 .rev = 0, 7605 .funcs = &gfx_v11_0_ip_funcs, 7606 }; 7607