1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "imu_v11_0.h" 33 #include "soc21.h" 34 #include "nvd.h" 35 36 #include "gc/gc_11_0_0_offset.h" 37 #include "gc/gc_11_0_0_sh_mask.h" 38 #include "smuio/smuio_13_0_6_offset.h" 39 #include "smuio/smuio_13_0_6_sh_mask.h" 40 #include "navi10_enum.h" 41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" 42 43 #include "soc15.h" 44 #include "clearstate_gfx11.h" 45 #include "v11_structs.h" 46 #include "gfx_v11_0.h" 47 #include "gfx_v11_0_cleaner_shader.h" 48 #include "gfx_v11_0_3.h" 49 #include "nbio_v4_3.h" 50 #include "mes_v11_0.h" 51 #include "mes_userqueue.h" 52 #include "amdgpu_userq_fence.h" 53 54 #define GFX11_NUM_GFX_RINGS 1 55 #define GFX11_MEC_HPD_SIZE 2048 56 57 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 58 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 59 60 #define regCGTT_WD_CLK_CTRL 0x5086 61 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e 63 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 64 #define regPC_CONFIG_CNTL_1 0x194d 65 #define regPC_CONFIG_CNTL_1_BASE_IDX 1 66 67 #define regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0 0x0030 68 #define regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0_BASE_IDX 1 69 #define regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0 0x0031 70 #define regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0_BASE_IDX 1 71 72 #define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 73 #define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 74 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 75 #define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 76 #define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 77 #define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 78 #define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 79 80 #define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 81 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 82 #define regCP_MQD_CONTROL_DEFAULT 0x00000100 83 #define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 84 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 85 #define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 86 #define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 87 #define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 88 89 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); 90 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); 91 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); 92 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); 93 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); 94 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); 95 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); 96 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); 98 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin"); 100 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin"); 101 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin"); 102 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin"); 103 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin"); 104 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); 105 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); 106 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); 107 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); 108 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); 109 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); 110 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); 112 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); 113 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); 114 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); 115 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); 116 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); 118 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); 120 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); 121 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); 122 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); 123 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); 124 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); 125 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); 126 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); 127 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); 128 MODULE_FIRMWARE("amdgpu/gc_11_5_4_pfp.bin"); 129 MODULE_FIRMWARE("amdgpu/gc_11_5_4_me.bin"); 130 MODULE_FIRMWARE("amdgpu/gc_11_5_4_mec.bin"); 131 MODULE_FIRMWARE("amdgpu/gc_11_5_4_rlc.bin"); 132 MODULE_FIRMWARE("amdgpu/gc_11_5_6_pfp.bin"); 133 MODULE_FIRMWARE("amdgpu/gc_11_5_6_me.bin"); 134 MODULE_FIRMWARE("amdgpu/gc_11_5_6_mec.bin"); 135 MODULE_FIRMWARE("amdgpu/gc_11_5_6_rlc.bin"); 136 137 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { 138 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), 139 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), 140 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), 141 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), 142 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), 143 SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), 144 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), 145 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), 146 SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), 147 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), 148 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), 149 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), 150 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), 151 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), 152 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), 153 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), 154 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), 155 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 156 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), 157 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), 158 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), 159 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), 160 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE), 161 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR), 162 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR), 163 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 164 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), 165 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 166 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 167 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 168 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), 169 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), 170 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), 171 SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), 172 SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), 173 SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), 174 SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT), 175 SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT), 176 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), 177 SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), 178 SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), 179 SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS), 180 SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), 181 SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), 182 SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), 183 SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), 184 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), 185 SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS), 186 SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), 187 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), 188 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), 189 SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR), 190 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), 191 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), 192 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), 193 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), 194 SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), 195 /* cp header registers */ 196 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 197 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 198 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 199 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 200 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 201 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 202 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 203 SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), 204 /* SE status registers */ 205 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), 206 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), 207 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), 208 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3), 209 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4), 210 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5) 211 }; 212 213 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = { 214 /* compute registers */ 215 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), 216 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), 217 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), 218 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), 219 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), 220 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), 221 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), 222 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), 223 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), 224 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), 225 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), 226 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), 227 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), 228 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), 229 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), 230 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), 231 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), 232 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), 233 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), 234 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), 235 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), 236 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), 237 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), 238 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), 239 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), 240 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), 241 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), 242 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), 243 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), 244 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), 245 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), 246 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), 247 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), 248 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), 249 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), 250 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), 251 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), 252 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), 253 SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS), 254 /* cp header registers */ 255 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 256 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 257 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 258 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 259 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 260 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 261 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 262 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 263 }; 264 265 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = { 266 /* gfx queue registers */ 267 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), 268 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), 269 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), 270 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), 271 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), 272 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), 273 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), 274 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), 275 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), 276 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), 277 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), 278 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), 279 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), 280 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), 281 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), 282 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), 283 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), 284 SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), 285 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), 286 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), 287 SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), 288 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), 289 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), 290 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), 291 SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), 292 /* cp header registers */ 293 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 294 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 295 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 296 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 297 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 298 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 299 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 300 SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), 301 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 302 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 303 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 304 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 305 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 306 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 307 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 308 SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), 309 }; 310 311 static const struct soc15_reg_golden golden_settings_gc_11_0[] = { 312 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) 313 }; 314 315 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = 316 { 317 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010), 318 SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010), 319 SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), 320 SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988), 321 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007), 322 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008), 323 SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100), 324 SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), 325 SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) 326 }; 327 328 #define DEFAULT_SH_MEM_CONFIG \ 329 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 330 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 331 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 332 333 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev); 334 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev); 335 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev); 336 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev); 337 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev); 338 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev); 339 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev); 340 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 341 struct amdgpu_cu_info *cu_info); 342 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev); 343 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 344 u32 sh_num, u32 instance, int xcc_id); 345 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); 346 347 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); 348 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); 349 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 350 uint32_t val); 351 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 352 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 353 uint16_t pasid, uint32_t flush_type, 354 bool all_hub, uint8_t dst_sel); 355 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 356 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 357 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 358 bool enable); 359 360 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) 361 { 362 struct amdgpu_device *adev = kiq_ring->adev; 363 u64 shader_mc_addr; 364 365 /* Cleaner shader MC address */ 366 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8; 367 368 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 369 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 370 PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ 371 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 372 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 373 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 374 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */ 375 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */ 376 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 377 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 378 } 379 380 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring, 381 struct amdgpu_ring *ring) 382 { 383 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 384 uint64_t wptr_addr = ring->wptr_gpu_addr; 385 uint32_t me = 0, eng_sel = 0; 386 387 switch (ring->funcs->type) { 388 case AMDGPU_RING_TYPE_COMPUTE: 389 me = 1; 390 eng_sel = 0; 391 break; 392 case AMDGPU_RING_TYPE_GFX: 393 me = 0; 394 eng_sel = 4; 395 break; 396 case AMDGPU_RING_TYPE_MES: 397 me = 2; 398 eng_sel = 5; 399 break; 400 default: 401 WARN_ON(1); 402 } 403 404 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 405 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 406 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 407 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 408 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 409 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 410 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 411 PACKET3_MAP_QUEUES_ME((me)) | 412 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 413 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 414 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 415 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 416 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 417 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 418 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 419 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 420 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 421 } 422 423 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 424 struct amdgpu_ring *ring, 425 enum amdgpu_unmap_queues_action action, 426 u64 gpu_addr, u64 seq) 427 { 428 struct amdgpu_device *adev = kiq_ring->adev; 429 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 430 431 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { 432 amdgpu_mes_unmap_legacy_queue(adev, ring, action, 433 gpu_addr, seq, 0); 434 return; 435 } 436 437 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 438 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 439 PACKET3_UNMAP_QUEUES_ACTION(action) | 440 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 441 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 442 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 443 amdgpu_ring_write(kiq_ring, 444 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 445 446 if (action == PREEMPT_QUEUES_NO_UNMAP) { 447 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 448 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 449 amdgpu_ring_write(kiq_ring, seq); 450 } else { 451 amdgpu_ring_write(kiq_ring, 0); 452 amdgpu_ring_write(kiq_ring, 0); 453 amdgpu_ring_write(kiq_ring, 0); 454 } 455 } 456 457 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring, 458 struct amdgpu_ring *ring, 459 u64 addr, 460 u64 seq) 461 { 462 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 463 464 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 465 amdgpu_ring_write(kiq_ring, 466 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 467 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 468 PACKET3_QUERY_STATUS_COMMAND(2)); 469 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 470 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 471 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 472 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 473 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 474 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 475 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 476 } 477 478 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 479 uint16_t pasid, uint32_t flush_type, 480 bool all_hub) 481 { 482 gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 483 } 484 485 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = { 486 .kiq_set_resources = gfx11_kiq_set_resources, 487 .kiq_map_queues = gfx11_kiq_map_queues, 488 .kiq_unmap_queues = gfx11_kiq_unmap_queues, 489 .kiq_query_status = gfx11_kiq_query_status, 490 .kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs, 491 .set_resources_size = 8, 492 .map_queues_size = 7, 493 .unmap_queues_size = 6, 494 .query_status_size = 7, 495 .invalidate_tlbs_size = 2, 496 }; 497 498 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) 499 { 500 adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs; 501 } 502 503 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) 504 { 505 if (amdgpu_sriov_vf(adev)) 506 return; 507 508 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 509 case IP_VERSION(11, 0, 1): 510 case IP_VERSION(11, 0, 4): 511 soc15_program_register_sequence(adev, 512 golden_settings_gc_11_0_1, 513 (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); 514 break; 515 default: 516 break; 517 } 518 soc15_program_register_sequence(adev, 519 golden_settings_gc_11_0, 520 (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); 521 522 } 523 524 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 525 bool wc, uint32_t reg, uint32_t val) 526 { 527 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 528 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 529 WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0)); 530 amdgpu_ring_write(ring, reg); 531 amdgpu_ring_write(ring, 0); 532 amdgpu_ring_write(ring, val); 533 } 534 535 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 536 int mem_space, int opt, uint32_t addr0, 537 uint32_t addr1, uint32_t ref, uint32_t mask, 538 uint32_t inv) 539 { 540 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 541 amdgpu_ring_write(ring, 542 /* memory (1) or register (0) */ 543 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 544 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 545 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 546 WAIT_REG_MEM_ENGINE(eng_sel))); 547 548 if (mem_space) 549 BUG_ON(addr0 & 0x3); /* Dword align */ 550 amdgpu_ring_write(ring, addr0); 551 amdgpu_ring_write(ring, addr1); 552 amdgpu_ring_write(ring, ref); 553 amdgpu_ring_write(ring, mask); 554 amdgpu_ring_write(ring, inv); /* poll interval */ 555 } 556 557 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) 558 { 559 /* Header itself is a NOP packet */ 560 if (num_nop == 1) { 561 amdgpu_ring_write(ring, ring->funcs->nop); 562 return; 563 } 564 565 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/ 566 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe))); 567 568 /* Header is at index 0, followed by num_nops - 1 NOP packet's */ 569 amdgpu_ring_insert_nop(ring, num_nop - 1); 570 } 571 572 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring) 573 { 574 struct amdgpu_device *adev = ring->adev; 575 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 576 uint32_t tmp = 0; 577 unsigned i; 578 int r; 579 580 WREG32(scratch, 0xCAFEDEAD); 581 r = amdgpu_ring_alloc(ring, 5); 582 if (r) { 583 drm_err(adev_to_drm(adev), "cp failed to lock ring %d (%d).\n", 584 ring->idx, r); 585 return r; 586 } 587 588 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 589 gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF); 590 } else { 591 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 592 amdgpu_ring_write(ring, scratch - 593 PACKET3_SET_UCONFIG_REG_START); 594 amdgpu_ring_write(ring, 0xDEADBEEF); 595 } 596 amdgpu_ring_commit(ring); 597 598 for (i = 0; i < adev->usec_timeout; i++) { 599 tmp = RREG32(scratch); 600 if (tmp == 0xDEADBEEF) 601 break; 602 if (amdgpu_emu_mode == 1) 603 msleep(1); 604 else 605 udelay(1); 606 } 607 608 if (i >= adev->usec_timeout) 609 r = -ETIMEDOUT; 610 return r; 611 } 612 613 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 614 { 615 struct amdgpu_device *adev = ring->adev; 616 struct amdgpu_ib ib; 617 struct dma_fence *f = NULL; 618 unsigned index; 619 uint64_t gpu_addr; 620 uint32_t *cpu_ptr; 621 long r; 622 623 /* MES KIQ fw hasn't indirect buffer support for now */ 624 if (adev->enable_mes_kiq && 625 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 626 return 0; 627 628 memset(&ib, 0, sizeof(ib)); 629 630 r = amdgpu_device_wb_get(adev, &index); 631 if (r) 632 return r; 633 634 gpu_addr = adev->wb.gpu_addr + (index * 4); 635 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 636 cpu_ptr = &adev->wb.wb[index]; 637 638 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 639 if (r) { 640 drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r); 641 goto err1; 642 } 643 644 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 645 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 646 ib.ptr[2] = lower_32_bits(gpu_addr); 647 ib.ptr[3] = upper_32_bits(gpu_addr); 648 ib.ptr[4] = 0xDEADBEEF; 649 ib.length_dw = 5; 650 651 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 652 if (r) 653 goto err2; 654 655 r = dma_fence_wait_timeout(f, false, timeout); 656 if (r == 0) { 657 r = -ETIMEDOUT; 658 goto err2; 659 } else if (r < 0) { 660 goto err2; 661 } 662 663 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 664 r = 0; 665 else 666 r = -EINVAL; 667 err2: 668 amdgpu_ib_free(&ib, NULL); 669 dma_fence_put(f); 670 err1: 671 amdgpu_device_wb_free(adev, index); 672 return r; 673 } 674 675 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev) 676 { 677 amdgpu_ucode_release(&adev->gfx.pfp_fw); 678 amdgpu_ucode_release(&adev->gfx.me_fw); 679 amdgpu_ucode_release(&adev->gfx.rlc_fw); 680 amdgpu_ucode_release(&adev->gfx.mec_fw); 681 682 kfree(adev->gfx.rlc.register_list_format); 683 } 684 685 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) 686 { 687 const struct psp_firmware_header_v1_0 *toc_hdr; 688 int err = 0; 689 690 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, 691 AMDGPU_UCODE_REQUIRED, 692 "amdgpu/%s_toc.bin", ucode_prefix); 693 if (err) 694 goto out; 695 696 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 697 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 698 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 699 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 700 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 701 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 702 return 0; 703 out: 704 amdgpu_ucode_release(&adev->psp.toc_fw); 705 return err; 706 } 707 708 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) 709 { 710 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 711 case IP_VERSION(11, 0, 0): 712 case IP_VERSION(11, 0, 2): 713 case IP_VERSION(11, 0, 3): 714 if ((adev->gfx.me_fw_version >= 1505) && 715 (adev->gfx.pfp_fw_version >= 1600) && 716 (adev->gfx.mec_fw_version >= 512)) { 717 if (amdgpu_sriov_vf(adev)) 718 adev->gfx.cp_gfx_shadow = true; 719 else 720 adev->gfx.cp_gfx_shadow = false; 721 } 722 break; 723 default: 724 adev->gfx.cp_gfx_shadow = false; 725 break; 726 } 727 } 728 729 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) 730 { 731 char ucode_prefix[25]; 732 int err; 733 const struct rlc_firmware_header_v2_0 *rlc_hdr; 734 uint16_t version_major; 735 uint16_t version_minor; 736 737 DRM_DEBUG("\n"); 738 739 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 740 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 741 AMDGPU_UCODE_REQUIRED, 742 "amdgpu/%s_pfp.bin", ucode_prefix); 743 if (err) 744 goto out; 745 /* check pfp fw hdr version to decide if enable rs64 for gfx11.*/ 746 adev->gfx.rs64_enable = amdgpu_ucode_hdr_version( 747 (union amdgpu_firmware_header *) 748 adev->gfx.pfp_fw->data, 2, 0); 749 if (adev->gfx.rs64_enable) { 750 dev_info(adev->dev, "CP RS64 enable\n"); 751 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP); 752 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); 753 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK); 754 } else { 755 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); 756 } 757 758 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 759 AMDGPU_UCODE_REQUIRED, 760 "amdgpu/%s_me.bin", ucode_prefix); 761 if (err) 762 goto out; 763 if (adev->gfx.rs64_enable) { 764 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME); 765 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); 766 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK); 767 } else { 768 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); 769 } 770 771 if (!amdgpu_sriov_vf(adev)) { 772 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && 773 adev->pdev->revision == 0xCE) 774 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 775 AMDGPU_UCODE_REQUIRED, 776 "amdgpu/gc_11_0_0_rlc_1.bin"); 777 else if (amdgpu_is_kicker_fw(adev)) 778 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 779 AMDGPU_UCODE_REQUIRED, 780 "amdgpu/%s_rlc_kicker.bin", ucode_prefix); 781 else 782 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 783 AMDGPU_UCODE_REQUIRED, 784 "amdgpu/%s_rlc.bin", ucode_prefix); 785 if (err) 786 goto out; 787 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 788 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 789 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 790 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 791 if (err) 792 goto out; 793 } 794 795 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 796 AMDGPU_UCODE_REQUIRED, 797 "amdgpu/%s_mec.bin", ucode_prefix); 798 if (err) 799 goto out; 800 if (adev->gfx.rs64_enable) { 801 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 802 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 803 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 804 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 805 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 806 } else { 807 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); 808 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); 809 } 810 811 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 812 err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix); 813 814 /* only one MEC for gfx 11.0.0. */ 815 adev->gfx.mec2_fw = NULL; 816 817 gfx_v11_0_check_fw_cp_gfx_shadow(adev); 818 819 if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { 820 err = adev->gfx.imu.funcs->init_microcode(adev); 821 if (err) 822 DRM_ERROR("Failed to init imu firmware!\n"); 823 return err; 824 } 825 826 out: 827 if (err) { 828 amdgpu_ucode_release(&adev->gfx.pfp_fw); 829 amdgpu_ucode_release(&adev->gfx.me_fw); 830 amdgpu_ucode_release(&adev->gfx.rlc_fw); 831 amdgpu_ucode_release(&adev->gfx.mec_fw); 832 } 833 834 return err; 835 } 836 837 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev) 838 { 839 u32 count = 0; 840 const struct cs_section_def *sect = NULL; 841 const struct cs_extent_def *ext = NULL; 842 843 /* begin clear state */ 844 count += 2; 845 /* context control state */ 846 count += 3; 847 848 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 849 for (ext = sect->section; ext->extent != NULL; ++ext) { 850 if (sect->id == SECT_CONTEXT) 851 count += 2 + ext->reg_count; 852 else 853 return 0; 854 } 855 } 856 857 /* set PA_SC_TILE_STEERING_OVERRIDE */ 858 count += 3; 859 /* end clear state */ 860 count += 2; 861 /* clear state */ 862 count += 2; 863 864 return count; 865 } 866 867 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) 868 { 869 u32 count = 0; 870 int ctx_reg_offset; 871 872 if (adev->gfx.rlc.cs_data == NULL) 873 return; 874 if (buffer == NULL) 875 return; 876 877 count = amdgpu_gfx_csb_preamble_start(buffer); 878 count = amdgpu_gfx_csb_data_parser(adev, buffer, count); 879 880 ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 881 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 882 buffer[count++] = cpu_to_le32(ctx_reg_offset); 883 buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override); 884 885 amdgpu_gfx_csb_preamble_end(buffer, count); 886 } 887 888 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev) 889 { 890 /* clear state block */ 891 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 892 &adev->gfx.rlc.clear_state_gpu_addr, 893 (void **)&adev->gfx.rlc.cs_ptr); 894 895 /* jump table block */ 896 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 897 &adev->gfx.rlc.cp_table_gpu_addr, 898 (void **)&adev->gfx.rlc.cp_table_ptr); 899 } 900 901 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 902 { 903 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 904 905 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; 906 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 907 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); 908 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); 909 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3); 910 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL); 911 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX); 912 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0); 913 adev->gfx.rlc.rlcg_reg_access_supported = true; 914 } 915 916 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) 917 { 918 const struct cs_section_def *cs_data; 919 int r; 920 921 adev->gfx.rlc.cs_data = gfx11_cs_data; 922 923 cs_data = adev->gfx.rlc.cs_data; 924 925 if (cs_data) { 926 /* init clear state block */ 927 r = amdgpu_gfx_rlc_init_csb(adev); 928 if (r) 929 return r; 930 } 931 932 /* init spm vmid with 0xf */ 933 if (adev->gfx.rlc.funcs->update_spm_vmid) 934 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf); 935 936 return 0; 937 } 938 939 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev) 940 { 941 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 942 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 943 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 944 } 945 946 static void gfx_v11_0_me_init(struct amdgpu_device *adev) 947 { 948 bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES); 949 950 amdgpu_gfx_graphics_queue_acquire(adev); 951 } 952 953 static int gfx_v11_0_mec_init(struct amdgpu_device *adev) 954 { 955 int r; 956 u32 *hpd; 957 size_t mec_hpd_size; 958 959 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 960 961 /* take ownership of the relevant compute queues */ 962 amdgpu_gfx_compute_queue_acquire(adev); 963 mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE; 964 965 if (mec_hpd_size) { 966 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 967 AMDGPU_GEM_DOMAIN_GTT, 968 &adev->gfx.mec.hpd_eop_obj, 969 &adev->gfx.mec.hpd_eop_gpu_addr, 970 (void **)&hpd); 971 if (r) { 972 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 973 gfx_v11_0_mec_fini(adev); 974 return r; 975 } 976 977 memset(hpd, 0, mec_hpd_size); 978 979 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 980 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 981 } 982 983 return 0; 984 } 985 986 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address) 987 { 988 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 989 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 990 (address << SQ_IND_INDEX__INDEX__SHIFT)); 991 return RREG32_SOC15(GC, 0, regSQ_IND_DATA); 992 } 993 994 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave, 995 uint32_t thread, uint32_t regno, 996 uint32_t num, uint32_t *out) 997 { 998 WREG32_SOC15(GC, 0, regSQ_IND_INDEX, 999 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1000 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1001 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 1002 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1003 while (num--) 1004 *(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA); 1005 } 1006 1007 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1008 { 1009 /* in gfx11 the SIMD_ID is specified as part of the INSTANCE 1010 * field when performing a select_se_sh so it should be 1011 * zero here */ 1012 WARN_ON(simd != 0); 1013 1014 /* type 3 wave data */ 1015 dst[(*no_fields)++] = 3; 1016 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS); 1017 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO); 1018 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI); 1019 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO); 1020 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI); 1021 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1); 1022 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2); 1023 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC); 1024 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC); 1025 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS); 1026 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS); 1027 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2); 1028 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1); 1029 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); 1030 dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE); 1031 } 1032 1033 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1034 uint32_t wave, uint32_t start, 1035 uint32_t size, uint32_t *dst) 1036 { 1037 WARN_ON(simd != 0); 1038 1039 wave_read_regs( 1040 adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size, 1041 dst); 1042 } 1043 1044 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 1045 uint32_t wave, uint32_t thread, 1046 uint32_t start, uint32_t size, 1047 uint32_t *dst) 1048 { 1049 wave_read_regs( 1050 adev, wave, thread, 1051 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1052 } 1053 1054 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev, 1055 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 1056 { 1057 soc21_grbm_select(adev, me, pipe, q, vm); 1058 } 1059 1060 /* all sizes are in bytes */ 1061 #define MQD_SHADOW_BASE_SIZE 73728 1062 #define MQD_SHADOW_BASE_ALIGNMENT 256 1063 #define MQD_FWWORKAREA_SIZE 484 1064 #define MQD_FWWORKAREA_ALIGNMENT 256 1065 1066 static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev, 1067 struct amdgpu_gfx_shadow_info *shadow_info) 1068 { 1069 /* for gfx */ 1070 shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE; 1071 shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT; 1072 shadow_info->csa_size = MQD_FWWORKAREA_SIZE; 1073 shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT; 1074 /* for compute */ 1075 shadow_info->eop_size = GFX11_MEC_HPD_SIZE; 1076 shadow_info->eop_alignment = 256; 1077 } 1078 1079 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev, 1080 struct amdgpu_gfx_shadow_info *shadow_info, 1081 bool skip_check) 1082 { 1083 if (adev->gfx.cp_gfx_shadow || skip_check) { 1084 gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info); 1085 return 0; 1086 } else { 1087 memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info)); 1088 return -ENOTSUPP; 1089 } 1090 } 1091 1092 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { 1093 .get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter, 1094 .select_se_sh = &gfx_v11_0_select_se_sh, 1095 .read_wave_data = &gfx_v11_0_read_wave_data, 1096 .read_wave_sgprs = &gfx_v11_0_read_wave_sgprs, 1097 .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, 1098 .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, 1099 .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, 1100 .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info, 1101 .get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask, 1102 }; 1103 1104 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) 1105 { 1106 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1107 case IP_VERSION(11, 0, 0): 1108 case IP_VERSION(11, 0, 2): 1109 adev->gfx.config.max_hw_contexts = 8; 1110 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1111 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1112 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1113 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1114 break; 1115 case IP_VERSION(11, 0, 3): 1116 adev->gfx.ras = &gfx_v11_0_3_ras; 1117 adev->gfx.config.max_hw_contexts = 8; 1118 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1119 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1120 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 1121 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1122 break; 1123 case IP_VERSION(11, 0, 1): 1124 case IP_VERSION(11, 0, 4): 1125 case IP_VERSION(11, 5, 0): 1126 case IP_VERSION(11, 5, 1): 1127 case IP_VERSION(11, 5, 2): 1128 case IP_VERSION(11, 5, 3): 1129 case IP_VERSION(11, 5, 4): 1130 case IP_VERSION(11, 5, 6): 1131 adev->gfx.config.max_hw_contexts = 8; 1132 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1133 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1134 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; 1135 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300; 1136 break; 1137 default: 1138 BUG(); 1139 break; 1140 } 1141 1142 return 0; 1143 } 1144 1145 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, 1146 int me, int pipe, int queue) 1147 { 1148 struct amdgpu_ring *ring; 1149 unsigned int irq_type; 1150 unsigned int hw_prio; 1151 1152 ring = &adev->gfx.gfx_ring[ring_id]; 1153 1154 ring->me = me; 1155 ring->pipe = pipe; 1156 ring->queue = queue; 1157 1158 ring->ring_obj = NULL; 1159 ring->use_doorbell = true; 1160 if (adev->gfx.disable_kq) { 1161 ring->no_scheduler = true; 1162 ring->no_user_submission = true; 1163 } 1164 1165 if (!ring_id) 1166 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1167 else 1168 ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; 1169 ring->vm_hub = AMDGPU_GFXHUB(0); 1170 sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1171 1172 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; 1173 hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? 1174 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1175 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1176 hw_prio, NULL); 1177 } 1178 1179 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1180 int mec, int pipe, int queue) 1181 { 1182 int r; 1183 unsigned irq_type; 1184 struct amdgpu_ring *ring; 1185 unsigned int hw_prio; 1186 1187 ring = &adev->gfx.compute_ring[ring_id]; 1188 1189 /* mec0 is me1 */ 1190 ring->me = mec + 1; 1191 ring->pipe = pipe; 1192 ring->queue = queue; 1193 1194 ring->ring_obj = NULL; 1195 ring->use_doorbell = true; 1196 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1197 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1198 + (ring_id * GFX11_MEC_HPD_SIZE); 1199 ring->vm_hub = AMDGPU_GFXHUB(0); 1200 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1201 1202 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1203 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1204 + ring->pipe; 1205 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 1206 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 1207 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1208 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1209 hw_prio, NULL); 1210 if (r) 1211 return r; 1212 1213 return 0; 1214 } 1215 1216 static struct { 1217 SOC21_FIRMWARE_ID id; 1218 unsigned int offset; 1219 unsigned int size; 1220 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX]; 1221 1222 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 1223 { 1224 RLC_TABLE_OF_CONTENT *ucode = rlc_toc; 1225 1226 while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) && 1227 (ucode->id < SOC21_FIRMWARE_ID_MAX)) { 1228 rlc_autoload_info[ucode->id].id = ucode->id; 1229 rlc_autoload_info[ucode->id].offset = ucode->offset * 4; 1230 rlc_autoload_info[ucode->id].size = ucode->size * 4; 1231 1232 ucode++; 1233 } 1234 } 1235 1236 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev) 1237 { 1238 uint32_t total_size = 0; 1239 SOC21_FIRMWARE_ID id; 1240 1241 gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr); 1242 1243 for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++) 1244 total_size += rlc_autoload_info[id].size; 1245 1246 /* In case the offset in rlc toc ucode is aligned */ 1247 if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset) 1248 total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset + 1249 rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size; 1250 1251 return total_size; 1252 } 1253 1254 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev) 1255 { 1256 int r; 1257 uint32_t total_size; 1258 1259 total_size = gfx_v11_0_calc_toc_total_size(adev); 1260 1261 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 1262 AMDGPU_GEM_DOMAIN_VRAM | 1263 AMDGPU_GEM_DOMAIN_GTT, 1264 &adev->gfx.rlc.rlc_autoload_bo, 1265 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1266 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1267 1268 if (r) { 1269 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 1270 return r; 1271 } 1272 1273 return 0; 1274 } 1275 1276 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 1277 SOC21_FIRMWARE_ID id, 1278 const void *fw_data, 1279 uint32_t fw_size, 1280 uint32_t *fw_autoload_mask) 1281 { 1282 uint32_t toc_offset; 1283 uint32_t toc_fw_size; 1284 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 1285 1286 if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX) 1287 return; 1288 1289 toc_offset = rlc_autoload_info[id].offset; 1290 toc_fw_size = rlc_autoload_info[id].size; 1291 1292 if (fw_size == 0) 1293 fw_size = toc_fw_size; 1294 1295 if (fw_size > toc_fw_size) 1296 fw_size = toc_fw_size; 1297 1298 memcpy(ptr + toc_offset, fw_data, fw_size); 1299 1300 if (fw_size < toc_fw_size) 1301 memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size); 1302 1303 if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) 1304 *(uint64_t *)fw_autoload_mask |= 1ULL << id; 1305 } 1306 1307 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, 1308 uint32_t *fw_autoload_mask) 1309 { 1310 void *data; 1311 uint32_t size; 1312 uint64_t *toc_ptr; 1313 1314 *(uint64_t *)fw_autoload_mask |= 0x1; 1315 1316 DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask); 1317 1318 data = adev->psp.toc.start_addr; 1319 size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size; 1320 1321 toc_ptr = (uint64_t *)data + size / 8 - 1; 1322 *toc_ptr = *(uint64_t *)fw_autoload_mask; 1323 1324 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC, 1325 data, size, fw_autoload_mask); 1326 } 1327 1328 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev, 1329 uint32_t *fw_autoload_mask) 1330 { 1331 const __le32 *fw_data; 1332 uint32_t fw_size; 1333 const struct gfx_firmware_header_v1_0 *cp_hdr; 1334 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 1335 const struct rlc_firmware_header_v2_0 *rlc_hdr; 1336 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 1337 uint16_t version_major, version_minor; 1338 1339 if (adev->gfx.rs64_enable) { 1340 /* pfp ucode */ 1341 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1342 adev->gfx.pfp_fw->data; 1343 /* instruction */ 1344 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1345 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1346 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1347 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP, 1348 fw_data, fw_size, fw_autoload_mask); 1349 /* data */ 1350 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1351 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1352 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1353 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK, 1354 fw_data, fw_size, fw_autoload_mask); 1355 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK, 1356 fw_data, fw_size, fw_autoload_mask); 1357 /* me ucode */ 1358 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1359 adev->gfx.me_fw->data; 1360 /* instruction */ 1361 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1362 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1363 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1364 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME, 1365 fw_data, fw_size, fw_autoload_mask); 1366 /* data */ 1367 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1368 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1369 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1370 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK, 1371 fw_data, fw_size, fw_autoload_mask); 1372 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK, 1373 fw_data, fw_size, fw_autoload_mask); 1374 /* mec ucode */ 1375 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 1376 adev->gfx.mec_fw->data; 1377 /* instruction */ 1378 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1379 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 1380 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 1381 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC, 1382 fw_data, fw_size, fw_autoload_mask); 1383 /* data */ 1384 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1385 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 1386 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 1387 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK, 1388 fw_data, fw_size, fw_autoload_mask); 1389 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK, 1390 fw_data, fw_size, fw_autoload_mask); 1391 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK, 1392 fw_data, fw_size, fw_autoload_mask); 1393 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK, 1394 fw_data, fw_size, fw_autoload_mask); 1395 } else { 1396 /* pfp ucode */ 1397 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1398 adev->gfx.pfp_fw->data; 1399 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 1400 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1401 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1402 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP, 1403 fw_data, fw_size, fw_autoload_mask); 1404 1405 /* me ucode */ 1406 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1407 adev->gfx.me_fw->data; 1408 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 1409 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1410 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes); 1411 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME, 1412 fw_data, fw_size, fw_autoload_mask); 1413 1414 /* mec ucode */ 1415 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1416 adev->gfx.mec_fw->data; 1417 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1418 le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes)); 1419 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) - 1420 cp_hdr->jt_size * 4; 1421 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC, 1422 fw_data, fw_size, fw_autoload_mask); 1423 } 1424 1425 /* rlc ucode */ 1426 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 1427 adev->gfx.rlc_fw->data; 1428 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1429 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 1430 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 1431 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE, 1432 fw_data, fw_size, fw_autoload_mask); 1433 1434 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1435 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1436 if (version_major == 2) { 1437 if (version_minor >= 2) { 1438 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1439 1440 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1441 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1442 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1443 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE, 1444 fw_data, fw_size, fw_autoload_mask); 1445 1446 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1447 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1448 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1449 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT, 1450 fw_data, fw_size, fw_autoload_mask); 1451 } 1452 } 1453 } 1454 1455 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev, 1456 uint32_t *fw_autoload_mask) 1457 { 1458 const __le32 *fw_data; 1459 uint32_t fw_size; 1460 const struct sdma_firmware_header_v2_0 *sdma_hdr; 1461 1462 sdma_hdr = (const struct sdma_firmware_header_v2_0 *) 1463 adev->sdma.instance[0].fw->data; 1464 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1465 le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes)); 1466 fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes); 1467 1468 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1469 SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask); 1470 1471 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1472 le32_to_cpu(sdma_hdr->ctl_ucode_offset)); 1473 fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes); 1474 1475 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1476 SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask); 1477 } 1478 1479 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev, 1480 uint32_t *fw_autoload_mask) 1481 { 1482 const __le32 *fw_data; 1483 unsigned fw_size; 1484 const struct mes_firmware_header_v1_0 *mes_hdr; 1485 int pipe, ucode_id, data_id; 1486 1487 for (pipe = 0; pipe < 2; pipe++) { 1488 if (pipe==0) { 1489 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0; 1490 data_id = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK; 1491 } else { 1492 ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1; 1493 data_id = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK; 1494 } 1495 1496 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1497 adev->mes.fw[pipe]->data; 1498 1499 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1500 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1501 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1502 1503 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1504 ucode_id, fw_data, fw_size, fw_autoload_mask); 1505 1506 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1507 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1508 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1509 1510 gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, 1511 data_id, fw_data, fw_size, fw_autoload_mask); 1512 } 1513 } 1514 1515 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1516 { 1517 uint32_t rlc_g_offset, rlc_g_size; 1518 uint64_t gpu_addr; 1519 uint32_t autoload_fw_id[2]; 1520 1521 memset(autoload_fw_id, 0, sizeof(uint32_t) * 2); 1522 1523 /* RLC autoload sequence 2: copy ucode */ 1524 gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id); 1525 gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id); 1526 gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id); 1527 gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id); 1528 1529 rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset; 1530 rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size; 1531 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset; 1532 1533 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr)); 1534 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr)); 1535 1536 WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size); 1537 1538 /* RLC autoload sequence 3: load IMU fw */ 1539 if (adev->gfx.imu.funcs->load_microcode) 1540 adev->gfx.imu.funcs->load_microcode(adev); 1541 /* RLC autoload sequence 4 init IMU fw */ 1542 if (adev->gfx.imu.funcs->setup_imu) 1543 adev->gfx.imu.funcs->setup_imu(adev); 1544 if (adev->gfx.imu.funcs->start_imu) 1545 adev->gfx.imu.funcs->start_imu(adev); 1546 1547 /* RLC autoload sequence 5 disable gpa mode */ 1548 gfx_v11_0_disable_gpa_mode(adev); 1549 1550 return 0; 1551 } 1552 1553 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev) 1554 { 1555 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 1556 uint32_t *ptr; 1557 uint32_t inst; 1558 1559 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); 1560 if (!ptr) { 1561 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); 1562 adev->gfx.ip_dump_core = NULL; 1563 } else { 1564 adev->gfx.ip_dump_core = ptr; 1565 } 1566 1567 /* Allocate memory for compute queue registers for all the instances */ 1568 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 1569 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * 1570 adev->gfx.mec.num_queue_per_pipe; 1571 1572 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1573 if (!ptr) { 1574 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); 1575 adev->gfx.ip_dump_compute_queues = NULL; 1576 } else { 1577 adev->gfx.ip_dump_compute_queues = ptr; 1578 } 1579 1580 /* Allocate memory for gfx queue registers for all the instances */ 1581 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 1582 inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * 1583 adev->gfx.me.num_queue_per_pipe; 1584 1585 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); 1586 if (!ptr) { 1587 DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); 1588 adev->gfx.ip_dump_gfx_queues = NULL; 1589 } else { 1590 adev->gfx.ip_dump_gfx_queues = ptr; 1591 } 1592 } 1593 1594 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) 1595 { 1596 int i, j, k, r, ring_id; 1597 int xcc_id = 0; 1598 struct amdgpu_device *adev = ip_block->adev; 1599 int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */ 1600 1601 INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); 1602 1603 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1604 case IP_VERSION(11, 0, 0): 1605 case IP_VERSION(11, 0, 1): 1606 case IP_VERSION(11, 0, 2): 1607 case IP_VERSION(11, 0, 3): 1608 case IP_VERSION(11, 0, 4): 1609 case IP_VERSION(11, 5, 0): 1610 case IP_VERSION(11, 5, 1): 1611 case IP_VERSION(11, 5, 2): 1612 case IP_VERSION(11, 5, 3): 1613 case IP_VERSION(11, 5, 4): 1614 case IP_VERSION(11, 5, 6): 1615 adev->gfx.me.num_me = 1; 1616 adev->gfx.me.num_pipe_per_me = 1; 1617 adev->gfx.me.num_queue_per_pipe = 2; 1618 adev->gfx.mec.num_mec = 1; 1619 adev->gfx.mec.num_pipe_per_mec = 4; 1620 adev->gfx.mec.num_queue_per_pipe = 4; 1621 break; 1622 default: 1623 adev->gfx.me.num_me = 1; 1624 adev->gfx.me.num_pipe_per_me = 1; 1625 adev->gfx.me.num_queue_per_pipe = 1; 1626 adev->gfx.mec.num_mec = 1; 1627 adev->gfx.mec.num_pipe_per_mec = 4; 1628 adev->gfx.mec.num_queue_per_pipe = 8; 1629 break; 1630 } 1631 1632 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1633 case IP_VERSION(11, 0, 0): 1634 case IP_VERSION(11, 0, 2): 1635 case IP_VERSION(11, 0, 3): 1636 if (!adev->gfx.disable_uq && 1637 adev->gfx.me_fw_version >= 2420 && 1638 adev->gfx.pfp_fw_version >= 2580 && 1639 adev->gfx.mec_fw_version >= 2650 && 1640 adev->mes.fw_version[0] >= 120) { 1641 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1642 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1643 } 1644 break; 1645 case IP_VERSION(11, 0, 1): 1646 case IP_VERSION(11, 0, 4): 1647 case IP_VERSION(11, 5, 0): 1648 case IP_VERSION(11, 5, 1): 1649 case IP_VERSION(11, 5, 2): 1650 case IP_VERSION(11, 5, 3): 1651 /* add firmware version checks here */ 1652 if (0 && !adev->gfx.disable_uq) { 1653 adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs; 1654 adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; 1655 } 1656 break; 1657 default: 1658 break; 1659 } 1660 1661 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1662 case IP_VERSION(11, 0, 0): 1663 case IP_VERSION(11, 0, 2): 1664 case IP_VERSION(11, 0, 3): 1665 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1666 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1667 if (adev->gfx.me_fw_version >= 2280 && 1668 adev->gfx.pfp_fw_version >= 2370 && 1669 adev->gfx.mec_fw_version >= 2450 && 1670 adev->mes.fw_version[0] >= 99) { 1671 adev->gfx.enable_cleaner_shader = true; 1672 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1673 if (r) { 1674 adev->gfx.enable_cleaner_shader = false; 1675 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1676 } 1677 } 1678 break; 1679 case IP_VERSION(11, 0, 1): 1680 case IP_VERSION(11, 0, 4): 1681 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1682 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1683 if (adev->gfx.pfp_fw_version >= 102 && 1684 adev->gfx.mec_fw_version >= 66 && 1685 adev->mes.fw_version[0] >= 128) { 1686 adev->gfx.enable_cleaner_shader = true; 1687 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1688 if (r) { 1689 adev->gfx.enable_cleaner_shader = false; 1690 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1691 } 1692 } 1693 break; 1694 case IP_VERSION(11, 5, 0): 1695 case IP_VERSION(11, 5, 1): 1696 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1697 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1698 if (adev->gfx.mec_fw_version >= 26 && 1699 adev->mes.fw_version[0] >= 114) { 1700 adev->gfx.enable_cleaner_shader = true; 1701 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1702 if (r) { 1703 adev->gfx.enable_cleaner_shader = false; 1704 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1705 } 1706 } 1707 break; 1708 case IP_VERSION(11, 5, 2): 1709 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1710 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1711 if (adev->gfx.me_fw_version >= 12 && 1712 adev->gfx.pfp_fw_version >= 15 && 1713 adev->gfx.mec_fw_version >= 15) { 1714 adev->gfx.enable_cleaner_shader = true; 1715 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1716 if (r) { 1717 adev->gfx.enable_cleaner_shader = false; 1718 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1719 } 1720 } 1721 break; 1722 case IP_VERSION(11, 5, 3): 1723 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1724 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1725 if (adev->gfx.me_fw_version >= 7 && 1726 adev->gfx.pfp_fw_version >= 8 && 1727 adev->gfx.mec_fw_version >= 8) { 1728 adev->gfx.enable_cleaner_shader = true; 1729 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1730 if (r) { 1731 adev->gfx.enable_cleaner_shader = false; 1732 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1733 } 1734 } 1735 break; 1736 case IP_VERSION(11, 5, 4): 1737 adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; 1738 adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); 1739 if (adev->gfx.me_fw_version >= 4 && 1740 adev->gfx.pfp_fw_version >= 7 && 1741 adev->gfx.mec_fw_version >= 5) { 1742 adev->gfx.enable_cleaner_shader = true; 1743 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); 1744 if (r) { 1745 adev->gfx.enable_cleaner_shader = false; 1746 dev_err(adev->dev, "Failed to initialize cleaner shader\n"); 1747 } 1748 } 1749 break; 1750 default: 1751 adev->gfx.enable_cleaner_shader = false; 1752 break; 1753 } 1754 1755 /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ 1756 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && 1757 amdgpu_sriov_is_pp_one_vf(adev)) 1758 adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; 1759 1760 /* EOP Event */ 1761 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1762 GFX_11_0_0__SRCID__CP_EOP_INTERRUPT, 1763 &adev->gfx.eop_irq); 1764 if (r) 1765 return r; 1766 1767 /* Bad opcode Event */ 1768 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1769 GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR, 1770 &adev->gfx.bad_op_irq); 1771 if (r) 1772 return r; 1773 1774 /* Privileged reg */ 1775 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1776 GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT, 1777 &adev->gfx.priv_reg_irq); 1778 if (r) 1779 return r; 1780 1781 /* Privileged inst */ 1782 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP, 1783 GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT, 1784 &adev->gfx.priv_inst_irq); 1785 if (r) 1786 return r; 1787 1788 /* FED error */ 1789 r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX, 1790 GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT, 1791 &adev->gfx.rlc_gc_fed_irq); 1792 if (r) 1793 return r; 1794 1795 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1796 1797 gfx_v11_0_me_init(adev); 1798 1799 r = gfx_v11_0_rlc_init(adev); 1800 if (r) { 1801 DRM_ERROR("Failed to init rlc BOs!\n"); 1802 return r; 1803 } 1804 1805 r = gfx_v11_0_mec_init(adev); 1806 if (r) { 1807 DRM_ERROR("Failed to init MEC BOs!\n"); 1808 return r; 1809 } 1810 1811 if (adev->gfx.num_gfx_rings) { 1812 ring_id = 0; 1813 /* set up the gfx ring */ 1814 for (i = 0; i < adev->gfx.me.num_me; i++) { 1815 for (j = 0; j < num_queue_per_pipe; j++) { 1816 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 1817 if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j)) 1818 continue; 1819 1820 r = gfx_v11_0_gfx_ring_init(adev, ring_id, 1821 i, k, j); 1822 if (r) 1823 return r; 1824 ring_id++; 1825 } 1826 } 1827 } 1828 } 1829 1830 if (adev->gfx.num_compute_rings) { 1831 ring_id = 0; 1832 /* set up the compute queues - allocate horizontally across pipes */ 1833 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1834 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1835 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1836 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 1837 k, j)) 1838 continue; 1839 1840 r = gfx_v11_0_compute_ring_init(adev, ring_id, 1841 i, k, j); 1842 if (r) 1843 return r; 1844 1845 ring_id++; 1846 } 1847 } 1848 } 1849 } 1850 1851 adev->gfx.gfx_supported_reset = 1852 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 1853 adev->gfx.compute_supported_reset = 1854 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 1855 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1856 case IP_VERSION(11, 0, 0): 1857 case IP_VERSION(11, 0, 2): 1858 case IP_VERSION(11, 0, 3): 1859 if ((adev->gfx.me_fw_version >= 2280) && 1860 (adev->gfx.mec_fw_version >= 2410) && 1861 !amdgpu_sriov_vf(adev) && 1862 !adev->debug_disable_gpu_ring_reset) { 1863 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1864 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1865 } 1866 break; 1867 default: 1868 if (!amdgpu_sriov_vf(adev) && 1869 !adev->debug_disable_gpu_ring_reset) { 1870 adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1871 adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 1872 } 1873 break; 1874 } 1875 1876 if (!adev->enable_mes_kiq) { 1877 r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0); 1878 if (r) { 1879 DRM_ERROR("Failed to init KIQ BOs!\n"); 1880 return r; 1881 } 1882 1883 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 1884 if (r) 1885 return r; 1886 } 1887 1888 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0); 1889 if (r) 1890 return r; 1891 1892 /* allocate visible FB for rlc auto-loading fw */ 1893 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1894 r = gfx_v11_0_rlc_autoload_buffer_init(adev); 1895 if (r) 1896 return r; 1897 } 1898 1899 r = gfx_v11_0_gpu_early_init(adev); 1900 if (r) 1901 return r; 1902 1903 if (amdgpu_gfx_ras_sw_init(adev)) { 1904 dev_err(adev->dev, "Failed to initialize gfx ras block!\n"); 1905 return -EINVAL; 1906 } 1907 1908 gfx_v11_0_alloc_ip_dump(adev); 1909 1910 r = amdgpu_gfx_sysfs_init(adev); 1911 if (r) 1912 return r; 1913 1914 return 0; 1915 } 1916 1917 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev) 1918 { 1919 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj, 1920 &adev->gfx.pfp.pfp_fw_gpu_addr, 1921 (void **)&adev->gfx.pfp.pfp_fw_ptr); 1922 1923 amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj, 1924 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 1925 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 1926 } 1927 1928 static void gfx_v11_0_me_fini(struct amdgpu_device *adev) 1929 { 1930 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, 1931 &adev->gfx.me.me_fw_gpu_addr, 1932 (void **)&adev->gfx.me.me_fw_ptr); 1933 1934 amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj, 1935 &adev->gfx.me.me_fw_data_gpu_addr, 1936 (void **)&adev->gfx.me.me_fw_data_ptr); 1937 } 1938 1939 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1940 { 1941 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1942 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1943 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1944 } 1945 1946 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) 1947 { 1948 int i; 1949 struct amdgpu_device *adev = ip_block->adev; 1950 1951 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1952 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1953 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1954 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1955 1956 amdgpu_gfx_mqd_sw_fini(adev, 0); 1957 1958 if (!adev->enable_mes_kiq) { 1959 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 1960 amdgpu_gfx_kiq_fini(adev, 0); 1961 } 1962 1963 amdgpu_gfx_cleaner_shader_sw_fini(adev); 1964 1965 gfx_v11_0_pfp_fini(adev); 1966 gfx_v11_0_me_fini(adev); 1967 gfx_v11_0_rlc_fini(adev); 1968 gfx_v11_0_mec_fini(adev); 1969 1970 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1971 gfx_v11_0_rlc_autoload_buffer_fini(adev); 1972 1973 gfx_v11_0_free_microcode(adev); 1974 1975 amdgpu_gfx_sysfs_fini(adev); 1976 1977 kfree(adev->gfx.ip_dump_core); 1978 kfree(adev->gfx.ip_dump_compute_queues); 1979 kfree(adev->gfx.ip_dump_gfx_queues); 1980 1981 return 0; 1982 } 1983 1984 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1985 u32 sh_num, u32 instance, int xcc_id) 1986 { 1987 u32 data; 1988 1989 if (instance == 0xffffffff) 1990 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1991 INSTANCE_BROADCAST_WRITES, 1); 1992 else 1993 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1994 instance); 1995 1996 if (se_num == 0xffffffff) 1997 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1998 1); 1999 else 2000 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 2001 2002 if (sh_num == 0xffffffff) 2003 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 2004 1); 2005 else 2006 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 2007 2008 WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data); 2009 } 2010 2011 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev) 2012 { 2013 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; 2014 2015 gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE); 2016 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, 2017 CC_GC_SA_UNIT_DISABLE, 2018 SA_DISABLE); 2019 gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE); 2020 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, 2021 GC_USER_SA_UNIT_DISABLE, 2022 SA_DISABLE); 2023 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * 2024 adev->gfx.config.max_shader_engines); 2025 2026 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 2027 } 2028 2029 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) 2030 { 2031 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; 2032 u32 rb_mask; 2033 2034 gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE); 2035 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, 2036 CC_RB_BACKEND_DISABLE, 2037 BACKEND_DISABLE); 2038 gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE); 2039 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, 2040 GC_USER_RB_BACKEND_DISABLE, 2041 BACKEND_DISABLE); 2042 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * 2043 adev->gfx.config.max_shader_engines); 2044 2045 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); 2046 } 2047 2048 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) 2049 { 2050 u32 rb_bitmap_per_sa; 2051 u32 rb_bitmap_width_per_sa; 2052 u32 max_sa; 2053 u32 active_sa_bitmap; 2054 u32 global_active_rb_bitmap; 2055 u32 active_rb_bitmap = 0; 2056 u32 i; 2057 2058 /* query sa bitmap from SA_UNIT_DISABLE registers */ 2059 active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev); 2060 /* query rb bitmap from RB_BACKEND_DISABLE registers */ 2061 global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev); 2062 2063 /* generate active rb bitmap according to active sa bitmap */ 2064 max_sa = adev->gfx.config.max_shader_engines * 2065 adev->gfx.config.max_sh_per_se; 2066 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 2067 adev->gfx.config.max_sh_per_se; 2068 rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); 2069 2070 for (i = 0; i < max_sa; i++) { 2071 if (active_sa_bitmap & (1 << i)) 2072 active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); 2073 } 2074 2075 active_rb_bitmap &= global_active_rb_bitmap; 2076 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 2077 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 2078 } 2079 2080 #define DEFAULT_SH_MEM_BASES (0x6000) 2081 #define LDS_APP_BASE 0x1 2082 #define SCRATCH_APP_BASE 0x2 2083 2084 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev) 2085 { 2086 int i; 2087 uint32_t sh_mem_bases; 2088 uint32_t data; 2089 2090 /* 2091 * Configure apertures: 2092 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 2093 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 2094 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 2095 */ 2096 sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) | 2097 SCRATCH_APP_BASE; 2098 2099 mutex_lock(&adev->srbm_mutex); 2100 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2101 soc21_grbm_select(adev, 0, 0, 0, i); 2102 /* CP and shaders */ 2103 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2104 WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases); 2105 2106 /* Enable trap for each kfd vmid. */ 2107 data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL); 2108 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 2109 WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data); 2110 } 2111 soc21_grbm_select(adev, 0, 0, 0, 0); 2112 mutex_unlock(&adev->srbm_mutex); 2113 2114 /* 2115 * Initialize all compute VMIDs to have no GDS, GWS, or OA 2116 * access. These should be enabled by FW for target VMIDs. 2117 */ 2118 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 2119 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0); 2120 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0); 2121 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0); 2122 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0); 2123 } 2124 } 2125 2126 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev) 2127 { 2128 int vmid; 2129 2130 /* 2131 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 2132 * access. Compute VMIDs should be enabled by FW for target VMIDs, 2133 * the driver can enable them for graphics. VMID0 should maintain 2134 * access so that HWS firmware can save/restore entries. 2135 */ 2136 for (vmid = 1; vmid < 16; vmid++) { 2137 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0); 2138 WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0); 2139 WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0); 2140 WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0); 2141 } 2142 } 2143 2144 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev) 2145 { 2146 /* TODO: harvest feature to be added later. */ 2147 } 2148 2149 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev) 2150 { 2151 /* TCCs are global (not instanced). */ 2152 uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) | 2153 RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE); 2154 2155 adev->gfx.config.tcc_disabled_mask = 2156 REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) | 2157 (REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16); 2158 } 2159 2160 static void gfx_v11_0_constants_init(struct amdgpu_device *adev) 2161 { 2162 u32 tmp; 2163 int i; 2164 2165 if (!amdgpu_sriov_vf(adev)) 2166 WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 2167 2168 gfx_v11_0_setup_rb(adev); 2169 gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); 2170 gfx_v11_0_get_tcc_info(adev); 2171 adev->gfx.config.pa_sc_tile_steering_override = 0; 2172 2173 /* Set whether texture coordinate truncation is conformant. */ 2174 tmp = RREG32_SOC15(GC, 0, regTA_CNTL2); 2175 adev->gfx.config.ta_cntl2_truncate_coord_mode = 2176 REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE); 2177 2178 /* XXX SH_MEM regs */ 2179 /* where to put LDS, scratch, GPUVM in FSA64 space */ 2180 mutex_lock(&adev->srbm_mutex); 2181 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 2182 soc21_grbm_select(adev, 0, 0, 0, i); 2183 /* CP and shaders */ 2184 WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 2185 if (i != 0) { 2186 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 2187 (adev->gmc.private_aperture_start >> 48)); 2188 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 2189 (adev->gmc.shared_aperture_start >> 48)); 2190 WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp); 2191 } 2192 } 2193 soc21_grbm_select(adev, 0, 0, 0, 0); 2194 2195 mutex_unlock(&adev->srbm_mutex); 2196 2197 gfx_v11_0_init_compute_vmid(adev); 2198 gfx_v11_0_init_gds_vmid(adev); 2199 } 2200 2201 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev, 2202 int me, int pipe) 2203 { 2204 if (me != 0) 2205 return 0; 2206 2207 switch (pipe) { 2208 case 0: 2209 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 2210 case 1: 2211 return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 2212 default: 2213 return 0; 2214 } 2215 } 2216 2217 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev, 2218 int me, int pipe) 2219 { 2220 /* 2221 * amdgpu controls only the first MEC. That's why this function only 2222 * handles the setting of interrupts for this specific MEC. All other 2223 * pipes' interrupts are set by amdkfd. 2224 */ 2225 if (me != 1) 2226 return 0; 2227 2228 switch (pipe) { 2229 case 0: 2230 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 2231 case 1: 2232 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 2233 case 2: 2234 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 2235 case 3: 2236 return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 2237 default: 2238 return 0; 2239 } 2240 } 2241 2242 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2243 bool enable) 2244 { 2245 u32 tmp, cp_int_cntl_reg; 2246 int i, j; 2247 2248 if (amdgpu_sriov_vf(adev)) 2249 return; 2250 2251 for (i = 0; i < adev->gfx.me.num_me; i++) { 2252 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 2253 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 2254 2255 if (cp_int_cntl_reg) { 2256 tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 2257 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 2258 enable ? 1 : 0); 2259 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 2260 enable ? 1 : 0); 2261 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 2262 enable ? 1 : 0); 2263 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 2264 enable ? 1 : 0); 2265 WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp); 2266 } 2267 } 2268 } 2269 } 2270 2271 static int gfx_v11_0_init_csb(struct amdgpu_device *adev) 2272 { 2273 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 2274 2275 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI, 2276 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2277 WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO, 2278 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2279 WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 2280 2281 return 0; 2282 } 2283 2284 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) 2285 { 2286 u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); 2287 2288 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 2289 WREG32_SOC15(GC, 0, regRLC_CNTL, tmp); 2290 } 2291 2292 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev) 2293 { 2294 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2295 udelay(50); 2296 WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2297 udelay(50); 2298 } 2299 2300 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 2301 bool enable) 2302 { 2303 uint32_t rlc_pg_cntl; 2304 2305 rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 2306 2307 if (!enable) { 2308 /* RLC_PG_CNTL[23] = 0 (default) 2309 * RLC will wait for handshake acks with SMU 2310 * GFXOFF will be enabled 2311 * RLC_PG_CNTL[23] = 1 2312 * RLC will not issue any message to SMU 2313 * hence no handshake between SMU & RLC 2314 * GFXOFF will be disabled 2315 */ 2316 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2317 } else 2318 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 2319 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl); 2320 } 2321 2322 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev) 2323 { 2324 /* TODO: enable rlc & smu handshake until smu 2325 * and gfxoff feature works as expected */ 2326 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 2327 gfx_v11_0_rlc_smu_handshake_cntl(adev, false); 2328 2329 WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2330 udelay(50); 2331 } 2332 2333 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev) 2334 { 2335 uint32_t tmp; 2336 2337 /* enable Save Restore Machine */ 2338 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL)); 2339 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2340 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 2341 WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp); 2342 } 2343 2344 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev) 2345 { 2346 const struct rlc_firmware_header_v2_0 *hdr; 2347 const __le32 *fw_data; 2348 unsigned i, fw_size; 2349 2350 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2351 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2352 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2353 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2354 2355 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, 2356 RLCG_UCODE_LOADING_START_ADDRESS); 2357 2358 for (i = 0; i < fw_size; i++) 2359 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA, 2360 le32_to_cpup(fw_data++)); 2361 2362 WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2363 } 2364 2365 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev) 2366 { 2367 const struct rlc_firmware_header_v2_2 *hdr; 2368 const __le32 *fw_data; 2369 unsigned i, fw_size; 2370 u32 tmp; 2371 2372 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 2373 2374 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2375 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 2376 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 2377 2378 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0); 2379 2380 for (i = 0; i < fw_size; i++) { 2381 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2382 msleep(1); 2383 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA, 2384 le32_to_cpup(fw_data++)); 2385 } 2386 2387 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2388 2389 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2390 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 2391 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 2392 2393 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0); 2394 for (i = 0; i < fw_size; i++) { 2395 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2396 msleep(1); 2397 WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA, 2398 le32_to_cpup(fw_data++)); 2399 } 2400 2401 WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 2402 2403 tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL); 2404 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 2405 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 2406 WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp); 2407 } 2408 2409 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev) 2410 { 2411 const struct rlc_firmware_header_v2_3 *hdr; 2412 const __le32 *fw_data; 2413 unsigned i, fw_size; 2414 u32 tmp; 2415 2416 hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data; 2417 2418 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2419 le32_to_cpu(hdr->rlcp_ucode_offset_bytes)); 2420 fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4; 2421 2422 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0); 2423 2424 for (i = 0; i < fw_size; i++) { 2425 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2426 msleep(1); 2427 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA, 2428 le32_to_cpup(fw_data++)); 2429 } 2430 2431 WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version); 2432 2433 tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE); 2434 tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 2435 WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp); 2436 2437 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2438 le32_to_cpu(hdr->rlcv_ucode_offset_bytes)); 2439 fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4; 2440 2441 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0); 2442 2443 for (i = 0; i < fw_size; i++) { 2444 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 2445 msleep(1); 2446 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA, 2447 le32_to_cpup(fw_data++)); 2448 } 2449 2450 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version); 2451 2452 tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL); 2453 tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1); 2454 WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp); 2455 } 2456 2457 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev) 2458 { 2459 const struct rlc_firmware_header_v2_0 *hdr; 2460 uint16_t version_major; 2461 uint16_t version_minor; 2462 2463 if (!adev->gfx.rlc_fw) 2464 return -EINVAL; 2465 2466 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2467 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2468 2469 version_major = le16_to_cpu(hdr->header.header_version_major); 2470 version_minor = le16_to_cpu(hdr->header.header_version_minor); 2471 2472 if (version_major == 2) { 2473 gfx_v11_0_load_rlcg_microcode(adev); 2474 if (amdgpu_dpm == 1) { 2475 if (version_minor >= 2) 2476 gfx_v11_0_load_rlc_iram_dram_microcode(adev); 2477 if (version_minor == 3) 2478 gfx_v11_0_load_rlcp_rlcv_microcode(adev); 2479 } 2480 2481 return 0; 2482 } 2483 2484 return -EINVAL; 2485 } 2486 2487 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev) 2488 { 2489 int r; 2490 2491 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 2492 gfx_v11_0_init_csb(adev); 2493 2494 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 2495 gfx_v11_0_rlc_enable_srm(adev); 2496 } else { 2497 if (amdgpu_sriov_vf(adev)) { 2498 gfx_v11_0_init_csb(adev); 2499 return 0; 2500 } 2501 2502 adev->gfx.rlc.funcs->stop(adev); 2503 2504 /* disable CG */ 2505 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0); 2506 2507 /* disable PG */ 2508 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0); 2509 2510 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2511 /* legacy rlc firmware loading */ 2512 r = gfx_v11_0_rlc_load_microcode(adev); 2513 if (r) 2514 return r; 2515 } 2516 2517 gfx_v11_0_init_csb(adev); 2518 2519 adev->gfx.rlc.funcs->start(adev); 2520 } 2521 return 0; 2522 } 2523 2524 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr) 2525 { 2526 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2527 uint32_t tmp; 2528 int i; 2529 2530 /* Trigger an invalidation of the L1 instruction caches */ 2531 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2532 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2533 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2534 2535 /* Wait for invalidation complete */ 2536 for (i = 0; i < usec_timeout; i++) { 2537 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2538 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2539 INVALIDATE_CACHE_COMPLETE)) 2540 break; 2541 udelay(1); 2542 } 2543 2544 if (i >= usec_timeout) { 2545 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2546 return -EINVAL; 2547 } 2548 2549 if (amdgpu_emu_mode == 1) 2550 amdgpu_device_flush_hdp(adev, NULL); 2551 2552 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2553 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2554 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2555 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2556 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2557 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2558 2559 /* Program me ucode address into intruction cache address register */ 2560 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2561 lower_32_bits(addr) & 0xFFFFF000); 2562 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2563 upper_32_bits(addr)); 2564 2565 return 0; 2566 } 2567 2568 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr) 2569 { 2570 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2571 uint32_t tmp; 2572 int i; 2573 2574 /* Trigger an invalidation of the L1 instruction caches */ 2575 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2576 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2577 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2578 2579 /* Wait for invalidation complete */ 2580 for (i = 0; i < usec_timeout; i++) { 2581 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2582 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2583 INVALIDATE_CACHE_COMPLETE)) 2584 break; 2585 udelay(1); 2586 } 2587 2588 if (i >= usec_timeout) { 2589 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2590 return -EINVAL; 2591 } 2592 2593 if (amdgpu_emu_mode == 1) 2594 amdgpu_device_flush_hdp(adev, NULL); 2595 2596 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2597 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2598 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2599 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2600 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2601 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2602 2603 /* Program pfp ucode address into intruction cache address register */ 2604 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2605 lower_32_bits(addr) & 0xFFFFF000); 2606 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2607 upper_32_bits(addr)); 2608 2609 return 0; 2610 } 2611 2612 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr) 2613 { 2614 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2615 uint32_t tmp; 2616 int i; 2617 2618 /* Trigger an invalidation of the L1 instruction caches */ 2619 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2620 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2621 2622 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2623 2624 /* Wait for invalidation complete */ 2625 for (i = 0; i < usec_timeout; i++) { 2626 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2627 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2628 INVALIDATE_CACHE_COMPLETE)) 2629 break; 2630 udelay(1); 2631 } 2632 2633 if (i >= usec_timeout) { 2634 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2635 return -EINVAL; 2636 } 2637 2638 if (amdgpu_emu_mode == 1) 2639 amdgpu_device_flush_hdp(adev, NULL); 2640 2641 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2642 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2643 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2644 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1); 2645 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2646 2647 /* Program mec1 ucode address into intruction cache address register */ 2648 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, 2649 lower_32_bits(addr) & 0xFFFFF000); 2650 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2651 upper_32_bits(addr)); 2652 2653 return 0; 2654 } 2655 2656 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2657 { 2658 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2659 uint32_t tmp; 2660 unsigned i, pipe_id; 2661 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2662 2663 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2664 adev->gfx.pfp_fw->data; 2665 2666 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 2667 lower_32_bits(addr)); 2668 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 2669 upper_32_bits(addr)); 2670 2671 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 2672 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 2673 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 2674 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 2675 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 2676 2677 /* 2678 * Programming any of the CP_PFP_IC_BASE registers 2679 * forces invalidation of the ME L1 I$. Wait for the 2680 * invalidation complete 2681 */ 2682 for (i = 0; i < usec_timeout; i++) { 2683 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2684 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2685 INVALIDATE_CACHE_COMPLETE)) 2686 break; 2687 udelay(1); 2688 } 2689 2690 if (i >= usec_timeout) { 2691 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2692 return -EINVAL; 2693 } 2694 2695 /* Prime the L1 instruction caches */ 2696 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2697 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 2698 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 2699 /* Waiting for cache primed*/ 2700 for (i = 0; i < usec_timeout; i++) { 2701 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 2702 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 2703 ICACHE_PRIMED)) 2704 break; 2705 udelay(1); 2706 } 2707 2708 if (i >= usec_timeout) { 2709 dev_err(adev->dev, "failed to prime instruction cache\n"); 2710 return -EINVAL; 2711 } 2712 2713 mutex_lock(&adev->srbm_mutex); 2714 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2715 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2716 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 2717 (pfp_hdr->ucode_start_addr_hi << 30) | 2718 (pfp_hdr->ucode_start_addr_lo >> 2)); 2719 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 2720 pfp_hdr->ucode_start_addr_hi >> 2); 2721 2722 /* 2723 * Program CP_ME_CNTL to reset given PIPE to take 2724 * effect of CP_PFP_PRGRM_CNTR_START. 2725 */ 2726 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2727 if (pipe_id == 0) 2728 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2729 PFP_PIPE0_RESET, 1); 2730 else 2731 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2732 PFP_PIPE1_RESET, 1); 2733 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2734 2735 /* Clear pfp pipe0 reset bit. */ 2736 if (pipe_id == 0) 2737 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2738 PFP_PIPE0_RESET, 0); 2739 else 2740 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2741 PFP_PIPE1_RESET, 0); 2742 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2743 2744 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 2745 lower_32_bits(addr2)); 2746 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 2747 upper_32_bits(addr2)); 2748 } 2749 soc21_grbm_select(adev, 0, 0, 0, 0); 2750 mutex_unlock(&adev->srbm_mutex); 2751 2752 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2753 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2754 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2755 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2756 2757 /* Invalidate the data caches */ 2758 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2759 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2760 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2761 2762 for (i = 0; i < usec_timeout; i++) { 2763 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2764 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2765 INVALIDATE_DCACHE_COMPLETE)) 2766 break; 2767 udelay(1); 2768 } 2769 2770 if (i >= usec_timeout) { 2771 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2772 return -EINVAL; 2773 } 2774 2775 return 0; 2776 } 2777 2778 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2779 { 2780 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2781 uint32_t tmp; 2782 unsigned i, pipe_id; 2783 const struct gfx_firmware_header_v2_0 *me_hdr; 2784 2785 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2786 adev->gfx.me_fw->data; 2787 2788 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 2789 lower_32_bits(addr)); 2790 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 2791 upper_32_bits(addr)); 2792 2793 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 2794 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 2795 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 2796 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 2797 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 2798 2799 /* 2800 * Programming any of the CP_ME_IC_BASE registers 2801 * forces invalidation of the ME L1 I$. Wait for the 2802 * invalidation complete 2803 */ 2804 for (i = 0; i < usec_timeout; i++) { 2805 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2806 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2807 INVALIDATE_CACHE_COMPLETE)) 2808 break; 2809 udelay(1); 2810 } 2811 2812 if (i >= usec_timeout) { 2813 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2814 return -EINVAL; 2815 } 2816 2817 /* Prime the instruction caches */ 2818 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2819 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 2820 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 2821 2822 /* Waiting for instruction cache primed*/ 2823 for (i = 0; i < usec_timeout; i++) { 2824 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 2825 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 2826 ICACHE_PRIMED)) 2827 break; 2828 udelay(1); 2829 } 2830 2831 if (i >= usec_timeout) { 2832 dev_err(adev->dev, "failed to prime instruction cache\n"); 2833 return -EINVAL; 2834 } 2835 2836 mutex_lock(&adev->srbm_mutex); 2837 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 2838 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 2839 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 2840 (me_hdr->ucode_start_addr_hi << 30) | 2841 (me_hdr->ucode_start_addr_lo >> 2) ); 2842 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 2843 me_hdr->ucode_start_addr_hi>>2); 2844 2845 /* 2846 * Program CP_ME_CNTL to reset given PIPE to take 2847 * effect of CP_PFP_PRGRM_CNTR_START. 2848 */ 2849 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 2850 if (pipe_id == 0) 2851 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2852 ME_PIPE0_RESET, 1); 2853 else 2854 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2855 ME_PIPE1_RESET, 1); 2856 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2857 2858 /* Clear pfp pipe0 reset bit. */ 2859 if (pipe_id == 0) 2860 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2861 ME_PIPE0_RESET, 0); 2862 else 2863 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 2864 ME_PIPE1_RESET, 0); 2865 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 2866 2867 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 2868 lower_32_bits(addr2)); 2869 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 2870 upper_32_bits(addr2)); 2871 } 2872 soc21_grbm_select(adev, 0, 0, 0, 0); 2873 mutex_unlock(&adev->srbm_mutex); 2874 2875 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 2876 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 2877 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 2878 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 2879 2880 /* Invalidate the data caches */ 2881 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2882 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2883 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 2884 2885 for (i = 0; i < usec_timeout; i++) { 2886 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 2887 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 2888 INVALIDATE_DCACHE_COMPLETE)) 2889 break; 2890 udelay(1); 2891 } 2892 2893 if (i >= usec_timeout) { 2894 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 2895 return -EINVAL; 2896 } 2897 2898 return 0; 2899 } 2900 2901 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2) 2902 { 2903 uint32_t usec_timeout = 50000; /* wait for 50ms */ 2904 uint32_t tmp; 2905 unsigned i; 2906 const struct gfx_firmware_header_v2_0 *mec_hdr; 2907 2908 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2909 adev->gfx.mec_fw->data; 2910 2911 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 2912 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2913 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2914 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2915 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 2916 2917 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 2918 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2919 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2920 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 2921 2922 mutex_lock(&adev->srbm_mutex); 2923 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2924 soc21_grbm_select(adev, 1, i, 0, 0); 2925 2926 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2); 2927 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 2928 upper_32_bits(addr2)); 2929 2930 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 2931 mec_hdr->ucode_start_addr_lo >> 2 | 2932 mec_hdr->ucode_start_addr_hi << 30); 2933 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 2934 mec_hdr->ucode_start_addr_hi >> 2); 2935 2936 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr); 2937 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 2938 upper_32_bits(addr)); 2939 } 2940 mutex_unlock(&adev->srbm_mutex); 2941 soc21_grbm_select(adev, 0, 0, 0, 0); 2942 2943 /* Trigger an invalidation of the L1 instruction caches */ 2944 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2945 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2946 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 2947 2948 /* Wait for invalidation complete */ 2949 for (i = 0; i < usec_timeout; i++) { 2950 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 2951 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2952 INVALIDATE_DCACHE_COMPLETE)) 2953 break; 2954 udelay(1); 2955 } 2956 2957 if (i >= usec_timeout) { 2958 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2959 return -EINVAL; 2960 } 2961 2962 /* Trigger an invalidation of the L1 instruction caches */ 2963 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2964 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2965 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 2966 2967 /* Wait for invalidation complete */ 2968 for (i = 0; i < usec_timeout; i++) { 2969 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 2970 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2971 INVALIDATE_CACHE_COMPLETE)) 2972 break; 2973 udelay(1); 2974 } 2975 2976 if (i >= usec_timeout) { 2977 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2978 return -EINVAL; 2979 } 2980 2981 return 0; 2982 } 2983 2984 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev) 2985 { 2986 const struct gfx_firmware_header_v2_0 *pfp_hdr; 2987 const struct gfx_firmware_header_v2_0 *me_hdr; 2988 const struct gfx_firmware_header_v2_0 *mec_hdr; 2989 uint32_t pipe_id, tmp; 2990 2991 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 2992 adev->gfx.mec_fw->data; 2993 me_hdr = (const struct gfx_firmware_header_v2_0 *) 2994 adev->gfx.me_fw->data; 2995 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 2996 adev->gfx.pfp_fw->data; 2997 2998 /* config pfp program start addr */ 2999 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 3000 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3001 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 3002 (pfp_hdr->ucode_start_addr_hi << 30) | 3003 (pfp_hdr->ucode_start_addr_lo >> 2)); 3004 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 3005 pfp_hdr->ucode_start_addr_hi >> 2); 3006 } 3007 soc21_grbm_select(adev, 0, 0, 0, 0); 3008 3009 /* reset pfp pipe */ 3010 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3011 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1); 3012 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1); 3013 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3014 3015 /* clear pfp pipe reset */ 3016 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0); 3017 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0); 3018 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3019 3020 /* config me program start addr */ 3021 for (pipe_id = 0; pipe_id < 2; pipe_id++) { 3022 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3023 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3024 (me_hdr->ucode_start_addr_hi << 30) | 3025 (me_hdr->ucode_start_addr_lo >> 2) ); 3026 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3027 me_hdr->ucode_start_addr_hi>>2); 3028 } 3029 soc21_grbm_select(adev, 0, 0, 0, 0); 3030 3031 /* reset me pipe */ 3032 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3033 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1); 3034 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1); 3035 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3036 3037 /* clear me pipe reset */ 3038 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0); 3039 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0); 3040 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3041 3042 /* config mec program start addr */ 3043 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 3044 soc21_grbm_select(adev, 1, pipe_id, 0, 0); 3045 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 3046 mec_hdr->ucode_start_addr_lo >> 2 | 3047 mec_hdr->ucode_start_addr_hi << 30); 3048 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 3049 mec_hdr->ucode_start_addr_hi >> 2); 3050 } 3051 soc21_grbm_select(adev, 0, 0, 0, 0); 3052 3053 /* reset mec pipe */ 3054 tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3055 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 3056 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 3057 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 3058 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 3059 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3060 3061 /* clear mec pipe reset */ 3062 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 3063 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 3064 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 3065 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 3066 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp); 3067 } 3068 3069 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 3070 { 3071 uint32_t cp_status; 3072 uint32_t bootload_status; 3073 int i, r; 3074 uint64_t addr, addr2; 3075 3076 for (i = 0; i < adev->usec_timeout; i++) { 3077 cp_status = RREG32_SOC15(GC, 0, regCP_STAT); 3078 3079 if (amdgpu_ip_version(adev, GC_HWIP, 0) == 3080 IP_VERSION(11, 0, 1) || 3081 amdgpu_ip_version(adev, GC_HWIP, 0) == 3082 IP_VERSION(11, 0, 4) || 3083 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || 3084 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || 3085 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || 3086 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3) || 3087 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4) || 3088 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 6)) 3089 bootload_status = RREG32_SOC15(GC, 0, 3090 regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); 3091 else 3092 bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); 3093 3094 if ((cp_status == 0) && 3095 (REG_GET_FIELD(bootload_status, 3096 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 3097 break; 3098 } 3099 udelay(1); 3100 } 3101 3102 if (i >= adev->usec_timeout) { 3103 dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n"); 3104 return -ETIMEDOUT; 3105 } 3106 3107 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 3108 if (adev->gfx.rs64_enable) { 3109 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3110 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset; 3111 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3112 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset; 3113 r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2); 3114 if (r) 3115 return r; 3116 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3117 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset; 3118 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3119 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset; 3120 r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2); 3121 if (r) 3122 return r; 3123 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3124 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset; 3125 addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr + 3126 rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset; 3127 r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2); 3128 if (r) 3129 return r; 3130 } else { 3131 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3132 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset; 3133 r = gfx_v11_0_config_me_cache(adev, addr); 3134 if (r) 3135 return r; 3136 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3137 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset; 3138 r = gfx_v11_0_config_pfp_cache(adev, addr); 3139 if (r) 3140 return r; 3141 addr = adev->gfx.rlc.rlc_autoload_gpu_addr + 3142 rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset; 3143 r = gfx_v11_0_config_mec_cache(adev, addr); 3144 if (r) 3145 return r; 3146 } 3147 } 3148 3149 return 0; 3150 } 3151 3152 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3153 { 3154 int i; 3155 u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3156 3157 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 3158 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 3159 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3160 3161 for (i = 0; i < adev->usec_timeout; i++) { 3162 if (RREG32_SOC15(GC, 0, regCP_STAT) == 0) 3163 break; 3164 udelay(1); 3165 } 3166 3167 if (i >= adev->usec_timeout) 3168 DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); 3169 3170 return 0; 3171 } 3172 3173 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) 3174 { 3175 int r; 3176 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3177 const __le32 *fw_data; 3178 unsigned i, fw_size; 3179 3180 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3181 adev->gfx.pfp_fw->data; 3182 3183 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3184 3185 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3186 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3187 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes); 3188 3189 r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes, 3190 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3191 &adev->gfx.pfp.pfp_fw_obj, 3192 &adev->gfx.pfp.pfp_fw_gpu_addr, 3193 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3194 if (r) { 3195 dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r); 3196 gfx_v11_0_pfp_fini(adev); 3197 return r; 3198 } 3199 3200 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size); 3201 3202 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3203 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3204 3205 gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr); 3206 3207 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0); 3208 3209 for (i = 0; i < pfp_hdr->jt_size; i++) 3210 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA, 3211 le32_to_cpup(fw_data + pfp_hdr->jt_offset + i)); 3212 3213 WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3214 3215 return 0; 3216 } 3217 3218 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev) 3219 { 3220 int r; 3221 const struct gfx_firmware_header_v2_0 *pfp_hdr; 3222 const __le32 *fw_ucode, *fw_data; 3223 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3224 uint32_t tmp; 3225 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3226 3227 pfp_hdr = (const struct gfx_firmware_header_v2_0 *) 3228 adev->gfx.pfp_fw->data; 3229 3230 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3231 3232 /* instruction */ 3233 fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data + 3234 le32_to_cpu(pfp_hdr->ucode_offset_bytes)); 3235 fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes); 3236 /* data */ 3237 fw_data = (const __le32 *)(adev->gfx.pfp_fw->data + 3238 le32_to_cpu(pfp_hdr->data_offset_bytes)); 3239 fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes); 3240 3241 /* 64kb align */ 3242 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3243 64 * 1024, 3244 AMDGPU_GEM_DOMAIN_VRAM | 3245 AMDGPU_GEM_DOMAIN_GTT, 3246 &adev->gfx.pfp.pfp_fw_obj, 3247 &adev->gfx.pfp.pfp_fw_gpu_addr, 3248 (void **)&adev->gfx.pfp.pfp_fw_ptr); 3249 if (r) { 3250 dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r); 3251 gfx_v11_0_pfp_fini(adev); 3252 return r; 3253 } 3254 3255 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3256 64 * 1024, 3257 AMDGPU_GEM_DOMAIN_VRAM | 3258 AMDGPU_GEM_DOMAIN_GTT, 3259 &adev->gfx.pfp.pfp_fw_data_obj, 3260 &adev->gfx.pfp.pfp_fw_data_gpu_addr, 3261 (void **)&adev->gfx.pfp.pfp_fw_data_ptr); 3262 if (r) { 3263 dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r); 3264 gfx_v11_0_pfp_fini(adev); 3265 return r; 3266 } 3267 3268 memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size); 3269 memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size); 3270 3271 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj); 3272 amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj); 3273 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj); 3274 amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj); 3275 3276 if (amdgpu_emu_mode == 1) 3277 amdgpu_device_flush_hdp(adev, NULL); 3278 3279 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO, 3280 lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3281 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI, 3282 upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr)); 3283 3284 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL); 3285 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); 3286 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0); 3287 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0); 3288 WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp); 3289 3290 /* 3291 * Programming any of the CP_PFP_IC_BASE registers 3292 * forces invalidation of the ME L1 I$. Wait for the 3293 * invalidation complete 3294 */ 3295 for (i = 0; i < usec_timeout; i++) { 3296 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3297 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3298 INVALIDATE_CACHE_COMPLETE)) 3299 break; 3300 udelay(1); 3301 } 3302 3303 if (i >= usec_timeout) { 3304 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3305 return -EINVAL; 3306 } 3307 3308 /* Prime the L1 instruction caches */ 3309 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3310 tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1); 3311 WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp); 3312 /* Waiting for cache primed*/ 3313 for (i = 0; i < usec_timeout; i++) { 3314 tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL); 3315 if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL, 3316 ICACHE_PRIMED)) 3317 break; 3318 udelay(1); 3319 } 3320 3321 if (i >= usec_timeout) { 3322 dev_err(adev->dev, "failed to prime instruction cache\n"); 3323 return -EINVAL; 3324 } 3325 3326 mutex_lock(&adev->srbm_mutex); 3327 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3328 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3329 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START, 3330 (pfp_hdr->ucode_start_addr_hi << 30) | 3331 (pfp_hdr->ucode_start_addr_lo >> 2) ); 3332 WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI, 3333 pfp_hdr->ucode_start_addr_hi>>2); 3334 3335 /* 3336 * Program CP_ME_CNTL to reset given PIPE to take 3337 * effect of CP_PFP_PRGRM_CNTR_START. 3338 */ 3339 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3340 if (pipe_id == 0) 3341 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3342 PFP_PIPE0_RESET, 1); 3343 else 3344 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3345 PFP_PIPE1_RESET, 1); 3346 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3347 3348 /* Clear pfp pipe0 reset bit. */ 3349 if (pipe_id == 0) 3350 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3351 PFP_PIPE0_RESET, 0); 3352 else 3353 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3354 PFP_PIPE1_RESET, 0); 3355 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3356 3357 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO, 3358 lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3359 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI, 3360 upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr)); 3361 } 3362 soc21_grbm_select(adev, 0, 0, 0, 0); 3363 mutex_unlock(&adev->srbm_mutex); 3364 3365 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3366 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3367 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3368 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3369 3370 /* Invalidate the data caches */ 3371 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3372 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3373 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3374 3375 for (i = 0; i < usec_timeout; i++) { 3376 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3377 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3378 INVALIDATE_DCACHE_COMPLETE)) 3379 break; 3380 udelay(1); 3381 } 3382 3383 if (i >= usec_timeout) { 3384 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3385 return -EINVAL; 3386 } 3387 3388 return 0; 3389 } 3390 3391 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) 3392 { 3393 int r; 3394 const struct gfx_firmware_header_v1_0 *me_hdr; 3395 const __le32 *fw_data; 3396 unsigned i, fw_size; 3397 3398 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3399 adev->gfx.me_fw->data; 3400 3401 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3402 3403 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3404 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3405 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes); 3406 3407 r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes, 3408 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3409 &adev->gfx.me.me_fw_obj, 3410 &adev->gfx.me.me_fw_gpu_addr, 3411 (void **)&adev->gfx.me.me_fw_ptr); 3412 if (r) { 3413 dev_err(adev->dev, "(%d) failed to create me fw bo\n", r); 3414 gfx_v11_0_me_fini(adev); 3415 return r; 3416 } 3417 3418 memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size); 3419 3420 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3421 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3422 3423 gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr); 3424 3425 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0); 3426 3427 for (i = 0; i < me_hdr->jt_size; i++) 3428 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA, 3429 le32_to_cpup(fw_data + me_hdr->jt_offset + i)); 3430 3431 WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version); 3432 3433 return 0; 3434 } 3435 3436 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) 3437 { 3438 int r; 3439 const struct gfx_firmware_header_v2_0 *me_hdr; 3440 const __le32 *fw_ucode, *fw_data; 3441 unsigned i, pipe_id, fw_ucode_size, fw_data_size; 3442 uint32_t tmp; 3443 uint32_t usec_timeout = 50000; /* wait for 50ms */ 3444 3445 me_hdr = (const struct gfx_firmware_header_v2_0 *) 3446 adev->gfx.me_fw->data; 3447 3448 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3449 3450 /* instruction */ 3451 fw_ucode = (const __le32 *)(adev->gfx.me_fw->data + 3452 le32_to_cpu(me_hdr->ucode_offset_bytes)); 3453 fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes); 3454 /* data */ 3455 fw_data = (const __le32 *)(adev->gfx.me_fw->data + 3456 le32_to_cpu(me_hdr->data_offset_bytes)); 3457 fw_data_size = le32_to_cpu(me_hdr->data_size_bytes); 3458 3459 /* 64kb align*/ 3460 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3461 64 * 1024, 3462 AMDGPU_GEM_DOMAIN_VRAM | 3463 AMDGPU_GEM_DOMAIN_GTT, 3464 &adev->gfx.me.me_fw_obj, 3465 &adev->gfx.me.me_fw_gpu_addr, 3466 (void **)&adev->gfx.me.me_fw_ptr); 3467 if (r) { 3468 dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r); 3469 gfx_v11_0_me_fini(adev); 3470 return r; 3471 } 3472 3473 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3474 64 * 1024, 3475 AMDGPU_GEM_DOMAIN_VRAM | 3476 AMDGPU_GEM_DOMAIN_GTT, 3477 &adev->gfx.me.me_fw_data_obj, 3478 &adev->gfx.me.me_fw_data_gpu_addr, 3479 (void **)&adev->gfx.me.me_fw_data_ptr); 3480 if (r) { 3481 dev_err(adev->dev, "(%d) failed to create me data bo\n", r); 3482 gfx_v11_0_pfp_fini(adev); 3483 return r; 3484 } 3485 3486 memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size); 3487 memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size); 3488 3489 amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj); 3490 amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj); 3491 amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj); 3492 amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj); 3493 3494 if (amdgpu_emu_mode == 1) 3495 amdgpu_device_flush_hdp(adev, NULL); 3496 3497 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO, 3498 lower_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3499 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI, 3500 upper_32_bits(adev->gfx.me.me_fw_gpu_addr)); 3501 3502 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL); 3503 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); 3504 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0); 3505 tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0); 3506 WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp); 3507 3508 /* 3509 * Programming any of the CP_ME_IC_BASE registers 3510 * forces invalidation of the ME L1 I$. Wait for the 3511 * invalidation complete 3512 */ 3513 for (i = 0; i < usec_timeout; i++) { 3514 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3515 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3516 INVALIDATE_CACHE_COMPLETE)) 3517 break; 3518 udelay(1); 3519 } 3520 3521 if (i >= usec_timeout) { 3522 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 3523 return -EINVAL; 3524 } 3525 3526 /* Prime the instruction caches */ 3527 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3528 tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1); 3529 WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp); 3530 3531 /* Waiting for instruction cache primed*/ 3532 for (i = 0; i < usec_timeout; i++) { 3533 tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL); 3534 if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL, 3535 ICACHE_PRIMED)) 3536 break; 3537 udelay(1); 3538 } 3539 3540 if (i >= usec_timeout) { 3541 dev_err(adev->dev, "failed to prime instruction cache\n"); 3542 return -EINVAL; 3543 } 3544 3545 mutex_lock(&adev->srbm_mutex); 3546 for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) { 3547 soc21_grbm_select(adev, 0, pipe_id, 0, 0); 3548 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START, 3549 (me_hdr->ucode_start_addr_hi << 30) | 3550 (me_hdr->ucode_start_addr_lo >> 2) ); 3551 WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI, 3552 me_hdr->ucode_start_addr_hi>>2); 3553 3554 /* 3555 * Program CP_ME_CNTL to reset given PIPE to take 3556 * effect of CP_PFP_PRGRM_CNTR_START. 3557 */ 3558 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL); 3559 if (pipe_id == 0) 3560 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3561 ME_PIPE0_RESET, 1); 3562 else 3563 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3564 ME_PIPE1_RESET, 1); 3565 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3566 3567 /* Clear pfp pipe0 reset bit. */ 3568 if (pipe_id == 0) 3569 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3570 ME_PIPE0_RESET, 0); 3571 else 3572 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, 3573 ME_PIPE1_RESET, 0); 3574 WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp); 3575 3576 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO, 3577 lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3578 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI, 3579 upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr)); 3580 } 3581 soc21_grbm_select(adev, 0, 0, 0, 0); 3582 mutex_unlock(&adev->srbm_mutex); 3583 3584 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL); 3585 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0); 3586 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0); 3587 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp); 3588 3589 /* Invalidate the data caches */ 3590 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3591 tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 3592 WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp); 3593 3594 for (i = 0; i < usec_timeout; i++) { 3595 tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL); 3596 if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, 3597 INVALIDATE_DCACHE_COMPLETE)) 3598 break; 3599 udelay(1); 3600 } 3601 3602 if (i >= usec_timeout) { 3603 dev_err(adev->dev, "failed to invalidate RS64 data cache\n"); 3604 return -EINVAL; 3605 } 3606 3607 return 0; 3608 } 3609 3610 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3611 { 3612 int r; 3613 3614 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw) 3615 return -EINVAL; 3616 3617 gfx_v11_0_cp_gfx_enable(adev, false); 3618 3619 if (adev->gfx.rs64_enable) 3620 r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev); 3621 else 3622 r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev); 3623 if (r) { 3624 dev_err(adev->dev, "(%d) failed to load pfp fw\n", r); 3625 return r; 3626 } 3627 3628 if (adev->gfx.rs64_enable) 3629 r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev); 3630 else 3631 r = gfx_v11_0_cp_gfx_load_me_microcode(adev); 3632 if (r) { 3633 dev_err(adev->dev, "(%d) failed to load me fw\n", r); 3634 return r; 3635 } 3636 3637 return 0; 3638 } 3639 3640 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev) 3641 { 3642 struct amdgpu_ring *ring; 3643 const struct cs_section_def *sect = NULL; 3644 const struct cs_extent_def *ext = NULL; 3645 int r, i; 3646 int ctx_reg_offset; 3647 3648 /* init the CP */ 3649 WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT, 3650 adev->gfx.config.max_hw_contexts - 1); 3651 WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1); 3652 3653 if (!amdgpu_async_gfx_ring) 3654 gfx_v11_0_cp_gfx_enable(adev, true); 3655 3656 ring = &adev->gfx.gfx_ring[0]; 3657 r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev)); 3658 if (r) { 3659 drm_err(&adev->ddev, "cp failed to lock ring (%d).\n", r); 3660 return r; 3661 } 3662 3663 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3664 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3665 3666 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3667 amdgpu_ring_write(ring, 0x80000000); 3668 amdgpu_ring_write(ring, 0x80000000); 3669 3670 for (sect = gfx11_cs_data; sect->section != NULL; ++sect) { 3671 for (ext = sect->section; ext->extent != NULL; ++ext) { 3672 if (sect->id == SECT_CONTEXT) { 3673 amdgpu_ring_write(ring, 3674 PACKET3(PACKET3_SET_CONTEXT_REG, 3675 ext->reg_count)); 3676 amdgpu_ring_write(ring, ext->reg_index - 3677 PACKET3_SET_CONTEXT_REG_START); 3678 for (i = 0; i < ext->reg_count; i++) 3679 amdgpu_ring_write(ring, ext->extent[i]); 3680 } 3681 } 3682 } 3683 3684 ctx_reg_offset = 3685 SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START; 3686 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1)); 3687 amdgpu_ring_write(ring, ctx_reg_offset); 3688 amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override); 3689 3690 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3691 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 3692 3693 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3694 amdgpu_ring_write(ring, 0); 3695 3696 amdgpu_ring_commit(ring); 3697 3698 /* submit cs packet to copy state 0 to next available state */ 3699 if (adev->gfx.num_gfx_rings > 1) { 3700 /* maximum supported gfx ring is 2 */ 3701 ring = &adev->gfx.gfx_ring[1]; 3702 r = amdgpu_ring_alloc(ring, 2); 3703 if (r) { 3704 drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r); 3705 return r; 3706 } 3707 3708 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 3709 amdgpu_ring_write(ring, 0); 3710 3711 amdgpu_ring_commit(ring); 3712 } 3713 return 0; 3714 } 3715 3716 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev, 3717 CP_PIPE_ID pipe) 3718 { 3719 u32 tmp; 3720 3721 tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); 3722 tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe); 3723 3724 WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); 3725 } 3726 3727 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, 3728 struct amdgpu_ring *ring) 3729 { 3730 u32 tmp; 3731 3732 tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); 3733 if (ring->use_doorbell) { 3734 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3735 DOORBELL_OFFSET, ring->doorbell_index); 3736 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3737 DOORBELL_EN, 1); 3738 } else { 3739 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 3740 DOORBELL_EN, 0); 3741 } 3742 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp); 3743 3744 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 3745 DOORBELL_RANGE_LOWER, ring->doorbell_index); 3746 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp); 3747 3748 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 3749 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 3750 } 3751 3752 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev) 3753 { 3754 struct amdgpu_ring *ring; 3755 u32 tmp; 3756 u32 rb_bufsz; 3757 u64 rb_addr, rptr_addr, wptr_gpu_addr; 3758 3759 /* Set the write pointer delay */ 3760 WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); 3761 3762 /* set the RB to use vmid 0 */ 3763 WREG32_SOC15(GC, 0, regCP_RB_VMID, 0); 3764 3765 /* Init gfx ring 0 for pipe 0 */ 3766 mutex_lock(&adev->srbm_mutex); 3767 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3768 3769 /* Set ring buffer size */ 3770 ring = &adev->gfx.gfx_ring[0]; 3771 rb_bufsz = order_base_2(ring->ring_size / 8); 3772 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 3773 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 3774 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3775 3776 /* Initialize the ring buffer's write pointers */ 3777 ring->wptr = 0; 3778 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); 3779 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 3780 3781 /* set the wb address whether it's enabled or not */ 3782 rptr_addr = ring->rptr_gpu_addr; 3783 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 3784 WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3785 CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3786 3787 wptr_gpu_addr = ring->wptr_gpu_addr; 3788 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3789 lower_32_bits(wptr_gpu_addr)); 3790 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3791 upper_32_bits(wptr_gpu_addr)); 3792 3793 mdelay(1); 3794 WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp); 3795 3796 rb_addr = ring->gpu_addr >> 8; 3797 WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr); 3798 WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 3799 3800 WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1); 3801 3802 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3803 mutex_unlock(&adev->srbm_mutex); 3804 3805 /* Init gfx ring 1 for pipe 1 */ 3806 if (adev->gfx.num_gfx_rings > 1) { 3807 mutex_lock(&adev->srbm_mutex); 3808 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1); 3809 /* maximum supported gfx ring is 2 */ 3810 ring = &adev->gfx.gfx_ring[1]; 3811 rb_bufsz = order_base_2(ring->ring_size / 8); 3812 tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); 3813 tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); 3814 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3815 /* Initialize the ring buffer's write pointers */ 3816 ring->wptr = 0; 3817 WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr)); 3818 WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); 3819 /* Set the wb address whether it's enabled or not */ 3820 rptr_addr = ring->rptr_gpu_addr; 3821 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); 3822 WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 3823 CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 3824 wptr_gpu_addr = ring->wptr_gpu_addr; 3825 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO, 3826 lower_32_bits(wptr_gpu_addr)); 3827 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI, 3828 upper_32_bits(wptr_gpu_addr)); 3829 3830 mdelay(1); 3831 WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp); 3832 3833 rb_addr = ring->gpu_addr >> 8; 3834 WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr); 3835 WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr)); 3836 WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1); 3837 3838 gfx_v11_0_cp_gfx_set_doorbell(adev, ring); 3839 mutex_unlock(&adev->srbm_mutex); 3840 } 3841 /* Switch to pipe 0 */ 3842 mutex_lock(&adev->srbm_mutex); 3843 gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0); 3844 mutex_unlock(&adev->srbm_mutex); 3845 3846 /* start the ring */ 3847 gfx_v11_0_cp_gfx_start(adev); 3848 3849 return 0; 3850 } 3851 3852 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 3853 { 3854 u32 data; 3855 3856 if (adev->gfx.rs64_enable) { 3857 data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 3858 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 3859 enable ? 0 : 1); 3860 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 3861 enable ? 0 : 1); 3862 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 3863 enable ? 0 : 1); 3864 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 3865 enable ? 0 : 1); 3866 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 3867 enable ? 0 : 1); 3868 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 3869 enable ? 1 : 0); 3870 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 3871 enable ? 1 : 0); 3872 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 3873 enable ? 1 : 0); 3874 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 3875 enable ? 1 : 0); 3876 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 3877 enable ? 0 : 1); 3878 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data); 3879 } else { 3880 data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL); 3881 3882 if (enable) { 3883 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0); 3884 if (!adev->enable_mes_kiq) 3885 data = REG_SET_FIELD(data, CP_MEC_CNTL, 3886 MEC_ME2_HALT, 0); 3887 } else { 3888 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1); 3889 data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1); 3890 } 3891 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data); 3892 } 3893 3894 udelay(50); 3895 } 3896 3897 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev) 3898 { 3899 const struct gfx_firmware_header_v1_0 *mec_hdr; 3900 const __le32 *fw_data; 3901 unsigned i, fw_size; 3902 u32 *fw = NULL; 3903 int r; 3904 3905 if (!adev->gfx.mec_fw) 3906 return -EINVAL; 3907 3908 gfx_v11_0_cp_compute_enable(adev, false); 3909 3910 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 3911 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3912 3913 fw_data = (const __le32 *) 3914 (adev->gfx.mec_fw->data + 3915 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 3916 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes); 3917 3918 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 3919 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 3920 &adev->gfx.mec.mec_fw_obj, 3921 &adev->gfx.mec.mec_fw_gpu_addr, 3922 (void **)&fw); 3923 if (r) { 3924 dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r); 3925 gfx_v11_0_mec_fini(adev); 3926 return r; 3927 } 3928 3929 memcpy(fw, fw_data, fw_size); 3930 3931 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 3932 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 3933 3934 gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr); 3935 3936 /* MEC1 */ 3937 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0); 3938 3939 for (i = 0; i < mec_hdr->jt_size; i++) 3940 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA, 3941 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 3942 3943 WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 3944 3945 return 0; 3946 } 3947 3948 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev) 3949 { 3950 const struct gfx_firmware_header_v2_0 *mec_hdr; 3951 const __le32 *fw_ucode, *fw_data; 3952 u32 tmp, fw_ucode_size, fw_data_size; 3953 u32 i, usec_timeout = 50000; /* Wait for 50 ms */ 3954 u32 *fw_ucode_ptr, *fw_data_ptr; 3955 int r; 3956 3957 if (!adev->gfx.mec_fw) 3958 return -EINVAL; 3959 3960 gfx_v11_0_cp_compute_enable(adev, false); 3961 3962 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 3963 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 3964 3965 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 3966 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 3967 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 3968 3969 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 3970 le32_to_cpu(mec_hdr->data_offset_bytes)); 3971 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 3972 3973 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 3974 64 * 1024, 3975 AMDGPU_GEM_DOMAIN_VRAM | 3976 AMDGPU_GEM_DOMAIN_GTT, 3977 &adev->gfx.mec.mec_fw_obj, 3978 &adev->gfx.mec.mec_fw_gpu_addr, 3979 (void **)&fw_ucode_ptr); 3980 if (r) { 3981 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3982 gfx_v11_0_mec_fini(adev); 3983 return r; 3984 } 3985 3986 r = amdgpu_bo_create_reserved(adev, fw_data_size, 3987 64 * 1024, 3988 AMDGPU_GEM_DOMAIN_VRAM | 3989 AMDGPU_GEM_DOMAIN_GTT, 3990 &adev->gfx.mec.mec_fw_data_obj, 3991 &adev->gfx.mec.mec_fw_data_gpu_addr, 3992 (void **)&fw_data_ptr); 3993 if (r) { 3994 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 3995 gfx_v11_0_mec_fini(adev); 3996 return r; 3997 } 3998 3999 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 4000 memcpy(fw_data_ptr, fw_data, fw_data_size); 4001 4002 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 4003 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 4004 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 4005 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 4006 4007 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL); 4008 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 4009 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 4010 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 4011 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp); 4012 4013 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL); 4014 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 4015 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 4016 WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp); 4017 4018 mutex_lock(&adev->srbm_mutex); 4019 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 4020 soc21_grbm_select(adev, 1, i, 0, 0); 4021 4022 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr); 4023 WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI, 4024 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr)); 4025 4026 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START, 4027 mec_hdr->ucode_start_addr_lo >> 2 | 4028 mec_hdr->ucode_start_addr_hi << 30); 4029 WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI, 4030 mec_hdr->ucode_start_addr_hi >> 2); 4031 4032 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr); 4033 WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI, 4034 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 4035 } 4036 mutex_unlock(&adev->srbm_mutex); 4037 soc21_grbm_select(adev, 0, 0, 0, 0); 4038 4039 /* Trigger an invalidation of the L1 instruction caches */ 4040 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 4041 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 4042 WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp); 4043 4044 /* Wait for invalidation complete */ 4045 for (i = 0; i < usec_timeout; i++) { 4046 tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL); 4047 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 4048 INVALIDATE_DCACHE_COMPLETE)) 4049 break; 4050 udelay(1); 4051 } 4052 4053 if (i >= usec_timeout) { 4054 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4055 return -EINVAL; 4056 } 4057 4058 /* Trigger an invalidation of the L1 instruction caches */ 4059 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4060 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 4061 WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp); 4062 4063 /* Wait for invalidation complete */ 4064 for (i = 0; i < usec_timeout; i++) { 4065 tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL); 4066 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 4067 INVALIDATE_CACHE_COMPLETE)) 4068 break; 4069 udelay(1); 4070 } 4071 4072 if (i >= usec_timeout) { 4073 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 4074 return -EINVAL; 4075 } 4076 4077 return 0; 4078 } 4079 4080 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) 4081 { 4082 uint32_t tmp; 4083 struct amdgpu_device *adev = ring->adev; 4084 4085 /* tell RLC which is KIQ queue */ 4086 tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); 4087 tmp &= 0xffffff00; 4088 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4089 WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); 4090 } 4091 4092 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) 4093 { 4094 /* set graphics engine doorbell range */ 4095 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, 4096 (adev->doorbell_index.gfx_ring0 * 2) << 2); 4097 WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER, 4098 (adev->doorbell_index.gfx_userqueue_end * 2) << 2); 4099 4100 /* set compute engine doorbell range */ 4101 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4102 (adev->doorbell_index.kiq * 2) << 2); 4103 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4104 (adev->doorbell_index.userqueue_end * 2) << 2); 4105 } 4106 4107 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, 4108 struct v11_gfx_mqd *mqd, 4109 struct amdgpu_mqd_prop *prop) 4110 { 4111 bool priority = 0; 4112 u32 tmp; 4113 4114 /* set up default queue priority level 4115 * 0x0 = low priority, 0x1 = high priority 4116 */ 4117 if (prop->hqd_queue_priority == AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM) 4118 priority = 1; 4119 4120 tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; 4121 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); 4122 mqd->cp_gfx_hqd_queue_priority = tmp; 4123 } 4124 4125 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, 4126 struct amdgpu_mqd_prop *prop) 4127 { 4128 struct v11_gfx_mqd *mqd = m; 4129 uint64_t hqd_gpu_addr, wb_gpu_addr; 4130 uint32_t tmp; 4131 uint32_t rb_bufsz; 4132 4133 /* set up gfx hqd wptr */ 4134 mqd->cp_gfx_hqd_wptr = 0; 4135 mqd->cp_gfx_hqd_wptr_hi = 0; 4136 4137 /* set the pointer to the MQD */ 4138 mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc; 4139 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4140 4141 /* set up mqd control */ 4142 tmp = regCP_GFX_MQD_CONTROL_DEFAULT; 4143 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); 4144 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); 4145 tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); 4146 mqd->cp_gfx_mqd_control = tmp; 4147 4148 /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ 4149 tmp = regCP_GFX_HQD_VMID_DEFAULT; 4150 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); 4151 mqd->cp_gfx_hqd_vmid = 0; 4152 4153 /* set up gfx queue priority */ 4154 gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); 4155 4156 /* set up time quantum */ 4157 tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; 4158 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); 4159 mqd->cp_gfx_hqd_quantum = tmp; 4160 4161 /* set up gfx hqd base. this is similar as CP_RB_BASE */ 4162 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4163 mqd->cp_gfx_hqd_base = hqd_gpu_addr; 4164 mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr); 4165 4166 /* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */ 4167 wb_gpu_addr = prop->rptr_gpu_addr; 4168 mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc; 4169 mqd->cp_gfx_hqd_rptr_addr_hi = 4170 upper_32_bits(wb_gpu_addr) & 0xffff; 4171 4172 /* set up rb_wptr_poll addr */ 4173 wb_gpu_addr = prop->wptr_gpu_addr; 4174 mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4175 mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4176 4177 /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ 4178 rb_bufsz = order_base_2(prop->queue_size / 4) - 1; 4179 tmp = regCP_GFX_HQD_CNTL_DEFAULT; 4180 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); 4181 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); 4182 #ifdef __BIG_ENDIAN 4183 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1); 4184 #endif 4185 if (prop->tmz_queue) 4186 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); 4187 if (!prop->kernel_queue) 4188 tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1); 4189 mqd->cp_gfx_hqd_cntl = tmp; 4190 4191 /* set up cp_doorbell_control */ 4192 tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; 4193 if (prop->use_doorbell) { 4194 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4195 DOORBELL_OFFSET, prop->doorbell_index); 4196 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4197 DOORBELL_EN, 1); 4198 } else 4199 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4200 DOORBELL_EN, 0); 4201 mqd->cp_rb_doorbell_control = tmp; 4202 4203 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4204 mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; 4205 4206 /* active the queue */ 4207 mqd->cp_gfx_hqd_active = 1; 4208 4209 /* set gfx UQ items */ 4210 mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr); 4211 mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr); 4212 mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr); 4213 mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr); 4214 mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr); 4215 mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr); 4216 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4217 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4218 4219 return 0; 4220 } 4221 4222 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset) 4223 { 4224 struct amdgpu_device *adev = ring->adev; 4225 struct v11_gfx_mqd *mqd = ring->mqd_ptr; 4226 int mqd_idx = ring - &adev->gfx.gfx_ring[0]; 4227 4228 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4229 memset((void *)mqd, 0, sizeof(*mqd)); 4230 mutex_lock(&adev->srbm_mutex); 4231 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4232 amdgpu_ring_init_mqd(ring); 4233 soc21_grbm_select(adev, 0, 0, 0, 0); 4234 mutex_unlock(&adev->srbm_mutex); 4235 if (adev->gfx.me.mqd_backup[mqd_idx]) 4236 memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4237 } else { 4238 /* restore mqd with the backup copy */ 4239 if (adev->gfx.me.mqd_backup[mqd_idx]) 4240 memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); 4241 /* reset the ring */ 4242 ring->wptr = 0; 4243 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4244 amdgpu_ring_clear_ring(ring); 4245 } 4246 4247 return 0; 4248 } 4249 4250 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) 4251 { 4252 int r, i; 4253 4254 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4255 r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false); 4256 if (r) 4257 return r; 4258 } 4259 4260 r = amdgpu_gfx_enable_kgq(adev, 0); 4261 if (r) 4262 return r; 4263 4264 return gfx_v11_0_cp_gfx_start(adev); 4265 } 4266 4267 static void gfx_v11_0_compute_mqd_set_cu_mask(struct amdgpu_device *adev, 4268 struct v11_compute_mqd *mqd, 4269 struct amdgpu_mqd_prop *prop) 4270 { 4271 uint32_t se_mask[8] = {0}; 4272 uint32_t wa_mask; 4273 bool has_wa_flag = prop->cu_flags & (AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE | 4274 AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE); 4275 4276 if (!has_wa_flag && (!prop->cu_mask || !prop->cu_mask_count)) 4277 return; 4278 4279 if (has_wa_flag) { 4280 wa_mask = (prop->cu_flags & AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE) ? 4281 0xffff : 0xffffffff; 4282 mqd->compute_static_thread_mgmt_se0 = wa_mask; 4283 mqd->compute_static_thread_mgmt_se1 = wa_mask; 4284 mqd->compute_static_thread_mgmt_se2 = wa_mask; 4285 mqd->compute_static_thread_mgmt_se3 = wa_mask; 4286 return; 4287 } 4288 4289 amdgpu_gfx_mqd_symmetrically_map_cu_mask(adev, prop->cu_mask, 4290 prop->cu_mask_count, se_mask); 4291 4292 mqd->compute_static_thread_mgmt_se0 = se_mask[0]; 4293 mqd->compute_static_thread_mgmt_se1 = se_mask[1]; 4294 mqd->compute_static_thread_mgmt_se2 = se_mask[2]; 4295 mqd->compute_static_thread_mgmt_se3 = se_mask[3]; 4296 } 4297 4298 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, 4299 struct amdgpu_mqd_prop *prop) 4300 { 4301 struct v11_compute_mqd *mqd = m; 4302 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4303 uint32_t tmp; 4304 4305 mqd->header = 0xC0310800; 4306 mqd->compute_pipelinestat_enable = 0x00000001; 4307 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4308 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4309 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4310 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4311 mqd->compute_misc_reserved = 0x00000007; 4312 4313 eop_base_addr = prop->eop_gpu_addr >> 8; 4314 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4315 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4316 4317 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4318 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 4319 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4320 (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); 4321 4322 mqd->cp_hqd_eop_control = tmp; 4323 4324 /* enable doorbell? */ 4325 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4326 4327 if (prop->use_doorbell) { 4328 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4329 DOORBELL_OFFSET, prop->doorbell_index); 4330 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4331 DOORBELL_EN, 1); 4332 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4333 DOORBELL_SOURCE, 0); 4334 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4335 DOORBELL_HIT, 0); 4336 } else { 4337 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4338 DOORBELL_EN, 0); 4339 } 4340 4341 mqd->cp_hqd_pq_doorbell_control = tmp; 4342 4343 /* disable the queue if it's active */ 4344 mqd->cp_hqd_dequeue_request = 0; 4345 mqd->cp_hqd_pq_rptr = 0; 4346 mqd->cp_hqd_pq_wptr_lo = 0; 4347 mqd->cp_hqd_pq_wptr_hi = 0; 4348 4349 /* set the pointer to the MQD */ 4350 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 4351 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 4352 4353 /* set MQD vmid to 0 */ 4354 tmp = regCP_MQD_CONTROL_DEFAULT; 4355 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4356 mqd->cp_mqd_control = tmp; 4357 4358 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4359 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 4360 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4361 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4362 4363 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4364 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 4365 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4366 (order_base_2(prop->queue_size / 4) - 1)); 4367 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4368 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 4369 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); 4370 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 4371 prop->allow_tunneling); 4372 if (prop->kernel_queue) { 4373 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4374 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4375 } 4376 if (prop->tmz_queue) 4377 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1); 4378 mqd->cp_hqd_pq_control = tmp; 4379 4380 /* set the wb address whether it's enabled or not */ 4381 wb_gpu_addr = prop->rptr_gpu_addr; 4382 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4383 mqd->cp_hqd_pq_rptr_report_addr_hi = 4384 upper_32_bits(wb_gpu_addr) & 0xffff; 4385 4386 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4387 wb_gpu_addr = prop->wptr_gpu_addr; 4388 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4389 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4390 4391 tmp = 0; 4392 /* enable the doorbell if requested */ 4393 if (prop->use_doorbell) { 4394 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 4395 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4396 DOORBELL_OFFSET, prop->doorbell_index); 4397 4398 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4399 DOORBELL_EN, 1); 4400 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4401 DOORBELL_SOURCE, 0); 4402 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4403 DOORBELL_HIT, 0); 4404 } 4405 4406 mqd->cp_hqd_pq_doorbell_control = tmp; 4407 4408 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4409 mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; 4410 4411 /* set the vmid for the queue */ 4412 mqd->cp_hqd_vmid = 0; 4413 4414 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 4415 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); 4416 mqd->cp_hqd_persistent_state = tmp; 4417 4418 /* set MIN_IB_AVAIL_SIZE */ 4419 tmp = regCP_HQD_IB_CONTROL_DEFAULT; 4420 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4421 mqd->cp_hqd_ib_control = tmp; 4422 4423 /* set static priority for a compute queue/ring */ 4424 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 4425 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 4426 4427 tmp = REG_SET_FIELD(0, CP_HQD_QUANTUM, QUANTUM_EN, 1); 4428 tmp = REG_SET_FIELD(tmp, CP_HQD_QUANTUM, QUANTUM_SCALE, 1); 4429 tmp = REG_SET_FIELD(tmp, CP_HQD_QUANTUM, QUANTUM_DURATION, 1); 4430 mqd->cp_hqd_quantum = tmp; 4431 4432 mqd->cp_hqd_active = prop->hqd_active; 4433 4434 /* set UQ fenceaddress */ 4435 mqd->fence_address_lo = lower_32_bits(prop->fence_address); 4436 mqd->fence_address_hi = upper_32_bits(prop->fence_address); 4437 /* set CU mask */ 4438 gfx_v11_0_compute_mqd_set_cu_mask(adev, mqd, prop); 4439 4440 return 0; 4441 } 4442 4443 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring) 4444 { 4445 struct amdgpu_device *adev = ring->adev; 4446 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4447 int j; 4448 4449 /* inactivate the queue */ 4450 if (amdgpu_sriov_vf(adev)) 4451 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0); 4452 4453 /* disable wptr polling */ 4454 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4455 4456 /* write the EOP addr */ 4457 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR, 4458 mqd->cp_hqd_eop_base_addr_lo); 4459 WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI, 4460 mqd->cp_hqd_eop_base_addr_hi); 4461 4462 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4463 WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL, 4464 mqd->cp_hqd_eop_control); 4465 4466 /* enable doorbell? */ 4467 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4468 mqd->cp_hqd_pq_doorbell_control); 4469 4470 /* disable the queue if it's active */ 4471 if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) { 4472 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1); 4473 for (j = 0; j < adev->usec_timeout; j++) { 4474 if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) 4475 break; 4476 udelay(1); 4477 } 4478 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 4479 mqd->cp_hqd_dequeue_request); 4480 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 4481 mqd->cp_hqd_pq_rptr); 4482 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4483 mqd->cp_hqd_pq_wptr_lo); 4484 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4485 mqd->cp_hqd_pq_wptr_hi); 4486 } 4487 4488 /* set the pointer to the MQD */ 4489 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR, 4490 mqd->cp_mqd_base_addr_lo); 4491 WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI, 4492 mqd->cp_mqd_base_addr_hi); 4493 4494 /* set MQD vmid to 0 */ 4495 WREG32_SOC15(GC, 0, regCP_MQD_CONTROL, 4496 mqd->cp_mqd_control); 4497 4498 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4499 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE, 4500 mqd->cp_hqd_pq_base_lo); 4501 WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI, 4502 mqd->cp_hqd_pq_base_hi); 4503 4504 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4505 WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL, 4506 mqd->cp_hqd_pq_control); 4507 4508 /* set the wb address whether it's enabled or not */ 4509 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR, 4510 mqd->cp_hqd_pq_rptr_report_addr_lo); 4511 WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4512 mqd->cp_hqd_pq_rptr_report_addr_hi); 4513 4514 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4515 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR, 4516 mqd->cp_hqd_pq_wptr_poll_addr_lo); 4517 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4518 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4519 4520 /* enable the doorbell if requested */ 4521 if (ring->use_doorbell) { 4522 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER, 4523 (adev->doorbell_index.kiq * 2) << 2); 4524 WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER, 4525 (adev->doorbell_index.userqueue_end * 2) << 2); 4526 } 4527 4528 WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 4529 mqd->cp_hqd_pq_doorbell_control); 4530 4531 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4532 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 4533 mqd->cp_hqd_pq_wptr_lo); 4534 WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 4535 mqd->cp_hqd_pq_wptr_hi); 4536 4537 /* set the vmid for the queue */ 4538 WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid); 4539 4540 WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE, 4541 mqd->cp_hqd_persistent_state); 4542 4543 /* activate the queue */ 4544 WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 4545 mqd->cp_hqd_active); 4546 4547 if (ring->use_doorbell) 4548 WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4549 4550 return 0; 4551 } 4552 4553 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) 4554 { 4555 struct amdgpu_device *adev = ring->adev; 4556 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4557 4558 gfx_v11_0_kiq_setting(ring); 4559 4560 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4561 /* reset MQD to a clean status */ 4562 if (adev->gfx.kiq[0].mqd_backup) 4563 memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); 4564 4565 /* reset ring buffer */ 4566 ring->wptr = 0; 4567 amdgpu_ring_clear_ring(ring); 4568 4569 mutex_lock(&adev->srbm_mutex); 4570 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4571 gfx_v11_0_kiq_init_register(ring); 4572 soc21_grbm_select(adev, 0, 0, 0, 0); 4573 mutex_unlock(&adev->srbm_mutex); 4574 } else { 4575 memset((void *)mqd, 0, sizeof(*mqd)); 4576 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4577 amdgpu_ring_clear_ring(ring); 4578 mutex_lock(&adev->srbm_mutex); 4579 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4580 amdgpu_ring_init_mqd(ring); 4581 gfx_v11_0_kiq_init_register(ring); 4582 soc21_grbm_select(adev, 0, 0, 0, 0); 4583 mutex_unlock(&adev->srbm_mutex); 4584 4585 if (adev->gfx.kiq[0].mqd_backup) 4586 memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); 4587 } 4588 4589 return 0; 4590 } 4591 4592 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset) 4593 { 4594 struct amdgpu_device *adev = ring->adev; 4595 struct v11_compute_mqd *mqd = ring->mqd_ptr; 4596 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4597 4598 if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) { 4599 memset((void *)mqd, 0, sizeof(*mqd)); 4600 mutex_lock(&adev->srbm_mutex); 4601 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4602 amdgpu_ring_init_mqd(ring); 4603 soc21_grbm_select(adev, 0, 0, 0, 0); 4604 mutex_unlock(&adev->srbm_mutex); 4605 4606 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4607 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4608 } else { 4609 /* restore MQD to a clean status */ 4610 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4611 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4612 /* reset ring buffer */ 4613 ring->wptr = 0; 4614 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4615 amdgpu_ring_clear_ring(ring); 4616 } 4617 4618 return 0; 4619 } 4620 4621 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev) 4622 { 4623 gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring); 4624 return 0; 4625 } 4626 4627 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev) 4628 { 4629 int i, r; 4630 4631 if (!amdgpu_async_gfx_ring) 4632 gfx_v11_0_cp_compute_enable(adev, true); 4633 4634 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4635 r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false); 4636 if (r) 4637 return r; 4638 } 4639 4640 return amdgpu_gfx_enable_kcq(adev, 0); 4641 } 4642 4643 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev) 4644 { 4645 int r, i; 4646 struct amdgpu_ring *ring; 4647 4648 if (!(adev->flags & AMD_IS_APU)) 4649 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 4650 4651 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4652 /* legacy firmware loading */ 4653 r = gfx_v11_0_cp_gfx_load_microcode(adev); 4654 if (r) 4655 return r; 4656 4657 if (adev->gfx.rs64_enable) 4658 r = gfx_v11_0_cp_compute_load_microcode_rs64(adev); 4659 else 4660 r = gfx_v11_0_cp_compute_load_microcode(adev); 4661 if (r) 4662 return r; 4663 } 4664 4665 gfx_v11_0_cp_set_doorbell_range(adev); 4666 4667 if (amdgpu_async_gfx_ring) { 4668 gfx_v11_0_cp_compute_enable(adev, true); 4669 gfx_v11_0_cp_gfx_enable(adev, true); 4670 } 4671 4672 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 4673 r = amdgpu_mes_kiq_hw_init(adev, 0); 4674 else 4675 r = gfx_v11_0_kiq_resume(adev); 4676 if (r) 4677 return r; 4678 4679 r = gfx_v11_0_kcq_resume(adev); 4680 if (r) 4681 return r; 4682 4683 if (!amdgpu_async_gfx_ring) { 4684 r = gfx_v11_0_cp_gfx_resume(adev); 4685 if (r) 4686 return r; 4687 } else { 4688 r = gfx_v11_0_cp_async_gfx_ring_resume(adev); 4689 if (r) 4690 return r; 4691 } 4692 4693 if (adev->gfx.disable_kq) { 4694 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4695 ring = &adev->gfx.gfx_ring[i]; 4696 /* we don't want to set ring->ready */ 4697 r = amdgpu_ring_test_ring(ring); 4698 if (r) 4699 return r; 4700 } 4701 if (amdgpu_async_gfx_ring) 4702 amdgpu_gfx_disable_kgq(adev, 0); 4703 } else { 4704 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 4705 ring = &adev->gfx.gfx_ring[i]; 4706 r = amdgpu_ring_test_helper(ring); 4707 if (r) 4708 return r; 4709 } 4710 } 4711 4712 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4713 ring = &adev->gfx.compute_ring[i]; 4714 r = amdgpu_ring_test_helper(ring); 4715 if (r) 4716 return r; 4717 } 4718 4719 return 0; 4720 } 4721 4722 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable) 4723 { 4724 gfx_v11_0_cp_gfx_enable(adev, enable); 4725 gfx_v11_0_cp_compute_enable(adev, enable); 4726 } 4727 4728 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) 4729 { 4730 int r; 4731 bool value; 4732 4733 r = adev->gfxhub.funcs->gart_enable(adev); 4734 if (r) 4735 return r; 4736 4737 amdgpu_device_flush_hdp(adev, NULL); 4738 4739 value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS; 4740 4741 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 4742 /* TODO investigate why this and the hdp flush above is needed, 4743 * are we missing a flush somewhere else? */ 4744 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); 4745 4746 return 0; 4747 } 4748 4749 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev) 4750 { 4751 u32 tmp; 4752 4753 /* select RS64 */ 4754 if (adev->gfx.rs64_enable) { 4755 tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL); 4756 tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1); 4757 WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp); 4758 4759 tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL); 4760 tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1); 4761 WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp); 4762 } 4763 4764 if (amdgpu_emu_mode == 1) 4765 msleep(100); 4766 } 4767 4768 static int get_gb_addr_config(struct amdgpu_device * adev) 4769 { 4770 u32 gb_addr_config; 4771 4772 gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); 4773 if (gb_addr_config == 0) 4774 return -EINVAL; 4775 4776 adev->gfx.config.gb_addr_config_fields.num_pkrs = 4777 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); 4778 4779 adev->gfx.config.gb_addr_config = gb_addr_config; 4780 4781 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 4782 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4783 GB_ADDR_CONFIG, NUM_PIPES); 4784 4785 adev->gfx.config.max_tile_pipes = 4786 adev->gfx.config.gb_addr_config_fields.num_pipes; 4787 4788 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 4789 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4790 GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS); 4791 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 4792 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4793 GB_ADDR_CONFIG, NUM_RB_PER_SE); 4794 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 4795 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4796 GB_ADDR_CONFIG, NUM_SHADER_ENGINES); 4797 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 4798 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 4799 GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE)); 4800 4801 return 0; 4802 } 4803 4804 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev) 4805 { 4806 uint32_t data; 4807 4808 data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG); 4809 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 4810 WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data); 4811 4812 data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG); 4813 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 4814 WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); 4815 } 4816 4817 static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev, 4818 bool enable) 4819 { 4820 unsigned int irq_type; 4821 int m, p, r; 4822 4823 if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) { 4824 for (m = 0; m < adev->gfx.me.num_me; m++) { 4825 for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) { 4826 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; 4827 if (enable) 4828 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, irq_type); 4829 else 4830 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, irq_type); 4831 if (r) { 4832 if (!enable) 4833 return r; 4834 goto err_gfx; 4835 } 4836 } 4837 } 4838 } 4839 4840 if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) { 4841 for (m = 0; m < adev->gfx.mec.num_mec; ++m) { 4842 for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { 4843 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 4844 + (m * adev->gfx.mec.num_pipe_per_mec) 4845 + p; 4846 if (enable) 4847 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, irq_type); 4848 else 4849 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, irq_type); 4850 if (r) { 4851 if (!enable) 4852 return r; 4853 goto err_compute; 4854 } 4855 } 4856 } 4857 } 4858 4859 return 0; 4860 4861 err_compute: 4862 for (p--; p >= 0; p--) { 4863 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 4864 + (m * adev->gfx.mec.num_pipe_per_mec) + p; 4865 amdgpu_irq_put(adev, &adev->gfx.eop_irq, irq_type); 4866 } 4867 for (m--; m >= 0; m--) { 4868 for (p = adev->gfx.mec.num_pipe_per_mec - 1; p >= 0; p--) { 4869 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 4870 + (m * adev->gfx.mec.num_pipe_per_mec) + p; 4871 amdgpu_irq_put(adev, &adev->gfx.eop_irq, irq_type); 4872 } 4873 } 4874 m = adev->gfx.me.num_me; 4875 err_gfx: 4876 for (p--; p >= 0; p--) { 4877 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; 4878 amdgpu_irq_put(adev, &adev->gfx.eop_irq, irq_type); 4879 } 4880 for (m--; m >= 0; m--) { 4881 for (p = adev->gfx.me.num_pipe_per_me - 1; p >= 0; p--) { 4882 irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p; 4883 amdgpu_irq_put(adev, &adev->gfx.eop_irq, irq_type); 4884 } 4885 } 4886 return r; 4887 } 4888 4889 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block) 4890 { 4891 int r; 4892 struct amdgpu_device *adev = ip_block->adev; 4893 4894 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size, 4895 adev->gfx.cleaner_shader_ptr); 4896 4897 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 4898 if (adev->gfx.imu.funcs) { 4899 /* RLC autoload sequence 1: Program rlc ram */ 4900 if (adev->gfx.imu.funcs->program_rlc_ram) 4901 adev->gfx.imu.funcs->program_rlc_ram(adev); 4902 /* rlc autoload firmware */ 4903 r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); 4904 if (r) 4905 return r; 4906 } 4907 } else { 4908 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4909 if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { 4910 if (adev->gfx.imu.funcs->load_microcode) 4911 adev->gfx.imu.funcs->load_microcode(adev); 4912 if (adev->gfx.imu.funcs->setup_imu) 4913 adev->gfx.imu.funcs->setup_imu(adev); 4914 if (adev->gfx.imu.funcs->start_imu) 4915 adev->gfx.imu.funcs->start_imu(adev); 4916 } 4917 4918 /* disable gpa mode in backdoor loading */ 4919 gfx_v11_0_disable_gpa_mode(adev); 4920 } 4921 } 4922 4923 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 4924 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 4925 r = gfx_v11_0_wait_for_rlc_autoload_complete(adev); 4926 if (r) { 4927 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 4928 return r; 4929 } 4930 } 4931 4932 adev->gfx.is_poweron = true; 4933 4934 if(get_gb_addr_config(adev)) 4935 drm_warn(adev_to_drm(adev), "Invalid gb_addr_config !\n"); 4936 4937 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && 4938 adev->gfx.rs64_enable) 4939 gfx_v11_0_config_gfx_rs64(adev); 4940 4941 r = gfx_v11_0_gfxhub_enable(adev); 4942 if (r) 4943 return r; 4944 4945 if (!amdgpu_emu_mode) 4946 gfx_v11_0_init_golden_registers(adev); 4947 4948 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || 4949 (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) { 4950 /** 4951 * For gfx 11, rlc firmware loading relies on smu firmware is 4952 * loaded firstly, so in direct type, it has to load smc ucode 4953 * here before rlc. 4954 */ 4955 r = amdgpu_pm_load_smu_firmware(adev, NULL); 4956 if (r) 4957 return r; 4958 } 4959 4960 gfx_v11_0_constants_init(adev); 4961 4962 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) 4963 gfx_v11_0_select_cp_fw_arch(adev); 4964 4965 if (adev->nbio.funcs->gc_doorbell_init) 4966 adev->nbio.funcs->gc_doorbell_init(adev); 4967 4968 r = gfx_v11_0_rlc_resume(adev); 4969 if (r) 4970 return r; 4971 4972 /* 4973 * init golden registers and rlc resume may override some registers, 4974 * reconfig them here 4975 */ 4976 gfx_v11_0_tcp_harvest(adev); 4977 4978 r = gfx_v11_0_cp_resume(adev); 4979 if (r) 4980 return r; 4981 4982 /* get IMU version from HW if it's not set */ 4983 if (!adev->gfx.imu_fw_version) 4984 adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); 4985 4986 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 4987 if (r) 4988 return r; 4989 4990 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 4991 if (r) 4992 goto err_priv_inst; 4993 4994 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0); 4995 if (r) 4996 goto err_bad_op; 4997 4998 r = gfx_v11_0_set_userq_eop_interrupts(adev, true); 4999 if (r) 5000 goto err_userq_eop; 5001 5002 return 0; 5003 5004 err_userq_eop: 5005 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 5006 err_bad_op: 5007 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5008 err_priv_inst: 5009 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5010 return r; 5011 } 5012 5013 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) 5014 { 5015 struct amdgpu_device *adev = ip_block->adev; 5016 5017 cancel_delayed_work_sync(&adev->gfx.idle_work); 5018 5019 gfx_v11_0_set_userq_eop_interrupts(adev, false); 5020 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); 5021 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5022 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5023 5024 if (!adev->no_hw_access) { 5025 if (amdgpu_async_gfx_ring && 5026 !adev->gfx.disable_kq) { 5027 if (amdgpu_gfx_disable_kgq(adev, 0)) 5028 DRM_ERROR("KGQ disable failed\n"); 5029 } 5030 5031 if (amdgpu_gfx_disable_kcq(adev, 0)) 5032 DRM_ERROR("KCQ disable failed\n"); 5033 5034 amdgpu_mes_kiq_hw_fini(adev, 0); 5035 } 5036 5037 if (amdgpu_sriov_vf(adev)) 5038 /* Remove the steps disabling CPG and clearing KIQ position, 5039 * so that CP could perform IDLE-SAVE during switch. Those 5040 * steps are necessary to avoid a DMAR error in gfx9 but it is 5041 * not reproduced on gfx11. 5042 */ 5043 return 0; 5044 5045 gfx_v11_0_cp_enable(adev, false); 5046 gfx_v11_0_enable_gui_idle_interrupt(adev, false); 5047 5048 adev->gfxhub.funcs->gart_disable(adev); 5049 5050 adev->gfx.is_poweron = false; 5051 5052 return 0; 5053 } 5054 5055 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block) 5056 { 5057 return gfx_v11_0_hw_fini(ip_block); 5058 } 5059 5060 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) 5061 { 5062 return gfx_v11_0_hw_init(ip_block); 5063 } 5064 5065 static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block) 5066 { 5067 struct amdgpu_device *adev = ip_block->adev; 5068 5069 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), 5070 GRBM_STATUS, GUI_ACTIVE)) 5071 return false; 5072 else 5073 return true; 5074 } 5075 5076 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 5077 { 5078 unsigned i; 5079 u32 tmp; 5080 struct amdgpu_device *adev = ip_block->adev; 5081 5082 for (i = 0; i < adev->usec_timeout; i++) { 5083 /* read MC_STATUS */ 5084 tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) & 5085 GRBM_STATUS__GUI_ACTIVE_MASK; 5086 5087 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 5088 return 0; 5089 udelay(1); 5090 } 5091 return -ETIMEDOUT; 5092 } 5093 5094 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, 5095 bool req) 5096 { 5097 u32 i, tmp, val; 5098 5099 for (i = 0; i < adev->usec_timeout; i++) { 5100 /* Request with MeId=2, PipeId=0 */ 5101 tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); 5102 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); 5103 WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); 5104 5105 val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); 5106 if (req) { 5107 if (val == tmp) 5108 break; 5109 } else { 5110 tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, 5111 REQUEST, 1); 5112 5113 /* unlocked or locked by firmware */ 5114 if (val != tmp) 5115 break; 5116 } 5117 udelay(1); 5118 } 5119 5120 if (i >= adev->usec_timeout) 5121 return -EINVAL; 5122 5123 return 0; 5124 } 5125 5126 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block) 5127 { 5128 u32 grbm_soft_reset = 0; 5129 u32 tmp; 5130 int r, i, j, k; 5131 struct amdgpu_device *adev = ip_block->adev; 5132 5133 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5134 5135 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5136 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); 5137 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); 5138 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); 5139 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); 5140 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5141 5142 mutex_lock(&adev->srbm_mutex); 5143 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 5144 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 5145 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 5146 soc21_grbm_select(adev, i, k, j, 0); 5147 5148 WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); 5149 WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); 5150 } 5151 } 5152 } 5153 for (i = 0; i < adev->gfx.me.num_me; ++i) { 5154 for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { 5155 for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { 5156 soc21_grbm_select(adev, i, k, j, 0); 5157 5158 WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); 5159 } 5160 } 5161 } 5162 soc21_grbm_select(adev, 0, 0, 0, 0); 5163 mutex_unlock(&adev->srbm_mutex); 5164 5165 /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ 5166 mutex_lock(&adev->gfx.reset_sem_mutex); 5167 r = gfx_v11_0_request_gfx_index_mutex(adev, true); 5168 if (r) { 5169 mutex_unlock(&adev->gfx.reset_sem_mutex); 5170 DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); 5171 return r; 5172 } 5173 5174 WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); 5175 5176 // Read CP_VMID_RESET register three times. 5177 // to get sufficient time for GFX_HQD_ACTIVE reach 0 5178 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5179 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5180 RREG32_SOC15(GC, 0, regCP_VMID_RESET); 5181 5182 /* release the gfx mutex */ 5183 r = gfx_v11_0_request_gfx_index_mutex(adev, false); 5184 mutex_unlock(&adev->gfx.reset_sem_mutex); 5185 if (r) { 5186 DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); 5187 return r; 5188 } 5189 5190 for (i = 0; i < adev->usec_timeout; i++) { 5191 if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && 5192 !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) 5193 break; 5194 udelay(1); 5195 } 5196 if (i >= adev->usec_timeout) { 5197 printk("Failed to wait all pipes clean\n"); 5198 return -EINVAL; 5199 } 5200 5201 /********** trigger soft reset ***********/ 5202 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5203 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5204 SOFT_RESET_CP, 1); 5205 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5206 SOFT_RESET_GFX, 1); 5207 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5208 SOFT_RESET_CPF, 1); 5209 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5210 SOFT_RESET_CPC, 1); 5211 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5212 SOFT_RESET_CPG, 1); 5213 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5214 /********** exit soft reset ***********/ 5215 grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 5216 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5217 SOFT_RESET_CP, 0); 5218 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5219 SOFT_RESET_GFX, 0); 5220 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5221 SOFT_RESET_CPF, 0); 5222 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5223 SOFT_RESET_CPC, 0); 5224 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5225 SOFT_RESET_CPG, 0); 5226 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); 5227 5228 tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); 5229 tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); 5230 WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); 5231 5232 WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); 5233 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); 5234 5235 for (i = 0; i < adev->usec_timeout; i++) { 5236 if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) 5237 break; 5238 udelay(1); 5239 } 5240 if (i >= adev->usec_timeout) { 5241 printk("Failed to wait CP_VMID_RESET to 0\n"); 5242 return -EINVAL; 5243 } 5244 5245 tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5246 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5247 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5248 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5249 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5250 WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); 5251 5252 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5253 5254 return gfx_v11_0_cp_resume(adev); 5255 } 5256 5257 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block) 5258 { 5259 int i, r; 5260 struct amdgpu_device *adev = ip_block->adev; 5261 struct amdgpu_ring *ring; 5262 long tmo = msecs_to_jiffies(1000); 5263 5264 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 5265 ring = &adev->gfx.gfx_ring[i]; 5266 r = amdgpu_ring_test_ib(ring, tmo); 5267 if (r) 5268 return true; 5269 } 5270 5271 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5272 ring = &adev->gfx.compute_ring[i]; 5273 r = amdgpu_ring_test_ib(ring, tmo); 5274 if (r) 5275 return true; 5276 } 5277 5278 return false; 5279 } 5280 5281 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block) 5282 { 5283 struct amdgpu_device *adev = ip_block->adev; 5284 /** 5285 * GFX soft reset will impact MES, need resume MES when do GFX soft reset 5286 */ 5287 return amdgpu_mes_resume(adev, 0); 5288 } 5289 5290 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5291 { 5292 uint64_t clock; 5293 uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; 5294 5295 if (amdgpu_sriov_vf(adev)) { 5296 amdgpu_gfx_off_ctrl(adev, false); 5297 mutex_lock(&adev->gfx.gpu_clock_mutex); 5298 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5299 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5300 clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI); 5301 if (clock_counter_hi_pre != clock_counter_hi_after) 5302 clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO); 5303 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5304 amdgpu_gfx_off_ctrl(adev, true); 5305 } else { 5306 preempt_disable(); 5307 if (amdgpu_ip_version(adev, SMUIO_HWIP, 0) < IP_VERSION(15, 0, 0)) { 5308 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, 5309 regGOLDEN_TSC_COUNT_UPPER); 5310 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, 5311 regGOLDEN_TSC_COUNT_LOWER); 5312 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, 5313 regGOLDEN_TSC_COUNT_UPPER); 5314 if (clock_counter_hi_pre != clock_counter_hi_after) 5315 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, 5316 regGOLDEN_TSC_COUNT_LOWER); 5317 } else { 5318 clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, 5319 regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0); 5320 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, 5321 regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0); 5322 clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, 5323 regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0); 5324 if (clock_counter_hi_pre != clock_counter_hi_after) 5325 clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, 5326 regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0); 5327 } 5328 preempt_enable(); 5329 } 5330 clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); 5331 5332 return clock; 5333 } 5334 5335 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5336 uint32_t vmid, 5337 uint32_t gds_base, uint32_t gds_size, 5338 uint32_t gws_base, uint32_t gws_size, 5339 uint32_t oa_base, uint32_t oa_size) 5340 { 5341 struct amdgpu_device *adev = ring->adev; 5342 5343 /* GDS Base */ 5344 gfx_v11_0_write_data_to_reg(ring, 0, false, 5345 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid, 5346 gds_base); 5347 5348 /* GDS Size */ 5349 gfx_v11_0_write_data_to_reg(ring, 0, false, 5350 SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid, 5351 gds_size); 5352 5353 /* GWS */ 5354 gfx_v11_0_write_data_to_reg(ring, 0, false, 5355 SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid, 5356 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5357 5358 /* OA */ 5359 gfx_v11_0_write_data_to_reg(ring, 0, false, 5360 SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid, 5361 (1 << (oa_size + oa_base)) - (1 << oa_base)); 5362 } 5363 5364 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block) 5365 { 5366 struct amdgpu_device *adev = ip_block->adev; 5367 5368 switch (amdgpu_user_queue) { 5369 case -1: 5370 case 0: 5371 default: 5372 adev->gfx.disable_kq = false; 5373 adev->gfx.disable_uq = true; 5374 break; 5375 case 1: 5376 adev->gfx.disable_kq = false; 5377 adev->gfx.disable_uq = false; 5378 break; 5379 case 2: 5380 adev->gfx.disable_kq = true; 5381 adev->gfx.disable_uq = false; 5382 break; 5383 } 5384 5385 adev->gfx.funcs = &gfx_v11_0_gfx_funcs; 5386 5387 if (adev->gfx.disable_kq) { 5388 /* We need one GFX ring temporarily to set up 5389 * the clear state. 5390 */ 5391 adev->gfx.num_gfx_rings = 1; 5392 adev->gfx.num_compute_rings = 0; 5393 } else { 5394 adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS; 5395 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 5396 AMDGPU_MAX_COMPUTE_RINGS); 5397 } 5398 5399 gfx_v11_0_set_kiq_pm4_funcs(adev); 5400 gfx_v11_0_set_ring_funcs(adev); 5401 gfx_v11_0_set_irq_funcs(adev); 5402 gfx_v11_0_set_gds_init(adev); 5403 gfx_v11_0_set_rlc_funcs(adev); 5404 gfx_v11_0_set_mqd_funcs(adev); 5405 gfx_v11_0_set_imu_funcs(adev); 5406 5407 gfx_v11_0_init_rlcg_reg_access_ctrl(adev); 5408 5409 return gfx_v11_0_init_microcode(adev); 5410 } 5411 5412 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev) 5413 { 5414 uint32_t rlc_cntl; 5415 5416 /* if RLC is not enabled, do nothing */ 5417 rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL); 5418 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 5419 } 5420 5421 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 5422 { 5423 uint32_t data; 5424 unsigned i; 5425 5426 data = RLC_SAFE_MODE__CMD_MASK; 5427 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5428 5429 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data); 5430 5431 /* wait for RLC_SAFE_MODE */ 5432 for (i = 0; i < adev->usec_timeout; i++) { 5433 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE), 5434 RLC_SAFE_MODE, CMD)) 5435 break; 5436 udelay(1); 5437 } 5438 } 5439 5440 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 5441 { 5442 WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 5443 } 5444 5445 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, 5446 bool enable) 5447 { 5448 uint32_t def, data; 5449 5450 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 5451 return; 5452 5453 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5454 5455 if (enable) 5456 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5457 else 5458 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 5459 5460 if (def != data) 5461 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5462 } 5463 5464 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev, 5465 bool enable) 5466 { 5467 uint32_t def, data; 5468 5469 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 5470 return; 5471 5472 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5473 5474 if (enable) 5475 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5476 else 5477 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 5478 5479 if (def != data) 5480 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5481 } 5482 5483 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev, 5484 bool enable) 5485 { 5486 uint32_t def, data; 5487 5488 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 5489 return; 5490 5491 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5492 5493 if (enable) 5494 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5495 else 5496 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK; 5497 5498 if (def != data) 5499 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5500 } 5501 5502 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5503 bool enable) 5504 { 5505 uint32_t data, def; 5506 5507 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 5508 return; 5509 5510 /* It is disabled by HW by default */ 5511 if (enable) { 5512 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5513 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 5514 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5515 5516 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5517 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5518 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5519 5520 if (def != data) 5521 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5522 } 5523 } else { 5524 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5525 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5526 5527 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 5528 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 5529 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 5530 5531 if (def != data) 5532 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5533 } 5534 } 5535 } 5536 5537 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5538 bool enable) 5539 { 5540 uint32_t def, data; 5541 5542 if (!(adev->cg_flags & 5543 (AMD_CG_SUPPORT_GFX_CGCG | 5544 AMD_CG_SUPPORT_GFX_CGLS | 5545 AMD_CG_SUPPORT_GFX_3D_CGCG | 5546 AMD_CG_SUPPORT_GFX_3D_CGLS))) 5547 return; 5548 5549 if (enable) { 5550 def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5551 5552 /* unset CGCG override */ 5553 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5554 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 5555 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5556 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 5557 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 5558 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5559 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 5560 5561 /* update CGCG override bits */ 5562 if (def != data) 5563 WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data); 5564 5565 /* enable cgcg FSM(0x0000363F) */ 5566 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5567 5568 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5569 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 5570 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5571 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5572 } 5573 5574 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5575 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 5576 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5577 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5578 } 5579 5580 if (def != data) 5581 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5582 5583 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5584 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5585 5586 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5587 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK; 5588 data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 5589 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5590 } 5591 5592 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5593 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK; 5594 data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 5595 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5596 } 5597 5598 if (def != data) 5599 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5600 5601 /* set IDLE_POLL_COUNT(0x00900100) */ 5602 def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL); 5603 5604 data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK); 5605 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 5606 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 5607 5608 if (def != data) 5609 WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data); 5610 5611 data = RREG32_SOC15(GC, 0, regCP_INT_CNTL); 5612 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 5613 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 5614 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 5615 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 5616 WREG32_SOC15(GC, 0, regCP_INT_CNTL, data); 5617 5618 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5619 data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5620 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5621 5622 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5623 if (adev->sdma.num_instances > 1) { 5624 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5625 data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); 5626 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5627 } 5628 } else { 5629 /* Program RLC_CGCG_CGLS_CTRL */ 5630 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5631 5632 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 5633 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5634 5635 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 5636 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5637 5638 if (def != data) 5639 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data); 5640 5641 /* Program RLC_CGCG_CGLS_CTRL_3D */ 5642 def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5643 5644 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) 5645 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 5646 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 5647 data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 5648 5649 if (def != data) 5650 WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); 5651 5652 data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); 5653 data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5654 WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); 5655 5656 /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ 5657 if (adev->sdma.num_instances > 1) { 5658 data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); 5659 data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; 5660 WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); 5661 } 5662 } 5663 } 5664 5665 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5666 bool enable) 5667 { 5668 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5669 5670 gfx_v11_0_update_coarse_grain_clock_gating(adev, enable); 5671 5672 gfx_v11_0_update_medium_grain_clock_gating(adev, enable); 5673 5674 gfx_v11_0_update_repeater_fgcg(adev, enable); 5675 5676 gfx_v11_0_update_sram_fgcg(adev, enable); 5677 5678 gfx_v11_0_update_perf_clk(adev, enable); 5679 5680 if (adev->cg_flags & 5681 (AMD_CG_SUPPORT_GFX_MGCG | 5682 AMD_CG_SUPPORT_GFX_CGLS | 5683 AMD_CG_SUPPORT_GFX_CGCG | 5684 AMD_CG_SUPPORT_GFX_3D_CGCG | 5685 AMD_CG_SUPPORT_GFX_3D_CGLS)) 5686 gfx_v11_0_enable_gui_idle_interrupt(adev, enable); 5687 5688 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5689 5690 return 0; 5691 } 5692 5693 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id, 5694 struct amdgpu_ring *ring, unsigned vmid) 5695 { 5696 u32 reg, pre_data, data; 5697 5698 amdgpu_gfx_off_ctrl(adev, false); 5699 reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); 5700 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) 5701 pre_data = RREG32_NO_KIQ(reg); 5702 else 5703 pre_data = RREG32(reg); 5704 5705 data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK); 5706 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 5707 5708 if (pre_data != data) { 5709 if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) { 5710 WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); 5711 } else 5712 WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); 5713 } 5714 amdgpu_gfx_off_ctrl(adev, true); 5715 5716 if (ring 5717 && amdgpu_sriov_is_pp_one_vf(adev) 5718 && (pre_data != data) 5719 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) 5720 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { 5721 amdgpu_ring_emit_wreg(ring, reg, data); 5722 } 5723 } 5724 5725 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { 5726 .is_rlc_enabled = gfx_v11_0_is_rlc_enabled, 5727 .set_safe_mode = gfx_v11_0_set_safe_mode, 5728 .unset_safe_mode = gfx_v11_0_unset_safe_mode, 5729 .init = gfx_v11_0_rlc_init, 5730 .get_csb_size = gfx_v11_0_get_csb_size, 5731 .get_csb_buffer = gfx_v11_0_get_csb_buffer, 5732 .resume = gfx_v11_0_rlc_resume, 5733 .stop = gfx_v11_0_rlc_stop, 5734 .reset = gfx_v11_0_rlc_reset, 5735 .start = gfx_v11_0_rlc_start, 5736 .update_spm_vmid = gfx_v11_0_update_spm_vmid, 5737 }; 5738 5739 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) 5740 { 5741 u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); 5742 5743 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5744 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5745 else 5746 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; 5747 5748 WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); 5749 5750 // Program RLC_PG_DELAY3 for CGPG hysteresis 5751 if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { 5752 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5753 case IP_VERSION(11, 0, 1): 5754 case IP_VERSION(11, 0, 4): 5755 case IP_VERSION(11, 5, 0): 5756 case IP_VERSION(11, 5, 1): 5757 case IP_VERSION(11, 5, 2): 5758 case IP_VERSION(11, 5, 3): 5759 case IP_VERSION(11, 5, 4): 5760 case IP_VERSION(11, 5, 6): 5761 WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); 5762 break; 5763 default: 5764 break; 5765 } 5766 } 5767 } 5768 5769 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) 5770 { 5771 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5772 5773 gfx_v11_cntl_power_gating(adev, enable); 5774 5775 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5776 } 5777 5778 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5779 enum amd_powergating_state state) 5780 { 5781 struct amdgpu_device *adev = ip_block->adev; 5782 bool enable = (state == AMD_PG_STATE_GATE); 5783 5784 if (amdgpu_sriov_vf(adev)) 5785 return 0; 5786 5787 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5788 case IP_VERSION(11, 0, 0): 5789 case IP_VERSION(11, 0, 2): 5790 case IP_VERSION(11, 0, 3): 5791 amdgpu_gfx_off_ctrl(adev, enable); 5792 break; 5793 case IP_VERSION(11, 0, 1): 5794 case IP_VERSION(11, 0, 4): 5795 case IP_VERSION(11, 5, 0): 5796 case IP_VERSION(11, 5, 1): 5797 case IP_VERSION(11, 5, 2): 5798 case IP_VERSION(11, 5, 3): 5799 case IP_VERSION(11, 5, 4): 5800 case IP_VERSION(11, 5, 6): 5801 if (!enable) 5802 amdgpu_gfx_off_ctrl(adev, false); 5803 5804 gfx_v11_cntl_pg(adev, enable); 5805 5806 if (enable) 5807 amdgpu_gfx_off_ctrl(adev, true); 5808 5809 break; 5810 default: 5811 break; 5812 } 5813 5814 return 0; 5815 } 5816 5817 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5818 enum amd_clockgating_state state) 5819 { 5820 struct amdgpu_device *adev = ip_block->adev; 5821 5822 if (amdgpu_sriov_vf(adev)) 5823 return 0; 5824 5825 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 5826 case IP_VERSION(11, 0, 0): 5827 case IP_VERSION(11, 0, 1): 5828 case IP_VERSION(11, 0, 2): 5829 case IP_VERSION(11, 0, 3): 5830 case IP_VERSION(11, 0, 4): 5831 case IP_VERSION(11, 5, 0): 5832 case IP_VERSION(11, 5, 1): 5833 case IP_VERSION(11, 5, 2): 5834 case IP_VERSION(11, 5, 3): 5835 case IP_VERSION(11, 5, 4): 5836 case IP_VERSION(11, 5, 6): 5837 gfx_v11_0_update_gfx_clock_gating(adev, 5838 state == AMD_CG_STATE_GATE); 5839 break; 5840 default: 5841 break; 5842 } 5843 5844 return 0; 5845 } 5846 5847 static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5848 { 5849 struct amdgpu_device *adev = ip_block->adev; 5850 int data; 5851 5852 /* AMD_CG_SUPPORT_GFX_MGCG */ 5853 data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE); 5854 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 5855 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5856 5857 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 5858 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 5859 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 5860 5861 /* AMD_CG_SUPPORT_GFX_FGCG */ 5862 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 5863 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 5864 5865 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 5866 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 5867 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 5868 5869 /* AMD_CG_SUPPORT_GFX_CGCG */ 5870 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); 5871 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5872 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5873 5874 /* AMD_CG_SUPPORT_GFX_CGLS */ 5875 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5876 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5877 5878 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 5879 data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D); 5880 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 5881 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 5882 5883 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 5884 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 5885 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 5886 } 5887 5888 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5889 { 5890 /* gfx11 is 32bit rptr*/ 5891 return *(uint32_t *)ring->rptr_cpu_addr; 5892 } 5893 5894 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5895 { 5896 struct amdgpu_device *adev = ring->adev; 5897 u64 wptr; 5898 5899 /* XXX check if swapping is necessary on BE */ 5900 if (ring->use_doorbell) { 5901 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5902 } else { 5903 wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR); 5904 wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32; 5905 } 5906 5907 return wptr; 5908 } 5909 5910 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5911 { 5912 struct amdgpu_device *adev = ring->adev; 5913 5914 if (ring->use_doorbell) { 5915 /* XXX check if swapping is necessary on BE */ 5916 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5917 ring->wptr); 5918 WDOORBELL64(ring->doorbell_index, ring->wptr); 5919 } else { 5920 WREG32_SOC15(GC, 0, regCP_RB0_WPTR, 5921 lower_32_bits(ring->wptr)); 5922 WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, 5923 upper_32_bits(ring->wptr)); 5924 } 5925 } 5926 5927 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5928 { 5929 /* gfx11 hardware is 32bit rptr */ 5930 return *(uint32_t *)ring->rptr_cpu_addr; 5931 } 5932 5933 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5934 { 5935 u64 wptr; 5936 5937 /* XXX check if swapping is necessary on BE */ 5938 if (ring->use_doorbell) 5939 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 5940 else 5941 BUG(); 5942 return wptr; 5943 } 5944 5945 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5946 { 5947 struct amdgpu_device *adev = ring->adev; 5948 5949 /* XXX check if swapping is necessary on BE */ 5950 if (ring->use_doorbell) { 5951 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 5952 ring->wptr); 5953 WDOORBELL64(ring->doorbell_index, ring->wptr); 5954 } else { 5955 BUG(); /* only DOORBELL method supported on gfx11 now */ 5956 } 5957 } 5958 5959 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5960 { 5961 struct amdgpu_device *adev = ring->adev; 5962 u32 ref_and_mask, reg_mem_engine; 5963 5964 if (!adev->gfx.funcs->get_hdp_flush_mask) { 5965 dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__); 5966 return; 5967 } 5968 5969 adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, ®_mem_engine); 5970 gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 5971 adev->nbio.funcs->get_hdp_flush_req_offset(adev), 5972 adev->nbio.funcs->get_hdp_flush_done_offset(adev), 5973 ref_and_mask, ref_and_mask, 0x20); 5974 } 5975 5976 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5977 struct amdgpu_job *job, 5978 struct amdgpu_ib *ib, 5979 uint32_t flags) 5980 { 5981 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 5982 u32 header, control = 0; 5983 5984 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5985 5986 control |= ib->length_dw | (vmid << 24); 5987 5988 if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 5989 control |= INDIRECT_BUFFER_PRE_ENB(1); 5990 5991 if (flags & AMDGPU_IB_PREEMPTED) 5992 control |= INDIRECT_BUFFER_PRE_RESUME(1); 5993 5994 if (vmid && !ring->adev->gfx.rs64_enable) 5995 gfx_v11_0_ring_emit_de_meta(ring, 5996 !amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED)); 5997 } 5998 5999 amdgpu_ring_write(ring, header); 6000 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 6001 amdgpu_ring_write(ring, 6002 #ifdef __BIG_ENDIAN 6003 (2 << 0) | 6004 #endif 6005 lower_32_bits(ib->gpu_addr)); 6006 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 6007 amdgpu_ring_write(ring, control); 6008 } 6009 6010 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6011 struct amdgpu_job *job, 6012 struct amdgpu_ib *ib, 6013 uint32_t flags) 6014 { 6015 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 6016 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6017 6018 /* Currently, there is a high possibility to get wave ID mismatch 6019 * between ME and GDS, leading to a hw deadlock, because ME generates 6020 * different wave IDs than the GDS expects. This situation happens 6021 * randomly when at least 5 compute pipes use GDS ordered append. 6022 * The wave IDs generated by ME are also wrong after suspend/resume. 6023 * Those are probably bugs somewhere else in the kernel driver. 6024 * 6025 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 6026 * GDS to 0 for this ring (me/pipe). 6027 */ 6028 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 6029 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 6030 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 6031 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 6032 } 6033 6034 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6035 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 6036 amdgpu_ring_write(ring, 6037 #ifdef __BIG_ENDIAN 6038 (2 << 0) | 6039 #endif 6040 lower_32_bits(ib->gpu_addr)); 6041 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 6042 amdgpu_ring_write(ring, control); 6043 } 6044 6045 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 6046 u64 seq, unsigned flags) 6047 { 6048 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6049 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6050 6051 /* RELEASE_MEM - flush caches, send int */ 6052 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 6053 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ | 6054 PACKET3_RELEASE_MEM_GCR_GL2_WB | 6055 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */ 6056 PACKET3_RELEASE_MEM_GCR_GLM_WB | 6057 PACKET3_RELEASE_MEM_CACHE_POLICY(3) | 6058 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6059 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 6060 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 6061 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 6062 6063 /* 6064 * the address should be Qword aligned if 64bit write, Dword 6065 * aligned if only send 32bit data low (discard data high) 6066 */ 6067 if (write64bit) 6068 BUG_ON(addr & 0x7); 6069 else 6070 BUG_ON(addr & 0x3); 6071 amdgpu_ring_write(ring, lower_32_bits(addr)); 6072 amdgpu_ring_write(ring, upper_32_bits(addr)); 6073 amdgpu_ring_write(ring, lower_32_bits(seq)); 6074 amdgpu_ring_write(ring, upper_32_bits(seq)); 6075 amdgpu_ring_write(ring, 0); 6076 } 6077 6078 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6079 { 6080 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6081 uint32_t seq = ring->fence_drv.sync_seq; 6082 uint64_t addr = ring->fence_drv.gpu_addr; 6083 6084 gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr), 6085 upper_32_bits(addr), seq, 0xffffffff, 4); 6086 } 6087 6088 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, 6089 uint16_t pasid, uint32_t flush_type, 6090 bool all_hub, uint8_t dst_sel) 6091 { 6092 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 6093 amdgpu_ring_write(ring, 6094 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 6095 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 6096 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 6097 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 6098 } 6099 6100 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6101 unsigned vmid, uint64_t pd_addr) 6102 { 6103 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6104 6105 /* compute doesn't have PFP */ 6106 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 6107 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6108 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6109 amdgpu_ring_write(ring, 0x0); 6110 } 6111 6112 /* Make sure that we can't skip the SET_Q_MODE packets when the VM 6113 * changed in any way. 6114 */ 6115 ring->set_q_mode_offs = 0; 6116 ring->set_q_mode_ptr = NULL; 6117 } 6118 6119 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6120 u64 seq, unsigned int flags) 6121 { 6122 struct amdgpu_device *adev = ring->adev; 6123 6124 /* we only allocate 32bit for each seq wb address */ 6125 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6126 6127 /* write fence seq to the "addr" */ 6128 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6129 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6130 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6131 amdgpu_ring_write(ring, lower_32_bits(addr)); 6132 amdgpu_ring_write(ring, upper_32_bits(addr)); 6133 amdgpu_ring_write(ring, lower_32_bits(seq)); 6134 6135 if (flags & AMDGPU_FENCE_FLAG_INT) { 6136 /* set register to trigger INT */ 6137 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6138 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6139 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6140 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS)); 6141 amdgpu_ring_write(ring, 0); 6142 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6143 } 6144 } 6145 6146 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, 6147 uint32_t flags) 6148 { 6149 uint32_t dw2 = 0; 6150 6151 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6152 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6153 /* set load_global_config & load_global_uconfig */ 6154 dw2 |= 0x8001; 6155 /* set load_cs_sh_regs */ 6156 dw2 |= 0x01000000; 6157 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6158 dw2 |= 0x10002; 6159 } 6160 6161 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6162 amdgpu_ring_write(ring, dw2); 6163 amdgpu_ring_write(ring, 0); 6164 } 6165 6166 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 6167 uint64_t addr) 6168 { 6169 unsigned ret; 6170 6171 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6172 amdgpu_ring_write(ring, lower_32_bits(addr)); 6173 amdgpu_ring_write(ring, upper_32_bits(addr)); 6174 /* discard following DWs if *cond_exec_gpu_addr==0 */ 6175 amdgpu_ring_write(ring, 0); 6176 ret = ring->wptr & ring->buf_mask; 6177 /* patch dummy value later */ 6178 amdgpu_ring_write(ring, 0); 6179 6180 return ret; 6181 } 6182 6183 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, 6184 u64 shadow_va, u64 csa_va, 6185 u64 gds_va, bool init_shadow, 6186 int vmid) 6187 { 6188 struct amdgpu_device *adev = ring->adev; 6189 unsigned int offs, end; 6190 6191 if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) 6192 return; 6193 6194 /* 6195 * The logic here isn't easy to understand because we need to keep state 6196 * accross multiple executions of the function as well as between the 6197 * CPU and GPU. The general idea is that the newly written GPU command 6198 * has a condition on the previous one and only executed if really 6199 * necessary. 6200 */ 6201 6202 /* 6203 * The dw in the NOP controls if the next SET_Q_MODE packet should be 6204 * executed or not. Reserve 64bits just to be on the save side. 6205 */ 6206 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); 6207 offs = ring->wptr & ring->buf_mask; 6208 6209 /* 6210 * We start with skipping the prefix SET_Q_MODE and always executing 6211 * the postfix SET_Q_MODE packet. This is changed below with a 6212 * WRITE_DATA command when the postfix executed. 6213 */ 6214 amdgpu_ring_write(ring, shadow_va ? 1 : 0); 6215 amdgpu_ring_write(ring, 0); 6216 6217 if (ring->set_q_mode_offs) { 6218 uint64_t addr; 6219 6220 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6221 addr += ring->set_q_mode_offs << 2; 6222 end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); 6223 } 6224 6225 /* 6226 * When the postfix SET_Q_MODE packet executes we need to make sure that the 6227 * next prefix SET_Q_MODE packet executes as well. 6228 */ 6229 if (!shadow_va) { 6230 uint64_t addr; 6231 6232 addr = amdgpu_bo_gpu_offset(ring->ring_obj); 6233 addr += offs << 2; 6234 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6235 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 6236 amdgpu_ring_write(ring, lower_32_bits(addr)); 6237 amdgpu_ring_write(ring, upper_32_bits(addr)); 6238 amdgpu_ring_write(ring, 0x1); 6239 } 6240 6241 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); 6242 amdgpu_ring_write(ring, lower_32_bits(shadow_va)); 6243 amdgpu_ring_write(ring, upper_32_bits(shadow_va)); 6244 amdgpu_ring_write(ring, lower_32_bits(gds_va)); 6245 amdgpu_ring_write(ring, upper_32_bits(gds_va)); 6246 amdgpu_ring_write(ring, lower_32_bits(csa_va)); 6247 amdgpu_ring_write(ring, upper_32_bits(csa_va)); 6248 amdgpu_ring_write(ring, shadow_va ? 6249 PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); 6250 amdgpu_ring_write(ring, init_shadow ? 6251 PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); 6252 6253 if (ring->set_q_mode_offs) 6254 amdgpu_ring_patch_cond_exec(ring, end); 6255 6256 if (shadow_va) { 6257 uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; 6258 6259 /* 6260 * If the tokens match try to skip the last postfix SET_Q_MODE 6261 * packet to avoid saving/restoring the state all the time. 6262 */ 6263 if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) 6264 *ring->set_q_mode_ptr = 0; 6265 6266 ring->set_q_mode_token = token; 6267 } else { 6268 ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; 6269 } 6270 6271 ring->set_q_mode_offs = offs; 6272 } 6273 6274 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) 6275 { 6276 struct amdgpu_device *adev = ring->adev; 6277 struct v10_de_ib_state de_payload = {0}; 6278 uint64_t offset, gds_addr, de_payload_gpu_addr; 6279 void *de_payload_cpu_addr; 6280 int cnt; 6281 6282 offset = offsetof(struct v10_gfx_meta_data, de_payload); 6283 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; 6284 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; 6285 6286 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + 6287 AMDGPU_CSA_SIZE - adev->gds.gds_size, 6288 PAGE_SIZE); 6289 6290 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 6291 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 6292 6293 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 6294 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 6295 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 6296 WRITE_DATA_DST_SEL(8) | 6297 WR_CONFIRM) | 6298 WRITE_DATA_CACHE_POLICY(0)); 6299 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); 6300 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); 6301 6302 if (resume) 6303 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, 6304 sizeof(de_payload) >> 2); 6305 else 6306 amdgpu_ring_write_multiple(ring, (void *)&de_payload, 6307 sizeof(de_payload) >> 2); 6308 } 6309 6310 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, 6311 bool secure) 6312 { 6313 uint32_t v = secure ? FRAME_TMZ : 0; 6314 6315 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 6316 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); 6317 } 6318 6319 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 6320 uint32_t reg_val_offs) 6321 { 6322 struct amdgpu_device *adev = ring->adev; 6323 6324 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6325 amdgpu_ring_write(ring, 0 | /* src: register*/ 6326 (5 << 8) | /* dst: memory */ 6327 (1 << 20)); /* write confirm */ 6328 amdgpu_ring_write(ring, reg); 6329 amdgpu_ring_write(ring, 0); 6330 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6331 reg_val_offs * 4)); 6332 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6333 reg_val_offs * 4)); 6334 } 6335 6336 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6337 uint32_t val) 6338 { 6339 uint32_t cmd = 0; 6340 6341 switch (ring->funcs->type) { 6342 case AMDGPU_RING_TYPE_GFX: 6343 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6344 break; 6345 case AMDGPU_RING_TYPE_KIQ: 6346 cmd = (1 << 16); /* no inc addr */ 6347 break; 6348 default: 6349 cmd = WR_CONFIRM; 6350 break; 6351 } 6352 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6353 amdgpu_ring_write(ring, cmd); 6354 amdgpu_ring_write(ring, reg); 6355 amdgpu_ring_write(ring, 0); 6356 amdgpu_ring_write(ring, val); 6357 } 6358 6359 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 6360 uint32_t val, uint32_t mask) 6361 { 6362 gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 6363 } 6364 6365 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 6366 uint32_t reg0, uint32_t reg1, 6367 uint32_t ref, uint32_t mask) 6368 { 6369 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6370 6371 gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 6372 ref, mask, 0x20); 6373 } 6374 6375 static void 6376 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6377 uint32_t me, uint32_t pipe, 6378 enum amdgpu_interrupt_state state) 6379 { 6380 uint32_t cp_int_cntl, cp_int_cntl_reg; 6381 6382 if (!me) { 6383 switch (pipe) { 6384 case 0: 6385 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0); 6386 break; 6387 case 1: 6388 cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1); 6389 break; 6390 default: 6391 DRM_DEBUG("invalid pipe %d\n", pipe); 6392 return; 6393 } 6394 } else { 6395 DRM_DEBUG("invalid me %d\n", me); 6396 return; 6397 } 6398 6399 switch (state) { 6400 case AMDGPU_IRQ_STATE_DISABLE: 6401 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6402 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6403 TIME_STAMP_INT_ENABLE, 0); 6404 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6405 GENERIC0_INT_ENABLE, 0); 6406 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6407 break; 6408 case AMDGPU_IRQ_STATE_ENABLE: 6409 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6410 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6411 TIME_STAMP_INT_ENABLE, 1); 6412 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6413 GENERIC0_INT_ENABLE, 1); 6414 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6415 break; 6416 default: 6417 break; 6418 } 6419 } 6420 6421 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6422 int me, int pipe, 6423 enum amdgpu_interrupt_state state) 6424 { 6425 u32 mec_int_cntl, mec_int_cntl_reg; 6426 6427 /* 6428 * amdgpu controls only the first MEC. That's why this function only 6429 * handles the setting of interrupts for this specific MEC. All other 6430 * pipes' interrupts are set by amdkfd. 6431 */ 6432 6433 if (me == 1) { 6434 switch (pipe) { 6435 case 0: 6436 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6437 break; 6438 case 1: 6439 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL); 6440 break; 6441 case 2: 6442 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL); 6443 break; 6444 case 3: 6445 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL); 6446 break; 6447 default: 6448 DRM_DEBUG("invalid pipe %d\n", pipe); 6449 return; 6450 } 6451 } else { 6452 DRM_DEBUG("invalid me %d\n", me); 6453 return; 6454 } 6455 6456 switch (state) { 6457 case AMDGPU_IRQ_STATE_DISABLE: 6458 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6459 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6460 TIME_STAMP_INT_ENABLE, 0); 6461 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6462 GENERIC0_INT_ENABLE, 0); 6463 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6464 break; 6465 case AMDGPU_IRQ_STATE_ENABLE: 6466 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg); 6467 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6468 TIME_STAMP_INT_ENABLE, 1); 6469 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6470 GENERIC0_INT_ENABLE, 1); 6471 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl); 6472 break; 6473 default: 6474 break; 6475 } 6476 } 6477 6478 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6479 struct amdgpu_irq_src *src, 6480 unsigned type, 6481 enum amdgpu_interrupt_state state) 6482 { 6483 switch (type) { 6484 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6485 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state); 6486 break; 6487 case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP: 6488 gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state); 6489 break; 6490 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6491 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6492 break; 6493 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6494 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6495 break; 6496 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6497 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6498 break; 6499 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6500 gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6501 break; 6502 default: 6503 break; 6504 } 6505 return 0; 6506 } 6507 6508 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev, 6509 struct amdgpu_irq_src *source, 6510 struct amdgpu_iv_entry *entry) 6511 { 6512 u32 doorbell_offset = entry->src_data[0]; 6513 u8 me_id, pipe_id, queue_id; 6514 struct amdgpu_ring *ring; 6515 int i; 6516 6517 DRM_DEBUG("IH: CP EOP\n"); 6518 6519 if (adev->enable_mes && doorbell_offset) { 6520 amdgpu_userq_process_fence_irq(adev, doorbell_offset); 6521 } else { 6522 me_id = (entry->ring_id & 0x0c) >> 2; 6523 pipe_id = (entry->ring_id & 0x03) >> 0; 6524 queue_id = (entry->ring_id & 0x70) >> 4; 6525 6526 switch (me_id) { 6527 case 0: 6528 if (pipe_id == 0) 6529 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6530 else 6531 amdgpu_fence_process(&adev->gfx.gfx_ring[1]); 6532 break; 6533 case 1: 6534 case 2: 6535 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6536 ring = &adev->gfx.compute_ring[i]; 6537 /* Per-queue interrupt is supported for MEC starting from VI. 6538 * The interrupt can only be enabled/disabled per pipe instead 6539 * of per queue. 6540 */ 6541 if ((ring->me == me_id) && 6542 (ring->pipe == pipe_id) && 6543 (ring->queue == queue_id)) 6544 amdgpu_fence_process(ring); 6545 } 6546 break; 6547 } 6548 } 6549 6550 return 0; 6551 } 6552 6553 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6554 struct amdgpu_irq_src *source, 6555 unsigned int type, 6556 enum amdgpu_interrupt_state state) 6557 { 6558 u32 cp_int_cntl_reg, cp_int_cntl; 6559 int i, j; 6560 6561 switch (state) { 6562 case AMDGPU_IRQ_STATE_DISABLE: 6563 case AMDGPU_IRQ_STATE_ENABLE: 6564 for (i = 0; i < adev->gfx.me.num_me; i++) { 6565 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6566 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6567 6568 if (cp_int_cntl_reg) { 6569 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6570 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6571 PRIV_REG_INT_ENABLE, 6572 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6573 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6574 } 6575 } 6576 } 6577 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6578 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6579 /* MECs start at 1 */ 6580 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6581 6582 if (cp_int_cntl_reg) { 6583 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6584 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6585 PRIV_REG_INT_ENABLE, 6586 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6587 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6588 } 6589 } 6590 } 6591 break; 6592 default: 6593 break; 6594 } 6595 6596 return 0; 6597 } 6598 6599 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev, 6600 struct amdgpu_irq_src *source, 6601 unsigned type, 6602 enum amdgpu_interrupt_state state) 6603 { 6604 u32 cp_int_cntl_reg, cp_int_cntl; 6605 int i, j; 6606 6607 switch (state) { 6608 case AMDGPU_IRQ_STATE_DISABLE: 6609 case AMDGPU_IRQ_STATE_ENABLE: 6610 for (i = 0; i < adev->gfx.me.num_me; i++) { 6611 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6612 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6613 6614 if (cp_int_cntl_reg) { 6615 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6616 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6617 OPCODE_ERROR_INT_ENABLE, 6618 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6619 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6620 } 6621 } 6622 } 6623 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 6624 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 6625 /* MECs start at 1 */ 6626 cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j); 6627 6628 if (cp_int_cntl_reg) { 6629 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6630 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL, 6631 OPCODE_ERROR_INT_ENABLE, 6632 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6633 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6634 } 6635 } 6636 } 6637 break; 6638 default: 6639 break; 6640 } 6641 return 0; 6642 } 6643 6644 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6645 struct amdgpu_irq_src *source, 6646 unsigned int type, 6647 enum amdgpu_interrupt_state state) 6648 { 6649 u32 cp_int_cntl_reg, cp_int_cntl; 6650 int i, j; 6651 6652 switch (state) { 6653 case AMDGPU_IRQ_STATE_DISABLE: 6654 case AMDGPU_IRQ_STATE_ENABLE: 6655 for (i = 0; i < adev->gfx.me.num_me; i++) { 6656 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 6657 cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j); 6658 6659 if (cp_int_cntl_reg) { 6660 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg); 6661 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 6662 PRIV_INSTR_INT_ENABLE, 6663 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 6664 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl); 6665 } 6666 } 6667 } 6668 break; 6669 default: 6670 break; 6671 } 6672 6673 return 0; 6674 } 6675 6676 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev, 6677 struct amdgpu_iv_entry *entry) 6678 { 6679 u8 me_id, pipe_id, queue_id; 6680 struct amdgpu_ring *ring; 6681 int i; 6682 6683 me_id = (entry->ring_id & 0x0c) >> 2; 6684 pipe_id = (entry->ring_id & 0x03) >> 0; 6685 queue_id = (entry->ring_id & 0x70) >> 4; 6686 6687 if (!adev->gfx.disable_kq) { 6688 switch (me_id) { 6689 case 0: 6690 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 6691 ring = &adev->gfx.gfx_ring[i]; 6692 if (ring->me == me_id && ring->pipe == pipe_id && 6693 ring->queue == queue_id) 6694 drm_sched_fault(&ring->sched); 6695 } 6696 break; 6697 case 1: 6698 case 2: 6699 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6700 ring = &adev->gfx.compute_ring[i]; 6701 if (ring->me == me_id && ring->pipe == pipe_id && 6702 ring->queue == queue_id) 6703 drm_sched_fault(&ring->sched); 6704 } 6705 break; 6706 default: 6707 BUG(); 6708 break; 6709 } 6710 } 6711 } 6712 6713 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev, 6714 struct amdgpu_irq_src *source, 6715 struct amdgpu_iv_entry *entry) 6716 { 6717 DRM_ERROR("Illegal register access in command stream\n"); 6718 gfx_v11_0_handle_priv_fault(adev, entry); 6719 return 0; 6720 } 6721 6722 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev, 6723 struct amdgpu_irq_src *source, 6724 struct amdgpu_iv_entry *entry) 6725 { 6726 DRM_ERROR("Illegal opcode in command stream\n"); 6727 gfx_v11_0_handle_priv_fault(adev, entry); 6728 return 0; 6729 } 6730 6731 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev, 6732 struct amdgpu_irq_src *source, 6733 struct amdgpu_iv_entry *entry) 6734 { 6735 DRM_ERROR("Illegal instruction in command stream\n"); 6736 gfx_v11_0_handle_priv_fault(adev, entry); 6737 return 0; 6738 } 6739 6740 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev, 6741 struct amdgpu_irq_src *source, 6742 struct amdgpu_iv_entry *entry) 6743 { 6744 if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq) 6745 return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry); 6746 6747 return 0; 6748 } 6749 6750 #if 0 6751 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6752 struct amdgpu_irq_src *src, 6753 unsigned int type, 6754 enum amdgpu_interrupt_state state) 6755 { 6756 uint32_t tmp, target; 6757 struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring); 6758 6759 target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL); 6760 target += ring->pipe; 6761 6762 switch (type) { 6763 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6764 if (state == AMDGPU_IRQ_STATE_DISABLE) { 6765 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6766 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6767 GENERIC2_INT_ENABLE, 0); 6768 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6769 6770 tmp = RREG32_SOC15_IP(GC, target); 6771 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6772 GENERIC2_INT_ENABLE, 0); 6773 WREG32_SOC15_IP(GC, target, tmp); 6774 } else { 6775 tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL); 6776 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, 6777 GENERIC2_INT_ENABLE, 1); 6778 WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp); 6779 6780 tmp = RREG32_SOC15_IP(GC, target); 6781 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL, 6782 GENERIC2_INT_ENABLE, 1); 6783 WREG32_SOC15_IP(GC, target, tmp); 6784 } 6785 break; 6786 default: 6787 BUG(); /* kiq only support GENERIC2_INT now */ 6788 break; 6789 } 6790 return 0; 6791 } 6792 #endif 6793 6794 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring) 6795 { 6796 const unsigned int gcr_cntl = 6797 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 6798 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 6799 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) | 6800 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) | 6801 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) | 6802 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 6803 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 6804 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1); 6805 6806 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 6807 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 6808 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 6809 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6810 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 6811 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6812 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6813 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 6814 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 6815 } 6816 6817 static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) 6818 { 6819 /* Disable the pipe reset until the CPFW fully support it.*/ 6820 dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n"); 6821 return false; 6822 } 6823 6824 6825 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) 6826 { 6827 struct amdgpu_device *adev = ring->adev; 6828 uint32_t reset_pipe = 0, clean_pipe = 0; 6829 int r; 6830 6831 if (!gfx_v11_pipe_reset_support(adev)) 6832 return -EOPNOTSUPP; 6833 6834 gfx_v11_0_set_safe_mode(adev, 0); 6835 mutex_lock(&adev->srbm_mutex); 6836 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6837 6838 switch (ring->pipe) { 6839 case 0: 6840 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6841 PFP_PIPE0_RESET, 1); 6842 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6843 ME_PIPE0_RESET, 1); 6844 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6845 PFP_PIPE0_RESET, 0); 6846 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6847 ME_PIPE0_RESET, 0); 6848 break; 6849 case 1: 6850 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6851 PFP_PIPE1_RESET, 1); 6852 reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL, 6853 ME_PIPE1_RESET, 1); 6854 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6855 PFP_PIPE1_RESET, 0); 6856 clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL, 6857 ME_PIPE1_RESET, 0); 6858 break; 6859 default: 6860 break; 6861 } 6862 6863 WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe); 6864 WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe); 6865 6866 r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - 6867 RS64_FW_UC_START_ADDR_LO; 6868 soc21_grbm_select(adev, 0, 0, 0, 0); 6869 mutex_unlock(&adev->srbm_mutex); 6870 gfx_v11_0_unset_safe_mode(adev, 0); 6871 6872 dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name, 6873 r == 0 ? "successfully" : "failed"); 6874 /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, 6875 * so the pipe reset status relies on the later gfx ring test result. 6876 */ 6877 return 0; 6878 } 6879 6880 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, 6881 unsigned int vmid, 6882 struct amdgpu_fence *timedout_fence) 6883 { 6884 struct amdgpu_device *adev = ring->adev; 6885 bool use_mmio = false; 6886 int r; 6887 6888 amdgpu_ring_reset_helper_begin(ring, timedout_fence); 6889 6890 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio, 0); 6891 if (r) { 6892 6893 dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r); 6894 r = gfx_v11_reset_gfx_pipe(ring); 6895 if (r) 6896 return r; 6897 } 6898 6899 if (use_mmio) { 6900 r = gfx_v11_0_kgq_init_queue(ring, true); 6901 if (r) { 6902 dev_err(adev->dev, "failed to init kgq\n"); 6903 return r; 6904 } 6905 6906 r = amdgpu_mes_map_legacy_queue(adev, ring, 0); 6907 if (r) { 6908 dev_err(adev->dev, "failed to remap kgq\n"); 6909 return r; 6910 } 6911 } 6912 6913 return amdgpu_ring_reset_helper_end(ring, timedout_fence); 6914 } 6915 6916 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) 6917 { 6918 6919 struct amdgpu_device *adev = ring->adev; 6920 uint32_t reset_pipe = 0, clean_pipe = 0; 6921 int r; 6922 6923 if (!gfx_v11_pipe_reset_support(adev)) 6924 return -EOPNOTSUPP; 6925 6926 gfx_v11_0_set_safe_mode(adev, 0); 6927 mutex_lock(&adev->srbm_mutex); 6928 soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6929 6930 reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL); 6931 clean_pipe = reset_pipe; 6932 6933 if (adev->gfx.rs64_enable) { 6934 6935 switch (ring->pipe) { 6936 case 0: 6937 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6938 MEC_PIPE0_RESET, 1); 6939 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6940 MEC_PIPE0_RESET, 0); 6941 break; 6942 case 1: 6943 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6944 MEC_PIPE1_RESET, 1); 6945 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6946 MEC_PIPE1_RESET, 0); 6947 break; 6948 case 2: 6949 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6950 MEC_PIPE2_RESET, 1); 6951 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6952 MEC_PIPE2_RESET, 0); 6953 break; 6954 case 3: 6955 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL, 6956 MEC_PIPE3_RESET, 1); 6957 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL, 6958 MEC_PIPE3_RESET, 0); 6959 break; 6960 default: 6961 break; 6962 } 6963 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe); 6964 WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe); 6965 r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) - 6966 RS64_FW_UC_START_ADDR_LO; 6967 } else { 6968 if (ring->me == 1) { 6969 switch (ring->pipe) { 6970 case 0: 6971 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6972 MEC_ME1_PIPE0_RESET, 1); 6973 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6974 MEC_ME1_PIPE0_RESET, 0); 6975 break; 6976 case 1: 6977 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6978 MEC_ME1_PIPE1_RESET, 1); 6979 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6980 MEC_ME1_PIPE1_RESET, 0); 6981 break; 6982 case 2: 6983 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6984 MEC_ME1_PIPE2_RESET, 1); 6985 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6986 MEC_ME1_PIPE2_RESET, 0); 6987 break; 6988 case 3: 6989 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 6990 MEC_ME1_PIPE3_RESET, 1); 6991 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 6992 MEC_ME1_PIPE3_RESET, 0); 6993 break; 6994 default: 6995 break; 6996 } 6997 /* mec1 fw pc: CP_MEC1_INSTR_PNTR */ 6998 } else { 6999 switch (ring->pipe) { 7000 case 0: 7001 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 7002 MEC_ME2_PIPE0_RESET, 1); 7003 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 7004 MEC_ME2_PIPE0_RESET, 0); 7005 break; 7006 case 1: 7007 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 7008 MEC_ME2_PIPE1_RESET, 1); 7009 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 7010 MEC_ME2_PIPE1_RESET, 0); 7011 break; 7012 case 2: 7013 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 7014 MEC_ME2_PIPE2_RESET, 1); 7015 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 7016 MEC_ME2_PIPE2_RESET, 0); 7017 break; 7018 case 3: 7019 reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL, 7020 MEC_ME2_PIPE3_RESET, 1); 7021 clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL, 7022 MEC_ME2_PIPE3_RESET, 0); 7023 break; 7024 default: 7025 break; 7026 } 7027 /* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */ 7028 } 7029 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe); 7030 WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe); 7031 r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR)); 7032 } 7033 7034 soc21_grbm_select(adev, 0, 0, 0, 0); 7035 mutex_unlock(&adev->srbm_mutex); 7036 gfx_v11_0_unset_safe_mode(adev, 0); 7037 7038 dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name, 7039 r == 0 ? "successfully" : "failed"); 7040 /*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe 7041 * reset status relies on the compute ring test result. 7042 */ 7043 return 0; 7044 } 7045 7046 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, 7047 unsigned int vmid, 7048 struct amdgpu_fence *timedout_fence) 7049 { 7050 struct amdgpu_device *adev = ring->adev; 7051 int r = 0; 7052 7053 amdgpu_ring_reset_helper_begin(ring, timedout_fence); 7054 7055 r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0); 7056 if (r) { 7057 dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r); 7058 r = gfx_v11_0_reset_compute_pipe(ring); 7059 if (r) 7060 return r; 7061 } 7062 7063 r = gfx_v11_0_kcq_init_queue(ring, true); 7064 if (r) { 7065 dev_err(adev->dev, "fail to init kcq\n"); 7066 return r; 7067 } 7068 r = amdgpu_mes_map_legacy_queue(adev, ring, 0); 7069 if (r) { 7070 dev_err(adev->dev, "failed to remap kcq\n"); 7071 return r; 7072 } 7073 7074 return amdgpu_ring_reset_helper_end(ring, timedout_fence); 7075 } 7076 7077 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 7078 { 7079 struct amdgpu_device *adev = ip_block->adev; 7080 uint32_t i, j, k, reg, index = 0; 7081 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7082 7083 if (!adev->gfx.ip_dump_core) 7084 return; 7085 7086 for (i = 0; i < reg_count; i++) 7087 drm_printf(p, "%-50s \t 0x%08x\n", 7088 gc_reg_list_11_0[i].reg_name, 7089 adev->gfx.ip_dump_core[i]); 7090 7091 /* print compute queue registers for all instances */ 7092 if (!adev->gfx.ip_dump_compute_queues) 7093 return; 7094 7095 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7096 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", 7097 adev->gfx.mec.num_mec, 7098 adev->gfx.mec.num_pipe_per_mec, 7099 adev->gfx.mec.num_queue_per_pipe); 7100 7101 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7102 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7103 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7104 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); 7105 for (reg = 0; reg < reg_count; reg++) { 7106 if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP) 7107 drm_printf(p, "%-50s \t 0x%08x\n", 7108 "regCP_MEC_ME2_HEADER_DUMP", 7109 adev->gfx.ip_dump_compute_queues[index + reg]); 7110 else 7111 drm_printf(p, "%-50s \t 0x%08x\n", 7112 gc_cp_reg_list_11[reg].reg_name, 7113 adev->gfx.ip_dump_compute_queues[index + reg]); 7114 } 7115 index += reg_count; 7116 } 7117 } 7118 } 7119 7120 /* print gfx queue registers for all instances */ 7121 if (!adev->gfx.ip_dump_gfx_queues) 7122 return; 7123 7124 index = 0; 7125 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7126 drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", 7127 adev->gfx.me.num_me, 7128 adev->gfx.me.num_pipe_per_me, 7129 adev->gfx.me.num_queue_per_pipe); 7130 7131 for (i = 0; i < adev->gfx.me.num_me; i++) { 7132 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7133 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7134 drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); 7135 for (reg = 0; reg < reg_count; reg++) { 7136 drm_printf(p, "%-50s \t 0x%08x\n", 7137 gc_gfx_queue_reg_list_11[reg].reg_name, 7138 adev->gfx.ip_dump_gfx_queues[index + reg]); 7139 } 7140 index += reg_count; 7141 } 7142 } 7143 } 7144 } 7145 7146 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block) 7147 { 7148 struct amdgpu_device *adev = ip_block->adev; 7149 uint32_t i, j, k, reg, index = 0; 7150 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0); 7151 7152 if (!adev->gfx.ip_dump_core) 7153 return; 7154 7155 amdgpu_gfx_off_ctrl(adev, false); 7156 for (i = 0; i < reg_count; i++) 7157 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i])); 7158 amdgpu_gfx_off_ctrl(adev, true); 7159 7160 /* dump compute queue registers for all instances */ 7161 if (!adev->gfx.ip_dump_compute_queues) 7162 return; 7163 7164 reg_count = ARRAY_SIZE(gc_cp_reg_list_11); 7165 amdgpu_gfx_off_ctrl(adev, false); 7166 mutex_lock(&adev->srbm_mutex); 7167 for (i = 0; i < adev->gfx.mec.num_mec; i++) { 7168 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { 7169 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { 7170 /* ME0 is for GFX so start from 1 for CP */ 7171 soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); 7172 for (reg = 0; reg < reg_count; reg++) { 7173 if (i && 7174 gc_cp_reg_list_11[reg].reg_offset == 7175 regCP_MEC_ME1_HEADER_DUMP) 7176 adev->gfx.ip_dump_compute_queues[index + reg] = 7177 RREG32(SOC15_REG_OFFSET(GC, 0, 7178 regCP_MEC_ME2_HEADER_DUMP)); 7179 else 7180 adev->gfx.ip_dump_compute_queues[index + reg] = 7181 RREG32(SOC15_REG_ENTRY_OFFSET( 7182 gc_cp_reg_list_11[reg])); 7183 } 7184 index += reg_count; 7185 } 7186 } 7187 } 7188 soc21_grbm_select(adev, 0, 0, 0, 0); 7189 mutex_unlock(&adev->srbm_mutex); 7190 amdgpu_gfx_off_ctrl(adev, true); 7191 7192 /* dump gfx queue registers for all instances */ 7193 if (!adev->gfx.ip_dump_gfx_queues) 7194 return; 7195 7196 index = 0; 7197 reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11); 7198 amdgpu_gfx_off_ctrl(adev, false); 7199 mutex_lock(&adev->srbm_mutex); 7200 for (i = 0; i < adev->gfx.me.num_me; i++) { 7201 for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { 7202 for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { 7203 soc21_grbm_select(adev, i, j, k, 0); 7204 7205 for (reg = 0; reg < reg_count; reg++) { 7206 adev->gfx.ip_dump_gfx_queues[index + reg] = 7207 RREG32(SOC15_REG_ENTRY_OFFSET( 7208 gc_gfx_queue_reg_list_11[reg])); 7209 } 7210 index += reg_count; 7211 } 7212 } 7213 } 7214 soc21_grbm_select(adev, 0, 0, 0, 0); 7215 mutex_unlock(&adev->srbm_mutex); 7216 amdgpu_gfx_off_ctrl(adev, true); 7217 } 7218 7219 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) 7220 { 7221 /* Emit the cleaner shader */ 7222 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); 7223 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ 7224 } 7225 7226 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring) 7227 { 7228 amdgpu_gfx_profile_ring_begin_use(ring); 7229 7230 amdgpu_gfx_enforce_isolation_ring_begin_use(ring); 7231 } 7232 7233 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring) 7234 { 7235 amdgpu_gfx_profile_ring_end_use(ring); 7236 7237 amdgpu_gfx_enforce_isolation_ring_end_use(ring); 7238 } 7239 7240 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { 7241 .name = "gfx_v11_0", 7242 .early_init = gfx_v11_0_early_init, 7243 .sw_init = gfx_v11_0_sw_init, 7244 .sw_fini = gfx_v11_0_sw_fini, 7245 .hw_init = gfx_v11_0_hw_init, 7246 .hw_fini = gfx_v11_0_hw_fini, 7247 .suspend = gfx_v11_0_suspend, 7248 .resume = gfx_v11_0_resume, 7249 .is_idle = gfx_v11_0_is_idle, 7250 .wait_for_idle = gfx_v11_0_wait_for_idle, 7251 .soft_reset = gfx_v11_0_soft_reset, 7252 .check_soft_reset = gfx_v11_0_check_soft_reset, 7253 .post_soft_reset = gfx_v11_0_post_soft_reset, 7254 .set_clockgating_state = gfx_v11_0_set_clockgating_state, 7255 .set_powergating_state = gfx_v11_0_set_powergating_state, 7256 .get_clockgating_state = gfx_v11_0_get_clockgating_state, 7257 .dump_ip_state = gfx_v11_ip_dump, 7258 .print_ip_state = gfx_v11_ip_print, 7259 }; 7260 7261 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 7262 .type = AMDGPU_RING_TYPE_GFX, 7263 .align_mask = 0xff, 7264 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7265 .support_64bit_ptrs = true, 7266 .secure_submission_supported = true, 7267 .get_rptr = gfx_v11_0_ring_get_rptr_gfx, 7268 .get_wptr = gfx_v11_0_ring_get_wptr_gfx, 7269 .set_wptr = gfx_v11_0_ring_set_wptr_gfx, 7270 .emit_frame_size = /* totally 247 maximum if 16 IBs */ 7271 5 + /* update_spm_vmid */ 7272 5 + /* COND_EXEC */ 7273 22 + /* SET_Q_PREEMPTION_MODE */ 7274 7 + /* PIPELINE_SYNC */ 7275 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7276 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7277 4 + /* VM_FLUSH */ 7278 8 + /* FENCE for VM_FLUSH */ 7279 20 + /* GDS switch */ 7280 5 + /* COND_EXEC */ 7281 7 + /* HDP_flush */ 7282 4 + /* VGT_flush */ 7283 31 + /* DE_META */ 7284 3 + /* CNTX_CTRL */ 7285 5 + /* HDP_INVL */ 7286 22 + /* SET_Q_PREEMPTION_MODE */ 7287 8 + 8 + /* FENCE x2 */ 7288 8 + /* gfx_v11_0_emit_mem_sync */ 7289 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7290 .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ 7291 .emit_ib = gfx_v11_0_ring_emit_ib_gfx, 7292 .emit_fence = gfx_v11_0_ring_emit_fence, 7293 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7294 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7295 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7296 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7297 .test_ring = gfx_v11_0_ring_test_ring, 7298 .test_ib = gfx_v11_0_ring_test_ib, 7299 .insert_nop = gfx_v11_ring_insert_nop, 7300 .pad_ib = amdgpu_ring_generic_pad_ib, 7301 .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, 7302 .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, 7303 .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, 7304 .preempt_ib = amdgpu_gfx_ring_preempt_ib, 7305 .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, 7306 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7307 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7308 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7309 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7310 .reset = gfx_v11_0_reset_kgq, 7311 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7312 .begin_use = gfx_v11_0_ring_begin_use, 7313 .end_use = gfx_v11_0_ring_end_use, 7314 }; 7315 7316 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { 7317 .type = AMDGPU_RING_TYPE_COMPUTE, 7318 .align_mask = 0xff, 7319 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7320 .support_64bit_ptrs = true, 7321 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7322 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7323 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7324 .emit_frame_size = 7325 5 + /* update_spm_vmid */ 7326 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7327 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7328 5 + /* hdp invalidate */ 7329 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7330 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7331 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7332 2 + /* gfx_v11_0_ring_emit_vm_flush */ 7333 8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */ 7334 8 + /* gfx_v11_0_emit_mem_sync */ 7335 2, /* gfx_v11_0_ring_emit_cleaner_shader */ 7336 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7337 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7338 .emit_fence = gfx_v11_0_ring_emit_fence, 7339 .emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync, 7340 .emit_vm_flush = gfx_v11_0_ring_emit_vm_flush, 7341 .emit_gds_switch = gfx_v11_0_ring_emit_gds_switch, 7342 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7343 .test_ring = gfx_v11_0_ring_test_ring, 7344 .test_ib = gfx_v11_0_ring_test_ib, 7345 .insert_nop = gfx_v11_ring_insert_nop, 7346 .pad_ib = amdgpu_ring_generic_pad_ib, 7347 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7348 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7349 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7350 .emit_mem_sync = gfx_v11_0_emit_mem_sync, 7351 .reset = gfx_v11_0_reset_kcq, 7352 .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, 7353 .begin_use = gfx_v11_0_ring_begin_use, 7354 .end_use = gfx_v11_0_ring_end_use, 7355 }; 7356 7357 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7358 .type = AMDGPU_RING_TYPE_KIQ, 7359 .align_mask = 0xff, 7360 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7361 .support_64bit_ptrs = true, 7362 .get_rptr = gfx_v11_0_ring_get_rptr_compute, 7363 .get_wptr = gfx_v11_0_ring_get_wptr_compute, 7364 .set_wptr = gfx_v11_0_ring_set_wptr_compute, 7365 .emit_frame_size = 7366 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7367 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 7368 5 + /*hdp invalidate */ 7369 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ 7370 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 7371 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 7372 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7373 .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ 7374 .emit_ib = gfx_v11_0_ring_emit_ib_compute, 7375 .emit_fence = gfx_v11_0_ring_emit_fence_kiq, 7376 .test_ring = gfx_v11_0_ring_test_ring, 7377 .test_ib = gfx_v11_0_ring_test_ib, 7378 .insert_nop = amdgpu_ring_insert_nop, 7379 .pad_ib = amdgpu_ring_generic_pad_ib, 7380 .emit_rreg = gfx_v11_0_ring_emit_rreg, 7381 .emit_wreg = gfx_v11_0_ring_emit_wreg, 7382 .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, 7383 .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, 7384 .emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush, 7385 }; 7386 7387 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev) 7388 { 7389 int i; 7390 7391 adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq; 7392 7393 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7394 adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx; 7395 7396 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7397 adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute; 7398 } 7399 7400 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = { 7401 .set = gfx_v11_0_set_eop_interrupt_state, 7402 .process = gfx_v11_0_eop_irq, 7403 }; 7404 7405 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = { 7406 .set = gfx_v11_0_set_priv_reg_fault_state, 7407 .process = gfx_v11_0_priv_reg_irq, 7408 }; 7409 7410 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = { 7411 .set = gfx_v11_0_set_bad_op_fault_state, 7412 .process = gfx_v11_0_bad_op_irq, 7413 }; 7414 7415 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = { 7416 .set = gfx_v11_0_set_priv_inst_fault_state, 7417 .process = gfx_v11_0_priv_inst_irq, 7418 }; 7419 7420 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = { 7421 .process = gfx_v11_0_rlc_gc_fed_irq, 7422 }; 7423 7424 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) 7425 { 7426 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7427 adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs; 7428 7429 adev->gfx.priv_reg_irq.num_types = 1; 7430 adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs; 7431 7432 adev->gfx.bad_op_irq.num_types = 1; 7433 adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs; 7434 7435 adev->gfx.priv_inst_irq.num_types = 1; 7436 adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs; 7437 7438 adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */ 7439 adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs; 7440 7441 } 7442 7443 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) 7444 { 7445 if (adev->flags & AMD_IS_APU) 7446 adev->gfx.imu.mode = MISSION_MODE; 7447 else 7448 adev->gfx.imu.mode = DEBUG_MODE; 7449 7450 adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; 7451 } 7452 7453 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev) 7454 { 7455 adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs; 7456 } 7457 7458 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev) 7459 { 7460 unsigned total_cu = adev->gfx.config.max_cu_per_sh * 7461 adev->gfx.config.max_sh_per_se * 7462 adev->gfx.config.max_shader_engines; 7463 7464 adev->gds.gds_size = 0x1000; 7465 adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1; 7466 adev->gds.gws_size = 64; 7467 adev->gds.oa_size = 16; 7468 } 7469 7470 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev) 7471 { 7472 /* set gfx eng mqd */ 7473 adev->mqds[AMDGPU_HW_IP_GFX].mqd_size = 7474 sizeof(struct v11_gfx_mqd); 7475 adev->mqds[AMDGPU_HW_IP_GFX].init_mqd = 7476 gfx_v11_0_gfx_mqd_init; 7477 /* set compute eng mqd */ 7478 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 7479 sizeof(struct v11_compute_mqd); 7480 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 7481 gfx_v11_0_compute_mqd_init; 7482 } 7483 7484 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev, 7485 u32 bitmap) 7486 { 7487 u32 data; 7488 7489 if (!bitmap) 7490 return; 7491 7492 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7493 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7494 7495 WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data); 7496 } 7497 7498 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev) 7499 { 7500 u32 data, wgp_bitmask; 7501 data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG); 7502 data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG); 7503 7504 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 7505 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 7506 7507 wgp_bitmask = 7508 amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1); 7509 7510 return (~data) & wgp_bitmask; 7511 } 7512 7513 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev) 7514 { 7515 u32 wgp_idx, wgp_active_bitmap; 7516 u32 cu_bitmap_per_wgp, cu_active_bitmap; 7517 7518 wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev); 7519 cu_active_bitmap = 0; 7520 7521 for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) { 7522 /* if there is one WGP enabled, it means 2 CUs will be enabled */ 7523 cu_bitmap_per_wgp = 3 << (2 * wgp_idx); 7524 if (wgp_active_bitmap & (1 << wgp_idx)) 7525 cu_active_bitmap |= cu_bitmap_per_wgp; 7526 } 7527 7528 return cu_active_bitmap; 7529 } 7530 7531 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, 7532 struct amdgpu_cu_info *cu_info) 7533 { 7534 int i, j, k, counter, active_cu_number = 0; 7535 u32 mask, bitmap; 7536 unsigned disable_masks[8 * 2]; 7537 7538 if (!adev || !cu_info) 7539 return -EINVAL; 7540 7541 amdgpu_gfx_parse_disable_cu(adev, disable_masks, 8, 2); 7542 7543 mutex_lock(&adev->grbm_idx_mutex); 7544 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7545 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7546 bitmap = i * adev->gfx.config.max_sh_per_se + j; 7547 if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) 7548 continue; 7549 mask = 1; 7550 counter = 0; 7551 gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); 7552 if (i < 8 && j < 2) 7553 gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh( 7554 adev, disable_masks[i * 2 + j]); 7555 bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev); 7556 7557 /** 7558 * GFX11 could support more than 4 SEs, while the bitmap 7559 * in cu_info struct is 4x4 and ioctl interface struct 7560 * drm_amdgpu_info_device should keep stable. 7561 * So we use last two columns of bitmap to store cu mask for 7562 * SEs 4 to 7, the layout of the bitmap is as below: 7563 * SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]} 7564 * SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]} 7565 * SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]} 7566 * SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]} 7567 * SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]} 7568 * SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]} 7569 * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} 7570 * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} 7571 */ 7572 cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; 7573 7574 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 7575 if (bitmap & mask) 7576 counter++; 7577 7578 mask <<= 1; 7579 } 7580 active_cu_number += counter; 7581 } 7582 } 7583 gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7584 mutex_unlock(&adev->grbm_idx_mutex); 7585 7586 cu_info->number = active_cu_number; 7587 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7588 7589 return 0; 7590 } 7591 7592 const struct amdgpu_ip_block_version gfx_v11_0_ip_block = 7593 { 7594 .type = AMD_IP_BLOCK_TYPE_GFX, 7595 .major = 11, 7596 .minor = 0, 7597 .rev = 0, 7598 .funcs = &gfx_v11_0_ip_funcs, 7599 }; 7600