1 /* 2 * Copyright 2025 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/delay.h> 24 #include <linux/kernel.h> 25 #include <linux/firmware.h> 26 #include <linux/module.h> 27 #include <linux/pci.h> 28 #include "amdgpu.h" 29 #include "amdgpu_gfx.h" 30 #include "amdgpu_psp.h" 31 #include "amdgpu_smu.h" 32 #include "amdgpu_atomfirmware.h" 33 #include "amdgpu_userq_fence.h" 34 #include "imu_v12_1.h" 35 #include "soc_v1_0.h" 36 #include "gfx_v12_1_pkt.h" 37 38 #include "gc/gc_12_1_0_offset.h" 39 #include "gc/gc_12_1_0_sh_mask.h" 40 #include "soc24_enum.h" 41 #include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h" 42 43 #include "soc15.h" 44 #include "clearstate_gfx12.h" 45 #include "v12_structs.h" 46 #include "gfx_v12_1.h" 47 #include "mes_v12_1.h" 48 #include "amdgpu_ras_mgr.h" 49 50 #define GFX12_MEC_HPD_SIZE 2048 51 #define NUM_SIMD_PER_CU_GFX12_1 4 52 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 55 #define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000000 56 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 57 #define regCP_MQD_CONTROL_DEFAULT 0x00000100 58 #define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 59 #define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 60 #define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0ae06301 61 #define regCP_HQD_IB_CONTROL_DEFAULT 0x00100000 62 63 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mec.bin"); 64 MODULE_FIRMWARE("amdgpu/gc_12_1_0_rlc.bin"); 65 66 #define SH_MEM_ALIGNMENT_MODE_UNALIGNED_GFX12_1_0 0x00000001 67 #define DEFAULT_SH_MEM_CONFIG \ 68 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ 69 (SH_MEM_ALIGNMENT_MODE_UNALIGNED_GFX12_1_0 << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ 70 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) 71 72 static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id); 73 static void gfx_v12_1_set_ring_funcs(struct amdgpu_device *adev); 74 static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev); 75 static void gfx_v12_1_set_rlc_funcs(struct amdgpu_device *adev); 76 static void gfx_v12_1_set_mqd_funcs(struct amdgpu_device *adev); 77 static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev); 78 static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev, 79 struct amdgpu_cu_info *cu_info); 80 static uint64_t gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device *adev); 81 static void gfx_v12_1_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num, 82 u32 sh_num, u32 instance, int xcc_id); 83 static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 84 uint32_t val); 85 static int gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); 86 static void gfx_v12_1_ring_invalidate_tlbs(struct amdgpu_ring *ring, 87 uint16_t pasid, uint32_t flush_type, 88 bool all_hub, uint8_t dst_sel); 89 static void gfx_v12_1_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id); 90 static void gfx_v12_1_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id); 91 static void gfx_v12_1_update_perf_clk(struct amdgpu_device *adev, 92 bool enable); 93 static void gfx_v12_1_xcc_update_perf_clk(struct amdgpu_device *adev, 94 bool enable, int xcc_id); 95 static int gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device *adev); 96 97 static void gfx_v12_1_kiq_set_resources(struct amdgpu_ring *kiq_ring, 98 uint64_t queue_mask) 99 { 100 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 101 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 102 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 103 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 104 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 105 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 106 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 107 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 108 amdgpu_ring_write(kiq_ring, 0); 109 } 110 111 static void gfx_v12_1_kiq_map_queues(struct amdgpu_ring *kiq_ring, 112 struct amdgpu_ring *ring) 113 { 114 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 115 uint64_t wptr_addr = ring->wptr_gpu_addr; 116 uint32_t me = 0, eng_sel = 0; 117 118 switch (ring->funcs->type) { 119 case AMDGPU_RING_TYPE_COMPUTE: 120 me = 1; 121 eng_sel = 0; 122 break; 123 case AMDGPU_RING_TYPE_MES: 124 me = 2; 125 eng_sel = 5; 126 break; 127 default: 128 WARN_ON(1); 129 } 130 131 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 132 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 133 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 134 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 135 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 136 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 137 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 138 PACKET3_MAP_QUEUES_ME((me)) | 139 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 140 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) | 141 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 142 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 143 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 144 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 145 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 146 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 147 } 148 149 static void gfx_v12_1_kiq_unmap_queues(struct amdgpu_ring *kiq_ring, 150 struct amdgpu_ring *ring, 151 enum amdgpu_unmap_queues_action action, 152 u64 gpu_addr, u64 seq) 153 { 154 struct amdgpu_device *adev = kiq_ring->adev; 155 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 156 157 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) { 158 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, 159 seq, kiq_ring->xcc_id); 160 return; 161 } 162 163 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 164 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 165 PACKET3_UNMAP_QUEUES_ACTION(action) | 166 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 167 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) | 168 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 169 amdgpu_ring_write(kiq_ring, 170 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 171 172 if (action == PREEMPT_QUEUES_NO_UNMAP) { 173 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); 174 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); 175 amdgpu_ring_write(kiq_ring, seq); 176 } else { 177 amdgpu_ring_write(kiq_ring, 0); 178 amdgpu_ring_write(kiq_ring, 0); 179 amdgpu_ring_write(kiq_ring, 0); 180 } 181 } 182 183 static void gfx_v12_1_kiq_query_status(struct amdgpu_ring *kiq_ring, 184 struct amdgpu_ring *ring, 185 u64 addr, u64 seq) 186 { 187 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0; 188 189 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5)); 190 amdgpu_ring_write(kiq_ring, 191 PACKET3_QUERY_STATUS_CONTEXT_ID(0) | 192 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) | 193 PACKET3_QUERY_STATUS_COMMAND(2)); 194 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 195 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) | 196 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel)); 197 amdgpu_ring_write(kiq_ring, lower_32_bits(addr)); 198 amdgpu_ring_write(kiq_ring, upper_32_bits(addr)); 199 amdgpu_ring_write(kiq_ring, lower_32_bits(seq)); 200 amdgpu_ring_write(kiq_ring, upper_32_bits(seq)); 201 } 202 203 static void gfx_v12_1_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, 204 uint16_t pasid, 205 uint32_t flush_type, 206 bool all_hub) 207 { 208 gfx_v12_1_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); 209 } 210 211 static const struct kiq_pm4_funcs gfx_v12_1_kiq_pm4_funcs = { 212 .kiq_set_resources = gfx_v12_1_kiq_set_resources, 213 .kiq_map_queues = gfx_v12_1_kiq_map_queues, 214 .kiq_unmap_queues = gfx_v12_1_kiq_unmap_queues, 215 .kiq_query_status = gfx_v12_1_kiq_query_status, 216 .kiq_invalidate_tlbs = gfx_v12_1_kiq_invalidate_tlbs, 217 .set_resources_size = 8, 218 .map_queues_size = 7, 219 .unmap_queues_size = 6, 220 .query_status_size = 7, 221 .invalidate_tlbs_size = 2, 222 }; 223 224 static void gfx_v12_1_set_kiq_pm4_funcs(struct amdgpu_device *adev) 225 { 226 int i, num_xcc; 227 228 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 229 for (i =0; i < num_xcc; i++) 230 adev->gfx.kiq[i].pmf = &gfx_v12_1_kiq_pm4_funcs; 231 } 232 233 static void gfx_v12_1_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 234 int mem_space, int opt, uint32_t addr0, 235 uint32_t addr1, uint32_t ref, 236 uint32_t mask, uint32_t inv) 237 { 238 if (mem_space == 0) { 239 addr0 = soc_v1_0_normalize_xcc_reg_offset(addr0); 240 addr1 = soc_v1_0_normalize_xcc_reg_offset(addr1); 241 } 242 243 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 244 amdgpu_ring_write(ring, 245 /* memory (1) or register (0) */ 246 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 247 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 248 WAIT_REG_MEM_FUNCTION(3))); /* equal */ 249 250 if (mem_space) 251 BUG_ON(addr0 & 0x3); /* Dword align */ 252 amdgpu_ring_write(ring, addr0); 253 amdgpu_ring_write(ring, addr1); 254 amdgpu_ring_write(ring, ref); 255 amdgpu_ring_write(ring, mask); 256 amdgpu_ring_write(ring, inv); /* poll interval */ 257 } 258 259 static int gfx_v12_1_ring_test_ring(struct amdgpu_ring *ring) 260 { 261 struct amdgpu_device *adev = ring->adev; 262 uint32_t scratch_reg0_offset, xcc_offset; 263 uint32_t tmp = 0; 264 unsigned i; 265 int r; 266 267 /* Use register offset which is local to XCC in the packet */ 268 xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); 269 scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0); 270 WREG32(scratch_reg0_offset, 0xCAFEDEAD); 271 tmp = RREG32(scratch_reg0_offset); 272 273 r = amdgpu_ring_alloc(ring, 5); 274 if (r) { 275 dev_err(adev->dev, 276 "amdgpu: cp failed to lock ring %d (%d).\n", 277 ring->idx, r); 278 return r; 279 } 280 281 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) { 282 gfx_v12_1_ring_emit_wreg(ring, xcc_offset, 0xDEADBEEF); 283 } else { 284 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 285 amdgpu_ring_write(ring, xcc_offset - 286 PACKET3_SET_UCONFIG_REG_START); 287 amdgpu_ring_write(ring, 0xDEADBEEF); 288 } 289 amdgpu_ring_commit(ring); 290 291 for (i = 0; i < adev->usec_timeout; i++) { 292 tmp = RREG32(scratch_reg0_offset); 293 if (tmp == 0xDEADBEEF) 294 break; 295 if (amdgpu_emu_mode == 1) 296 msleep(1); 297 else 298 udelay(1); 299 } 300 301 if (i >= adev->usec_timeout) 302 r = -ETIMEDOUT; 303 return r; 304 } 305 306 static int gfx_v12_1_ring_test_ib(struct amdgpu_ring *ring, long timeout) 307 { 308 struct amdgpu_device *adev = ring->adev; 309 struct amdgpu_ib ib; 310 struct dma_fence *f = NULL; 311 unsigned index; 312 uint64_t gpu_addr; 313 volatile uint32_t *cpu_ptr; 314 long r; 315 316 /* MES KIQ fw hasn't indirect buffer support for now */ 317 if (adev->enable_mes_kiq && 318 ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 319 return 0; 320 321 memset(&ib, 0, sizeof(ib)); 322 323 r = amdgpu_device_wb_get(adev, &index); 324 if (r) 325 return r; 326 327 gpu_addr = adev->wb.gpu_addr + (index * 4); 328 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 329 cpu_ptr = &adev->wb.wb[index]; 330 331 r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); 332 if (r) { 333 dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r); 334 goto err1; 335 } 336 337 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 338 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 339 ib.ptr[2] = lower_32_bits(gpu_addr); 340 ib.ptr[3] = upper_32_bits(gpu_addr); 341 ib.ptr[4] = 0xDEADBEEF; 342 ib.length_dw = 5; 343 344 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 345 if (r) 346 goto err2; 347 348 r = dma_fence_wait_timeout(f, false, timeout); 349 if (r == 0) { 350 r = -ETIMEDOUT; 351 goto err2; 352 } else if (r < 0) { 353 goto err2; 354 } 355 356 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF) 357 r = 0; 358 else 359 r = -EINVAL; 360 err2: 361 amdgpu_ib_free(&ib, NULL); 362 dma_fence_put(f); 363 err1: 364 amdgpu_device_wb_free(adev, index); 365 return r; 366 } 367 368 static void gfx_v12_1_free_microcode(struct amdgpu_device *adev) 369 { 370 amdgpu_ucode_release(&adev->gfx.rlc_fw); 371 amdgpu_ucode_release(&adev->gfx.mec_fw); 372 373 kfree(adev->gfx.rlc.register_list_format); 374 } 375 376 static int gfx_v12_1_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix) 377 { 378 const struct psp_firmware_header_v1_0 *toc_hdr; 379 int err = 0; 380 381 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, 382 AMDGPU_UCODE_REQUIRED, 383 "amdgpu/%s_toc.bin", ucode_prefix); 384 if (err) 385 goto out; 386 387 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; 388 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version); 389 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version); 390 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes); 391 adev->psp.toc.start_addr = (uint8_t *)toc_hdr + 392 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); 393 return 0; 394 out: 395 amdgpu_ucode_release(&adev->psp.toc_fw); 396 return err; 397 } 398 399 static int gfx_v12_1_init_microcode(struct amdgpu_device *adev) 400 { 401 char ucode_prefix[15]; 402 int err; 403 const struct rlc_firmware_header_v2_0 *rlc_hdr; 404 uint16_t version_major; 405 uint16_t version_minor; 406 407 DRM_DEBUG("\n"); 408 409 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); 410 411 if (!amdgpu_sriov_vf(adev)) { 412 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 413 AMDGPU_UCODE_REQUIRED, 414 "amdgpu/%s_rlc.bin", ucode_prefix); 415 if (err) 416 goto out; 417 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 418 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 419 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 420 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor); 421 if (err) 422 goto out; 423 } 424 425 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 426 AMDGPU_UCODE_REQUIRED, 427 "amdgpu/%s_mec.bin", ucode_prefix); 428 if (err) 429 goto out; 430 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC); 431 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK); 432 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK); 433 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK); 434 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK); 435 436 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 437 err = gfx_v12_1_init_toc_microcode(adev, ucode_prefix); 438 439 /* only one MEC for gfx 12 */ 440 adev->gfx.mec2_fw = NULL; 441 442 if (adev->gfx.imu.funcs) { 443 if (adev->gfx.imu.funcs->init_microcode) { 444 err = adev->gfx.imu.funcs->init_microcode(adev); 445 if (err) 446 dev_err(adev->dev, "Failed to load imu firmware!\n"); 447 } 448 } 449 450 out: 451 if (err) { 452 amdgpu_ucode_release(&adev->gfx.rlc_fw); 453 amdgpu_ucode_release(&adev->gfx.mec_fw); 454 } 455 456 return err; 457 } 458 459 static u32 gfx_v12_1_get_csb_size(struct amdgpu_device *adev) 460 { 461 u32 count = 0; 462 const struct cs_section_def *sect = NULL; 463 const struct cs_extent_def *ext = NULL; 464 465 count += 1; 466 467 for (sect = gfx12_cs_data; sect->section != NULL; ++sect) { 468 if (sect->id == SECT_CONTEXT) { 469 for (ext = sect->section; ext->extent != NULL; ++ext) 470 count += 2 + ext->reg_count; 471 } else 472 return 0; 473 } 474 475 return count; 476 } 477 478 static void gfx_v12_1_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) 479 { 480 u32 count = 0, clustercount = 0, i; 481 const struct cs_section_def *sect = NULL; 482 const struct cs_extent_def *ext = NULL; 483 484 if (adev->gfx.rlc.cs_data == NULL) 485 return; 486 if (buffer == NULL) 487 return; 488 489 count += 1; 490 491 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 492 if (sect->id == SECT_CONTEXT) { 493 for (ext = sect->section; ext->extent != NULL; ++ext) { 494 clustercount++; 495 buffer[count++] = ext->reg_count; 496 buffer[count++] = ext->reg_index; 497 498 for (i = 0; i < ext->reg_count; i++) 499 buffer[count++] = cpu_to_le32(ext->extent[i]); 500 } 501 } else 502 return; 503 } 504 505 buffer[0] = clustercount; 506 } 507 508 static void gfx_v12_1_rlc_fini(struct amdgpu_device *adev) 509 { 510 /* clear state block */ 511 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 512 &adev->gfx.rlc.clear_state_gpu_addr, 513 (void **)&adev->gfx.rlc.cs_ptr); 514 515 /* jump table block */ 516 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 517 &adev->gfx.rlc.cp_table_gpu_addr, 518 (void **)&adev->gfx.rlc.cp_table_ptr); 519 } 520 521 static void gfx_v12_1_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) 522 { 523 int xcc_id, num_xcc; 524 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; 525 526 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 527 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 528 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[GET_INST(GC, xcc_id)]; 529 530 reg_access_ctrl->grbm_cntl = 531 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_CNTL); 532 reg_access_ctrl->grbm_idx = 533 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX); 534 535 reg_access_ctrl->vfi_cmd = 536 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_CMD); 537 reg_access_ctrl->vfi_stat = 538 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_STAT); 539 reg_access_ctrl->vfi_addr = 540 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_ADDR); 541 reg_access_ctrl->vfi_data = 542 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_DATA); 543 reg_access_ctrl->vfi_grbm_cntl = 544 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_GRBM_GFX_CNTL); 545 reg_access_ctrl->vfi_grbm_idx = 546 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_GRBM_GFX_INDEX); 547 reg_access_ctrl->vfi_grbm_cntl_data = 0; 548 reg_access_ctrl->vfi_grbm_idx_data = 0; 549 } 550 adev->gfx.rlc.rlcg_reg_access_supported = true; 551 } 552 553 static int gfx_v12_1_rlc_init(struct amdgpu_device *adev) 554 { 555 const struct cs_section_def *cs_data; 556 int r, i, num_xcc; 557 558 adev->gfx.rlc.cs_data = gfx12_cs_data; 559 560 cs_data = adev->gfx.rlc.cs_data; 561 562 if (cs_data) { 563 /* init clear state block */ 564 r = amdgpu_gfx_rlc_init_csb(adev); 565 if (r) 566 return r; 567 } 568 569 /* init spm vmid with 0xf */ 570 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 571 for (i = 0; i < num_xcc; i++) { 572 if (adev->gfx.rlc.funcs->update_spm_vmid) 573 adev->gfx.rlc.funcs->update_spm_vmid(adev, i, NULL, 0xf); 574 } 575 576 return 0; 577 } 578 579 static void gfx_v12_1_mec_fini(struct amdgpu_device *adev) 580 { 581 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 582 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 583 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL); 584 } 585 586 static int gfx_v12_1_mec_init(struct amdgpu_device *adev) 587 { 588 int r, i, num_xcc; 589 u32 *hpd; 590 size_t mec_hpd_size; 591 592 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 593 594 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 595 for (i = 0; i < num_xcc; i++) 596 bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap, 597 AMDGPU_MAX_COMPUTE_QUEUES); 598 599 /* take ownership of the relevant compute queues */ 600 amdgpu_gfx_compute_queue_acquire(adev); 601 mec_hpd_size = adev->gfx.num_compute_rings * 602 GFX12_MEC_HPD_SIZE * num_xcc; 603 604 if (mec_hpd_size) { 605 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 606 AMDGPU_GEM_DOMAIN_GTT, 607 &adev->gfx.mec.hpd_eop_obj, 608 &adev->gfx.mec.hpd_eop_gpu_addr, 609 (void **)&hpd); 610 if (r) { 611 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 612 gfx_v12_1_mec_fini(adev); 613 return r; 614 } 615 616 memset(hpd, 0, mec_hpd_size); 617 618 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 619 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 620 } 621 622 return 0; 623 } 624 625 static uint32_t wave_read_ind(struct amdgpu_device *adev, 626 uint32_t xcc_id, uint32_t wave, 627 uint32_t address) 628 { 629 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX, 630 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 631 (address << SQ_IND_INDEX__INDEX__SHIFT)); 632 return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA); 633 } 634 635 static void wave_read_regs(struct amdgpu_device *adev, 636 uint32_t xcc_id, uint32_t wave, 637 uint32_t thread, uint32_t regno, 638 uint32_t num, uint32_t *out) 639 { 640 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX, 641 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 642 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 643 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) | 644 (SQ_IND_INDEX__AUTO_INCR_MASK)); 645 while (num--) 646 *(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA); 647 } 648 649 static void gfx_v12_1_read_wave_data(struct amdgpu_device *adev, 650 uint32_t xcc_id, 651 uint32_t simd, uint32_t wave, 652 uint32_t *dst, int *no_fields) 653 { 654 /* in gfx12 the SIMD_ID is specified as part of the INSTANCE 655 * field when performing a select_se_sh so it should be 656 * zero here */ 657 WARN_ON(simd != 0); 658 659 /* type 4 wave data */ 660 dst[(*no_fields)++] = 4; 661 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_STATUS); 662 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_PC_LO); 663 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_PC_HI); 664 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXEC_LO); 665 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXEC_HI); 666 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_HW_ID1); 667 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_HW_ID2); 668 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_GPR_ALLOC); 669 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_LDS_ALLOC); 670 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_STS); 671 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_STS2); 672 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_DBG1); 673 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_M0); 674 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_MODE); 675 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_STATE_PRIV); 676 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXCP_FLAG_PRIV); 677 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXCP_FLAG_USER); 678 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_TRAP_CTRL); 679 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_ACTIVE); 680 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_VALID_AND_IDLE); 681 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_DVGPR_ALLOC_LO); 682 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_DVGPR_ALLOC_HI); 683 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_SCHED_MODE); 684 } 685 686 static void gfx_v12_1_read_wave_sgprs(struct amdgpu_device *adev, 687 uint32_t xcc_id, uint32_t simd, 688 uint32_t wave, uint32_t start, 689 uint32_t size, uint32_t *dst) 690 { 691 WARN_ON(simd != 0); 692 693 wave_read_regs(adev, xcc_id, wave, 0, 694 start + SQIND_WAVE_SGPRS_OFFSET, 695 size, dst); 696 } 697 698 static void gfx_v12_1_read_wave_vgprs(struct amdgpu_device *adev, 699 uint32_t xcc_id, uint32_t simd, 700 uint32_t wave, uint32_t thread, 701 uint32_t start, uint32_t size, 702 uint32_t *dst) 703 { 704 wave_read_regs(adev, xcc_id, wave, thread, 705 start + SQIND_WAVE_VGPRS_OFFSET, 706 size, dst); 707 } 708 709 static void gfx_v12_1_select_me_pipe_q(struct amdgpu_device *adev, 710 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 711 { 712 soc_v1_0_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id)); 713 } 714 715 static int gfx_v12_1_get_xccs_per_xcp(struct amdgpu_device *adev) 716 { 717 /* Fill this in when the interface is ready */ 718 return 1; 719 } 720 721 static int gfx_v12_1_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) 722 { 723 int logic_xcc; 724 int xcc = (ih_node & 0x7) - 2 + (ih_node >> 3) * 4; 725 726 for (logic_xcc = 0; logic_xcc < NUM_XCC(adev->gfx.xcc_mask); logic_xcc++) { 727 if (xcc == GET_INST(GC, logic_xcc)) 728 return logic_xcc; 729 } 730 731 dev_err(adev->dev, "Couldn't find xcc mapping from IH node"); 732 return -EINVAL; 733 } 734 735 static const struct amdgpu_gfx_funcs gfx_v12_1_gfx_funcs = { 736 .get_gpu_clock_counter = &gfx_v12_1_get_gpu_clock_counter, 737 .select_se_sh = &gfx_v12_1_xcc_select_se_sh, 738 .read_wave_data = &gfx_v12_1_read_wave_data, 739 .read_wave_sgprs = &gfx_v12_1_read_wave_sgprs, 740 .read_wave_vgprs = &gfx_v12_1_read_wave_vgprs, 741 .select_me_pipe_q = &gfx_v12_1_select_me_pipe_q, 742 .update_perfmon_mgcg = &gfx_v12_1_update_perf_clk, 743 .get_xccs_per_xcp = &gfx_v12_1_get_xccs_per_xcp, 744 .ih_node_to_logical_xcc = &gfx_v12_1_ih_to_xcc_inst, 745 }; 746 747 static int gfx_v12_1_gpu_early_init(struct amdgpu_device *adev) 748 { 749 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 750 case IP_VERSION(12, 1, 0): 751 adev->gfx.config.max_hw_contexts = 8; 752 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 753 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 754 adev->gfx.config.sc_hiz_tile_fifo_size = 0; 755 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 756 break; 757 default: 758 BUG(); 759 break; 760 } 761 762 return 0; 763 } 764 765 static int gfx_v12_1_compute_ring_init(struct amdgpu_device *adev, int ring_id, 766 int xcc_id, int mec, int pipe, int queue) 767 { 768 int r; 769 unsigned irq_type; 770 struct amdgpu_ring *ring; 771 unsigned int hw_prio; 772 uint32_t xcc_doorbell_start; 773 774 ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings + 775 ring_id]; 776 777 /* mec0 is me1 */ 778 ring->xcc_id = xcc_id; 779 ring->me = mec + 1; 780 ring->pipe = pipe; 781 ring->queue = queue; 782 783 ring->ring_obj = NULL; 784 ring->use_doorbell = true; 785 xcc_doorbell_start = adev->doorbell_index.mec_ring0 + 786 xcc_id * adev->doorbell_index.xcc_doorbell_range; 787 ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1; 788 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + 789 (ring_id + xcc_id * adev->gfx.num_compute_rings) * 790 GFX12_MEC_HPD_SIZE; 791 ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 792 sprintf(ring->name, "comp_%d.%d.%d.%d", 793 ring->xcc_id, ring->me, ring->pipe, ring->queue); 794 795 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 796 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 797 + ring->pipe; 798 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 799 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; 800 /* type-2 packets are deprecated on MEC, use type-3 instead */ 801 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 802 hw_prio, NULL); 803 if (r) 804 return r; 805 806 return 0; 807 } 808 809 static struct { 810 SOC24_FIRMWARE_ID id; 811 unsigned int offset; 812 unsigned int size; 813 unsigned int size_x16; 814 unsigned int num_inst; 815 } rlc_autoload_info[SOC24_FIRMWARE_ID_MAX]; 816 817 #define RLC_TOC_OFFSET_DWUNIT 8 818 #define RLC_SIZE_MULTIPLE 1024 819 #define RLC_TOC_UMF_SIZE_inM 23ULL 820 #define RLC_TOC_FORMAT_API 165ULL 821 822 #define RLC_NUM_INS_CODE0 1 823 #define RLC_NUM_INS_CODE1 8 824 #define RLC_NUM_INS_CODE2 2 825 #define RLC_NUM_INS_CODE3 16 826 827 static void gfx_v12_1_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc) 828 { 829 RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc; 830 831 while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) { 832 rlc_autoload_info[ucode->id].id = ucode->id; 833 rlc_autoload_info[ucode->id].offset = 834 ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4; 835 rlc_autoload_info[ucode->id].size = 836 ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 : 837 ucode->size * 4; 838 switch (ucode->vfflr_image_code) { 839 case 0: 840 rlc_autoload_info[ucode->id].num_inst = 841 RLC_NUM_INS_CODE0; 842 break; 843 case 1: 844 rlc_autoload_info[ucode->id].num_inst = 845 RLC_NUM_INS_CODE1; 846 break; 847 case 2: 848 rlc_autoload_info[ucode->id].num_inst = 849 RLC_NUM_INS_CODE2; 850 break; 851 case 3: 852 rlc_autoload_info[ucode->id].num_inst = 853 RLC_NUM_INS_CODE3; 854 break; 855 default: 856 dev_err(adev->dev, 857 "Invalid Instance number detected\n"); 858 break; 859 } 860 ucode++; 861 } 862 } 863 864 static uint32_t gfx_v12_1_calc_toc_total_size(struct amdgpu_device *adev) 865 { 866 uint32_t total_size = 0; 867 SOC24_FIRMWARE_ID id; 868 869 gfx_v12_1_parse_rlc_toc(adev, adev->psp.toc.start_addr); 870 871 for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++) 872 total_size += rlc_autoload_info[id].size; 873 874 /* In case the offset in rlc toc ucode is aligned */ 875 if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset) 876 total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset + 877 rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size; 878 if (total_size < (RLC_TOC_UMF_SIZE_inM << 20)) 879 total_size = RLC_TOC_UMF_SIZE_inM << 20; 880 881 return total_size; 882 } 883 884 static int gfx_v12_1_rlc_autoload_buffer_init(struct amdgpu_device *adev) 885 { 886 int r; 887 uint32_t total_size; 888 889 total_size = gfx_v12_1_calc_toc_total_size(adev); 890 891 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024, 892 AMDGPU_GEM_DOMAIN_VRAM, 893 &adev->gfx.rlc.rlc_autoload_bo, 894 &adev->gfx.rlc.rlc_autoload_gpu_addr, 895 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 896 897 if (r) { 898 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r); 899 return r; 900 } 901 902 return 0; 903 } 904 905 static void gfx_v12_1_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev, 906 SOC24_FIRMWARE_ID id, 907 const void *fw_data, 908 uint32_t fw_size) 909 { 910 uint32_t toc_offset; 911 uint32_t toc_fw_size, toc_fw_inst_size; 912 char *ptr = adev->gfx.rlc.rlc_autoload_ptr; 913 int i, num_inst; 914 915 if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX) 916 return; 917 918 toc_offset = rlc_autoload_info[id].offset; 919 toc_fw_size = rlc_autoload_info[id].size; 920 num_inst = rlc_autoload_info[id].num_inst; 921 toc_fw_inst_size = toc_fw_size / num_inst; 922 923 if (fw_size == 0) 924 fw_size = toc_fw_inst_size; 925 926 if (fw_size > toc_fw_inst_size) 927 fw_size = toc_fw_inst_size; 928 929 for (i = 0; i < num_inst; i++) { 930 if ((num_inst == RLC_NUM_INS_CODE0) || 931 ((1 << (i / 2)) & adev->gfx.xcc_mask)) { 932 memcpy(ptr + toc_offset + i * toc_fw_inst_size, fw_data, fw_size); 933 934 if (fw_size < toc_fw_inst_size) 935 memset(ptr + toc_offset + fw_size + i * toc_fw_inst_size, 936 0, toc_fw_inst_size - fw_size); 937 } 938 } 939 } 940 941 static void 942 gfx_v12_1_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev) 943 { 944 void *data; 945 uint32_t size; 946 uint32_t *toc_ptr; 947 948 data = adev->psp.toc.start_addr; 949 size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size; 950 951 toc_ptr = (uint32_t *)data + size / 4 - 2; 952 *toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1; 953 954 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC, 955 data, size); 956 } 957 958 static void 959 gfx_v12_1_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev) 960 { 961 const __le32 *fw_data; 962 uint32_t fw_size; 963 const struct gfx_firmware_header_v2_0 *cpv2_hdr; 964 const struct rlc_firmware_header_v2_0 *rlc_hdr; 965 const struct rlc_firmware_header_v2_1 *rlcv21_hdr; 966 const struct rlc_firmware_header_v2_2 *rlcv22_hdr; 967 uint16_t version_major, version_minor; 968 969 /* mec ucode */ 970 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *) 971 adev->gfx.mec_fw->data; 972 /* instruction */ 973 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 974 le32_to_cpu(cpv2_hdr->ucode_offset_bytes)); 975 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes); 976 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC, 977 fw_data, fw_size); 978 /* data */ 979 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 980 le32_to_cpu(cpv2_hdr->data_offset_bytes)); 981 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes); 982 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK, 983 fw_data, fw_size); 984 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK, 985 fw_data, fw_size); 986 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK, 987 fw_data, fw_size); 988 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK, 989 fw_data, fw_size); 990 991 /* rlc ucode */ 992 rlc_hdr = (const struct rlc_firmware_header_v2_0 *) 993 adev->gfx.rlc_fw->data; 994 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 995 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes)); 996 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes); 997 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE, 998 fw_data, fw_size); 999 1000 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 1001 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 1002 if (version_major == 2) { 1003 if (version_minor >= 1) { 1004 rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 1005 1006 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1007 le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes)); 1008 fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes); 1009 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH, 1010 fw_data, fw_size); 1011 1012 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1013 le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes)); 1014 fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes); 1015 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM, 1016 fw_data, fw_size); 1017 } 1018 if (version_minor >= 2) { 1019 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1020 1021 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1022 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes)); 1023 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes); 1024 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE, 1025 fw_data, fw_size); 1026 1027 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1028 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes)); 1029 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes); 1030 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT, 1031 fw_data, fw_size); 1032 } 1033 } 1034 } 1035 1036 static void 1037 gfx_v12_1_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev) 1038 { 1039 const __le32 *fw_data; 1040 uint32_t fw_size; 1041 const struct sdma_firmware_header_v3_0 *sdma_hdr; 1042 1043 if (adev->sdma.instance[0].fw) { 1044 sdma_hdr = (const struct sdma_firmware_header_v3_0 *) 1045 adev->sdma.instance[0].fw->data; 1046 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data + 1047 le32_to_cpu(sdma_hdr->ucode_offset_bytes)); 1048 fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes); 1049 1050 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0, 1051 fw_data, fw_size); 1052 } 1053 } 1054 1055 static void 1056 gfx_v12_1_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev) 1057 { 1058 const __le32 *fw_data; 1059 unsigned fw_size; 1060 const struct mes_firmware_header_v1_0 *mes_hdr; 1061 int pipe, ucode_id, data_id; 1062 1063 for (pipe = 0; pipe < 2; pipe++) { 1064 if (pipe == 0) { 1065 ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0; 1066 data_id = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK; 1067 } else { 1068 ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1; 1069 data_id = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK; 1070 } 1071 1072 mes_hdr = (const struct mes_firmware_header_v1_0 *) 1073 adev->mes.fw[pipe]->data; 1074 1075 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1076 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes)); 1077 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); 1078 1079 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size); 1080 1081 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data + 1082 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); 1083 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); 1084 1085 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size); 1086 } 1087 } 1088 1089 static int gfx_v12_1_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) 1090 { 1091 uint32_t rlc_g_offset, rlc_g_size; 1092 uint64_t gpu_addr; 1093 uint32_t data; 1094 int i, num_xcc; 1095 1096 /* RLC autoload sequence 2: copy ucode */ 1097 gfx_v12_1_rlc_backdoor_autoload_copy_sdma_ucode(adev); 1098 gfx_v12_1_rlc_backdoor_autoload_copy_gfx_ucode(adev); 1099 gfx_v12_1_rlc_backdoor_autoload_copy_mes_ucode(adev); 1100 gfx_v12_1_rlc_backdoor_autoload_copy_toc_ucode(adev); 1101 1102 rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset; 1103 rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size; 1104 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start; 1105 1106 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1107 for (i = 0; i < num_xcc; i++) { 1108 WREG32_SOC15(GC, GET_INST(GC, i), 1109 regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, 1110 upper_32_bits(gpu_addr)); 1111 WREG32_SOC15(GC, GET_INST(GC, i), 1112 regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, 1113 lower_32_bits(gpu_addr)); 1114 WREG32_SOC15(GC, GET_INST(GC, i), 1115 regGFX_IMU_RLC_BOOTLOADER_SIZE, 1116 rlc_g_size); 1117 } 1118 1119 if (adev->gfx.imu.funcs) { 1120 /* RLC autoload sequence 3: load IMU fw */ 1121 if (adev->gfx.imu.funcs->load_microcode) 1122 adev->gfx.imu.funcs->load_microcode(adev); 1123 } 1124 1125 /* unhalt rlc to start autoload */ 1126 for (i = 0; i < num_xcc; i++) { 1127 data = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_THREAD_ENABLE); 1128 data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1); 1129 data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1); 1130 WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_THREAD_ENABLE, data); 1131 WREG32_SOC15(GC, GET_INST(GC, i), regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK); 1132 } 1133 1134 return 0; 1135 } 1136 1137 static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block) 1138 { 1139 int i, j, k, r, ring_id = 0; 1140 unsigned num_compute_rings; 1141 int xcc_id, num_xcc; 1142 struct amdgpu_device *adev = ip_block->adev; 1143 1144 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 1145 case IP_VERSION(12, 1, 0): 1146 adev->gfx.mec.num_mec = 1; 1147 adev->gfx.mec.num_pipe_per_mec = 4; 1148 adev->gfx.mec.num_queue_per_pipe = 8; 1149 break; 1150 default: 1151 adev->gfx.mec.num_mec = 2; 1152 adev->gfx.mec.num_pipe_per_mec = 2; 1153 adev->gfx.mec.num_queue_per_pipe = 4; 1154 break; 1155 } 1156 1157 if (adev->gfx.num_compute_rings) { 1158 /* recalculate compute rings to use based on hardware configuration */ 1159 num_compute_rings = (adev->gfx.mec.num_pipe_per_mec * 1160 adev->gfx.mec.num_queue_per_pipe) / 2; 1161 adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings, 1162 num_compute_rings); 1163 } 1164 1165 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1166 1167 /* EOP Event */ 1168 r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP, 1169 GFX_12_1_0__SRCID__CP_EOP_INTERRUPT, 1170 &adev->gfx.eop_irq); 1171 if (r) 1172 return r; 1173 1174 /* Privileged reg */ 1175 r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP, 1176 GFX_12_1_0__SRCID__CP_PRIV_REG_FAULT, 1177 &adev->gfx.priv_reg_irq); 1178 if (r) 1179 return r; 1180 1181 /* Privileged inst */ 1182 r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP, 1183 GFX_12_1_0__SRCID__CP_PRIV_INSTR_FAULT, 1184 &adev->gfx.priv_inst_irq); 1185 if (r) 1186 return r; 1187 1188 /* RLC POISON Error */ 1189 r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_RLC, 1190 GFX_12_1_0__SRCID__RLC_POISON_INTERRUPT, 1191 &adev->gfx.rlc_poison_irq); 1192 if (r) 1193 return r; 1194 1195 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1196 1197 r = gfx_v12_1_rlc_init(adev); 1198 if (r) { 1199 dev_err(adev->dev, "Failed to init rlc BOs!\n"); 1200 return r; 1201 } 1202 1203 r = gfx_v12_1_mec_init(adev); 1204 if (r) { 1205 dev_err(adev->dev, "Failed to init MEC BOs!\n"); 1206 return r; 1207 } 1208 1209 /* set up the compute queues - allocate horizontally across pipes */ 1210 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { 1211 ring_id = 0; 1212 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1213 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1214 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1215 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 1216 xcc_id, i, k, j)) 1217 continue; 1218 1219 r = gfx_v12_1_compute_ring_init(adev, ring_id, 1220 xcc_id, i, k, j); 1221 if (r) 1222 return r; 1223 1224 ring_id++; 1225 } 1226 } 1227 } 1228 1229 if (!adev->enable_mes_kiq) { 1230 r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, xcc_id); 1231 if (r) { 1232 dev_err(adev->dev, "Failed to init KIQ BOs!\n"); 1233 return r; 1234 } 1235 1236 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 1237 if (r) 1238 return r; 1239 } 1240 1241 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_1_compute_mqd), xcc_id); 1242 if (r) 1243 return r; 1244 } 1245 1246 /* allocate visible FB for rlc auto-loading fw */ 1247 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1248 r = gfx_v12_1_rlc_autoload_buffer_init(adev); 1249 if (r) 1250 return r; 1251 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1252 r = gfx_v12_1_init_cp_compute_microcode_bo(adev); 1253 if (r) 1254 return r; 1255 } 1256 1257 r = gfx_v12_1_gpu_early_init(adev); 1258 if (r) 1259 return r; 1260 1261 r = amdgpu_gfx_sysfs_init(adev); 1262 if (r) 1263 return r; 1264 1265 return 0; 1266 } 1267 1268 static void gfx_v12_1_rlc_autoload_buffer_fini(struct amdgpu_device *adev) 1269 { 1270 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo, 1271 &adev->gfx.rlc.rlc_autoload_gpu_addr, 1272 (void **)&adev->gfx.rlc.rlc_autoload_ptr); 1273 } 1274 1275 static int gfx_v12_1_sw_fini(struct amdgpu_ip_block *ip_block) 1276 { 1277 int i, num_xcc; 1278 struct amdgpu_device *adev = ip_block->adev; 1279 1280 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1281 for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++) 1282 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1283 1284 for (i = 0; i < num_xcc; i++) { 1285 amdgpu_gfx_mqd_sw_fini(adev, i); 1286 1287 if (!adev->enable_mes_kiq) { 1288 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring); 1289 amdgpu_gfx_kiq_fini(adev, i); 1290 } 1291 } 1292 1293 gfx_v12_1_rlc_fini(adev); 1294 gfx_v12_1_mec_fini(adev); 1295 1296 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) 1297 gfx_v12_1_rlc_autoload_buffer_fini(adev); 1298 1299 gfx_v12_1_free_microcode(adev); 1300 amdgpu_gfx_sysfs_fini(adev); 1301 1302 return 0; 1303 } 1304 1305 static void gfx_v12_1_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num, 1306 u32 sh_num, u32 instance, int xcc_id) 1307 { 1308 u32 data; 1309 1310 if (instance == 0xffffffff) 1311 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, 1312 INSTANCE_BROADCAST_WRITES, 1); 1313 else 1314 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, 1315 instance); 1316 1317 if (se_num == 0xffffffff) 1318 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1319 1); 1320 else 1321 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1322 1323 if (sh_num == 0xffffffff) 1324 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES, 1325 1); 1326 else 1327 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num); 1328 1329 WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data); 1330 } 1331 1332 static u32 gfx_v12_1_get_sa_active_bitmap(struct amdgpu_device *adev, 1333 int xcc_id) 1334 { 1335 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask; 1336 1337 gc_disabled_sa_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SA_UNIT_DISABLE); 1338 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask, 1339 CC_GC_SA_UNIT_DISABLE, 1340 SA_DISABLE); 1341 gc_user_disabled_sa_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SA_UNIT_DISABLE); 1342 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask, 1343 GC_USER_SA_UNIT_DISABLE, 1344 SA_DISABLE); 1345 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se * 1346 adev->gfx.config.max_shader_engines); 1347 1348 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask)); 1349 } 1350 1351 static u32 gfx_v12_1_get_rb_active_bitmap(struct amdgpu_device *adev, 1352 int xcc_id) 1353 { 1354 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask; 1355 u32 rb_mask; 1356 1357 gc_disabled_rb_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), 1358 regCC_RB_BACKEND_DISABLE); 1359 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask, 1360 CC_RB_BACKEND_DISABLE, 1361 BACKEND_DISABLE); 1362 gc_user_disabled_rb_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), 1363 regGC_USER_RB_BACKEND_DISABLE); 1364 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask, 1365 GC_USER_RB_BACKEND_DISABLE, 1366 BACKEND_DISABLE); 1367 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se * 1368 adev->gfx.config.max_shader_engines); 1369 1370 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask)); 1371 } 1372 1373 static void gfx_v12_1_setup_rb(struct amdgpu_device *adev) 1374 { 1375 u32 rb_bitmap_width_per_sa; 1376 u32 max_sa; 1377 u32 active_sa_bitmap; 1378 u32 global_active_rb_bitmap; 1379 u32 active_rb_bitmap = 0; 1380 u32 i; 1381 int xcc_id; 1382 1383 for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { 1384 /* query sa bitmap from SA_UNIT_DISABLE registers */ 1385 active_sa_bitmap = gfx_v12_1_get_sa_active_bitmap(adev, xcc_id); 1386 /* query rb bitmap from RB_BACKEND_DISABLE registers */ 1387 global_active_rb_bitmap = gfx_v12_1_get_rb_active_bitmap(adev, xcc_id); 1388 1389 /* generate active rb bitmap according to active sa bitmap */ 1390 max_sa = adev->gfx.config.max_shader_engines * 1391 adev->gfx.config.max_sh_per_se; 1392 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / 1393 adev->gfx.config.max_sh_per_se; 1394 for (i = 0; i < max_sa; i++) { 1395 if (active_sa_bitmap & (1 << i)) 1396 active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); 1397 } 1398 1399 active_rb_bitmap |= global_active_rb_bitmap; 1400 } 1401 1402 adev->gfx.config.backend_enable_mask = active_rb_bitmap; 1403 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); 1404 } 1405 1406 static void gfx_v12_1_xcc_init_compute_vmid(struct amdgpu_device *adev, 1407 int xcc_id) 1408 { 1409 int i; 1410 uint32_t sh_mem_bases; 1411 uint32_t data; 1412 1413 /* 1414 * Configure apertures: 1415 * LDS: 0x20000000'00000000 - 0x20000001'00000000 (4GB) 1416 * Scratch: 0x10000000'00000000 - 0x11ffffff'ffffffff (128PB 57-bit) 1417 */ 1418 sh_mem_bases = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1419 (adev->gmc.private_aperture_start >> 58)); 1420 sh_mem_bases = REG_SET_FIELD(sh_mem_bases, SH_MEM_BASES, SHARED_BASE, 1421 (adev->gmc.shared_aperture_start >> 48)); 1422 1423 mutex_lock(&adev->srbm_mutex); 1424 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 1425 soc_v1_0_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id)); 1426 /* CP and shaders */ 1427 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 1428 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases); 1429 1430 /* Enable trap for each kfd vmid. */ 1431 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL); 1432 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 1433 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL, data); 1434 1435 /* Disable VGPR deallocation instruction for each KFD vmid. */ 1436 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_DEBUG); 1437 data = REG_SET_FIELD(data, SQ_DEBUG, DISABLE_VGPR_DEALLOC, 1); 1438 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_DEBUG, data); 1439 } 1440 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1441 mutex_unlock(&adev->srbm_mutex); 1442 } 1443 1444 static void gfx_v12_1_tcp_harvest(struct amdgpu_device *adev) 1445 { 1446 /* TODO: harvest feature to be added later. */ 1447 } 1448 1449 static void gfx_v12_1_get_tcc_info(struct amdgpu_device *adev) 1450 { 1451 } 1452 1453 static void gfx_v12_1_xcc_constants_init(struct amdgpu_device *adev, 1454 int xcc_id) 1455 { 1456 u32 tmp; 1457 int i; 1458 1459 /* XXX SH_MEM regs */ 1460 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1461 mutex_lock(&adev->srbm_mutex); 1462 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { 1463 soc_v1_0_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id)); 1464 /* CP and shaders */ 1465 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1466 regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); 1467 if (i != 0) { 1468 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1469 (adev->gmc.private_aperture_start >> 58)); 1470 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1471 (adev->gmc.shared_aperture_start >> 48)); 1472 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, tmp); 1473 } 1474 } 1475 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1476 1477 mutex_unlock(&adev->srbm_mutex); 1478 1479 gfx_v12_1_xcc_init_compute_vmid(adev, xcc_id); 1480 } 1481 1482 static void gfx_v12_1_constants_init(struct amdgpu_device *adev) 1483 { 1484 int i, num_xcc; 1485 1486 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1487 1488 gfx_v12_1_setup_rb(adev); 1489 gfx_v12_1_get_cu_info(adev, &adev->gfx.cu_info); 1490 gfx_v12_1_get_tcc_info(adev); 1491 adev->gfx.config.pa_sc_tile_steering_override = 0; 1492 1493 for (i = 0; i < num_xcc; i++) 1494 gfx_v12_1_xcc_constants_init(adev, i); 1495 } 1496 1497 static void gfx_v12_1_xcc_enable_gui_idle_interrupt(struct amdgpu_device *adev, 1498 bool enable, int xcc_id) 1499 { 1500 u32 tmp; 1501 1502 if (amdgpu_sriov_vf(adev)) 1503 return; 1504 1505 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0); 1506 1507 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 1508 enable ? 1 : 0); 1509 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 1510 enable ? 1 : 0); 1511 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 1512 enable ? 1 : 0); 1513 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 1514 enable ? 1 : 0); 1515 1516 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0, tmp); 1517 } 1518 1519 static int gfx_v12_1_xcc_init_csb(struct amdgpu_device *adev, 1520 int xcc_id) 1521 { 1522 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 1523 1524 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_HI, 1525 adev->gfx.rlc.clear_state_gpu_addr >> 32); 1526 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_LO, 1527 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 1528 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1529 regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); 1530 1531 return 0; 1532 } 1533 1534 static void gfx_v12_1_xcc_rlc_stop(struct amdgpu_device *adev, 1535 int xcc_id) 1536 { 1537 u32 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CNTL); 1538 1539 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 1540 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CNTL, tmp); 1541 } 1542 1543 static void gfx_v12_1_rlc_stop(struct amdgpu_device *adev) 1544 { 1545 int i, num_xcc; 1546 1547 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1548 for (i = 0; i < num_xcc; i++) 1549 gfx_v12_1_xcc_rlc_stop(adev, i); 1550 } 1551 1552 static void gfx_v12_1_xcc_rlc_reset(struct amdgpu_device *adev, 1553 int xcc_id) 1554 { 1555 WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), 1556 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 1557 udelay(50); 1558 WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), 1559 GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 1560 udelay(50); 1561 } 1562 1563 static void gfx_v12_1_rlc_reset(struct amdgpu_device *adev) 1564 { 1565 int i, num_xcc; 1566 1567 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1568 for (i = 0; i < num_xcc; i++) 1569 gfx_v12_1_xcc_rlc_reset(adev, i); 1570 } 1571 1572 static void gfx_v12_1_xcc_rlc_smu_handshake_cntl(struct amdgpu_device *adev, 1573 bool enable, int xcc_id) 1574 { 1575 uint32_t rlc_pg_cntl; 1576 1577 rlc_pg_cntl = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL); 1578 1579 if (!enable) { 1580 /* RLC_PG_CNTL[23] = 0 (default) 1581 * RLC will wait for handshake acks with SMU 1582 * GFXOFF will be enabled 1583 * RLC_PG_CNTL[23] = 1 1584 * RLC will not issue any message to SMU 1585 * hence no handshake between SMU & RLC 1586 * GFXOFF will be disabled 1587 */ 1588 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 1589 } else 1590 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK; 1591 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL, rlc_pg_cntl); 1592 } 1593 1594 static void gfx_v12_1_xcc_rlc_start(struct amdgpu_device *adev, 1595 int xcc_id) 1596 { 1597 /* TODO: enable rlc & smu handshake until smu 1598 * and gfxoff feature works as expected */ 1599 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) 1600 gfx_v12_1_xcc_rlc_smu_handshake_cntl(adev, false, xcc_id); 1601 1602 WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL, RLC_ENABLE_F32, 1); 1603 udelay(50); 1604 } 1605 1606 static void gfx_v12_1_rlc_start(struct amdgpu_device *adev) 1607 { 1608 int i, num_xcc; 1609 1610 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1611 for (i = 0; i < num_xcc; i++) { 1612 gfx_v12_1_xcc_rlc_start(adev, i); 1613 } 1614 } 1615 1616 static void gfx_v12_1_xcc_rlc_enable_srm(struct amdgpu_device *adev, 1617 int xcc_id) 1618 { 1619 uint32_t tmp; 1620 1621 /* enable Save Restore Machine */ 1622 tmp = RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SRM_CNTL)); 1623 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 1624 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 1625 WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SRM_CNTL), tmp); 1626 } 1627 1628 static void gfx_v12_1_xcc_load_rlcg_microcode(struct amdgpu_device *adev, 1629 int xcc_id) 1630 { 1631 const struct rlc_firmware_header_v2_0 *hdr; 1632 const __le32 *fw_data; 1633 unsigned i, fw_size; 1634 1635 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1636 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1637 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1638 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 1639 1640 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR, 1641 RLCG_UCODE_LOADING_START_ADDRESS); 1642 1643 for (i = 0; i < fw_size; i++) 1644 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1645 regRLC_GPM_UCODE_DATA, 1646 le32_to_cpup(fw_data++)); 1647 1648 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1649 regRLC_GPM_UCODE_ADDR, 1650 adev->gfx.rlc_fw_version); 1651 } 1652 1653 static void gfx_v12_1_xcc_load_rlc_iram_dram_microcode(struct amdgpu_device *adev, 1654 int xcc_id) 1655 { 1656 const struct rlc_firmware_header_v2_2 *hdr; 1657 const __le32 *fw_data; 1658 unsigned i, fw_size; 1659 u32 tmp; 1660 1661 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data; 1662 1663 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1664 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes)); 1665 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4; 1666 1667 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_IRAM_ADDR, 0); 1668 1669 for (i = 0; i < fw_size; i++) { 1670 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 1671 msleep(1); 1672 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1673 regRLC_LX6_IRAM_DATA, 1674 le32_to_cpup(fw_data++)); 1675 } 1676 1677 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1678 regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 1679 1680 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 1681 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes)); 1682 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4; 1683 1684 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1685 regRLC_LX6_DRAM_ADDR, 0); 1686 for (i = 0; i < fw_size; i++) { 1687 if ((amdgpu_emu_mode == 1) && (i % 100 == 99)) 1688 msleep(1); 1689 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1690 regRLC_LX6_DRAM_DATA, 1691 le32_to_cpup(fw_data++)); 1692 } 1693 1694 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 1695 regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version); 1696 1697 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_CNTL); 1698 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1); 1699 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0); 1700 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_CNTL, tmp); 1701 } 1702 1703 static int gfx_v12_1_xcc_rlc_load_microcode(struct amdgpu_device *adev, 1704 int xcc_id) 1705 { 1706 const struct rlc_firmware_header_v2_0 *hdr; 1707 uint16_t version_major; 1708 uint16_t version_minor; 1709 1710 if (!adev->gfx.rlc_fw) 1711 return -EINVAL; 1712 1713 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1714 amdgpu_ucode_print_rlc_hdr(&hdr->header); 1715 1716 version_major = le16_to_cpu(hdr->header.header_version_major); 1717 version_minor = le16_to_cpu(hdr->header.header_version_minor); 1718 1719 if (version_major == 2) { 1720 gfx_v12_1_xcc_load_rlcg_microcode(adev, xcc_id); 1721 if (amdgpu_dpm == 1) { 1722 if (version_minor >= 2) 1723 gfx_v12_1_xcc_load_rlc_iram_dram_microcode(adev, xcc_id); 1724 } 1725 1726 return 0; 1727 } 1728 1729 return -EINVAL; 1730 } 1731 1732 static int gfx_v12_1_xcc_rlc_resume(struct amdgpu_device *adev, 1733 int xcc_id) 1734 { 1735 int r; 1736 1737 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 1738 gfx_v12_1_xcc_init_csb(adev, xcc_id); 1739 1740 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ 1741 gfx_v12_1_xcc_rlc_enable_srm(adev, xcc_id); 1742 } else { 1743 if (amdgpu_sriov_vf(adev)) { 1744 gfx_v12_1_xcc_init_csb(adev, xcc_id); 1745 return 0; 1746 } 1747 1748 gfx_v12_1_xcc_rlc_stop(adev, xcc_id); 1749 1750 /* disable CG */ 1751 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0); 1752 1753 /* disable PG */ 1754 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL, 0); 1755 1756 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 1757 /* legacy rlc firmware loading */ 1758 r = gfx_v12_1_xcc_rlc_load_microcode(adev, xcc_id); 1759 if (r) 1760 return r; 1761 } 1762 1763 gfx_v12_1_xcc_init_csb(adev, xcc_id); 1764 1765 gfx_v12_1_xcc_rlc_start(adev, xcc_id); 1766 } 1767 1768 return 0; 1769 } 1770 1771 static int gfx_v12_1_rlc_resume(struct amdgpu_device *adev) 1772 { 1773 int r, i, num_xcc; 1774 1775 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1776 for (i = 0; i < num_xcc; i++) { 1777 r = gfx_v12_1_xcc_rlc_resume(adev, i); 1778 if (r) 1779 return r; 1780 } 1781 1782 return 0; 1783 } 1784 1785 static void gfx_v12_1_xcc_config_gfx_rs64(struct amdgpu_device *adev, 1786 int xcc_id) 1787 { 1788 const struct gfx_firmware_header_v2_0 *mec_hdr; 1789 uint32_t pipe_id, tmp; 1790 1791 mec_hdr = (const struct gfx_firmware_header_v2_0 *) 1792 adev->gfx.mec_fw->data; 1793 1794 /* config mec program start addr */ 1795 for (pipe_id = 0; pipe_id < 4; pipe_id++) { 1796 soc_v1_0_grbm_select(adev, 1, pipe_id, 0, 0, GET_INST(GC, xcc_id)); 1797 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START, 1798 mec_hdr->ucode_start_addr_lo >> 2 | 1799 mec_hdr->ucode_start_addr_hi << 30); 1800 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START_HI, 1801 mec_hdr->ucode_start_addr_hi >> 2); 1802 } 1803 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1804 1805 /* reset mec pipe */ 1806 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL); 1807 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1); 1808 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1); 1809 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1); 1810 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1); 1811 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, tmp); 1812 1813 /* clear mec pipe reset */ 1814 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0); 1815 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0); 1816 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0); 1817 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0); 1818 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, tmp); 1819 } 1820 1821 static void gfx_v12_1_config_gfx_rs64(struct amdgpu_device *adev) 1822 { 1823 int i, num_xcc; 1824 1825 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 1826 1827 for (i = 0; i < num_xcc; i++) 1828 gfx_v12_1_xcc_config_gfx_rs64(adev, i); 1829 } 1830 1831 static void gfx_v12_1_xcc_set_mec_ucode_start_addr(struct amdgpu_device *adev, 1832 int xcc_id) 1833 { 1834 const struct gfx_firmware_header_v2_0 *cp_hdr; 1835 unsigned pipe_id; 1836 1837 cp_hdr = (const struct gfx_firmware_header_v2_0 *) 1838 adev->gfx.mec_fw->data; 1839 mutex_lock(&adev->srbm_mutex); 1840 for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) { 1841 soc_v1_0_grbm_select(adev, 1, pipe_id, 0, 0, GET_INST(GC, xcc_id)); 1842 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START, 1843 cp_hdr->ucode_start_addr_lo >> 2 | 1844 cp_hdr->ucode_start_addr_hi << 30); 1845 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START_HI, 1846 cp_hdr->ucode_start_addr_hi >> 2); 1847 } 1848 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 1849 mutex_unlock(&adev->srbm_mutex); 1850 } 1851 1852 static int gfx_v12_1_xcc_wait_for_rlc_autoload_complete(struct amdgpu_device *adev, 1853 int xcc_id) 1854 { 1855 uint32_t cp_status; 1856 uint32_t bootload_status; 1857 int i; 1858 1859 for (i = 0; i < adev->usec_timeout; i++) { 1860 cp_status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_STAT); 1861 bootload_status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), 1862 regRLC_RLCS_BOOTLOAD_STATUS); 1863 1864 if ((cp_status == 0) && 1865 (REG_GET_FIELD(bootload_status, 1866 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { 1867 break; 1868 } 1869 udelay(1); 1870 if (amdgpu_emu_mode) 1871 msleep(10); 1872 } 1873 1874 if (i >= adev->usec_timeout) { 1875 dev_err(adev->dev, 1876 "rlc autoload: xcc%d gc ucode autoload timeout\n", xcc_id); 1877 return -ETIMEDOUT; 1878 } 1879 1880 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 1881 gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id); 1882 } 1883 1884 return 0; 1885 } 1886 1887 static int gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) 1888 { 1889 int xcc_id; 1890 1891 for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) 1892 gfx_v12_1_xcc_wait_for_rlc_autoload_complete(adev, xcc_id); 1893 1894 return 0; 1895 } 1896 1897 static void gfx_v12_1_xcc_cp_compute_enable(struct amdgpu_device *adev, 1898 bool enable, int xcc_id) 1899 { 1900 u32 data; 1901 1902 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL); 1903 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE, 1904 enable ? 0 : 1); 1905 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1906 enable ? 0 : 1); 1907 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1908 enable ? 0 : 1); 1909 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1910 enable ? 0 : 1); 1911 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1912 enable ? 0 : 1); 1913 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE, 1914 enable ? 1 : 0); 1915 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE, 1916 enable ? 1 : 0); 1917 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE, 1918 enable ? 1 : 0); 1919 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE, 1920 enable ? 1 : 0); 1921 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT, 1922 enable ? 0 : 1); 1923 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, data); 1924 1925 adev->gfx.kiq[xcc_id].ring.sched.ready = enable; 1926 1927 udelay(50); 1928 } 1929 1930 static int gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device *adev) 1931 { 1932 const struct gfx_firmware_header_v2_0 *mec_hdr; 1933 const __le32 *fw_ucode, *fw_data; 1934 u32 fw_ucode_size, fw_data_size; 1935 u32 *fw_ucode_ptr, *fw_data_ptr; 1936 int i, r, xcc_id; 1937 1938 if (!adev->gfx.mec_fw) 1939 return -EINVAL; 1940 1941 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 1942 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 1943 1944 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data + 1945 le32_to_cpu(mec_hdr->ucode_offset_bytes)); 1946 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes); 1947 1948 fw_data = (const __le32 *) (adev->gfx.mec_fw->data + 1949 le32_to_cpu(mec_hdr->data_offset_bytes)); 1950 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 1951 1952 if (adev->gfx.mec.mec_fw_obj == NULL) { 1953 r = amdgpu_bo_create_reserved(adev, fw_ucode_size, 1954 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 1955 &adev->gfx.mec.mec_fw_obj, 1956 &adev->gfx.mec.mec_fw_gpu_addr, 1957 (void **)&fw_ucode_ptr); 1958 if (r) { 1959 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r); 1960 gfx_v12_1_mec_fini(adev); 1961 return r; 1962 } 1963 1964 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size); 1965 1966 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1967 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1968 } 1969 1970 if (adev->gfx.mec.mec_fw_data_obj == NULL) { 1971 r = amdgpu_bo_create_reserved(adev, 1972 ALIGN(fw_data_size, 64 * 1024) * 1973 adev->gfx.mec.num_pipe_per_mec * NUM_XCC(adev->gfx.xcc_mask), 1974 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, 1975 &adev->gfx.mec.mec_fw_data_obj, 1976 &adev->gfx.mec.mec_fw_data_gpu_addr, 1977 (void **)&fw_data_ptr); 1978 if (r) { 1979 dev_err(adev->dev, "(%d) failed to create mec fw data bo\n", r); 1980 gfx_v12_1_mec_fini(adev); 1981 return r; 1982 } 1983 1984 for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { 1985 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 1986 u32 offset = (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) * 1987 ALIGN(fw_data_size, 64 * 1024) / 4; 1988 memcpy(fw_data_ptr + offset, fw_data, fw_data_size); 1989 } 1990 } 1991 1992 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj); 1993 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj); 1994 } 1995 1996 return 0; 1997 } 1998 1999 static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *adev, 2000 int xcc_id) 2001 { 2002 const struct gfx_firmware_header_v2_0 *mec_hdr; 2003 u32 fw_data_size; 2004 u32 tmp, i, usec_timeout = 50000; /* Wait for 50 ms */ 2005 2006 if (!adev->gfx.mec_fw) 2007 return -EINVAL; 2008 2009 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data; 2010 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes); 2011 2012 gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id); 2013 2014 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL); 2015 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2016 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0); 2017 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2018 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp); 2019 2020 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL); 2021 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0); 2022 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0); 2023 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL, tmp); 2024 2025 mutex_lock(&adev->srbm_mutex); 2026 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 2027 soc_v1_0_grbm_select(adev, 1, i, 0, 0, GET_INST(GC, xcc_id)); 2028 2029 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_LO, 2030 lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr + 2031 (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) * 2032 ALIGN(fw_data_size, 64 * 1024))); 2033 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_HI, 2034 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr + 2035 (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) * 2036 ALIGN(fw_data_size, 64 * 1024))); 2037 2038 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO, 2039 lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2040 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI, 2041 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2042 } 2043 mutex_unlock(&adev->srbm_mutex); 2044 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 2045 2046 /* Trigger an invalidation of the L1 instruction caches */ 2047 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL); 2048 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1); 2049 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL, tmp); 2050 2051 /* Wait for invalidation complete */ 2052 for (i = 0; i < usec_timeout; i++) { 2053 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL); 2054 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL, 2055 INVALIDATE_DCACHE_COMPLETE)) 2056 break; 2057 udelay(1); 2058 } 2059 2060 if (i >= usec_timeout) { 2061 dev_err(adev->dev, "failed to invalidate data cache\n"); 2062 return -EINVAL; 2063 } 2064 2065 /* Trigger an invalidation of the L1 instruction caches */ 2066 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL); 2067 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1); 2068 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL, tmp); 2069 2070 /* Wait for invalidation complete */ 2071 for (i = 0; i < usec_timeout; i++) { 2072 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL); 2073 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL, 2074 INVALIDATE_CACHE_COMPLETE)) 2075 break; 2076 udelay(1); 2077 } 2078 2079 if (i >= usec_timeout) { 2080 dev_err(adev->dev, "failed to invalidate instruction cache\n"); 2081 return -EINVAL; 2082 } 2083 2084 gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id); 2085 2086 return 0; 2087 } 2088 2089 static void gfx_v12_1_xcc_kiq_setting(struct amdgpu_ring *ring, 2090 int xcc_id) 2091 { 2092 uint32_t tmp; 2093 struct amdgpu_device *adev = ring->adev; 2094 2095 /* tell RLC which is KIQ queue */ 2096 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); 2097 tmp &= 0xffffff00; 2098 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 2099 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 2100 tmp |= 0x80; 2101 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 2102 } 2103 2104 static void gfx_v12_1_xcc_cp_set_doorbell_range(struct amdgpu_device *adev, 2105 int xcc_id) 2106 { 2107 /* disable gfx engine doorbell range */ 2108 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_DOORBELL_RANGE_LOWER, 0); 2109 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_DOORBELL_RANGE_UPPER, 0); 2110 2111 /* set compute engine doorbell range */ 2112 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_LOWER, 2113 ((adev->doorbell_index.kiq + 2114 xcc_id * adev->doorbell_index.xcc_doorbell_range) * 2115 2) << 2); 2116 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_UPPER, 2117 ((adev->doorbell_index.userqueue_end + 2118 xcc_id * adev->doorbell_index.xcc_doorbell_range) * 2119 2) << 2); 2120 } 2121 2122 static int gfx_v12_1_compute_mqd_init(struct amdgpu_device *adev, void *m, 2123 struct amdgpu_mqd_prop *prop) 2124 { 2125 struct v12_1_compute_mqd *mqd = m; 2126 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 2127 uint32_t tmp; 2128 2129 mqd->header = 0xC0310800; 2130 mqd->compute_pipelinestat_enable = 0x00000001; 2131 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 2132 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 2133 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 2134 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 2135 mqd->compute_misc_reserved = 0x00000007; 2136 2137 eop_base_addr = prop->eop_gpu_addr >> 8; 2138 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 2139 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 2140 2141 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2142 tmp = regCP_HQD_EOP_CONTROL_DEFAULT; 2143 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 2144 (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1)); 2145 2146 mqd->cp_hqd_eop_control = tmp; 2147 2148 /* enable doorbell? */ 2149 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 2150 2151 if (prop->use_doorbell) { 2152 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2153 DOORBELL_OFFSET, prop->doorbell_index); 2154 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2155 DOORBELL_EN, 1); 2156 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2157 DOORBELL_SOURCE, 0); 2158 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2159 DOORBELL_HIT, 0); 2160 } else { 2161 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2162 DOORBELL_EN, 0); 2163 } 2164 2165 mqd->cp_hqd_pq_doorbell_control = tmp; 2166 2167 /* disable the queue if it's active */ 2168 mqd->cp_hqd_dequeue_request = 0; 2169 mqd->cp_hqd_pq_rptr = 0; 2170 mqd->cp_hqd_pq_wptr_lo = 0; 2171 mqd->cp_hqd_pq_wptr_hi = 0; 2172 2173 /* set the pointer to the MQD */ 2174 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc; 2175 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); 2176 2177 /* set MQD vmid to 0 */ 2178 tmp = regCP_MQD_CONTROL_DEFAULT; 2179 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 2180 mqd->cp_mqd_control = tmp; 2181 2182 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2183 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8; 2184 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 2185 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 2186 2187 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2188 tmp = regCP_HQD_PQ_CONTROL_DEFAULT; 2189 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 2190 (order_base_2(prop->queue_size / 4) - 1)); 2191 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 2192 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 2193 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 2194 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); 2195 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 2196 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 2197 mqd->cp_hqd_pq_control = tmp; 2198 2199 /* set the wb address whether it's enabled or not */ 2200 wb_gpu_addr = prop->rptr_gpu_addr; 2201 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 2202 mqd->cp_hqd_pq_rptr_report_addr_hi = 2203 upper_32_bits(wb_gpu_addr) & 0xffff; 2204 2205 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 2206 wb_gpu_addr = prop->wptr_gpu_addr; 2207 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 2208 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 2209 2210 tmp = 0; 2211 /* enable the doorbell if requested */ 2212 if (prop->use_doorbell) { 2213 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; 2214 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2215 DOORBELL_OFFSET, prop->doorbell_index); 2216 2217 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2218 DOORBELL_EN, 1); 2219 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2220 DOORBELL_SOURCE, 0); 2221 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2222 DOORBELL_HIT, 0); 2223 } 2224 2225 mqd->cp_hqd_pq_doorbell_control = tmp; 2226 2227 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2228 mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; 2229 2230 /* set the vmid for the queue */ 2231 mqd->cp_hqd_vmid = 0; 2232 2233 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; 2234 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x63); 2235 mqd->cp_hqd_persistent_state = tmp; 2236 2237 /* set MIN_IB_AVAIL_SIZE */ 2238 tmp = regCP_HQD_IB_CONTROL_DEFAULT; 2239 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 1); 2240 mqd->cp_hqd_ib_control = tmp; 2241 2242 /* set static priority for a compute queue/ring */ 2243 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority; 2244 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority; 2245 2246 mqd->cp_mqd_stride_size = prop->mqd_stride_size ? prop->mqd_stride_size : 2247 AMDGPU_MQD_SIZE_ALIGN(adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size); 2248 2249 mqd->cp_hqd_active = prop->hqd_active; 2250 2251 return 0; 2252 } 2253 2254 static int gfx_v12_1_xcc_kiq_init_register(struct amdgpu_ring *ring, 2255 int xcc_id) 2256 { 2257 struct amdgpu_device *adev = ring->adev; 2258 struct v12_1_compute_mqd *mqd = ring->mqd_ptr; 2259 int j; 2260 2261 /* inactivate the queue */ 2262 if (amdgpu_sriov_vf(adev)) 2263 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0); 2264 2265 /* disable wptr polling */ 2266 WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0); 2267 2268 /* write the EOP addr */ 2269 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR, 2270 mqd->cp_hqd_eop_base_addr_lo); 2271 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR_HI, 2272 mqd->cp_hqd_eop_base_addr_hi); 2273 2274 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2275 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL, 2276 mqd->cp_hqd_eop_control); 2277 2278 /* enable doorbell? */ 2279 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 2280 mqd->cp_hqd_pq_doorbell_control); 2281 2282 /* disable the queue if it's active */ 2283 if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) { 2284 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1); 2285 for (j = 0; j < adev->usec_timeout; j++) { 2286 if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1)) 2287 break; 2288 udelay(1); 2289 } 2290 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 2291 mqd->cp_hqd_dequeue_request); 2292 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 2293 mqd->cp_hqd_pq_rptr); 2294 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 2295 mqd->cp_hqd_pq_wptr_lo); 2296 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 2297 mqd->cp_hqd_pq_wptr_hi); 2298 } 2299 2300 /* set the pointer to the MQD */ 2301 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR, 2302 mqd->cp_mqd_base_addr_lo); 2303 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI, 2304 mqd->cp_mqd_base_addr_hi); 2305 2306 /* set MQD vmid to 0 */ 2307 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL, 2308 mqd->cp_mqd_control); 2309 2310 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2311 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE, 2312 mqd->cp_hqd_pq_base_lo); 2313 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI, 2314 mqd->cp_hqd_pq_base_hi); 2315 2316 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2317 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL, 2318 mqd->cp_hqd_pq_control); 2319 2320 /* set the wb address whether it's enabled or not */ 2321 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR, 2322 mqd->cp_hqd_pq_rptr_report_addr_lo); 2323 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 2324 mqd->cp_hqd_pq_rptr_report_addr_hi); 2325 2326 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 2327 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR, 2328 mqd->cp_hqd_pq_wptr_poll_addr_lo); 2329 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, 2330 mqd->cp_hqd_pq_wptr_poll_addr_hi); 2331 2332 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 2333 mqd->cp_hqd_pq_doorbell_control); 2334 2335 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2336 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO, 2337 mqd->cp_hqd_pq_wptr_lo); 2338 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI, 2339 mqd->cp_hqd_pq_wptr_hi); 2340 2341 /* set the vmid for the queue */ 2342 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, mqd->cp_hqd_vmid); 2343 2344 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, 2345 mqd->cp_hqd_persistent_state); 2346 2347 /* activate the queue */ 2348 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 2349 mqd->cp_hqd_active); 2350 2351 if (ring->use_doorbell) 2352 WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_STATUS, DOORBELL_ENABLE, 1); 2353 2354 return 0; 2355 } 2356 2357 static int gfx_v12_1_xcc_kiq_init_queue(struct amdgpu_ring *ring, 2358 int xcc_id) 2359 { 2360 struct amdgpu_device *adev = ring->adev; 2361 struct v12_1_compute_mqd *mqd = ring->mqd_ptr; 2362 2363 gfx_v12_1_xcc_kiq_setting(ring, xcc_id); 2364 2365 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 2366 /* reset MQD to a clean status */ 2367 if (adev->gfx.kiq[xcc_id].mqd_backup) 2368 memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(*mqd)); 2369 2370 /* reset ring buffer */ 2371 ring->wptr = 0; 2372 amdgpu_ring_clear_ring(ring); 2373 2374 mutex_lock(&adev->srbm_mutex); 2375 soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); 2376 gfx_v12_1_xcc_kiq_init_register(ring, xcc_id); 2377 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 2378 mutex_unlock(&adev->srbm_mutex); 2379 } else { 2380 memset((void *)mqd, 0, sizeof(*mqd)); 2381 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 2382 amdgpu_ring_clear_ring(ring); 2383 mutex_lock(&adev->srbm_mutex); 2384 soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); 2385 amdgpu_ring_init_mqd(ring); 2386 gfx_v12_1_xcc_kiq_init_register(ring, xcc_id); 2387 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 2388 mutex_unlock(&adev->srbm_mutex); 2389 2390 if (adev->gfx.kiq[xcc_id].mqd_backup) 2391 memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(*mqd)); 2392 } 2393 2394 return 0; 2395 } 2396 2397 static int gfx_v12_1_xcc_kcq_init_queue(struct amdgpu_ring *ring, 2398 int xcc_id) 2399 { 2400 struct amdgpu_device *adev = ring->adev; 2401 struct v12_1_compute_mqd *mqd = ring->mqd_ptr; 2402 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 2403 2404 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 2405 memset((void *)mqd, 0, sizeof(*mqd)); 2406 mutex_lock(&adev->srbm_mutex); 2407 soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id)); 2408 amdgpu_ring_init_mqd(ring); 2409 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); 2410 mutex_unlock(&adev->srbm_mutex); 2411 2412 if (adev->gfx.mec.mqd_backup[mqd_idx]) 2413 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 2414 } else { 2415 /* restore MQD to a clean status */ 2416 if (adev->gfx.mec.mqd_backup[mqd_idx]) 2417 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 2418 /* reset ring buffer */ 2419 ring->wptr = 0; 2420 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 2421 amdgpu_ring_clear_ring(ring); 2422 } 2423 2424 return 0; 2425 } 2426 2427 static int gfx_v12_1_xcc_kiq_resume(struct amdgpu_device *adev, 2428 int xcc_id) 2429 { 2430 struct amdgpu_ring *ring; 2431 int r; 2432 2433 ring = &adev->gfx.kiq[xcc_id].ring; 2434 2435 r = amdgpu_bo_reserve(ring->mqd_obj, false); 2436 if (unlikely(r != 0)) 2437 return r; 2438 2439 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 2440 if (unlikely(r != 0)) { 2441 amdgpu_bo_unreserve(ring->mqd_obj); 2442 return r; 2443 } 2444 2445 gfx_v12_1_xcc_kiq_init_queue(ring, xcc_id); 2446 amdgpu_bo_kunmap(ring->mqd_obj); 2447 ring->mqd_ptr = NULL; 2448 amdgpu_bo_unreserve(ring->mqd_obj); 2449 ring->sched.ready = true; 2450 return 0; 2451 } 2452 2453 static int gfx_v12_1_xcc_kcq_resume(struct amdgpu_device *adev, 2454 int xcc_id) 2455 { 2456 struct amdgpu_ring *ring = NULL; 2457 int r = 0, i; 2458 2459 if (!amdgpu_async_gfx_ring) 2460 gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id); 2461 2462 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2463 ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; 2464 2465 r = amdgpu_bo_reserve(ring->mqd_obj, false); 2466 if (unlikely(r != 0)) 2467 goto done; 2468 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 2469 if (!r) { 2470 r = gfx_v12_1_xcc_kcq_init_queue(ring, xcc_id); 2471 amdgpu_bo_kunmap(ring->mqd_obj); 2472 ring->mqd_ptr = NULL; 2473 } 2474 amdgpu_bo_unreserve(ring->mqd_obj); 2475 if (r) 2476 goto done; 2477 } 2478 2479 r = amdgpu_gfx_enable_kcq(adev, xcc_id); 2480 done: 2481 return r; 2482 } 2483 2484 static int gfx_v12_1_xcc_cp_resume(struct amdgpu_device *adev, uint16_t xcc_mask) 2485 { 2486 int r, i, xcc_id; 2487 struct amdgpu_ring *ring; 2488 2489 for_each_inst(xcc_id, xcc_mask) { 2490 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2491 /* legacy firmware loading */ 2492 r = gfx_v12_1_xcc_cp_compute_load_microcode_rs64(adev, xcc_id); 2493 if (r) 2494 return r; 2495 } 2496 2497 /* GFX CGCG and LS is set by default */ 2498 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) 2499 gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, true, xcc_id); 2500 2501 gfx_v12_1_xcc_cp_set_doorbell_range(adev, xcc_id); 2502 2503 gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id); 2504 2505 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init) 2506 r = amdgpu_mes_kiq_hw_init(adev, xcc_id); 2507 else 2508 r = gfx_v12_1_xcc_kiq_resume(adev, xcc_id); 2509 if (r) 2510 return r; 2511 2512 r = gfx_v12_1_xcc_kcq_resume(adev, xcc_id); 2513 if (r) 2514 return r; 2515 2516 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2517 ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings]; 2518 r = amdgpu_ring_test_helper(ring); 2519 if (r) 2520 return r; 2521 } 2522 } 2523 2524 return 0; 2525 } 2526 2527 static int gfx_v12_1_cp_resume(struct amdgpu_device *adev) 2528 { 2529 int num_xcc, num_xcp, num_xcc_per_xcp; 2530 uint16_t xcc_mask; 2531 int r = 0; 2532 2533 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2534 if (amdgpu_sriov_vf(adev)) { 2535 enum amdgpu_gfx_partition mode; 2536 2537 mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr, 2538 AMDGPU_XCP_FL_NONE); 2539 if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) 2540 return -EINVAL; 2541 if (adev->gfx.funcs && 2542 adev->gfx.funcs->get_xccs_per_xcp) { 2543 num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev); 2544 adev->gfx.num_xcc_per_xcp = num_xcc_per_xcp; 2545 num_xcp = num_xcc / num_xcc_per_xcp; 2546 } else { 2547 return -EINVAL; 2548 } 2549 r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode); 2550 2551 } else { 2552 if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, 2553 AMDGPU_XCP_FL_NONE) == 2554 AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) 2555 r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, 2556 amdgpu_user_partt_mode); 2557 } 2558 2559 if (r) 2560 return r; 2561 2562 xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0); 2563 2564 return gfx_v12_1_xcc_cp_resume(adev, xcc_mask); 2565 } 2566 2567 static int gfx_v12_1_gfxhub_enable(struct amdgpu_device *adev) 2568 { 2569 int r, i; 2570 bool value; 2571 2572 r = adev->gfxhub.funcs->gart_enable(adev); 2573 if (r) 2574 return r; 2575 2576 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? 2577 false : true; 2578 2579 adev->gfxhub.funcs->set_fault_enable_default(adev, value); 2580 /* TODO investigate why TLB flush is needed, 2581 * are we missing a flush somewhere else? */ 2582 for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { 2583 if (AMDGPU_IS_GFXHUB(i)) 2584 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(i), 0); 2585 } 2586 2587 return 0; 2588 } 2589 2590 static int get_gb_addr_config(struct amdgpu_device *adev) 2591 { 2592 u32 gb_addr_config; 2593 2594 gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG_READ); 2595 if (gb_addr_config == 0) 2596 return -EINVAL; 2597 2598 adev->gfx.config.gb_addr_config_fields.num_pkrs = 2599 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG_READ, NUM_PKRS); 2600 2601 adev->gfx.config.gb_addr_config = gb_addr_config; 2602 2603 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 2604 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 2605 GB_ADDR_CONFIG_READ, NUM_PIPES); 2606 2607 adev->gfx.config.max_tile_pipes = 2608 adev->gfx.config.gb_addr_config_fields.num_pipes; 2609 2610 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 2611 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 2612 GB_ADDR_CONFIG_READ, MAX_COMPRESSED_FRAGS); 2613 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 2614 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 2615 GB_ADDR_CONFIG_READ, NUM_RB_PER_SE); 2616 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 2617 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 2618 GB_ADDR_CONFIG_READ, NUM_SHADER_ENGINES); 2619 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 2620 REG_GET_FIELD(adev->gfx.config.gb_addr_config, 2621 GB_ADDR_CONFIG_READ, PIPE_INTERLEAVE_SIZE)); 2622 2623 return 0; 2624 } 2625 2626 static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev, 2627 int xcc_id) 2628 { 2629 uint32_t data; 2630 2631 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG); 2632 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; 2633 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data); 2634 2635 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG); 2636 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; 2637 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG, data); 2638 } 2639 2640 static void gfx_v12_1_xcc_enable_atomics(struct amdgpu_device *adev, 2641 int xcc_id) 2642 { 2643 uint32_t data; 2644 2645 /* Set the TCP UTCL0 register to enable atomics */ 2646 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_UTCL0_CNTL1); 2647 data = REG_SET_FIELD(data, TCP_UTCL0_CNTL1, ATOMIC_REQUESTER_EN, 0x1); 2648 2649 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_UTCL0_CNTL1, data); 2650 } 2651 2652 static void gfx_v12_1_xcc_disable_burst(struct amdgpu_device *adev, 2653 int xcc_id) 2654 { 2655 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGL1_DRAM_BURST_CTRL, 0xf); 2656 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGLARB_DRAM_BURST_CTRL, 0xf); 2657 } 2658 2659 static void gfx_v12_1_xcc_disable_early_write_ack(struct amdgpu_device *adev, 2660 int xcc_id) 2661 { 2662 uint32_t data; 2663 2664 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL3); 2665 data = REG_SET_FIELD(data, TCP_CNTL3, DISABLE_EARLY_WRITE_ACK, 0x1); 2666 2667 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL3, data); 2668 } 2669 2670 static void gfx_v12_1_xcc_disable_tcp_spill_cache(struct amdgpu_device *adev, 2671 int xcc_id) 2672 { 2673 uint32_t data; 2674 2675 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL); 2676 data = REG_SET_FIELD(data, TCP_CNTL, TCP_SPILL_CACHE_DISABLE, 0x1); 2677 2678 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL, data); 2679 } 2680 2681 static void gfx_v12_1_init_golden_registers(struct amdgpu_device *adev) 2682 { 2683 int i; 2684 2685 for (i = 0; i < NUM_XCC(adev->gfx.xcc_mask); i++) { 2686 gfx_v12_1_xcc_disable_burst(adev, i); 2687 gfx_v12_1_xcc_enable_atomics(adev, i); 2688 gfx_v12_1_xcc_disable_early_write_ack(adev, i); 2689 gfx_v12_1_xcc_disable_tcp_spill_cache(adev, i); 2690 } 2691 } 2692 2693 static int gfx_v12_1_hw_init(struct amdgpu_ip_block *ip_block) 2694 { 2695 int r, i, num_xcc; 2696 struct amdgpu_device *adev = ip_block->adev; 2697 2698 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { 2699 /* rlc autoload firmware */ 2700 r = gfx_v12_1_rlc_backdoor_autoload_enable(adev); 2701 if (r) 2702 return r; 2703 } else { 2704 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 2705 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2706 2707 if (adev->gfx.imu.funcs) { 2708 if (adev->gfx.imu.funcs->load_microcode) 2709 adev->gfx.imu.funcs->load_microcode(adev); 2710 } 2711 2712 for (i = 0; i < num_xcc; i++) { 2713 /* disable gpa mode in backdoor loading */ 2714 gfx_v12_1_xcc_disable_gpa_mode(adev, i); 2715 } 2716 } 2717 } 2718 2719 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) || 2720 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { 2721 r = gfx_v12_1_wait_for_rlc_autoload_complete(adev); 2722 if (r) { 2723 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r); 2724 return r; 2725 } 2726 } 2727 2728 adev->gfx.is_poweron = true; 2729 2730 if (get_gb_addr_config(adev)) 2731 DRM_WARN("Invalid gb_addr_config !\n"); 2732 2733 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) 2734 gfx_v12_1_config_gfx_rs64(adev); 2735 2736 r = gfx_v12_1_gfxhub_enable(adev); 2737 if (r) 2738 return r; 2739 2740 gfx_v12_1_init_golden_registers(adev); 2741 2742 gfx_v12_1_constants_init(adev); 2743 2744 if (adev->nbio.funcs->gc_doorbell_init) 2745 adev->nbio.funcs->gc_doorbell_init(adev); 2746 2747 r = gfx_v12_1_rlc_resume(adev); 2748 if (r) 2749 return r; 2750 2751 /* 2752 * init golden registers and rlc resume may override some registers, 2753 * reconfig them here 2754 */ 2755 gfx_v12_1_tcp_harvest(adev); 2756 2757 r = gfx_v12_1_cp_resume(adev); 2758 if (r) 2759 return r; 2760 2761 return r; 2762 } 2763 2764 static void gfx_v12_1_xcc_fini(struct amdgpu_device *adev, 2765 int xcc_id) 2766 { 2767 uint32_t tmp; 2768 2769 if (!adev->no_hw_access) { 2770 if (amdgpu_gfx_disable_kcq(adev, xcc_id)) 2771 DRM_ERROR("KCQ disable failed\n"); 2772 2773 amdgpu_mes_kiq_hw_fini(adev, xcc_id); 2774 } 2775 2776 if (amdgpu_sriov_vf(adev)) { 2777 /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */ 2778 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); 2779 tmp &= 0xffffff00; 2780 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); 2781 } 2782 gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id); 2783 gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, false, xcc_id); 2784 } 2785 2786 static int gfx_v12_1_set_userq_eop_interrupts(struct amdgpu_device *adev, 2787 bool enable) 2788 { 2789 unsigned int irq_type; 2790 int m, p, r; 2791 2792 if (adev->gfx.disable_kq) { 2793 for (m = 0; m < adev->gfx.mec.num_mec; ++m) { 2794 for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) { 2795 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2796 + (m * adev->gfx.mec.num_pipe_per_mec) 2797 + p; 2798 if (enable) 2799 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, 2800 irq_type); 2801 else 2802 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, 2803 irq_type); 2804 if (r) 2805 return r; 2806 } 2807 } 2808 } 2809 2810 return 0; 2811 } 2812 2813 static int gfx_v12_1_hw_fini(struct amdgpu_ip_block *ip_block) 2814 { 2815 struct amdgpu_device *adev = ip_block->adev; 2816 int i, num_xcc; 2817 2818 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 2819 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 2820 gfx_v12_1_set_userq_eop_interrupts(adev, false); 2821 2822 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2823 for (i = 0; i < num_xcc; i++) { 2824 gfx_v12_1_xcc_fini(adev, i); 2825 } 2826 2827 adev->gfxhub.funcs->gart_disable(adev); 2828 2829 adev->gfx.is_poweron = false; 2830 2831 return 0; 2832 } 2833 2834 static int gfx_v12_1_suspend(struct amdgpu_ip_block *ip_block) 2835 { 2836 return gfx_v12_1_hw_fini(ip_block); 2837 } 2838 2839 static int gfx_v12_1_resume(struct amdgpu_ip_block *ip_block) 2840 { 2841 return gfx_v12_1_hw_init(ip_block); 2842 } 2843 2844 static bool gfx_v12_1_is_idle(struct amdgpu_ip_block *ip_block) 2845 { 2846 struct amdgpu_device *adev = ip_block->adev; 2847 int i, num_xcc; 2848 2849 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2850 for (i = 0; i < num_xcc; i++) { 2851 if (REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, i), 2852 regGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 2853 return false; 2854 } 2855 return true; 2856 } 2857 2858 static int gfx_v12_1_wait_for_idle(struct amdgpu_ip_block *ip_block) 2859 { 2860 unsigned i; 2861 struct amdgpu_device *adev = ip_block->adev; 2862 2863 for (i = 0; i < adev->usec_timeout; i++) { 2864 if (gfx_v12_1_is_idle(ip_block)) 2865 return 0; 2866 udelay(1); 2867 } 2868 return -ETIMEDOUT; 2869 } 2870 2871 static uint64_t gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device *adev) 2872 { 2873 uint64_t clock = 0; 2874 2875 if (adev->smuio.funcs && 2876 adev->smuio.funcs->get_gpu_clock_counter) 2877 clock = adev->smuio.funcs->get_gpu_clock_counter(adev); 2878 else 2879 dev_warn(adev->dev, "query gpu clock counter is not supported\n"); 2880 2881 return clock; 2882 } 2883 2884 static int gfx_v12_1_early_init(struct amdgpu_ip_block *ip_block) 2885 { 2886 struct amdgpu_device *adev = ip_block->adev; 2887 2888 2889 switch (amdgpu_user_queue) { 2890 case -1: 2891 default: 2892 adev->gfx.disable_kq = true; 2893 adev->gfx.disable_uq = true; 2894 break; 2895 case 0: 2896 adev->gfx.disable_kq = false; 2897 adev->gfx.disable_uq = true; 2898 break; 2899 } 2900 2901 adev->gfx.funcs = &gfx_v12_1_gfx_funcs; 2902 2903 if (adev->gfx.disable_kq) 2904 adev->gfx.num_compute_rings = 0; 2905 else 2906 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 2907 AMDGPU_MAX_COMPUTE_RINGS); 2908 2909 gfx_v12_1_set_kiq_pm4_funcs(adev); 2910 gfx_v12_1_set_ring_funcs(adev); 2911 gfx_v12_1_set_irq_funcs(adev); 2912 gfx_v12_1_set_rlc_funcs(adev); 2913 gfx_v12_1_set_mqd_funcs(adev); 2914 gfx_v12_1_set_imu_funcs(adev); 2915 2916 gfx_v12_1_init_rlcg_reg_access_ctrl(adev); 2917 2918 return gfx_v12_1_init_microcode(adev); 2919 } 2920 2921 static int gfx_v12_1_late_init(struct amdgpu_ip_block *ip_block) 2922 { 2923 struct amdgpu_device *adev = ip_block->adev; 2924 int r; 2925 2926 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 2927 if (r) 2928 return r; 2929 2930 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 2931 if (r) 2932 return r; 2933 2934 r = gfx_v12_1_set_userq_eop_interrupts(adev, true); 2935 if (r) 2936 return r; 2937 2938 return 0; 2939 } 2940 2941 static bool gfx_v12_1_is_rlc_enabled(struct amdgpu_device *adev) 2942 { 2943 uint32_t rlc_cntl; 2944 2945 /* if RLC is not enabled, do nothing */ 2946 rlc_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CNTL); 2947 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false; 2948 } 2949 2950 static void gfx_v12_1_xcc_set_safe_mode(struct amdgpu_device *adev, 2951 int xcc_id) 2952 { 2953 uint32_t data; 2954 unsigned i; 2955 2956 data = RLC_SAFE_MODE__CMD_MASK; 2957 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 2958 2959 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data); 2960 2961 /* wait for RLC_SAFE_MODE */ 2962 for (i = 0; i < adev->usec_timeout; i++) { 2963 if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, xcc_id), 2964 regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 2965 break; 2966 udelay(1); 2967 } 2968 } 2969 2970 static void gfx_v12_1_xcc_unset_safe_mode(struct amdgpu_device *adev, 2971 int xcc_id) 2972 { 2973 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 2974 regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK); 2975 } 2976 2977 static void gfx_v12_1_update_perf_clk(struct amdgpu_device *adev, 2978 bool enable) 2979 { 2980 int i, num_xcc; 2981 2982 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 2983 for (i = 0; i < num_xcc; i++) 2984 gfx_v12_1_xcc_update_perf_clk(adev, enable, i); 2985 } 2986 2987 static void gfx_v12_1_update_spm_vmid(struct amdgpu_device *adev, 2988 int xcc_id, 2989 struct amdgpu_ring *ring, 2990 unsigned vmid) 2991 { 2992 u32 reg, data; 2993 2994 reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL); 2995 if (amdgpu_sriov_is_pp_one_vf(adev)) 2996 data = RREG32_NO_KIQ(reg); 2997 else 2998 data = RREG32(reg); 2999 3000 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; 3001 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; 3002 3003 if (amdgpu_sriov_is_pp_one_vf(adev)) 3004 WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL, data); 3005 else 3006 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL, data); 3007 3008 if (ring 3009 && amdgpu_sriov_is_pp_one_vf(adev) 3010 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) 3011 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { 3012 uint32_t reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL); 3013 amdgpu_ring_emit_wreg(ring, reg, data); 3014 } 3015 } 3016 3017 static const struct amdgpu_rlc_funcs gfx_v12_1_rlc_funcs = { 3018 .is_rlc_enabled = gfx_v12_1_is_rlc_enabled, 3019 .set_safe_mode = gfx_v12_1_xcc_set_safe_mode, 3020 .unset_safe_mode = gfx_v12_1_xcc_unset_safe_mode, 3021 .init = gfx_v12_1_rlc_init, 3022 .get_csb_size = gfx_v12_1_get_csb_size, 3023 .get_csb_buffer = gfx_v12_1_get_csb_buffer, 3024 .resume = gfx_v12_1_rlc_resume, 3025 .stop = gfx_v12_1_rlc_stop, 3026 .reset = gfx_v12_1_rlc_reset, 3027 .start = gfx_v12_1_rlc_start, 3028 .update_spm_vmid = gfx_v12_1_update_spm_vmid, 3029 }; 3030 3031 #if 0 3032 static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable) 3033 { 3034 /* TODO */ 3035 } 3036 3037 static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable) 3038 { 3039 /* TODO */ 3040 } 3041 #endif 3042 3043 static int gfx_v12_1_set_powergating_state(struct amdgpu_ip_block *ip_block, 3044 enum amd_powergating_state state) 3045 { 3046 struct amdgpu_device *adev = ip_block->adev; 3047 bool enable = (state == AMD_PG_STATE_GATE); 3048 3049 if (amdgpu_sriov_vf(adev)) 3050 return 0; 3051 3052 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { 3053 case IP_VERSION(12, 1, 0): 3054 amdgpu_gfx_off_ctrl(adev, enable); 3055 break; 3056 default: 3057 break; 3058 } 3059 3060 return 0; 3061 } 3062 3063 static void gfx_v12_1_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 3064 bool enable, int xcc_id) 3065 { 3066 uint32_t def, data; 3067 3068 if (!(adev->cg_flags & 3069 (AMD_CG_SUPPORT_GFX_CGCG | 3070 AMD_CG_SUPPORT_GFX_CGLS | 3071 AMD_CG_SUPPORT_GFX_3D_CGCG | 3072 AMD_CG_SUPPORT_GFX_3D_CGLS))) 3073 return; 3074 3075 if (enable) { 3076 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), 3077 regRLC_CGTT_MGCG_OVERRIDE); 3078 3079 /* unset CGCG override */ 3080 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 3081 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 3082 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 3083 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 3084 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG || 3085 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 3086 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 3087 3088 /* update CGCG override bits */ 3089 if (def != data) 3090 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 3091 regRLC_CGTT_MGCG_OVERRIDE, data); 3092 3093 /* enable cgcg FSM(0x0000363F) */ 3094 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL); 3095 3096 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 3097 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK; 3098 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 3099 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 3100 } 3101 3102 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 3103 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK; 3104 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 3105 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 3106 } 3107 3108 if (def != data) 3109 WREG32_SOC15(GC, GET_INST(GC, xcc_id), 3110 regRLC_CGCG_CGLS_CTRL, data); 3111 3112 /* set IDLE_POLL_COUNT(0x00900100) */ 3113 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL); 3114 3115 data &= ~CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK; 3116 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 3117 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 3118 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 3119 3120 if (def != data) 3121 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data); 3122 3123 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL); 3124 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); 3125 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); 3126 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); 3127 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); 3128 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL, data); 3129 } else { 3130 /* Program RLC_CGCG_CGLS_CTRL */ 3131 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL); 3132 3133 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) 3134 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 3135 3136 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 3137 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 3138 3139 if (def != data) 3140 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data); 3141 } 3142 } 3143 3144 static void gfx_v12_1_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev, 3145 bool enable, int xcc_id) 3146 { 3147 uint32_t data, def; 3148 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS))) 3149 return; 3150 3151 /* It is disabled by HW by default */ 3152 if (enable) { 3153 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 3154 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 3155 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); 3156 3157 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 3158 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 3159 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 3160 3161 if (def != data) 3162 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); 3163 } 3164 } else { 3165 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 3166 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); 3167 3168 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 3169 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 3170 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK); 3171 3172 if (def != data) 3173 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); 3174 } 3175 } 3176 } 3177 3178 static void gfx_v12_1_xcc_update_repeater_fgcg(struct amdgpu_device *adev, 3179 bool enable, int xcc_id) 3180 { 3181 uint32_t def, data; 3182 3183 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG)) 3184 return; 3185 3186 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); 3187 3188 if (enable) 3189 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK | 3190 RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK); 3191 else 3192 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK | 3193 RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK; 3194 3195 if (def != data) 3196 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); 3197 } 3198 3199 static void gfx_v12_1_xcc_update_sram_fgcg(struct amdgpu_device *adev, 3200 bool enable, int xcc_id) 3201 { 3202 uint32_t def, data; 3203 3204 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) 3205 return; 3206 3207 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); 3208 3209 if (enable) 3210 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 3211 else 3212 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; 3213 3214 if (def != data) 3215 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); 3216 } 3217 3218 static void gfx_v12_1_xcc_update_perf_clk(struct amdgpu_device *adev, 3219 bool enable, int xcc_id) 3220 { 3221 uint32_t def, data; 3222 3223 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK)) 3224 return; 3225 3226 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE); 3227 3228 if (enable) 3229 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 3230 else 3231 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK; 3232 3233 if (def != data) 3234 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); 3235 } 3236 3237 static int gfx_v12_1_xcc_update_gfx_clock_gating(struct amdgpu_device *adev, 3238 bool enable, int xcc_id) 3239 { 3240 amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); 3241 3242 gfx_v12_1_xcc_update_coarse_grain_clock_gating(adev, enable, xcc_id); 3243 3244 gfx_v12_1_xcc_update_medium_grain_clock_gating(adev, enable, xcc_id); 3245 3246 gfx_v12_1_xcc_update_repeater_fgcg(adev, enable, xcc_id); 3247 3248 gfx_v12_1_xcc_update_sram_fgcg(adev, enable, xcc_id); 3249 3250 gfx_v12_1_xcc_update_perf_clk(adev, enable, xcc_id); 3251 3252 if (adev->cg_flags & 3253 (AMD_CG_SUPPORT_GFX_MGCG | 3254 AMD_CG_SUPPORT_GFX_CGLS | 3255 AMD_CG_SUPPORT_GFX_CGCG | 3256 AMD_CG_SUPPORT_GFX_3D_CGCG | 3257 AMD_CG_SUPPORT_GFX_3D_CGLS)) 3258 gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, enable, xcc_id); 3259 3260 amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id); 3261 3262 return 0; 3263 } 3264 3265 static int gfx_v12_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, 3266 enum amd_clockgating_state state) 3267 { 3268 struct amdgpu_device *adev = ip_block->adev; 3269 int i, num_xcc; 3270 3271 if (amdgpu_sriov_vf(adev)) 3272 return 0; 3273 3274 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 3275 switch (adev->ip_versions[GC_HWIP][0]) { 3276 case IP_VERSION(12, 1, 0): 3277 for (i = 0; i < num_xcc; i++) 3278 gfx_v12_1_xcc_update_gfx_clock_gating(adev, 3279 state == AMD_CG_STATE_GATE, i); 3280 break; 3281 default: 3282 break; 3283 } 3284 3285 return 0; 3286 } 3287 3288 static void gfx_v12_1_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 3289 { 3290 struct amdgpu_device *adev = ip_block->adev; 3291 int data; 3292 3293 /* AMD_CG_SUPPORT_GFX_MGCG */ 3294 data = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CGTT_MGCG_OVERRIDE); 3295 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 3296 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 3297 3298 /* AMD_CG_SUPPORT_REPEATER_FGCG */ 3299 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK)) 3300 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG; 3301 3302 /* AMD_CG_SUPPORT_GFX_FGCG */ 3303 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) 3304 *flags |= AMD_CG_SUPPORT_GFX_FGCG; 3305 3306 /* AMD_CG_SUPPORT_GFX_PERF_CLK */ 3307 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK)) 3308 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK; 3309 3310 /* AMD_CG_SUPPORT_GFX_CGCG */ 3311 data = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CGCG_CGLS_CTRL); 3312 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 3313 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 3314 3315 /* AMD_CG_SUPPORT_GFX_CGLS */ 3316 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 3317 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 3318 } 3319 3320 static u64 gfx_v12_1_ring_get_rptr_compute(struct amdgpu_ring *ring) 3321 { 3322 /* gfx12 hardware is 32bit rptr */ 3323 return *(uint32_t *)ring->rptr_cpu_addr; 3324 } 3325 3326 static u64 gfx_v12_1_ring_get_wptr_compute(struct amdgpu_ring *ring) 3327 { 3328 u64 wptr; 3329 3330 /* XXX check if swapping is necessary on BE */ 3331 if (ring->use_doorbell) 3332 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 3333 else 3334 BUG(); 3335 return wptr; 3336 } 3337 3338 static void gfx_v12_1_ring_set_wptr_compute(struct amdgpu_ring *ring) 3339 { 3340 struct amdgpu_device *adev = ring->adev; 3341 3342 /* XXX check if swapping is necessary on BE */ 3343 if (ring->use_doorbell) { 3344 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 3345 ring->wptr); 3346 WDOORBELL64(ring->doorbell_index, ring->wptr); 3347 } else { 3348 BUG(); /* only DOORBELL method supported on gfx12 now */ 3349 } 3350 } 3351 3352 static void gfx_v12_1_ring_emit_ib_compute(struct amdgpu_ring *ring, 3353 struct amdgpu_job *job, 3354 struct amdgpu_ib *ib, 3355 uint32_t flags) 3356 { 3357 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 3358 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 3359 3360 /* Currently, there is a high possibility to get wave ID mismatch 3361 * between ME and GDS, leading to a hw deadlock, because ME generates 3362 * different wave IDs than the GDS expects. This situation happens 3363 * randomly when at least 5 compute pipes use GDS ordered append. 3364 * The wave IDs generated by ME are also wrong after suspend/resume. 3365 * Those are probably bugs somewhere else in the kernel driver. 3366 * 3367 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 3368 * GDS to 0 for this ring (me/pipe). 3369 */ 3370 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 3371 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 3372 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID); 3373 } 3374 3375 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 3376 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 3377 amdgpu_ring_write(ring, 3378 #ifdef __BIG_ENDIAN 3379 (2 << 0) | 3380 #endif 3381 lower_32_bits(ib->gpu_addr)); 3382 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 3383 amdgpu_ring_write(ring, control); 3384 } 3385 3386 static void gfx_v12_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 3387 u64 seq, unsigned flags) 3388 { 3389 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 3390 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 3391 3392 /* RELEASE_MEM - flush caches, send int */ 3393 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 3394 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ(1) | 3395 PACKET3_RELEASE_MEM_GCR_GLV_WB | 3396 PACKET3_RELEASE_MEM_GCR_GL2_WB | 3397 PACKET3_RELEASE_MEM_GCR_GL2_SCOPE(2) | 3398 PACKET3_RELEASE_MEM_TEMPORAL(3) | 3399 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 3400 PACKET3_RELEASE_MEM_EVENT_INDEX(5))); 3401 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) | 3402 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0))); 3403 3404 /* 3405 * the address should be Qword aligned if 64bit write, Dword 3406 * aligned if only send 32bit data low (discard data high) 3407 */ 3408 if (write64bit) 3409 BUG_ON(addr & 0x7); 3410 else 3411 BUG_ON(addr & 0x3); 3412 amdgpu_ring_write(ring, lower_32_bits(addr)); 3413 amdgpu_ring_write(ring, upper_32_bits(addr)); 3414 amdgpu_ring_write(ring, lower_32_bits(seq)); 3415 amdgpu_ring_write(ring, upper_32_bits(seq)); 3416 amdgpu_ring_write(ring, 0); 3417 } 3418 3419 static void gfx_v12_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 3420 { 3421 uint32_t seq = ring->fence_drv.sync_seq; 3422 uint64_t addr = ring->fence_drv.gpu_addr; 3423 3424 gfx_v12_1_wait_reg_mem(ring, 0, 1, 0, lower_32_bits(addr), 3425 upper_32_bits(addr), seq, 0xffffffff, 4); 3426 } 3427 3428 static void gfx_v12_1_ring_invalidate_tlbs(struct amdgpu_ring *ring, 3429 uint16_t pasid, uint32_t flush_type, 3430 bool all_hub, uint8_t dst_sel) 3431 { 3432 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); 3433 amdgpu_ring_write(ring, 3434 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) | 3435 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | 3436 PACKET3_INVALIDATE_TLBS_PASID(pasid) | 3437 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); 3438 } 3439 3440 static void gfx_v12_1_ring_emit_vm_flush(struct amdgpu_ring *ring, 3441 unsigned vmid, uint64_t pd_addr) 3442 { 3443 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 3444 3445 /* compute doesn't have PFP */ 3446 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 3447 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 3448 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 3449 amdgpu_ring_write(ring, 0x0); 3450 } 3451 } 3452 3453 static void gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 3454 u64 seq, unsigned int flags) 3455 { 3456 struct amdgpu_device *adev = ring->adev; 3457 3458 /* we only allocate 32bit for each seq wb address */ 3459 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 3460 3461 /* write fence seq to the "addr" */ 3462 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3463 amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 3464 amdgpu_ring_write(ring, lower_32_bits(addr)); 3465 amdgpu_ring_write(ring, upper_32_bits(addr)); 3466 amdgpu_ring_write(ring, lower_32_bits(seq)); 3467 3468 if (flags & AMDGPU_FENCE_FLAG_INT) { 3469 /* set register to trigger INT */ 3470 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3471 amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 3472 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS)); 3473 amdgpu_ring_write(ring, 0); 3474 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 3475 } 3476 } 3477 3478 static void gfx_v12_1_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 3479 uint32_t reg_val_offs) 3480 { 3481 struct amdgpu_device *adev = ring->adev; 3482 3483 reg = soc_v1_0_normalize_xcc_reg_offset(reg); 3484 3485 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 3486 amdgpu_ring_write(ring, 0 | /* src: register*/ 3487 (5 << 8) | /* dst: memory */ 3488 (1 << 20)); /* write confirm */ 3489 amdgpu_ring_write(ring, reg); 3490 amdgpu_ring_write(ring, 0); 3491 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 3492 reg_val_offs * 4)); 3493 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 3494 reg_val_offs * 4)); 3495 } 3496 3497 static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring, 3498 uint32_t reg, 3499 uint32_t val) 3500 { 3501 uint32_t cmd = 0; 3502 3503 reg = soc_v1_0_normalize_xcc_reg_offset(reg); 3504 3505 switch (ring->funcs->type) { 3506 case AMDGPU_RING_TYPE_KIQ: 3507 cmd = (1 << 16); /* no inc addr */ 3508 break; 3509 default: 3510 cmd = WR_CONFIRM; 3511 break; 3512 } 3513 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 3514 amdgpu_ring_write(ring, cmd); 3515 amdgpu_ring_write(ring, reg); 3516 amdgpu_ring_write(ring, 0); 3517 amdgpu_ring_write(ring, val); 3518 } 3519 3520 static void gfx_v12_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 3521 uint32_t val, uint32_t mask) 3522 { 3523 gfx_v12_1_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 3524 } 3525 3526 static void gfx_v12_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 3527 uint32_t reg0, uint32_t reg1, 3528 uint32_t ref, uint32_t mask) 3529 { 3530 gfx_v12_1_wait_reg_mem(ring, 0, 0, 1, reg0, reg1, 3531 ref, mask, 0x20); 3532 } 3533 3534 static void gfx_v12_1_xcc_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 3535 int me, int pipe, 3536 enum amdgpu_interrupt_state state, 3537 int xcc_id) 3538 { 3539 u32 mec_int_cntl, mec_int_cntl_reg; 3540 3541 /* 3542 * amdgpu controls only the first MEC. That's why this function only 3543 * handles the setting of interrupts for this specific MEC. All other 3544 * pipes' interrupts are set by amdkfd. 3545 */ 3546 3547 if (me == 1) { 3548 switch (pipe) { 3549 case 0: 3550 mec_int_cntl_reg = SOC15_REG_OFFSET( 3551 GC, GET_INST(GC, xcc_id), 3552 regCP_ME1_PIPE0_INT_CNTL); 3553 break; 3554 case 1: 3555 mec_int_cntl_reg = SOC15_REG_OFFSET( 3556 GC, GET_INST(GC, xcc_id), 3557 regCP_ME1_PIPE1_INT_CNTL); 3558 break; 3559 case 2: 3560 mec_int_cntl_reg = SOC15_REG_OFFSET( 3561 GC, GET_INST(GC, xcc_id), 3562 regCP_ME1_PIPE2_INT_CNTL); 3563 break; 3564 case 3: 3565 mec_int_cntl_reg = SOC15_REG_OFFSET( 3566 GC, GET_INST(GC, xcc_id), 3567 regCP_ME1_PIPE3_INT_CNTL); 3568 break; 3569 default: 3570 DRM_DEBUG("invalid pipe %d\n", pipe); 3571 return; 3572 } 3573 } else { 3574 DRM_DEBUG("invalid me %d\n", me); 3575 return; 3576 } 3577 3578 switch (state) { 3579 case AMDGPU_IRQ_STATE_DISABLE: 3580 mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id); 3581 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 3582 TIME_STAMP_INT_ENABLE, 0); 3583 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 3584 GENERIC0_INT_ENABLE, 0); 3585 WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id); 3586 break; 3587 case AMDGPU_IRQ_STATE_ENABLE: 3588 mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id); 3589 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 3590 TIME_STAMP_INT_ENABLE, 1); 3591 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 3592 GENERIC0_INT_ENABLE, 1); 3593 WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id); 3594 break; 3595 default: 3596 break; 3597 } 3598 } 3599 3600 static int gfx_v12_1_set_eop_interrupt_state(struct amdgpu_device *adev, 3601 struct amdgpu_irq_src *src, 3602 unsigned type, 3603 enum amdgpu_interrupt_state state) 3604 { 3605 int i, num_xcc; 3606 3607 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 3608 for (i = 0; i < num_xcc; i++) { 3609 switch (type) { 3610 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 3611 gfx_v12_1_xcc_set_compute_eop_interrupt_state( 3612 adev, 1, 0, state, i); 3613 break; 3614 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 3615 gfx_v12_1_xcc_set_compute_eop_interrupt_state( 3616 adev, 1, 1, state, i); 3617 break; 3618 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 3619 gfx_v12_1_xcc_set_compute_eop_interrupt_state( 3620 adev, 1, 2, state, i); 3621 break; 3622 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 3623 gfx_v12_1_xcc_set_compute_eop_interrupt_state( 3624 adev, 1, 3, state, i); 3625 break; 3626 default: 3627 break; 3628 } 3629 } 3630 3631 return 0; 3632 } 3633 3634 static int gfx_v12_1_eop_irq(struct amdgpu_device *adev, 3635 struct amdgpu_irq_src *source, 3636 struct amdgpu_iv_entry *entry) 3637 { 3638 u32 doorbell_offset = entry->src_data[0]; 3639 u8 me_id, pipe_id, queue_id; 3640 struct amdgpu_ring *ring; 3641 int i, xcc_id; 3642 3643 DRM_DEBUG("IH: CP EOP\n"); 3644 3645 if (adev->enable_mes && doorbell_offset) { 3646 struct amdgpu_userq_fence_driver *fence_drv = NULL; 3647 struct xarray *xa = &adev->userq_xa; 3648 unsigned long flags; 3649 3650 xa_lock_irqsave(xa, flags); 3651 fence_drv = xa_load(xa, doorbell_offset); 3652 if (fence_drv) 3653 amdgpu_userq_fence_driver_process(fence_drv); 3654 xa_unlock_irqrestore(xa, flags); 3655 } else { 3656 me_id = (entry->ring_id & 0x0c) >> 2; 3657 pipe_id = (entry->ring_id & 0x03) >> 0; 3658 queue_id = (entry->ring_id & 0x70) >> 4; 3659 xcc_id = gfx_v12_1_ih_to_xcc_inst(adev, entry->node_id); 3660 3661 if (xcc_id == -EINVAL) 3662 return -EINVAL; 3663 3664 switch (me_id) { 3665 case 1: 3666 case 2: 3667 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3668 ring = &adev->gfx.compute_ring 3669 [i + 3670 xcc_id * adev->gfx.num_compute_rings]; 3671 /* Per-queue interrupt is supported for MEC starting from VI. 3672 * The interrupt can only be enabled/disabled per pipe instead 3673 * of per queue. 3674 */ 3675 if ((ring->me == me_id) && 3676 (ring->pipe == pipe_id) && 3677 (ring->queue == queue_id)) 3678 amdgpu_fence_process(ring); 3679 } 3680 break; 3681 default: 3682 dev_dbg(adev->dev, "Unexpected me %d in eop_irq\n", me_id); 3683 break; 3684 } 3685 } 3686 3687 return 0; 3688 } 3689 3690 static int gfx_v12_1_set_priv_reg_fault_state(struct amdgpu_device *adev, 3691 struct amdgpu_irq_src *source, 3692 unsigned type, 3693 enum amdgpu_interrupt_state state) 3694 { 3695 int i, num_xcc; 3696 3697 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 3698 switch (state) { 3699 case AMDGPU_IRQ_STATE_DISABLE: 3700 case AMDGPU_IRQ_STATE_ENABLE: 3701 for (i = 0; i < num_xcc; i++) 3702 WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, 3703 PRIV_REG_INT_ENABLE, 3704 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 3705 break; 3706 default: 3707 break; 3708 } 3709 3710 return 0; 3711 } 3712 3713 static int gfx_v12_1_set_priv_inst_fault_state(struct amdgpu_device *adev, 3714 struct amdgpu_irq_src *source, 3715 unsigned type, 3716 enum amdgpu_interrupt_state state) 3717 { 3718 int i, num_xcc; 3719 3720 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 3721 switch (state) { 3722 case AMDGPU_IRQ_STATE_DISABLE: 3723 case AMDGPU_IRQ_STATE_ENABLE: 3724 for (i = 0; i < num_xcc; i++) 3725 WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0, 3726 PRIV_INSTR_INT_ENABLE, 3727 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 3728 break; 3729 default: 3730 break; 3731 } 3732 3733 return 0; 3734 } 3735 3736 static void gfx_v12_1_handle_priv_fault(struct amdgpu_device *adev, 3737 struct amdgpu_iv_entry *entry) 3738 { 3739 u8 me_id, pipe_id, queue_id; 3740 struct amdgpu_ring *ring; 3741 int i, xcc_id; 3742 3743 me_id = (entry->ring_id & 0x0c) >> 2; 3744 pipe_id = (entry->ring_id & 0x03) >> 0; 3745 queue_id = (entry->ring_id & 0x70) >> 4; 3746 xcc_id = gfx_v12_1_ih_to_xcc_inst(adev, entry->node_id); 3747 3748 if (xcc_id == -EINVAL) 3749 return; 3750 3751 if (!adev->gfx.disable_kq) { 3752 switch (me_id) { 3753 case 1: 3754 case 2: 3755 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3756 ring = &adev->gfx.compute_ring 3757 [i + 3758 xcc_id * adev->gfx.num_compute_rings]; 3759 if (ring->me == me_id && ring->pipe == pipe_id && 3760 ring->queue == queue_id) 3761 drm_sched_fault(&ring->sched); 3762 } 3763 break; 3764 default: 3765 dev_dbg(adev->dev, "Unexpected me %d in priv_fault\n", me_id); 3766 break; 3767 } 3768 } 3769 } 3770 3771 static int gfx_v12_1_priv_reg_irq(struct amdgpu_device *adev, 3772 struct amdgpu_irq_src *source, 3773 struct amdgpu_iv_entry *entry) 3774 { 3775 DRM_ERROR("Illegal register access in command stream\n"); 3776 gfx_v12_1_handle_priv_fault(adev, entry); 3777 return 0; 3778 } 3779 3780 static int gfx_v12_1_priv_inst_irq(struct amdgpu_device *adev, 3781 struct amdgpu_irq_src *source, 3782 struct amdgpu_iv_entry *entry) 3783 { 3784 DRM_ERROR("Illegal instruction in command stream\n"); 3785 gfx_v12_1_handle_priv_fault(adev, entry); 3786 return 0; 3787 } 3788 3789 static int gfx_v12_1_rlc_poison_irq(struct amdgpu_device *adev, 3790 struct amdgpu_irq_src *source, 3791 struct amdgpu_iv_entry *entry) 3792 { 3793 uint32_t rlc_fed_status = 0; 3794 uint32_t ras_blk = RAS_BLOCK_ID__GFX; 3795 struct ras_ih_info ih_info = {0}; 3796 int i, num_xcc; 3797 3798 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 3799 for (i = 0; i < num_xcc; i++) 3800 rlc_fed_status |= RREG32(SOC15_REG_OFFSET(GC, 3801 GET_INST(GC, i), regRLC_RLCS_FED_STATUS)); 3802 3803 if (!rlc_fed_status) 3804 return 0; 3805 3806 if (REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA0_FED_ERR) || 3807 REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA1_FED_ERR)) 3808 ras_blk = RAS_BLOCK_ID__SDMA; 3809 3810 dev_warn(adev->dev, "RLC %d FED IRQ\n", ras_blk); 3811 3812 ih_info.block = ras_blk; 3813 ih_info.reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; 3814 amdgpu_ras_mgr_dispatch_interrupt(adev, &ih_info); 3815 return 0; 3816 } 3817 3818 static void gfx_v12_1_emit_mem_sync(struct amdgpu_ring *ring) 3819 { 3820 const unsigned int gcr_cntl = 3821 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) | 3822 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) | 3823 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) | 3824 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) | 3825 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1) | 3826 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_SCOPE(2); 3827 3828 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */ 3829 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6)); 3830 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */ 3831 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 3832 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */ 3833 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 3834 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 3835 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ 3836 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ 3837 } 3838 3839 static const struct amd_ip_funcs gfx_v12_1_ip_funcs = { 3840 .name = "gfx_v12_1", 3841 .early_init = gfx_v12_1_early_init, 3842 .late_init = gfx_v12_1_late_init, 3843 .sw_init = gfx_v12_1_sw_init, 3844 .sw_fini = gfx_v12_1_sw_fini, 3845 .hw_init = gfx_v12_1_hw_init, 3846 .hw_fini = gfx_v12_1_hw_fini, 3847 .suspend = gfx_v12_1_suspend, 3848 .resume = gfx_v12_1_resume, 3849 .is_idle = gfx_v12_1_is_idle, 3850 .wait_for_idle = gfx_v12_1_wait_for_idle, 3851 .set_clockgating_state = gfx_v12_1_set_clockgating_state, 3852 .set_powergating_state = gfx_v12_1_set_powergating_state, 3853 .get_clockgating_state = gfx_v12_1_get_clockgating_state, 3854 }; 3855 3856 static const struct amdgpu_ring_funcs gfx_v12_1_ring_funcs_compute = { 3857 .type = AMDGPU_RING_TYPE_COMPUTE, 3858 .align_mask = 0xff, 3859 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 3860 .support_64bit_ptrs = true, 3861 .get_rptr = gfx_v12_1_ring_get_rptr_compute, 3862 .get_wptr = gfx_v12_1_ring_get_wptr_compute, 3863 .set_wptr = gfx_v12_1_ring_set_wptr_compute, 3864 .emit_frame_size = 3865 7 + /* gfx_v12_1_ring_emit_pipeline_sync */ 3866 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 3867 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 3868 2 + /* gfx_v12_1_ring_emit_vm_flush */ 3869 8 + 8 + 8 + /* gfx_v12_1_ring_emit_fence x3 for user fence, vm fence */ 3870 8, /* gfx_v12_1_emit_mem_sync */ 3871 .emit_ib_size = 7, /* gfx_v12_1_ring_emit_ib_compute */ 3872 .emit_ib = gfx_v12_1_ring_emit_ib_compute, 3873 .emit_fence = gfx_v12_1_ring_emit_fence, 3874 .emit_pipeline_sync = gfx_v12_1_ring_emit_pipeline_sync, 3875 .emit_vm_flush = gfx_v12_1_ring_emit_vm_flush, 3876 .test_ring = gfx_v12_1_ring_test_ring, 3877 .test_ib = gfx_v12_1_ring_test_ib, 3878 .insert_nop = amdgpu_ring_insert_nop, 3879 .pad_ib = amdgpu_ring_generic_pad_ib, 3880 .emit_wreg = gfx_v12_1_ring_emit_wreg, 3881 .emit_reg_wait = gfx_v12_1_ring_emit_reg_wait, 3882 .emit_reg_write_reg_wait = gfx_v12_1_ring_emit_reg_write_reg_wait, 3883 .emit_mem_sync = gfx_v12_1_emit_mem_sync, 3884 }; 3885 3886 static const struct amdgpu_ring_funcs gfx_v12_1_ring_funcs_kiq = { 3887 .type = AMDGPU_RING_TYPE_KIQ, 3888 .align_mask = 0xff, 3889 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 3890 .support_64bit_ptrs = true, 3891 .get_rptr = gfx_v12_1_ring_get_rptr_compute, 3892 .get_wptr = gfx_v12_1_ring_get_wptr_compute, 3893 .set_wptr = gfx_v12_1_ring_set_wptr_compute, 3894 .emit_frame_size = 3895 7 + /* gfx_v12_1_ring_emit_pipeline_sync */ 3896 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 3897 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 3898 2 + /* gfx_v12_1_ring_emit_vm_flush */ 3899 8 + 8 + 8, /* gfx_v12_1_ring_emit_fence_kiq x3 for user fence, vm fence */ 3900 .emit_ib_size = 7, /* gfx_v12_1_ring_emit_ib_compute */ 3901 .emit_ib = gfx_v12_1_ring_emit_ib_compute, 3902 .emit_fence = gfx_v12_1_ring_emit_fence_kiq, 3903 .test_ring = gfx_v12_1_ring_test_ring, 3904 .test_ib = gfx_v12_1_ring_test_ib, 3905 .insert_nop = amdgpu_ring_insert_nop, 3906 .pad_ib = amdgpu_ring_generic_pad_ib, 3907 .emit_rreg = gfx_v12_1_ring_emit_rreg, 3908 .emit_wreg = gfx_v12_1_ring_emit_wreg, 3909 .emit_reg_wait = gfx_v12_1_ring_emit_reg_wait, 3910 .emit_reg_write_reg_wait = gfx_v12_1_ring_emit_reg_write_reg_wait, 3911 }; 3912 3913 static void gfx_v12_1_set_ring_funcs(struct amdgpu_device *adev) 3914 { 3915 int i, j, num_xcc; 3916 3917 num_xcc = NUM_XCC(adev->gfx.xcc_mask); 3918 for (i = 0; i < num_xcc; i++) { 3919 adev->gfx.kiq[i].ring.funcs = &gfx_v12_1_ring_funcs_kiq; 3920 3921 for (j = 0; j < adev->gfx.num_compute_rings; j++) 3922 adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs = 3923 &gfx_v12_1_ring_funcs_compute; 3924 } 3925 } 3926 3927 static const struct amdgpu_irq_src_funcs gfx_v12_1_eop_irq_funcs = { 3928 .set = gfx_v12_1_set_eop_interrupt_state, 3929 .process = gfx_v12_1_eop_irq, 3930 }; 3931 3932 static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_reg_irq_funcs = { 3933 .set = gfx_v12_1_set_priv_reg_fault_state, 3934 .process = gfx_v12_1_priv_reg_irq, 3935 }; 3936 3937 static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_inst_irq_funcs = { 3938 .set = gfx_v12_1_set_priv_inst_fault_state, 3939 .process = gfx_v12_1_priv_inst_irq, 3940 }; 3941 3942 static const struct amdgpu_irq_src_funcs gfx_v12_1_rlc_poison_irq_funcs = { 3943 .process = gfx_v12_1_rlc_poison_irq, 3944 }; 3945 3946 static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev) 3947 { 3948 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 3949 adev->gfx.eop_irq.funcs = &gfx_v12_1_eop_irq_funcs; 3950 3951 adev->gfx.priv_reg_irq.num_types = 1; 3952 adev->gfx.priv_reg_irq.funcs = &gfx_v12_1_priv_reg_irq_funcs; 3953 3954 adev->gfx.priv_inst_irq.num_types = 1; 3955 adev->gfx.priv_inst_irq.funcs = &gfx_v12_1_priv_inst_irq_funcs; 3956 3957 adev->gfx.rlc_poison_irq.num_types = 1; 3958 adev->gfx.rlc_poison_irq.funcs = &gfx_v12_1_rlc_poison_irq_funcs; 3959 } 3960 3961 static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev) 3962 { 3963 if (adev->flags & AMD_IS_APU) 3964 adev->gfx.imu.mode = MISSION_MODE; 3965 else 3966 adev->gfx.imu.mode = DEBUG_MODE; 3967 if (!amdgpu_sriov_vf(adev)) 3968 adev->gfx.imu.funcs = &gfx_v12_1_imu_funcs; 3969 } 3970 3971 static void gfx_v12_1_set_rlc_funcs(struct amdgpu_device *adev) 3972 { 3973 adev->gfx.rlc.funcs = &gfx_v12_1_rlc_funcs; 3974 } 3975 3976 static void gfx_v12_1_set_mqd_funcs(struct amdgpu_device *adev) 3977 { 3978 /* set compute eng mqd */ 3979 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size = 3980 sizeof(struct v12_1_compute_mqd); 3981 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd = 3982 gfx_v12_1_compute_mqd_init; 3983 } 3984 3985 static void gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(struct amdgpu_device *adev, 3986 u32 bitmap, int xcc_id) 3987 { 3988 u32 data; 3989 3990 if (!bitmap) 3991 return; 3992 3993 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 3994 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 3995 3996 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data); 3997 } 3998 3999 static u32 gfx_v12_1_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev, 4000 int xcc_id) 4001 { 4002 u32 data, mask; 4003 4004 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG); 4005 data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG); 4006 4007 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; 4008 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; 4009 4010 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 4011 4012 return (~data) & mask; 4013 } 4014 4015 static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev, 4016 struct amdgpu_cu_info *cu_info) 4017 { 4018 int i, j, k, counter, xcc_id, active_cu_number = 0; 4019 u32 mask, bitmap; 4020 unsigned int disable_masks[2 * 2]; 4021 4022 if (!adev || !cu_info) 4023 return -EINVAL; 4024 4025 if (adev->gfx.config.max_shader_engines > 2 || 4026 adev->gfx.config.max_sh_per_se > 2) { 4027 dev_err(adev->dev, 4028 "Max SE (%d) and Max SA per SE (%d) is greater than expected\n", 4029 adev->gfx.config.max_shader_engines, 4030 adev->gfx.config.max_sh_per_se); 4031 return -EINVAL; 4032 } 4033 4034 amdgpu_gfx_parse_disable_cu(adev, disable_masks, 4035 adev->gfx.config.max_shader_engines, 4036 adev->gfx.config.max_sh_per_se); 4037 4038 mutex_lock(&adev->grbm_idx_mutex); 4039 for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { 4040 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 4041 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 4042 bitmap = i * adev->gfx.config.max_sh_per_se + j; 4043 if (!((gfx_v12_1_get_sa_active_bitmap(adev, xcc_id) >> bitmap) & 1)) 4044 continue; 4045 mask = 1; 4046 counter = 0; 4047 gfx_v12_1_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id); 4048 gfx_v12_1_set_user_cu_inactive_bitmap_per_sh( 4049 adev, 4050 disable_masks[i * adev->gfx.config.max_sh_per_se + j], 4051 xcc_id); 4052 bitmap = gfx_v12_1_get_cu_active_bitmap_per_sh(adev, xcc_id); 4053 4054 cu_info->bitmap[xcc_id][i][j] = bitmap; 4055 4056 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { 4057 if (bitmap & mask) 4058 counter++; 4059 4060 mask <<= 1; 4061 } 4062 active_cu_number += counter; 4063 } 4064 } 4065 gfx_v12_1_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); 4066 } 4067 mutex_unlock(&adev->grbm_idx_mutex); 4068 4069 cu_info->number = active_cu_number; 4070 cu_info->simd_per_cu = NUM_SIMD_PER_CU_GFX12_1; 4071 cu_info->lds_size = 320; 4072 4073 return 0; 4074 } 4075 4076 const struct amdgpu_ip_block_version gfx_v12_1_ip_block = { 4077 .type = AMD_IP_BLOCK_TYPE_GFX, 4078 .major = 12, 4079 .minor = 1, 4080 .rev = 0, 4081 .funcs = &gfx_v12_1_ip_funcs, 4082 }; 4083 4084 static int gfx_v12_1_xcp_resume(void *handle, uint32_t inst_mask) 4085 { 4086 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4087 uint32_t tmp_mask; 4088 int i, r; 4089 4090 /* TODO : Initialize golden regs */ 4091 /* gfx_v12_1_init_golden_registers(adev); */ 4092 4093 tmp_mask = inst_mask; 4094 for_each_inst(i, tmp_mask) 4095 gfx_v12_1_xcc_constants_init(adev, i); 4096 4097 if (!amdgpu_sriov_vf(adev)) { 4098 tmp_mask = inst_mask; 4099 for_each_inst(i, tmp_mask) { 4100 r = gfx_v12_1_xcc_rlc_resume(adev, i); 4101 if (r) 4102 return r; 4103 } 4104 } 4105 4106 r = gfx_v12_1_xcc_cp_resume(adev, inst_mask); 4107 4108 return r; 4109 } 4110 4111 static int gfx_v12_1_xcp_suspend(void *handle, uint32_t inst_mask) 4112 { 4113 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4114 int i; 4115 4116 for_each_inst(i, inst_mask) 4117 gfx_v12_1_xcc_fini(adev, i); 4118 4119 return 0; 4120 } 4121 4122 struct amdgpu_xcp_ip_funcs gfx_v12_1_xcp_funcs = { 4123 .suspend = &gfx_v12_1_xcp_suspend, 4124 .resume = &gfx_v12_1_xcp_resume 4125 }; 4126