1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/firmware.h> 24 #include <drm/drm_drv.h> 25 26 #include "amdgpu.h" 27 #include "amdgpu_ucode.h" 28 #include "amdgpu_vpe.h" 29 #include "soc15_common.h" 30 #include "vpe_v6_1.h" 31 32 #define VPE_FW_NAME_LEN 64 33 34 #define AMDGPU_CSA_VPE_SIZE 64 35 /* VPE CSA resides in the 4th page of CSA */ 36 #define AMDGPU_CSA_VPE_OFFSET (4096 * 3) 37 38 static void vpe_set_ring_funcs(struct amdgpu_device *adev); 39 40 int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev) 41 { 42 struct amdgpu_firmware_info ucode = { 43 .ucode_id = AMDGPU_UCODE_ID_VPE, 44 .mc_addr = adev->vpe.cmdbuf_gpu_addr, 45 .ucode_size = 8, 46 }; 47 48 return psp_execute_ip_fw_load(&adev->psp, &ucode); 49 } 50 51 int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe) 52 { 53 struct amdgpu_device *adev = vpe->ring.adev; 54 const struct vpe_firmware_header_v1_0 *vpe_hdr; 55 char fw_name[VPE_FW_NAME_LEN]; 56 char fw_prefix[VPE_FW_NAME_LEN]; 57 int ret; 58 59 amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix)); 60 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", fw_prefix); 61 62 ret = amdgpu_ucode_request(adev, &adev->vpe.fw, fw_name); 63 if (ret) 64 goto out; 65 66 vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data; 67 adev->vpe.fw_version = le32_to_cpu(vpe_hdr->header.ucode_version); 68 adev->vpe.feature_version = le32_to_cpu(vpe_hdr->ucode_feature_version); 69 70 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 71 struct amdgpu_firmware_info *info; 72 73 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTX]; 74 info->ucode_id = AMDGPU_UCODE_ID_VPE_CTX; 75 info->fw = adev->vpe.fw; 76 adev->firmware.fw_size += 77 ALIGN(le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes), PAGE_SIZE); 78 79 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTL]; 80 info->ucode_id = AMDGPU_UCODE_ID_VPE_CTL; 81 info->fw = adev->vpe.fw; 82 adev->firmware.fw_size += 83 ALIGN(le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes), PAGE_SIZE); 84 } 85 86 return 0; 87 out: 88 dev_err(adev->dev, "fail to initialize vpe microcode\n"); 89 release_firmware(adev->vpe.fw); 90 adev->vpe.fw = NULL; 91 return ret; 92 } 93 94 int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe) 95 { 96 struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe); 97 struct amdgpu_ring *ring = &vpe->ring; 98 int ret; 99 100 ring->ring_obj = NULL; 101 ring->use_doorbell = true; 102 ring->vm_hub = AMDGPU_MMHUB0(0); 103 ring->doorbell_index = (adev->doorbell_index.vpe_ring << 1); 104 snprintf(ring->name, 4, "vpe"); 105 106 ret = amdgpu_ring_init(adev, ring, 1024, &vpe->trap_irq, 0, 107 AMDGPU_RING_PRIO_DEFAULT, NULL); 108 if (ret) 109 return ret; 110 111 return 0; 112 } 113 114 int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe) 115 { 116 amdgpu_ring_fini(&vpe->ring); 117 118 return 0; 119 } 120 121 static int vpe_early_init(void *handle) 122 { 123 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 124 struct amdgpu_vpe *vpe = &adev->vpe; 125 126 switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { 127 case IP_VERSION(6, 1, 0): 128 vpe_v6_1_set_funcs(vpe); 129 break; 130 default: 131 return -EINVAL; 132 } 133 134 vpe_set_ring_funcs(adev); 135 vpe_set_regs(vpe); 136 137 return 0; 138 } 139 140 141 static int vpe_common_init(struct amdgpu_vpe *vpe) 142 { 143 struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe); 144 int r; 145 146 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, 147 AMDGPU_GEM_DOMAIN_GTT, 148 &adev->vpe.cmdbuf_obj, 149 &adev->vpe.cmdbuf_gpu_addr, 150 (void **)&adev->vpe.cmdbuf_cpu_addr); 151 if (r) { 152 dev_err(adev->dev, "VPE: failed to allocate cmdbuf bo %d\n", r); 153 return r; 154 } 155 156 return 0; 157 } 158 159 static int vpe_sw_init(void *handle) 160 { 161 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 162 struct amdgpu_vpe *vpe = &adev->vpe; 163 int ret; 164 165 ret = vpe_common_init(vpe); 166 if (ret) 167 goto out; 168 169 ret = vpe_irq_init(vpe); 170 if (ret) 171 goto out; 172 173 ret = vpe_ring_init(vpe); 174 if (ret) 175 goto out; 176 177 ret = vpe_init_microcode(vpe); 178 if (ret) 179 goto out; 180 out: 181 return ret; 182 } 183 184 static int vpe_sw_fini(void *handle) 185 { 186 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 187 struct amdgpu_vpe *vpe = &adev->vpe; 188 189 release_firmware(vpe->fw); 190 vpe->fw = NULL; 191 192 vpe_ring_fini(vpe); 193 194 amdgpu_bo_free_kernel(&adev->vpe.cmdbuf_obj, 195 &adev->vpe.cmdbuf_gpu_addr, 196 (void **)&adev->vpe.cmdbuf_cpu_addr); 197 198 return 0; 199 } 200 201 static int vpe_hw_init(void *handle) 202 { 203 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 204 struct amdgpu_vpe *vpe = &adev->vpe; 205 int ret; 206 207 ret = vpe_load_microcode(vpe); 208 if (ret) 209 return ret; 210 211 ret = vpe_ring_start(vpe); 212 if (ret) 213 return ret; 214 215 return 0; 216 } 217 218 static int vpe_hw_fini(void *handle) 219 { 220 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 221 struct amdgpu_vpe *vpe = &adev->vpe; 222 223 vpe_ring_stop(vpe); 224 225 return 0; 226 } 227 228 static int vpe_suspend(void *handle) 229 { 230 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 231 232 return vpe_hw_fini(adev); 233 } 234 235 static int vpe_resume(void *handle) 236 { 237 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 238 239 return vpe_hw_init(adev); 240 } 241 242 static void vpe_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 243 { 244 int i; 245 246 amdgpu_ring_write(ring, ring->funcs->nop | 247 VPE_CMD_NOP_HEADER_COUNT(count - 1)); 248 249 for (i = 0; i < count - 1; i++) 250 amdgpu_ring_write(ring, 0); 251 } 252 253 static void vpe_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 254 { 255 uint32_t pad_count; 256 int i; 257 258 pad_count = (-ib->length_dw) & 0x7; 259 260 ib->ptr[ib->length_dw++] = ring->funcs->nop | 261 VPE_CMD_NOP_HEADER_COUNT(pad_count - 1); 262 263 for (i = 0; i < pad_count - 1; i++) 264 ib->ptr[ib->length_dw++] = 0; 265 266 } 267 268 static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid) 269 { 270 struct amdgpu_device *adev = ring->adev; 271 uint32_t index = 0; 272 uint64_t csa_mc_addr; 273 274 if (amdgpu_sriov_vf(adev) || vmid == 0 || !amdgpu_mcbp) 275 return 0; 276 277 csa_mc_addr = amdgpu_csa_vaddr(adev) + AMDGPU_CSA_VPE_OFFSET + 278 index * AMDGPU_CSA_VPE_SIZE; 279 280 return csa_mc_addr; 281 } 282 283 static void vpe_ring_emit_ib(struct amdgpu_ring *ring, 284 struct amdgpu_job *job, 285 struct amdgpu_ib *ib, 286 uint32_t flags) 287 { 288 uint32_t vmid = AMDGPU_JOB_GET_VMID(job); 289 uint64_t csa_mc_addr = vpe_get_csa_mc_addr(ring, vmid); 290 291 /* IB packet must end on a 8 DW boundary */ 292 vpe_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); 293 294 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0) | 295 VPE_CMD_INDIRECT_HEADER_VMID(vmid & 0xf)); 296 297 /* base must be 32 byte aligned */ 298 amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); 299 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 300 amdgpu_ring_write(ring, ib->length_dw); 301 amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr)); 302 amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr)); 303 } 304 305 static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr, 306 uint64_t seq, unsigned int flags) 307 { 308 int i = 0; 309 310 do { 311 /* write the fence */ 312 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0)); 313 /* zero in first two bits */ 314 WARN_ON_ONCE(addr & 0x3); 315 amdgpu_ring_write(ring, lower_32_bits(addr)); 316 amdgpu_ring_write(ring, upper_32_bits(addr)); 317 amdgpu_ring_write(ring, i == 0 ? lower_32_bits(seq) : upper_32_bits(seq)); 318 addr += 4; 319 } while ((flags & AMDGPU_FENCE_FLAG_64BIT) && (i++ < 1)); 320 321 if (flags & AMDGPU_FENCE_FLAG_INT) { 322 /* generate an interrupt */ 323 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_TRAP, 0)); 324 amdgpu_ring_write(ring, 0); 325 } 326 327 } 328 329 static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 330 { 331 uint32_t seq = ring->fence_drv.sync_seq; 332 uint64_t addr = ring->fence_drv.gpu_addr; 333 334 /* wait for idle */ 335 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, 336 VPE_POLL_REGMEM_SUBOP_REGMEM) | 337 VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ 338 VPE_CMD_POLL_REGMEM_HEADER_MEM(1)); 339 amdgpu_ring_write(ring, addr & 0xfffffffc); 340 amdgpu_ring_write(ring, upper_32_bits(addr)); 341 amdgpu_ring_write(ring, seq); /* reference */ 342 amdgpu_ring_write(ring, 0xffffffff); /* mask */ 343 amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 344 VPE_CMD_POLL_REGMEM_DW5_INTERVAL(4)); 345 } 346 347 static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) 348 { 349 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0)); 350 amdgpu_ring_write(ring, reg << 2); 351 amdgpu_ring_write(ring, val); 352 } 353 354 static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 355 uint32_t val, uint32_t mask) 356 { 357 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, 358 VPE_POLL_REGMEM_SUBOP_REGMEM) | 359 VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ 360 VPE_CMD_POLL_REGMEM_HEADER_MEM(0)); 361 amdgpu_ring_write(ring, reg << 2); 362 amdgpu_ring_write(ring, 0); 363 amdgpu_ring_write(ring, val); /* reference */ 364 amdgpu_ring_write(ring, mask); /* mask */ 365 amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 366 VPE_CMD_POLL_REGMEM_DW5_INTERVAL(10)); 367 } 368 369 static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, 370 uint64_t pd_addr) 371 { 372 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 373 } 374 375 static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring) 376 { 377 unsigned int ret; 378 379 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0)); 380 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 381 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 382 amdgpu_ring_write(ring, 1); 383 ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ 384 amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ 385 386 return ret; 387 } 388 389 static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) 390 { 391 unsigned int cur; 392 393 WARN_ON_ONCE(offset > ring->buf_mask); 394 WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa); 395 396 cur = (ring->wptr - 1) & ring->buf_mask; 397 if (cur > offset) 398 ring->ring[offset] = cur - offset; 399 else 400 ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; 401 } 402 403 static int vpe_ring_preempt_ib(struct amdgpu_ring *ring) 404 { 405 struct amdgpu_device *adev = ring->adev; 406 struct amdgpu_vpe *vpe = &adev->vpe; 407 uint32_t preempt_reg = vpe->regs.queue0_preempt; 408 int i, r = 0; 409 410 /* assert preemption condition */ 411 amdgpu_ring_set_preempt_cond_exec(ring, false); 412 413 /* emit the trailing fence */ 414 ring->trail_seq += 1; 415 amdgpu_ring_alloc(ring, 10); 416 vpe_ring_emit_fence(ring, ring->trail_fence_gpu_addr, ring->trail_seq, 0); 417 amdgpu_ring_commit(ring); 418 419 /* assert IB preemption */ 420 WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 1); 421 422 /* poll the trailing fence */ 423 for (i = 0; i < adev->usec_timeout; i++) { 424 if (ring->trail_seq == 425 le32_to_cpu(*(ring->trail_fence_cpu_addr))) 426 break; 427 udelay(1); 428 } 429 430 if (i >= adev->usec_timeout) { 431 r = -EINVAL; 432 dev_err(adev->dev, "ring %d failed to be preempted\n", ring->idx); 433 } 434 435 /* deassert IB preemption */ 436 WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 0); 437 438 /* deassert the preemption condition */ 439 amdgpu_ring_set_preempt_cond_exec(ring, true); 440 441 return r; 442 } 443 444 static int vpe_set_clockgating_state(void *handle, 445 enum amd_clockgating_state state) 446 { 447 return 0; 448 } 449 450 static int vpe_set_powergating_state(void *handle, 451 enum amd_powergating_state state) 452 { 453 return 0; 454 } 455 456 static uint64_t vpe_ring_get_rptr(struct amdgpu_ring *ring) 457 { 458 struct amdgpu_device *adev = ring->adev; 459 struct amdgpu_vpe *vpe = &adev->vpe; 460 uint64_t rptr; 461 462 if (ring->use_doorbell) { 463 rptr = atomic64_read((atomic64_t *)ring->rptr_cpu_addr); 464 dev_dbg(adev->dev, "rptr/doorbell before shift == 0x%016llx\n", rptr); 465 } else { 466 rptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_hi)); 467 rptr = rptr << 32; 468 rptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_lo)); 469 dev_dbg(adev->dev, "rptr before shift [%i] == 0x%016llx\n", ring->me, rptr); 470 } 471 472 return (rptr >> 2); 473 } 474 475 static uint64_t vpe_ring_get_wptr(struct amdgpu_ring *ring) 476 { 477 struct amdgpu_device *adev = ring->adev; 478 struct amdgpu_vpe *vpe = &adev->vpe; 479 uint64_t wptr; 480 481 if (ring->use_doorbell) { 482 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); 483 dev_dbg(adev->dev, "wptr/doorbell before shift == 0x%016llx\n", wptr); 484 } else { 485 wptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi)); 486 wptr = wptr << 32; 487 wptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo)); 488 dev_dbg(adev->dev, "wptr before shift [%i] == 0x%016llx\n", ring->me, wptr); 489 } 490 491 return (wptr >> 2); 492 } 493 494 static void vpe_ring_set_wptr(struct amdgpu_ring *ring) 495 { 496 struct amdgpu_device *adev = ring->adev; 497 struct amdgpu_vpe *vpe = &adev->vpe; 498 499 if (ring->use_doorbell) { 500 dev_dbg(adev->dev, "Using doorbell, \ 501 wptr_offs == 0x%08x, \ 502 lower_32_bits(ring->wptr) << 2 == 0x%08x, \ 503 upper_32_bits(ring->wptr) << 2 == 0x%08x\n", 504 ring->wptr_offs, 505 lower_32_bits(ring->wptr << 2), 506 upper_32_bits(ring->wptr << 2)); 507 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); 508 WDOORBELL64(ring->doorbell_index, ring->wptr << 2); 509 } else { 510 dev_dbg(adev->dev, "Not using doorbell, \ 511 regVPEC_QUEUE0_RB_WPTR == 0x%08x, \ 512 regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n", 513 lower_32_bits(ring->wptr << 2), 514 upper_32_bits(ring->wptr << 2)); 515 WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo), 516 lower_32_bits(ring->wptr << 2)); 517 WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi), 518 upper_32_bits(ring->wptr << 2)); 519 } 520 } 521 522 static int vpe_ring_test_ring(struct amdgpu_ring *ring) 523 { 524 struct amdgpu_device *adev = ring->adev; 525 const uint32_t test_pattern = 0xdeadbeef; 526 uint32_t index, i; 527 uint64_t wb_addr; 528 int ret; 529 530 ret = amdgpu_device_wb_get(adev, &index); 531 if (ret) { 532 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret); 533 return ret; 534 } 535 536 adev->wb.wb[index] = 0; 537 wb_addr = adev->wb.gpu_addr + (index * 4); 538 539 ret = amdgpu_ring_alloc(ring, 4); 540 if (ret) { 541 dev_err(adev->dev, "amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, ret); 542 goto out; 543 } 544 545 amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0)); 546 amdgpu_ring_write(ring, lower_32_bits(wb_addr)); 547 amdgpu_ring_write(ring, upper_32_bits(wb_addr)); 548 amdgpu_ring_write(ring, test_pattern); 549 amdgpu_ring_commit(ring); 550 551 for (i = 0; i < adev->usec_timeout; i++) { 552 if (le32_to_cpu(adev->wb.wb[index]) == test_pattern) 553 goto out; 554 udelay(1); 555 } 556 557 ret = -ETIMEDOUT; 558 out: 559 amdgpu_device_wb_free(adev, index); 560 561 return ret; 562 } 563 564 static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout) 565 { 566 struct amdgpu_device *adev = ring->adev; 567 const uint32_t test_pattern = 0xdeadbeef; 568 struct amdgpu_ib ib = {}; 569 struct dma_fence *f = NULL; 570 uint32_t index; 571 uint64_t wb_addr; 572 int ret; 573 574 ret = amdgpu_device_wb_get(adev, &index); 575 if (ret) { 576 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret); 577 return ret; 578 } 579 580 adev->wb.wb[index] = 0; 581 wb_addr = adev->wb.gpu_addr + (index * 4); 582 583 ret = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); 584 if (ret) 585 goto err0; 586 587 ib.ptr[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0); 588 ib.ptr[1] = lower_32_bits(wb_addr); 589 ib.ptr[2] = upper_32_bits(wb_addr); 590 ib.ptr[3] = test_pattern; 591 ib.ptr[4] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0); 592 ib.ptr[5] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0); 593 ib.ptr[6] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0); 594 ib.ptr[7] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0); 595 ib.length_dw = 8; 596 597 ret = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 598 if (ret) 599 goto err1; 600 601 ret = dma_fence_wait_timeout(f, false, timeout); 602 if (ret <= 0) { 603 ret = ret ? : -ETIMEDOUT; 604 goto err1; 605 } 606 607 ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL; 608 609 err1: 610 amdgpu_ib_free(adev, &ib, NULL); 611 dma_fence_put(f); 612 err0: 613 amdgpu_device_wb_free(adev, index); 614 615 return ret; 616 } 617 618 static const struct amdgpu_ring_funcs vpe_ring_funcs = { 619 .type = AMDGPU_RING_TYPE_VPE, 620 .align_mask = 0xf, 621 .nop = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0), 622 .support_64bit_ptrs = true, 623 .get_rptr = vpe_ring_get_rptr, 624 .get_wptr = vpe_ring_get_wptr, 625 .set_wptr = vpe_ring_set_wptr, 626 .emit_frame_size = 627 5 + /* vpe_ring_init_cond_exec */ 628 6 + /* vpe_ring_emit_pipeline_sync */ 629 10 + 10 + 10 + /* vpe_ring_emit_fence */ 630 /* vpe_ring_emit_vm_flush */ 631 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 632 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6, 633 .emit_ib_size = 7 + 6, 634 .emit_ib = vpe_ring_emit_ib, 635 .emit_pipeline_sync = vpe_ring_emit_pipeline_sync, 636 .emit_fence = vpe_ring_emit_fence, 637 .emit_vm_flush = vpe_ring_emit_vm_flush, 638 .emit_wreg = vpe_ring_emit_wreg, 639 .emit_reg_wait = vpe_ring_emit_reg_wait, 640 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, 641 .insert_nop = vpe_ring_insert_nop, 642 .pad_ib = vpe_ring_pad_ib, 643 .test_ring = vpe_ring_test_ring, 644 .test_ib = vpe_ring_test_ib, 645 .init_cond_exec = vpe_ring_init_cond_exec, 646 .patch_cond_exec = vpe_ring_patch_cond_exec, 647 .preempt_ib = vpe_ring_preempt_ib, 648 }; 649 650 static void vpe_set_ring_funcs(struct amdgpu_device *adev) 651 { 652 adev->vpe.ring.funcs = &vpe_ring_funcs; 653 } 654 655 const struct amd_ip_funcs vpe_ip_funcs = { 656 .name = "vpe_v6_1", 657 .early_init = vpe_early_init, 658 .late_init = NULL, 659 .sw_init = vpe_sw_init, 660 .sw_fini = vpe_sw_fini, 661 .hw_init = vpe_hw_init, 662 .hw_fini = vpe_hw_fini, 663 .suspend = vpe_suspend, 664 .resume = vpe_resume, 665 .soft_reset = NULL, 666 .set_clockgating_state = vpe_set_clockgating_state, 667 .set_powergating_state = vpe_set_powergating_state, 668 }; 669 670 const struct amdgpu_ip_block_version vpe_v6_1_ip_block = { 671 .type = AMD_IP_BLOCK_TYPE_VPE, 672 .major = 6, 673 .minor = 1, 674 .rev = 0, 675 .funcs = &vpe_ip_funcs, 676 }; 677