xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c (revision 25396684b57f7d16306ca149c545db60b2d08dda)
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/firmware.h>
24 #include <drm/drm_drv.h>
25 
26 #include "amdgpu.h"
27 #include "amdgpu_ucode.h"
28 #include "amdgpu_vpe.h"
29 #include "soc15_common.h"
30 #include "vpe_v6_1.h"
31 
32 #define VPE_FW_NAME_LEN		64
33 
34 #define AMDGPU_CSA_VPE_SIZE 	64
35 /* VPE CSA resides in the 4th page of CSA */
36 #define AMDGPU_CSA_VPE_OFFSET 	(4096 * 3)
37 
38 static void vpe_set_ring_funcs(struct amdgpu_device *adev);
39 
40 int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev)
41 {
42 	struct amdgpu_firmware_info ucode = {
43 		.ucode_id = AMDGPU_UCODE_ID_VPE,
44 		.mc_addr = adev->vpe.cmdbuf_gpu_addr,
45 		.ucode_size = 8,
46 	};
47 
48 	return psp_execute_ip_fw_load(&adev->psp, &ucode);
49 }
50 
51 int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe)
52 {
53 	struct amdgpu_device *adev = vpe->ring.adev;
54 	const struct vpe_firmware_header_v1_0 *vpe_hdr;
55 	char fw_name[VPE_FW_NAME_LEN];
56 	char fw_prefix[VPE_FW_NAME_LEN];
57 	int ret;
58 
59 	amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix));
60 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", fw_prefix);
61 
62 	ret = amdgpu_ucode_request(adev, &adev->vpe.fw, fw_name);
63 	if (ret)
64 		goto out;
65 
66 	vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data;
67 	adev->vpe.fw_version = le32_to_cpu(vpe_hdr->header.ucode_version);
68 	adev->vpe.feature_version = le32_to_cpu(vpe_hdr->ucode_feature_version);
69 
70 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
71 		struct amdgpu_firmware_info *info;
72 
73 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTX];
74 		info->ucode_id = AMDGPU_UCODE_ID_VPE_CTX;
75 		info->fw = adev->vpe.fw;
76 		adev->firmware.fw_size +=
77 			ALIGN(le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes), PAGE_SIZE);
78 
79 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTL];
80 		info->ucode_id = AMDGPU_UCODE_ID_VPE_CTL;
81 		info->fw = adev->vpe.fw;
82 		adev->firmware.fw_size +=
83 			ALIGN(le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
84 	}
85 
86 	return 0;
87 out:
88 	dev_err(adev->dev, "fail to initialize vpe microcode\n");
89 	release_firmware(adev->vpe.fw);
90 	adev->vpe.fw = NULL;
91 	return ret;
92 }
93 
94 int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe)
95 {
96 	struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
97 	struct amdgpu_ring *ring = &vpe->ring;
98 	int ret;
99 
100 	ring->ring_obj = NULL;
101 	ring->use_doorbell = true;
102 	ring->vm_hub = AMDGPU_MMHUB0(0);
103 	ring->doorbell_index = (adev->doorbell_index.vpe_ring << 1);
104 	snprintf(ring->name, 4, "vpe");
105 
106 	ret = amdgpu_ring_init(adev, ring, 1024, &vpe->trap_irq, 0,
107 			     AMDGPU_RING_PRIO_DEFAULT, NULL);
108 	if (ret)
109 		return ret;
110 
111 	return 0;
112 }
113 
114 int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe)
115 {
116 	amdgpu_ring_fini(&vpe->ring);
117 
118 	return 0;
119 }
120 
121 static int vpe_early_init(void *handle)
122 {
123 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
124 	struct amdgpu_vpe *vpe = &adev->vpe;
125 
126 	switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) {
127 	case IP_VERSION(6, 1, 0):
128 		vpe_v6_1_set_funcs(vpe);
129 		break;
130 	default:
131 		return -EINVAL;
132 	}
133 
134 	vpe_set_ring_funcs(adev);
135 	vpe_set_regs(vpe);
136 
137 	return 0;
138 }
139 
140 
141 static int vpe_common_init(struct amdgpu_vpe *vpe)
142 {
143 	struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
144 	int r;
145 
146 	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
147 				    AMDGPU_GEM_DOMAIN_GTT,
148 				    &adev->vpe.cmdbuf_obj,
149 				    &adev->vpe.cmdbuf_gpu_addr,
150 				    (void **)&adev->vpe.cmdbuf_cpu_addr);
151 	if (r) {
152 		dev_err(adev->dev, "VPE: failed to allocate cmdbuf bo %d\n", r);
153 		return r;
154 	}
155 
156 	return 0;
157 }
158 
159 static int vpe_sw_init(void *handle)
160 {
161 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
162 	struct amdgpu_vpe *vpe = &adev->vpe;
163 	int ret;
164 
165 	ret = vpe_common_init(vpe);
166 	if (ret)
167 		goto out;
168 
169 	ret = vpe_irq_init(vpe);
170 	if (ret)
171 		goto out;
172 
173 	ret = vpe_ring_init(vpe);
174 	if (ret)
175 		goto out;
176 
177 	ret = vpe_init_microcode(vpe);
178 	if (ret)
179 		goto out;
180 out:
181 	return ret;
182 }
183 
184 static int vpe_sw_fini(void *handle)
185 {
186 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
187 	struct amdgpu_vpe *vpe = &adev->vpe;
188 
189 	release_firmware(vpe->fw);
190 	vpe->fw = NULL;
191 
192 	vpe_ring_fini(vpe);
193 
194 	amdgpu_bo_free_kernel(&adev->vpe.cmdbuf_obj,
195 			      &adev->vpe.cmdbuf_gpu_addr,
196 			      (void **)&adev->vpe.cmdbuf_cpu_addr);
197 
198 	return 0;
199 }
200 
201 static int vpe_hw_init(void *handle)
202 {
203 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
204 	struct amdgpu_vpe *vpe = &adev->vpe;
205 	int ret;
206 
207 	ret = vpe_load_microcode(vpe);
208 	if (ret)
209 		return ret;
210 
211 	ret = vpe_ring_start(vpe);
212 	if (ret)
213 		return ret;
214 
215 	return 0;
216 }
217 
218 static int vpe_hw_fini(void *handle)
219 {
220 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
221 	struct amdgpu_vpe *vpe = &adev->vpe;
222 
223 	vpe_ring_stop(vpe);
224 
225 	return 0;
226 }
227 
228 static int vpe_suspend(void *handle)
229 {
230 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
231 
232 	return vpe_hw_fini(adev);
233 }
234 
235 static int vpe_resume(void *handle)
236 {
237 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
238 
239 	return vpe_hw_init(adev);
240 }
241 
242 static void vpe_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
243 {
244 	int i;
245 
246 	amdgpu_ring_write(ring, ring->funcs->nop |
247 				VPE_CMD_NOP_HEADER_COUNT(count - 1));
248 
249 	for (i = 0; i < count - 1; i++)
250 		amdgpu_ring_write(ring, 0);
251 }
252 
253 static void vpe_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
254 {
255 	uint32_t pad_count;
256 	int i;
257 
258 	pad_count = (-ib->length_dw) & 0x7;
259 
260 	ib->ptr[ib->length_dw++] = ring->funcs->nop |
261 				   VPE_CMD_NOP_HEADER_COUNT(pad_count - 1);
262 
263 	for (i = 0; i < pad_count - 1; i++)
264 		ib->ptr[ib->length_dw++] = 0;
265 
266 }
267 
268 static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid)
269 {
270 	struct amdgpu_device *adev = ring->adev;
271 	uint32_t index = 0;
272 	uint64_t csa_mc_addr;
273 
274 	if (amdgpu_sriov_vf(adev) || vmid == 0 || !amdgpu_mcbp)
275 		return 0;
276 
277 	csa_mc_addr = amdgpu_csa_vaddr(adev) + AMDGPU_CSA_VPE_OFFSET +
278 		      index * AMDGPU_CSA_VPE_SIZE;
279 
280 	return csa_mc_addr;
281 }
282 
283 static void vpe_ring_emit_ib(struct amdgpu_ring *ring,
284 			     struct amdgpu_job *job,
285 			     struct amdgpu_ib *ib,
286 			     uint32_t flags)
287 {
288 	uint32_t vmid = AMDGPU_JOB_GET_VMID(job);
289 	uint64_t csa_mc_addr = vpe_get_csa_mc_addr(ring, vmid);
290 
291 	/* IB packet must end on a 8 DW boundary */
292 	vpe_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
293 
294 	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0) |
295 				VPE_CMD_INDIRECT_HEADER_VMID(vmid & 0xf));
296 
297 	/* base must be 32 byte aligned */
298 	amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0);
299 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
300 	amdgpu_ring_write(ring, ib->length_dw);
301 	amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
302 	amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
303 }
304 
305 static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr,
306 				uint64_t seq, unsigned int flags)
307 {
308 	int i = 0;
309 
310 	do {
311 		/* write the fence */
312 		amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
313 		/* zero in first two bits */
314 		WARN_ON_ONCE(addr & 0x3);
315 		amdgpu_ring_write(ring, lower_32_bits(addr));
316 		amdgpu_ring_write(ring, upper_32_bits(addr));
317 		amdgpu_ring_write(ring, i == 0 ? lower_32_bits(seq) : upper_32_bits(seq));
318 		addr += 4;
319 	} while ((flags & AMDGPU_FENCE_FLAG_64BIT) && (i++ < 1));
320 
321 	if (flags & AMDGPU_FENCE_FLAG_INT) {
322 		/* generate an interrupt */
323 		amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_TRAP, 0));
324 		amdgpu_ring_write(ring, 0);
325 	}
326 
327 }
328 
329 static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
330 {
331 	uint32_t seq = ring->fence_drv.sync_seq;
332 	uint64_t addr = ring->fence_drv.gpu_addr;
333 
334 	/* wait for idle */
335 	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
336 				VPE_POLL_REGMEM_SUBOP_REGMEM) |
337 				VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
338 				VPE_CMD_POLL_REGMEM_HEADER_MEM(1));
339 	amdgpu_ring_write(ring, addr & 0xfffffffc);
340 	amdgpu_ring_write(ring, upper_32_bits(addr));
341 	amdgpu_ring_write(ring, seq); /* reference */
342 	amdgpu_ring_write(ring, 0xffffffff); /* mask */
343 	amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
344 				VPE_CMD_POLL_REGMEM_DW5_INTERVAL(4));
345 }
346 
347 static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val)
348 {
349 	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0));
350 	amdgpu_ring_write(ring,	reg << 2);
351 	amdgpu_ring_write(ring, val);
352 }
353 
354 static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
355 				   uint32_t val, uint32_t mask)
356 {
357 	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM,
358 				VPE_POLL_REGMEM_SUBOP_REGMEM) |
359 				VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
360 				VPE_CMD_POLL_REGMEM_HEADER_MEM(0));
361 	amdgpu_ring_write(ring, reg << 2);
362 	amdgpu_ring_write(ring, 0);
363 	amdgpu_ring_write(ring, val); /* reference */
364 	amdgpu_ring_write(ring, mask); /* mask */
365 	amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
366 				VPE_CMD_POLL_REGMEM_DW5_INTERVAL(10));
367 }
368 
369 static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid,
370 				   uint64_t pd_addr)
371 {
372 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
373 }
374 
375 static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring)
376 {
377 	unsigned int ret;
378 
379 	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0));
380 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
381 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
382 	amdgpu_ring_write(ring, 1);
383 	ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
384 	amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
385 
386 	return ret;
387 }
388 
389 static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset)
390 {
391 	unsigned int cur;
392 
393 	WARN_ON_ONCE(offset > ring->buf_mask);
394 	WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa);
395 
396 	cur = (ring->wptr - 1) & ring->buf_mask;
397 	if (cur > offset)
398 		ring->ring[offset] = cur - offset;
399 	else
400 		ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
401 }
402 
403 static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
404 {
405 	struct amdgpu_device *adev = ring->adev;
406 	struct amdgpu_vpe *vpe = &adev->vpe;
407 	uint32_t preempt_reg = vpe->regs.queue0_preempt;
408 	int i, r = 0;
409 
410 	/* assert preemption condition */
411 	amdgpu_ring_set_preempt_cond_exec(ring, false);
412 
413 	/* emit the trailing fence */
414 	ring->trail_seq += 1;
415 	amdgpu_ring_alloc(ring, 10);
416 	vpe_ring_emit_fence(ring, ring->trail_fence_gpu_addr, ring->trail_seq, 0);
417 	amdgpu_ring_commit(ring);
418 
419 	/* assert IB preemption */
420 	WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 1);
421 
422 	/* poll the trailing fence */
423 	for (i = 0; i < adev->usec_timeout; i++) {
424 		if (ring->trail_seq ==
425 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
426 			break;
427 		udelay(1);
428 	}
429 
430 	if (i >= adev->usec_timeout) {
431 		r = -EINVAL;
432 		dev_err(adev->dev, "ring %d failed to be preempted\n", ring->idx);
433 	}
434 
435 	/* deassert IB preemption */
436 	WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 0);
437 
438 	/* deassert the preemption condition */
439 	amdgpu_ring_set_preempt_cond_exec(ring, true);
440 
441 	return r;
442 }
443 
444 static int vpe_set_clockgating_state(void *handle,
445 				     enum amd_clockgating_state state)
446 {
447 	return 0;
448 }
449 
450 static int vpe_set_powergating_state(void *handle,
451 				     enum amd_powergating_state state)
452 {
453 	return 0;
454 }
455 
456 static uint64_t vpe_ring_get_rptr(struct amdgpu_ring *ring)
457 {
458 	struct amdgpu_device *adev = ring->adev;
459 	struct amdgpu_vpe *vpe = &adev->vpe;
460 	uint64_t rptr;
461 
462 	if (ring->use_doorbell) {
463 		rptr = atomic64_read((atomic64_t *)ring->rptr_cpu_addr);
464 		dev_dbg(adev->dev, "rptr/doorbell before shift == 0x%016llx\n", rptr);
465 	} else {
466 		rptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_hi));
467 		rptr = rptr << 32;
468 		rptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_lo));
469 		dev_dbg(adev->dev, "rptr before shift [%i] == 0x%016llx\n", ring->me, rptr);
470 	}
471 
472 	return (rptr >> 2);
473 }
474 
475 static uint64_t vpe_ring_get_wptr(struct amdgpu_ring *ring)
476 {
477 	struct amdgpu_device *adev = ring->adev;
478 	struct amdgpu_vpe *vpe = &adev->vpe;
479 	uint64_t wptr;
480 
481 	if (ring->use_doorbell) {
482 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
483 		dev_dbg(adev->dev, "wptr/doorbell before shift == 0x%016llx\n", wptr);
484 	} else {
485 		wptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi));
486 		wptr = wptr << 32;
487 		wptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo));
488 		dev_dbg(adev->dev, "wptr before shift [%i] == 0x%016llx\n", ring->me, wptr);
489 	}
490 
491 	return (wptr >> 2);
492 }
493 
494 static void vpe_ring_set_wptr(struct amdgpu_ring *ring)
495 {
496 	struct amdgpu_device *adev = ring->adev;
497 	struct amdgpu_vpe *vpe = &adev->vpe;
498 
499 	if (ring->use_doorbell) {
500 		dev_dbg(adev->dev, "Using doorbell, \
501 			wptr_offs == 0x%08x, \
502 			lower_32_bits(ring->wptr) << 2 == 0x%08x, \
503 			upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
504 			ring->wptr_offs,
505 			lower_32_bits(ring->wptr << 2),
506 			upper_32_bits(ring->wptr << 2));
507 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2);
508 		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
509 	} else {
510 		dev_dbg(adev->dev, "Not using doorbell, \
511 			regVPEC_QUEUE0_RB_WPTR == 0x%08x, \
512 			regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n",
513 			lower_32_bits(ring->wptr << 2),
514 			upper_32_bits(ring->wptr << 2));
515 		WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo),
516 		       lower_32_bits(ring->wptr << 2));
517 		WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi),
518 		       upper_32_bits(ring->wptr << 2));
519 	}
520 }
521 
522 static int vpe_ring_test_ring(struct amdgpu_ring *ring)
523 {
524 	struct amdgpu_device *adev = ring->adev;
525 	const uint32_t test_pattern = 0xdeadbeef;
526 	uint32_t index, i;
527 	uint64_t wb_addr;
528 	int ret;
529 
530 	ret = amdgpu_device_wb_get(adev, &index);
531 	if (ret) {
532 		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret);
533 		return ret;
534 	}
535 
536 	adev->wb.wb[index] = 0;
537 	wb_addr = adev->wb.gpu_addr + (index * 4);
538 
539 	ret = amdgpu_ring_alloc(ring, 4);
540 	if (ret) {
541 		dev_err(adev->dev, "amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, ret);
542 		goto out;
543 	}
544 
545 	amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0));
546 	amdgpu_ring_write(ring, lower_32_bits(wb_addr));
547 	amdgpu_ring_write(ring, upper_32_bits(wb_addr));
548 	amdgpu_ring_write(ring, test_pattern);
549 	amdgpu_ring_commit(ring);
550 
551 	for (i = 0; i < adev->usec_timeout; i++) {
552 		if (le32_to_cpu(adev->wb.wb[index]) == test_pattern)
553 			goto out;
554 		udelay(1);
555 	}
556 
557 	ret = -ETIMEDOUT;
558 out:
559 	amdgpu_device_wb_free(adev, index);
560 
561 	return ret;
562 }
563 
564 static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout)
565 {
566 	struct amdgpu_device *adev = ring->adev;
567 	const uint32_t test_pattern = 0xdeadbeef;
568 	struct amdgpu_ib ib = {};
569 	struct dma_fence *f = NULL;
570 	uint32_t index;
571 	uint64_t wb_addr;
572 	int ret;
573 
574 	ret = amdgpu_device_wb_get(adev, &index);
575 	if (ret) {
576 		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret);
577 		return ret;
578 	}
579 
580 	adev->wb.wb[index] = 0;
581 	wb_addr = adev->wb.gpu_addr + (index * 4);
582 
583 	ret = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
584 	if (ret)
585 		goto err0;
586 
587 	ib.ptr[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0);
588 	ib.ptr[1] = lower_32_bits(wb_addr);
589 	ib.ptr[2] = upper_32_bits(wb_addr);
590 	ib.ptr[3] = test_pattern;
591 	ib.ptr[4] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
592 	ib.ptr[5] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
593 	ib.ptr[6] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
594 	ib.ptr[7] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0);
595 	ib.length_dw = 8;
596 
597 	ret = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
598 	if (ret)
599 		goto err1;
600 
601 	ret = dma_fence_wait_timeout(f, false, timeout);
602 	if (ret <= 0) {
603 		ret = ret ? : -ETIMEDOUT;
604 		goto err1;
605 	}
606 
607 	ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL;
608 
609 err1:
610 	amdgpu_ib_free(adev, &ib, NULL);
611 	dma_fence_put(f);
612 err0:
613 	amdgpu_device_wb_free(adev, index);
614 
615 	return ret;
616 }
617 
618 static const struct amdgpu_ring_funcs vpe_ring_funcs = {
619 	.type = AMDGPU_RING_TYPE_VPE,
620 	.align_mask = 0xf,
621 	.nop = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0),
622 	.support_64bit_ptrs = true,
623 	.get_rptr = vpe_ring_get_rptr,
624 	.get_wptr = vpe_ring_get_wptr,
625 	.set_wptr = vpe_ring_set_wptr,
626 	.emit_frame_size =
627 		5 + /* vpe_ring_init_cond_exec */
628 		6 + /* vpe_ring_emit_pipeline_sync */
629 		10 + 10 + 10 + /* vpe_ring_emit_fence */
630 		/* vpe_ring_emit_vm_flush */
631 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
632 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6,
633 	.emit_ib_size = 7 + 6,
634 	.emit_ib = vpe_ring_emit_ib,
635 	.emit_pipeline_sync = vpe_ring_emit_pipeline_sync,
636 	.emit_fence = vpe_ring_emit_fence,
637 	.emit_vm_flush = vpe_ring_emit_vm_flush,
638 	.emit_wreg = vpe_ring_emit_wreg,
639 	.emit_reg_wait = vpe_ring_emit_reg_wait,
640 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
641 	.insert_nop = vpe_ring_insert_nop,
642 	.pad_ib = vpe_ring_pad_ib,
643 	.test_ring = vpe_ring_test_ring,
644 	.test_ib = vpe_ring_test_ib,
645 	.init_cond_exec = vpe_ring_init_cond_exec,
646 	.patch_cond_exec = vpe_ring_patch_cond_exec,
647 	.preempt_ib = vpe_ring_preempt_ib,
648 };
649 
650 static void vpe_set_ring_funcs(struct amdgpu_device *adev)
651 {
652 	adev->vpe.ring.funcs = &vpe_ring_funcs;
653 }
654 
655 const struct amd_ip_funcs vpe_ip_funcs = {
656 	.name = "vpe_v6_1",
657 	.early_init = vpe_early_init,
658 	.late_init = NULL,
659 	.sw_init = vpe_sw_init,
660 	.sw_fini = vpe_sw_fini,
661 	.hw_init = vpe_hw_init,
662 	.hw_fini = vpe_hw_fini,
663 	.suspend = vpe_suspend,
664 	.resume = vpe_resume,
665 	.soft_reset = NULL,
666 	.set_clockgating_state = vpe_set_clockgating_state,
667 	.set_powergating_state = vpe_set_powergating_state,
668 };
669 
670 const struct amdgpu_ip_block_version vpe_v6_1_ip_block = {
671 	.type = AMD_IP_BLOCK_TYPE_VPE,
672 	.major = 6,
673 	.minor = 1,
674 	.rev = 0,
675 	.funcs = &vpe_ip_funcs,
676 };
677