xref: /linux/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c (revision 5ea5880764cbb164afb17a62e76ca75dc371409d)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright 2013 Advanced Micro Devices, Inc.
4  * Copyright 2025 Valve Corporation
5  * Copyright 2025 Alexandre Demers
6  * All Rights Reserved.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the
10  * "Software"), to deal in the Software without restriction, including
11  * without limitation the rights to use, copy, modify, merge, publish,
12  * distribute, sub license, and/or sell copies of the Software, and to
13  * permit persons to whom the Software is furnished to do so, subject to
14  * the following conditions:
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * The above copyright notice and this permission notice (including the
25  * next paragraph) shall be included in all copies or substantial portions
26  * of the Software.
27  *
28  * Authors: Christian König <christian.koenig@amd.com>
29  *          Timur Kristóf <timur.kristof@gmail.com>
30  *          Alexandre Demers <alexandre.f.demers@gmail.com>
31  */
32 
33 #include <linux/firmware.h>
34 
35 #include "amdgpu.h"
36 #include "amdgpu_vce.h"
37 #include "amdgpu_gart.h"
38 #include "sid.h"
39 #include "vce_v1_0.h"
40 #include "vce/vce_1_0_d.h"
41 #include "vce/vce_1_0_sh_mask.h"
42 #include "oss/oss_1_0_d.h"
43 #include "oss/oss_1_0_sh_mask.h"
44 
45 #define VCE_V1_0_FW_SIZE	(256 * 1024)
46 #define VCE_V1_0_STACK_SIZE	(64 * 1024)
47 #define VCE_V1_0_DATA_SIZE	(7808 * (AMDGPU_MAX_VCE_HANDLES + 1))
48 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
49 
50 static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
51 static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev);
52 
53 struct vce_v1_0_fw_signature {
54 	int32_t offset;
55 	uint32_t length;
56 	int32_t number;
57 	struct {
58 		uint32_t chip_id;
59 		uint32_t keyselect;
60 		uint32_t nonce[4];
61 		uint32_t sigval[4];
62 	} val[8];
63 };
64 
65 /**
66  * vce_v1_0_ring_get_rptr - get read pointer
67  *
68  * @ring: amdgpu_ring pointer
69  *
70  * Returns the current hardware read pointer
71  */
72 static uint64_t vce_v1_0_ring_get_rptr(struct amdgpu_ring *ring)
73 {
74 	struct amdgpu_device *adev = ring->adev;
75 
76 	if (ring->me == 0)
77 		return RREG32(mmVCE_RB_RPTR);
78 	else
79 		return RREG32(mmVCE_RB_RPTR2);
80 }
81 
82 /**
83  * vce_v1_0_ring_get_wptr - get write pointer
84  *
85  * @ring: amdgpu_ring pointer
86  *
87  * Returns the current hardware write pointer
88  */
89 static uint64_t vce_v1_0_ring_get_wptr(struct amdgpu_ring *ring)
90 {
91 	struct amdgpu_device *adev = ring->adev;
92 
93 	if (ring->me == 0)
94 		return RREG32(mmVCE_RB_WPTR);
95 	else
96 		return RREG32(mmVCE_RB_WPTR2);
97 }
98 
99 /**
100  * vce_v1_0_ring_set_wptr - set write pointer
101  *
102  * @ring: amdgpu_ring pointer
103  *
104  * Commits the write pointer to the hardware
105  */
106 static void vce_v1_0_ring_set_wptr(struct amdgpu_ring *ring)
107 {
108 	struct amdgpu_device *adev = ring->adev;
109 
110 	if (ring->me == 0)
111 		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
112 	else
113 		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
114 }
115 
116 static int vce_v1_0_lmi_clean(struct amdgpu_device *adev)
117 {
118 	int i, j;
119 
120 	for (i = 0; i < 10; ++i) {
121 		for (j = 0; j < 100; ++j) {
122 			if (RREG32(mmVCE_LMI_STATUS) & 0x337f)
123 				return 0;
124 
125 			mdelay(10);
126 		}
127 	}
128 
129 	return -ETIMEDOUT;
130 }
131 
132 static int vce_v1_0_firmware_loaded(struct amdgpu_device *adev)
133 {
134 	int i, j;
135 
136 	for (i = 0; i < 10; ++i) {
137 		for (j = 0; j < 100; ++j) {
138 			if (RREG32(mmVCE_STATUS) & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
139 				return 0;
140 			mdelay(10);
141 		}
142 
143 		dev_err(adev->dev, "VCE not responding, trying to reset the ECPU\n");
144 
145 		WREG32_P(mmVCE_SOFT_RESET,
146 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
147 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
148 		mdelay(10);
149 		WREG32_P(mmVCE_SOFT_RESET, 0,
150 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
151 		mdelay(10);
152 	}
153 
154 	return -ETIMEDOUT;
155 }
156 
157 static void vce_v1_0_init_cg(struct amdgpu_device *adev)
158 {
159 	u32 tmp;
160 
161 	tmp = RREG32(mmVCE_CLOCK_GATING_A);
162 	tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
163 	WREG32(mmVCE_CLOCK_GATING_A, tmp);
164 
165 	tmp = RREG32(mmVCE_CLOCK_GATING_B);
166 	tmp |= 0x1e;
167 	tmp &= ~0xe100e1;
168 	WREG32(mmVCE_CLOCK_GATING_B, tmp);
169 
170 	tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
171 	tmp &= ~0xff9ff000;
172 	WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
173 
174 	tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
175 	tmp &= ~0x3ff;
176 	WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
177 }
178 
179 /**
180  * vce_v1_0_load_fw_signature - load firmware signature into VCPU BO
181  *
182  * @adev: amdgpu_device pointer
183  *
184  * The VCE1 firmware validation mechanism needs a firmware signature.
185  * This function finds the signature appropriate for the current
186  * ASIC and writes that into the VCPU BO.
187  */
188 static int vce_v1_0_load_fw_signature(struct amdgpu_device *adev)
189 {
190 	const struct common_firmware_header *hdr;
191 	struct vce_v1_0_fw_signature *sign;
192 	unsigned int ucode_offset;
193 	uint32_t chip_id;
194 	u32 *cpu_addr;
195 	int i;
196 
197 	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
198 	ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
199 	cpu_addr = adev->vce.cpu_addr;
200 
201 	sign = (void *)adev->vce.fw->data + ucode_offset;
202 
203 	switch (adev->asic_type) {
204 	case CHIP_TAHITI:
205 		chip_id = 0x01000014;
206 		break;
207 	case CHIP_VERDE:
208 		chip_id = 0x01000015;
209 		break;
210 	case CHIP_PITCAIRN:
211 		chip_id = 0x01000016;
212 		break;
213 	default:
214 		dev_err(adev->dev, "asic_type %#010x was not found!", adev->asic_type);
215 		return -EINVAL;
216 	}
217 
218 	for (i = 0; i < le32_to_cpu(sign->number); ++i) {
219 		if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
220 			break;
221 	}
222 
223 	if (i == le32_to_cpu(sign->number)) {
224 		dev_err(adev->dev, "chip_id 0x%x for %s was not found in VCE firmware",
225 			chip_id, amdgpu_asic_name[adev->asic_type]);
226 		return -EINVAL;
227 	}
228 
229 	cpu_addr += (256 - 64) / 4;
230 	memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16);
231 	cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64);
232 
233 	memset_io(&cpu_addr[5], 0, 44);
234 	memcpy_toio(&cpu_addr[16], &sign[1], hdr->ucode_size_bytes - sizeof(*sign));
235 
236 	cpu_addr += (le32_to_cpu(sign->length) + 64) / 4;
237 	memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16);
238 
239 	adev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
240 
241 	return 0;
242 }
243 
244 static int vce_v1_0_wait_for_fw_validation(struct amdgpu_device *adev)
245 {
246 	int i;
247 
248 	dev_dbg(adev->dev, "VCE keyselect: %d", adev->vce.keyselect);
249 	WREG32(mmVCE_LMI_FW_START_KEYSEL, adev->vce.keyselect);
250 
251 	for (i = 0; i < 10; ++i) {
252 		mdelay(10);
253 		if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)
254 			break;
255 	}
256 
257 	if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)) {
258 		dev_err(adev->dev, "VCE FW validation timeout\n");
259 		return -ETIMEDOUT;
260 	}
261 
262 	if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__PASS_MASK)) {
263 		dev_err(adev->dev, "VCE FW validation failed\n");
264 		return -EINVAL;
265 	}
266 
267 	for (i = 0; i < 10; ++i) {
268 		mdelay(10);
269 		if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK))
270 			break;
271 	}
272 
273 	if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK) {
274 		dev_err(adev->dev, "VCE FW busy timeout\n");
275 		return -ETIMEDOUT;
276 	}
277 
278 	return 0;
279 }
280 
281 static int vce_v1_0_mc_resume(struct amdgpu_device *adev)
282 {
283 	uint32_t offset;
284 	uint32_t size;
285 
286 	/*
287 	 * When the keyselect is already set, don't perturb VCE FW.
288 	 * Validation seems to always fail the second time.
289 	 */
290 	if (RREG32(mmVCE_LMI_FW_START_KEYSEL)) {
291 		dev_dbg(adev->dev, "keyselect already set: 0x%x (on CPU: 0x%x)\n",
292 			RREG32(mmVCE_LMI_FW_START_KEYSEL), adev->vce.keyselect);
293 
294 		WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
295 		return 0;
296 	}
297 
298 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
299 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
300 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
301 	WREG32(mmVCE_CLOCK_GATING_B, 0);
302 
303 	WREG32_P(mmVCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4);
304 
305 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
306 
307 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
308 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
309 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
310 	WREG32(mmVCE_LMI_VM_CTRL, 0);
311 
312 	WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES);
313 
314 	offset =  adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET;
315 	size = VCE_V1_0_FW_SIZE;
316 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
317 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
318 
319 	offset += size;
320 	size = VCE_V1_0_STACK_SIZE;
321 	WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
322 	WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
323 
324 	offset += size;
325 	size = VCE_V1_0_DATA_SIZE;
326 	WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
327 	WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
328 
329 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
330 
331 	return vce_v1_0_wait_for_fw_validation(adev);
332 }
333 
334 /**
335  * vce_v1_0_is_idle() - Check idle status of VCE1 IP block
336  *
337  * @ip_block: amdgpu_ip_block pointer
338  *
339  * Check whether VCE is busy according to VCE_STATUS.
340  * Also check whether the SRBM thinks VCE is busy, although
341  * SRBM_STATUS.VCE_BUSY seems to be bogus because it
342  * appears to mirror the VCE_STATUS.VCPU_REPORT_FW_LOADED bit.
343  */
344 static bool vce_v1_0_is_idle(struct amdgpu_ip_block *ip_block)
345 {
346 	struct amdgpu_device *adev = ip_block->adev;
347 	bool busy =
348 		(RREG32(mmVCE_STATUS) & (VCE_STATUS__JOB_BUSY_MASK | VCE_STATUS__UENC_BUSY_MASK)) ||
349 		(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
350 
351 	return !busy;
352 }
353 
354 static int vce_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
355 {
356 	struct amdgpu_device *adev = ip_block->adev;
357 	unsigned int i;
358 
359 	for (i = 0; i < adev->usec_timeout; i++) {
360 		udelay(1);
361 		if (vce_v1_0_is_idle(ip_block))
362 			return 0;
363 	}
364 	return -ETIMEDOUT;
365 }
366 
367 /**
368  * vce_v1_0_start - start VCE block
369  *
370  * @adev: amdgpu_device pointer
371  *
372  * Setup and start the VCE block
373  */
374 static int vce_v1_0_start(struct amdgpu_device *adev)
375 {
376 	struct amdgpu_ring *ring;
377 	int r;
378 
379 	WREG32_P(mmVCE_STATUS, 1, ~1);
380 
381 	r = vce_v1_0_mc_resume(adev);
382 	if (r)
383 		return r;
384 
385 	ring = &adev->vce.ring[0];
386 	WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
387 	WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
388 	WREG32(mmVCE_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
389 	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
390 	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
391 
392 	ring = &adev->vce.ring[1];
393 	WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
394 	WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
395 	WREG32(mmVCE_RB_BASE_LO2, lower_32_bits(ring->gpu_addr));
396 	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
397 	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
398 
399 	WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
400 		 ~VCE_VCPU_CNTL__CLK_EN_MASK);
401 
402 	WREG32_P(mmVCE_SOFT_RESET,
403 		VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
404 		VCE_SOFT_RESET__FME_SOFT_RESET_MASK,
405 		~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
406 		  VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
407 
408 	mdelay(100);
409 
410 	WREG32_P(mmVCE_SOFT_RESET, 0,
411 		~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
412 		  VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
413 
414 	r = vce_v1_0_firmware_loaded(adev);
415 
416 	/* Clear VCE_STATUS, otherwise SRBM thinks VCE1 is busy. */
417 	WREG32(mmVCE_STATUS, 0);
418 
419 	if (r) {
420 		dev_err(adev->dev, "VCE not responding, giving up\n");
421 		return r;
422 	}
423 
424 	return 0;
425 }
426 
427 static int vce_v1_0_stop(struct amdgpu_device *adev)
428 {
429 	struct amdgpu_ip_block *ip_block;
430 	int status;
431 	int i;
432 
433 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE);
434 	if (!ip_block)
435 		return -EINVAL;
436 
437 	if (vce_v1_0_lmi_clean(adev))
438 		dev_warn(adev->dev, "VCE not idle\n");
439 
440 	if (vce_v1_0_wait_for_idle(ip_block))
441 		dev_warn(adev->dev, "VCE busy: VCE_STATUS=0x%x, SRBM_STATUS2=0x%x\n",
442 			RREG32(mmVCE_STATUS), RREG32(mmSRBM_STATUS2));
443 
444 	/* Stall UMC and register bus before resetting VCPU */
445 	WREG32_P(mmVCE_LMI_CTRL2, 1 << 8, ~(1 << 8));
446 
447 	for (i = 0; i < 100; ++i) {
448 		status = RREG32(mmVCE_LMI_STATUS);
449 		if (status & 0x240)
450 			break;
451 		mdelay(1);
452 	}
453 
454 	WREG32_P(mmVCE_VCPU_CNTL, 0, ~VCE_VCPU_CNTL__CLK_EN_MASK);
455 
456 	WREG32_P(mmVCE_SOFT_RESET,
457 		VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
458 		VCE_SOFT_RESET__FME_SOFT_RESET_MASK,
459 		~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
460 		  VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
461 
462 	WREG32(mmVCE_STATUS, 0);
463 
464 	return 0;
465 }
466 
467 static void vce_v1_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
468 {
469 	u32 tmp;
470 
471 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) {
472 		tmp = RREG32(mmVCE_CLOCK_GATING_A);
473 		tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
474 		WREG32(mmVCE_CLOCK_GATING_A, tmp);
475 
476 		tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
477 		tmp &= ~0x1ff000;
478 		tmp |= 0xff800000;
479 		WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
480 
481 		tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
482 		tmp &= ~0x3ff;
483 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
484 	} else {
485 		tmp = RREG32(mmVCE_CLOCK_GATING_A);
486 		tmp &= ~VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
487 		WREG32(mmVCE_CLOCK_GATING_A, tmp);
488 
489 		tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
490 		tmp |= 0x1ff000;
491 		tmp &= ~0xff800000;
492 		WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
493 
494 		tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
495 		tmp |= 0x3ff;
496 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
497 	}
498 }
499 
500 static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block)
501 {
502 	struct amdgpu_device *adev = ip_block->adev;
503 	int r;
504 
505 	r = amdgpu_vce_early_init(adev);
506 	if (r)
507 		return r;
508 
509 	adev->vce.num_rings = 2;
510 
511 	vce_v1_0_set_ring_funcs(adev);
512 	vce_v1_0_set_irq_funcs(adev);
513 
514 	return 0;
515 }
516 
517 /**
518  * vce_v1_0_ensure_vcpu_bo_32bit_addr() - ensure the VCPU BO has a 32-bit address
519  *
520  * @adev: amdgpu_device pointer
521  *
522  * Due to various hardware limitations, the VCE1 requires
523  * the VCPU BO to be in the low 32 bit address range.
524  * Ensure that the VCPU BO has a 32-bit GPU address,
525  * or return an error code when that isn't possible.
526  *
527  * To accomodate that, we put GART to the LOW address range
528  * and reserve some GART pages where we map the VCPU BO,
529  * so that it gets a 32-bit address.
530  */
531 static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
532 {
533 	u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo);
534 	u64 max_vcpu_bo_addr = 0xffffffff - bo_size;
535 	u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE;
536 	u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
537 	u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID;
538 	u64 vce_gart_start_offs;
539 	int r;
540 
541 	r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr,
542 					 &adev->vce.gart_node, num_pages,
543 					 DRM_MM_INSERT_LOW);
544 	if (r)
545 		return r;
546 
547 	vce_gart_start_offs = amdgpu_gtt_node_to_byte_offset(&adev->vce.gart_node);
548 
549 	/* Check if we can map the VCPU BO in GART to a 32-bit address. */
550 	if (adev->gmc.gart_start + vce_gart_start_offs > max_vcpu_bo_addr)
551 		return -EINVAL;
552 
553 	amdgpu_gart_map_vram_range(adev, pa, adev->vce.gart_node.start,
554 				   num_pages, flags, adev->gart.ptr);
555 	adev->vce.gpu_addr = adev->gmc.gart_start + vce_gart_start_offs;
556 	if (adev->vce.gpu_addr > max_vcpu_bo_addr)
557 		return -EINVAL;
558 
559 	return 0;
560 }
561 
562 static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
563 {
564 	struct amdgpu_device *adev = ip_block->adev;
565 	struct amdgpu_ring *ring;
566 	int r, i;
567 
568 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq);
569 	if (r)
570 		return r;
571 
572 	r = amdgpu_vce_sw_init(adev, VCE_V1_0_FW_SIZE +
573 		VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE);
574 	if (r)
575 		return r;
576 
577 	r = amdgpu_vce_resume(adev);
578 	if (r)
579 		return r;
580 	r = vce_v1_0_load_fw_signature(adev);
581 	if (r)
582 		return r;
583 	r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
584 	if (r)
585 		return r;
586 
587 	for (i = 0; i < adev->vce.num_rings; i++) {
588 		enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
589 
590 		ring = &adev->vce.ring[i];
591 		sprintf(ring->name, "vce%d", i);
592 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
593 				     hw_prio, NULL);
594 		if (r)
595 			return r;
596 	}
597 
598 	return r;
599 }
600 
601 static int vce_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
602 {
603 	struct amdgpu_device *adev = ip_block->adev;
604 	int r;
605 
606 	r = amdgpu_vce_suspend(adev);
607 	if (r)
608 		return r;
609 
610 	r = amdgpu_vce_sw_fini(adev);
611 
612 	amdgpu_gtt_mgr_free_entries(&adev->mman.gtt_mgr, &adev->vce.gart_node);
613 
614 	return r;
615 }
616 
617 /**
618  * vce_v1_0_hw_init - start and test VCE block
619  *
620  * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
621  *
622  * Initialize the hardware, boot up the VCPU and do some testing
623  */
624 static int vce_v1_0_hw_init(struct amdgpu_ip_block *ip_block)
625 {
626 	struct amdgpu_device *adev = ip_block->adev;
627 	int i, r;
628 
629 	if (adev->pm.dpm_enabled)
630 		amdgpu_dpm_enable_vce(adev, true);
631 	else
632 		amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
633 
634 	for (i = 0; i < adev->vce.num_rings; i++) {
635 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
636 		if (r)
637 			return r;
638 	}
639 
640 	dev_info(adev->dev, "VCE initialized successfully.\n");
641 
642 	return 0;
643 }
644 
645 static int vce_v1_0_hw_fini(struct amdgpu_ip_block *ip_block)
646 {
647 	int r;
648 
649 	r = vce_v1_0_stop(ip_block->adev);
650 	if (r)
651 		return r;
652 
653 	cancel_delayed_work_sync(&ip_block->adev->vce.idle_work);
654 	return 0;
655 }
656 
657 static int vce_v1_0_suspend(struct amdgpu_ip_block *ip_block)
658 {
659 	struct amdgpu_device *adev = ip_block->adev;
660 	int r;
661 
662 	/*
663 	 * Proper cleanups before halting the HW engine:
664 	 *   - cancel the delayed idle work
665 	 *   - enable powergating
666 	 *   - enable clockgating
667 	 *   - disable dpm
668 	 *
669 	 * TODO: to align with the VCN implementation, move the
670 	 * jobs for clockgating/powergating/dpm setting to
671 	 * ->set_powergating_state().
672 	 */
673 	cancel_delayed_work_sync(&adev->vce.idle_work);
674 
675 	if (adev->pm.dpm_enabled) {
676 		amdgpu_dpm_enable_vce(adev, false);
677 	} else {
678 		amdgpu_asic_set_vce_clocks(adev, 0, 0);
679 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
680 						       AMD_PG_STATE_GATE);
681 		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
682 						       AMD_CG_STATE_GATE);
683 	}
684 
685 	r = vce_v1_0_hw_fini(ip_block);
686 	if (r) {
687 		dev_err(adev->dev, "vce_v1_0_hw_fini() failed with error %i", r);
688 		return r;
689 	}
690 
691 	return amdgpu_vce_suspend(adev);
692 }
693 
694 static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block)
695 {
696 	struct amdgpu_device *adev = ip_block->adev;
697 	int r;
698 
699 	r = amdgpu_vce_resume(adev);
700 	if (r)
701 		return r;
702 	r = vce_v1_0_load_fw_signature(adev);
703 	if (r)
704 		return r;
705 	r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
706 	if (r)
707 		return r;
708 
709 	return vce_v1_0_hw_init(ip_block);
710 }
711 
712 static int vce_v1_0_set_interrupt_state(struct amdgpu_device *adev,
713 					struct amdgpu_irq_src *source,
714 					unsigned int type,
715 					enum amdgpu_interrupt_state state)
716 {
717 	uint32_t val = 0;
718 
719 	if (state == AMDGPU_IRQ_STATE_ENABLE)
720 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
721 
722 	WREG32_P(mmVCE_SYS_INT_EN, val,
723 		 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
724 	return 0;
725 }
726 
727 static int vce_v1_0_process_interrupt(struct amdgpu_device *adev,
728 				      struct amdgpu_irq_src *source,
729 				      struct amdgpu_iv_entry *entry)
730 {
731 	dev_dbg(adev->dev, "IH: VCE\n");
732 	switch (entry->src_data[0]) {
733 	case 0:
734 	case 1:
735 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
736 		break;
737 	default:
738 		dev_err(adev->dev, "Unhandled interrupt: %d %d\n",
739 			  entry->src_id, entry->src_data[0]);
740 		break;
741 	}
742 
743 	return 0;
744 }
745 
746 static int vce_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
747 					  enum amd_clockgating_state state)
748 {
749 	struct amdgpu_device *adev = ip_block->adev;
750 
751 	vce_v1_0_init_cg(adev);
752 	vce_v1_0_enable_mgcg(adev, state == AMD_CG_STATE_GATE);
753 
754 	return 0;
755 }
756 
757 static int vce_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
758 					  enum amd_powergating_state state)
759 {
760 	struct amdgpu_device *adev = ip_block->adev;
761 
762 	/*
763 	 * This doesn't actually powergate the VCE block.
764 	 * That's done in the dpm code via the SMC.  This
765 	 * just re-inits the block as necessary.  The actual
766 	 * gating still happens in the dpm code.  We should
767 	 * revisit this when there is a cleaner line between
768 	 * the smc and the hw blocks
769 	 */
770 	if (state == AMD_PG_STATE_GATE)
771 		return vce_v1_0_stop(adev);
772 	else
773 		return vce_v1_0_start(adev);
774 }
775 
776 static const struct amd_ip_funcs vce_v1_0_ip_funcs = {
777 	.name = "vce_v1_0",
778 	.early_init = vce_v1_0_early_init,
779 	.sw_init = vce_v1_0_sw_init,
780 	.sw_fini = vce_v1_0_sw_fini,
781 	.hw_init = vce_v1_0_hw_init,
782 	.hw_fini = vce_v1_0_hw_fini,
783 	.suspend = vce_v1_0_suspend,
784 	.resume = vce_v1_0_resume,
785 	.is_idle = vce_v1_0_is_idle,
786 	.wait_for_idle = vce_v1_0_wait_for_idle,
787 	.set_clockgating_state = vce_v1_0_set_clockgating_state,
788 	.set_powergating_state = vce_v1_0_set_powergating_state,
789 };
790 
791 static const struct amdgpu_ring_funcs vce_v1_0_ring_funcs = {
792 	.type = AMDGPU_RING_TYPE_VCE,
793 	.align_mask = 0xf,
794 	.nop = VCE_CMD_NO_OP,
795 	.support_64bit_ptrs = false,
796 	.no_user_fence = true,
797 	.get_rptr = vce_v1_0_ring_get_rptr,
798 	.get_wptr = vce_v1_0_ring_get_wptr,
799 	.set_wptr = vce_v1_0_ring_set_wptr,
800 	.parse_cs = amdgpu_vce_ring_parse_cs,
801 	.emit_frame_size = 6, /* amdgpu_vce_ring_emit_fence  x1 no user fence */
802 	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
803 	.emit_ib = amdgpu_vce_ring_emit_ib,
804 	.emit_fence = amdgpu_vce_ring_emit_fence,
805 	.test_ring = amdgpu_vce_ring_test_ring,
806 	.test_ib = amdgpu_vce_ring_test_ib,
807 	.insert_nop = amdgpu_ring_insert_nop,
808 	.pad_ib = amdgpu_ring_generic_pad_ib,
809 	.begin_use = amdgpu_vce_ring_begin_use,
810 	.end_use = amdgpu_vce_ring_end_use,
811 };
812 
813 static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev)
814 {
815 	int i;
816 
817 	for (i = 0; i < adev->vce.num_rings; i++) {
818 		adev->vce.ring[i].funcs = &vce_v1_0_ring_funcs;
819 		adev->vce.ring[i].me = i;
820 	}
821 };
822 
823 static const struct amdgpu_irq_src_funcs vce_v1_0_irq_funcs = {
824 	.set = vce_v1_0_set_interrupt_state,
825 	.process = vce_v1_0_process_interrupt,
826 };
827 
828 static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev)
829 {
830 	adev->vce.irq.num_types = 1;
831 	adev->vce.irq.funcs = &vce_v1_0_irq_funcs;
832 };
833 
834 const struct amdgpu_ip_block_version vce_v1_0_ip_block = {
835 	.type = AMD_IP_BLOCK_TYPE_VCE,
836 	.major = 1,
837 	.minor = 0,
838 	.rev = 0,
839 	.funcs = &vce_v1_0_ip_funcs,
840 };
841