xref: /linux/drivers/gpu/drm/amd/amdgpu/vce_v1_0.c (revision 2c142b63c8ee982cdfdba49a616027c266294838)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright 2013 Advanced Micro Devices, Inc.
4  * Copyright 2025 Valve Corporation
5  * Copyright 2025 Alexandre Demers
6  * All Rights Reserved.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the
10  * "Software"), to deal in the Software without restriction, including
11  * without limitation the rights to use, copy, modify, merge, publish,
12  * distribute, sub license, and/or sell copies of the Software, and to
13  * permit persons to whom the Software is furnished to do so, subject to
14  * the following conditions:
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * The above copyright notice and this permission notice (including the
25  * next paragraph) shall be included in all copies or substantial portions
26  * of the Software.
27  *
28  * Authors: Christian König <christian.koenig@amd.com>
29  *          Timur Kristóf <timur.kristof@gmail.com>
30  *          Alexandre Demers <alexandre.f.demers@gmail.com>
31  */
32 
33 #include <linux/firmware.h>
34 
35 #include "amdgpu.h"
36 #include "amdgpu_vce.h"
37 #include "amdgpu_gart.h"
38 #include "sid.h"
39 #include "vce_v1_0.h"
40 #include "vce/vce_1_0_d.h"
41 #include "vce/vce_1_0_sh_mask.h"
42 #include "oss/oss_1_0_d.h"
43 #include "oss/oss_1_0_sh_mask.h"
44 
45 #define VCE_V1_0_ALIGNMENT	(32 * 1024)
46 #define VCE_V1_0_FW_SIZE	(256 * 1024)
47 #define VCE_V1_0_STACK_SIZE	(64 * 1024)
48 #define VCE_V1_0_DATA_SIZE	(ALIGN(7808 * (AMDGPU_MAX_VCE_HANDLES + 1), VCE_V1_0_ALIGNMENT))
49 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
50 
51 static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev);
52 static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev);
53 
54 struct vce_v1_0_fw_signature {
55 	int32_t offset;
56 	uint32_t length;
57 	int32_t number;
58 	struct {
59 		uint32_t chip_id;
60 		uint32_t keyselect;
61 		uint32_t nonce[4];
62 		uint32_t sigval[4];
63 	} val[8];
64 };
65 
66 /**
67  * vce_v1_0_ring_get_rptr - get read pointer
68  *
69  * @ring: amdgpu_ring pointer
70  *
71  * Returns the current hardware read pointer
72  */
vce_v1_0_ring_get_rptr(struct amdgpu_ring * ring)73 static uint64_t vce_v1_0_ring_get_rptr(struct amdgpu_ring *ring)
74 {
75 	struct amdgpu_device *adev = ring->adev;
76 
77 	if (ring->me == 0)
78 		return RREG32(mmVCE_RB_RPTR);
79 	else
80 		return RREG32(mmVCE_RB_RPTR2);
81 }
82 
83 /**
84  * vce_v1_0_ring_get_wptr - get write pointer
85  *
86  * @ring: amdgpu_ring pointer
87  *
88  * Returns the current hardware write pointer
89  */
vce_v1_0_ring_get_wptr(struct amdgpu_ring * ring)90 static uint64_t vce_v1_0_ring_get_wptr(struct amdgpu_ring *ring)
91 {
92 	struct amdgpu_device *adev = ring->adev;
93 
94 	if (ring->me == 0)
95 		return RREG32(mmVCE_RB_WPTR);
96 	else
97 		return RREG32(mmVCE_RB_WPTR2);
98 }
99 
100 /**
101  * vce_v1_0_ring_set_wptr - set write pointer
102  *
103  * @ring: amdgpu_ring pointer
104  *
105  * Commits the write pointer to the hardware
106  */
vce_v1_0_ring_set_wptr(struct amdgpu_ring * ring)107 static void vce_v1_0_ring_set_wptr(struct amdgpu_ring *ring)
108 {
109 	struct amdgpu_device *adev = ring->adev;
110 
111 	if (ring->me == 0)
112 		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
113 	else
114 		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
115 }
116 
vce_v1_0_lmi_clean(struct amdgpu_device * adev)117 static int vce_v1_0_lmi_clean(struct amdgpu_device *adev)
118 {
119 	int i, j;
120 
121 	for (i = 0; i < 10; ++i) {
122 		for (j = 0; j < 100; ++j) {
123 			if (RREG32(mmVCE_LMI_STATUS) & 0x337f)
124 				return 0;
125 
126 			mdelay(10);
127 		}
128 	}
129 
130 	return -ETIMEDOUT;
131 }
132 
vce_v1_0_firmware_loaded(struct amdgpu_device * adev)133 static int vce_v1_0_firmware_loaded(struct amdgpu_device *adev)
134 {
135 	int i, j;
136 
137 	for (i = 0; i < 10; ++i) {
138 		for (j = 0; j < 100; ++j) {
139 			if (RREG32(mmVCE_STATUS) & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
140 				return 0;
141 			mdelay(10);
142 		}
143 
144 		dev_err(adev->dev, "VCE not responding, trying to reset the ECPU\n");
145 
146 		WREG32_P(mmVCE_SOFT_RESET,
147 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
148 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
149 		mdelay(10);
150 		WREG32_P(mmVCE_SOFT_RESET, 0,
151 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
152 		mdelay(10);
153 	}
154 
155 	return -ETIMEDOUT;
156 }
157 
vce_v1_0_init_cg(struct amdgpu_device * adev)158 static void vce_v1_0_init_cg(struct amdgpu_device *adev)
159 {
160 	u32 tmp;
161 
162 	tmp = RREG32(mmVCE_CLOCK_GATING_A);
163 	tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
164 	WREG32(mmVCE_CLOCK_GATING_A, tmp);
165 
166 	tmp = RREG32(mmVCE_CLOCK_GATING_B);
167 	tmp |= 0x1e;
168 	tmp &= ~0xe100e1;
169 	WREG32(mmVCE_CLOCK_GATING_B, tmp);
170 
171 	tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
172 	tmp &= ~0xff9ff000;
173 	WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
174 
175 	tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
176 	tmp &= ~0x3ff;
177 	WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
178 }
179 
180 /**
181  * vce_v1_0_load_fw() - load firmware signature into VCPU BO
182  *
183  * @adev: amdgpu_device pointer
184  *
185  * The VCE1 firmware validation mechanism needs a firmware signature.
186  * This function finds the signature appropriate for the current
187  * ASIC and writes that into the VCPU BO.
188  */
vce_v1_0_load_fw(struct amdgpu_device * adev)189 static int vce_v1_0_load_fw(struct amdgpu_device *adev)
190 {
191 	const struct common_firmware_header *hdr;
192 	struct vce_v1_0_fw_signature *sign;
193 	u32 ucode_offset;
194 	u32 ucode_size;
195 	uint32_t chip_id;
196 	u32 *cpu_addr;
197 	int i;
198 
199 	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
200 	ucode_offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
201 	ucode_size = hdr->ucode_size_bytes - sizeof(struct vce_v1_0_fw_signature *);
202 	cpu_addr = adev->vce.cpu_addr;
203 
204 	sign = (void *)adev->vce.fw->data + ucode_offset;
205 
206 	if (ucode_size > VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET)
207 		return -EINVAL;
208 
209 	switch (adev->asic_type) {
210 	case CHIP_TAHITI:
211 		chip_id = 0x01000014;
212 		break;
213 	case CHIP_VERDE:
214 		chip_id = 0x01000015;
215 		break;
216 	case CHIP_PITCAIRN:
217 		chip_id = 0x01000016;
218 		break;
219 	default:
220 		dev_err(adev->dev, "asic_type %#010x was not found!", adev->asic_type);
221 		return -EINVAL;
222 	}
223 
224 	for (i = 0; i < le32_to_cpu(sign->number); ++i) {
225 		if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
226 			break;
227 	}
228 
229 	if (i == le32_to_cpu(sign->number)) {
230 		dev_err(adev->dev, "chip_id 0x%x for %s was not found in VCE firmware",
231 			chip_id, amdgpu_asic_name[adev->asic_type]);
232 		return -EINVAL;
233 	}
234 
235 	memset_io(&cpu_addr[0], 0, amdgpu_bo_size(adev->vce.vcpu_bo));
236 
237 	cpu_addr += (256 - 64) / 4;
238 	memcpy_toio(&cpu_addr[0], &sign->val[i].nonce[0], 16);
239 	cpu_addr[4] = cpu_to_le32(le32_to_cpu(sign->length) + 64);
240 
241 	memset_io(&cpu_addr[5], 0, 44);
242 	memcpy_toio(&cpu_addr[16], &sign[1], ucode_size);
243 
244 	cpu_addr += (le32_to_cpu(sign->length) + 64) / 4;
245 	memcpy_toio(&cpu_addr[0], &sign->val[i].sigval[0], 16);
246 
247 	adev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
248 
249 	return 0;
250 }
251 
vce_v1_0_wait_for_fw_validation(struct amdgpu_device * adev)252 static int vce_v1_0_wait_for_fw_validation(struct amdgpu_device *adev)
253 {
254 	int i;
255 
256 	dev_dbg(adev->dev, "VCE keyselect: %d", adev->vce.keyselect);
257 	WREG32(mmVCE_LMI_FW_START_KEYSEL, adev->vce.keyselect);
258 
259 	for (i = 0; i < 10; ++i) {
260 		mdelay(10);
261 		if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)
262 			break;
263 	}
264 
265 	if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__DONE_MASK)) {
266 		dev_err(adev->dev, "VCE FW validation timeout\n");
267 		return -ETIMEDOUT;
268 	}
269 
270 	if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__PASS_MASK)) {
271 		dev_err(adev->dev, "VCE FW validation failed\n");
272 		return -EINVAL;
273 	}
274 
275 	for (i = 0; i < 10; ++i) {
276 		mdelay(10);
277 		if (!(RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK))
278 			break;
279 	}
280 
281 	if (RREG32(mmVCE_FW_REG_STATUS) & VCE_FW_REG_STATUS__BUSY_MASK) {
282 		dev_err(adev->dev, "VCE FW busy timeout\n");
283 		return -ETIMEDOUT;
284 	}
285 
286 	return 0;
287 }
288 
vce_v1_0_mc_resume(struct amdgpu_device * adev)289 static int vce_v1_0_mc_resume(struct amdgpu_device *adev)
290 {
291 	uint32_t offset;
292 	uint32_t size;
293 
294 	/*
295 	 * When the keyselect is already set, don't perturb VCE FW.
296 	 * Validation seems to always fail the second time.
297 	 */
298 	if (RREG32(mmVCE_LMI_FW_START_KEYSEL)) {
299 		dev_dbg(adev->dev, "keyselect already set: 0x%x (on CPU: 0x%x)\n",
300 			RREG32(mmVCE_LMI_FW_START_KEYSEL), adev->vce.keyselect);
301 
302 		WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
303 		return 0;
304 	}
305 
306 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
307 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
308 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
309 	WREG32(mmVCE_CLOCK_GATING_B, 0);
310 
311 	WREG32_P(mmVCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4);
312 
313 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
314 
315 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
316 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
317 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
318 	WREG32(mmVCE_LMI_VM_CTRL, 0);
319 
320 	WREG32(mmVCE_VCPU_SCRATCH7, AMDGPU_MAX_VCE_HANDLES);
321 
322 	offset =  adev->vce.gpu_addr + AMDGPU_VCE_FIRMWARE_OFFSET;
323 	size = VCE_V1_0_FW_SIZE - AMDGPU_VCE_FIRMWARE_OFFSET;
324 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset);
325 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
326 
327 	offset += size;
328 	size = VCE_V1_0_STACK_SIZE;
329 	WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT));
330 	WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT));
331 	WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset);
332 	WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
333 
334 	offset += size;
335 	size = VCE_V1_0_DATA_SIZE;
336 	WARN_ON(!IS_ALIGNED(offset, VCE_V1_0_ALIGNMENT));
337 	WARN_ON(!IS_ALIGNED(size, VCE_V1_0_ALIGNMENT));
338 	WARN_ON((offset + size - adev->vce.gpu_addr) > amdgpu_bo_size(adev->vce.vcpu_bo));
339 	WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset);
340 	WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
341 
342 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
343 
344 	return vce_v1_0_wait_for_fw_validation(adev);
345 }
346 
347 /**
348  * vce_v1_0_is_idle() - Check idle status of VCE1 IP block
349  *
350  * @ip_block: amdgpu_ip_block pointer
351  *
352  * Check whether VCE is busy according to VCE_STATUS.
353  * Also check whether the SRBM thinks VCE is busy, although
354  * SRBM_STATUS.VCE_BUSY seems to be bogus because it
355  * appears to mirror the VCE_STATUS.VCPU_REPORT_FW_LOADED bit.
356  */
vce_v1_0_is_idle(struct amdgpu_ip_block * ip_block)357 static bool vce_v1_0_is_idle(struct amdgpu_ip_block *ip_block)
358 {
359 	struct amdgpu_device *adev = ip_block->adev;
360 	bool busy =
361 		(RREG32(mmVCE_STATUS) & (VCE_STATUS__JOB_BUSY_MASK | VCE_STATUS__UENC_BUSY_MASK)) ||
362 		(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
363 
364 	return !busy;
365 }
366 
vce_v1_0_wait_for_idle(struct amdgpu_ip_block * ip_block)367 static int vce_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
368 {
369 	struct amdgpu_device *adev = ip_block->adev;
370 	unsigned int i;
371 
372 	for (i = 0; i < adev->usec_timeout; i++) {
373 		udelay(1);
374 		if (vce_v1_0_is_idle(ip_block))
375 			return 0;
376 	}
377 	return -ETIMEDOUT;
378 }
379 
380 /**
381  * vce_v1_0_start - start VCE block
382  *
383  * @adev: amdgpu_device pointer
384  *
385  * Setup and start the VCE block
386  */
vce_v1_0_start(struct amdgpu_device * adev)387 static int vce_v1_0_start(struct amdgpu_device *adev)
388 {
389 	struct amdgpu_ring *ring;
390 	int r;
391 
392 	WREG32_P(mmVCE_STATUS, 1, ~1);
393 
394 	r = vce_v1_0_mc_resume(adev);
395 	if (r)
396 		return r;
397 
398 	ring = &adev->vce.ring[0];
399 	WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
400 	WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
401 	WREG32(mmVCE_RB_BASE_LO, lower_32_bits(ring->gpu_addr));
402 	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
403 	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
404 
405 	ring = &adev->vce.ring[1];
406 	WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
407 	WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
408 	WREG32(mmVCE_RB_BASE_LO2, lower_32_bits(ring->gpu_addr));
409 	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
410 	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
411 
412 	WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
413 		 ~VCE_VCPU_CNTL__CLK_EN_MASK);
414 
415 	WREG32_P(mmVCE_SOFT_RESET,
416 		VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
417 		VCE_SOFT_RESET__FME_SOFT_RESET_MASK,
418 		~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
419 		  VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
420 
421 	mdelay(100);
422 
423 	WREG32_P(mmVCE_SOFT_RESET, 0,
424 		~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
425 		  VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
426 
427 	r = vce_v1_0_firmware_loaded(adev);
428 
429 	/* Clear VCE_STATUS, otherwise SRBM thinks VCE1 is busy. */
430 	WREG32(mmVCE_STATUS, 0);
431 
432 	if (r) {
433 		dev_err(adev->dev, "VCE not responding, giving up\n");
434 		return r;
435 	}
436 
437 	return 0;
438 }
439 
vce_v1_0_stop(struct amdgpu_device * adev)440 static int vce_v1_0_stop(struct amdgpu_device *adev)
441 {
442 	struct amdgpu_ip_block *ip_block;
443 	int status;
444 	int i;
445 
446 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE);
447 	if (!ip_block)
448 		return -EINVAL;
449 
450 	if (vce_v1_0_lmi_clean(adev))
451 		dev_warn(adev->dev, "VCE not idle\n");
452 
453 	if (vce_v1_0_wait_for_idle(ip_block))
454 		dev_warn(adev->dev, "VCE busy: VCE_STATUS=0x%x, SRBM_STATUS2=0x%x\n",
455 			RREG32(mmVCE_STATUS), RREG32(mmSRBM_STATUS2));
456 
457 	/* Stall UMC and register bus before resetting VCPU */
458 	WREG32_P(mmVCE_LMI_CTRL2, 1 << 8, ~(1 << 8));
459 
460 	for (i = 0; i < 100; ++i) {
461 		status = RREG32(mmVCE_LMI_STATUS);
462 		if (status & 0x240)
463 			break;
464 		mdelay(1);
465 	}
466 
467 	WREG32_P(mmVCE_VCPU_CNTL, 0, ~VCE_VCPU_CNTL__CLK_EN_MASK);
468 
469 	WREG32_P(mmVCE_SOFT_RESET,
470 		VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
471 		VCE_SOFT_RESET__FME_SOFT_RESET_MASK,
472 		~(VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK |
473 		  VCE_SOFT_RESET__FME_SOFT_RESET_MASK));
474 
475 	WREG32(mmVCE_STATUS, 0);
476 
477 	return 0;
478 }
479 
vce_v1_0_enable_mgcg(struct amdgpu_device * adev,bool enable)480 static void vce_v1_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
481 {
482 	u32 tmp;
483 
484 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) {
485 		tmp = RREG32(mmVCE_CLOCK_GATING_A);
486 		tmp |= VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
487 		WREG32(mmVCE_CLOCK_GATING_A, tmp);
488 
489 		tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
490 		tmp &= ~0x1ff000;
491 		tmp |= 0xff800000;
492 		WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
493 
494 		tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
495 		tmp &= ~0x3ff;
496 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
497 	} else {
498 		tmp = RREG32(mmVCE_CLOCK_GATING_A);
499 		tmp &= ~VCE_CLOCK_GATING_A__CGC_DYN_CLOCK_MODE_MASK;
500 		WREG32(mmVCE_CLOCK_GATING_A, tmp);
501 
502 		tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
503 		tmp |= 0x1ff000;
504 		tmp &= ~0xff800000;
505 		WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
506 
507 		tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
508 		tmp |= 0x3ff;
509 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
510 	}
511 }
512 
vce_v1_0_early_init(struct amdgpu_ip_block * ip_block)513 static int vce_v1_0_early_init(struct amdgpu_ip_block *ip_block)
514 {
515 	struct amdgpu_device *adev = ip_block->adev;
516 	int r;
517 
518 	r = amdgpu_vce_early_init(adev);
519 	if (r)
520 		return r;
521 
522 	adev->vce.num_rings = 2;
523 
524 	vce_v1_0_set_ring_funcs(adev);
525 	vce_v1_0_set_irq_funcs(adev);
526 
527 	return 0;
528 }
529 
530 /**
531  * vce_v1_0_ensure_vcpu_bo_32bit_addr() - ensure the VCPU BO has a 32-bit address
532  *
533  * @adev: amdgpu_device pointer
534  *
535  * Due to various hardware limitations, the VCE1 requires
536  * the VCPU BO to be in the low 32 bit address range.
537  * Ensure that the VCPU BO has a 32-bit GPU address,
538  * or return an error code when that isn't possible.
539  *
540  * To accomodate that, we put GART to the LOW address range
541  * and reserve some GART pages where we map the VCPU BO,
542  * so that it gets a 32-bit address.
543  *
544  * The BAR address is zero and we can't change it
545  * due to the firmware validation mechanism.
546  * It seems that it fails to initialize if the address is >= 128 MiB.
547  */
vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device * adev)548 static int vce_v1_0_ensure_vcpu_bo_32bit_addr(struct amdgpu_device *adev)
549 {
550 	u64 bo_size = amdgpu_bo_size(adev->vce.vcpu_bo);
551 	u64 max_vcpu_bo_addr = 0x07ffffff - bo_size;
552 	u64 num_pages = ALIGN(bo_size, AMDGPU_GPU_PAGE_SIZE) / AMDGPU_GPU_PAGE_SIZE;
553 	u64 pa = amdgpu_gmc_vram_pa(adev, adev->vce.vcpu_bo);
554 	u64 flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_VALID;
555 	u64 vce_gart_start_offs;
556 	int r;
557 
558 	if (adev->gmc.vram_start < adev->gmc.gart_start)
559 		return amdgpu_bo_gpu_offset(adev->vce.vcpu_bo) <= max_vcpu_bo_addr ? 0 : -EINVAL;
560 
561 	if (!drm_mm_node_allocated(&adev->vce.gart_node)) {
562 		r = amdgpu_gtt_mgr_alloc_entries(&adev->mman.gtt_mgr,
563 						 &adev->vce.gart_node, num_pages,
564 						 DRM_MM_INSERT_LOW);
565 		if (r)
566 			return r;
567 	}
568 
569 	vce_gart_start_offs = amdgpu_gtt_node_to_byte_offset(&adev->vce.gart_node);
570 
571 	/* Check if we can map the VCPU BO in GART to a 32-bit address. */
572 	if (adev->gmc.gart_start + vce_gart_start_offs > max_vcpu_bo_addr)
573 		return -EINVAL;
574 
575 	amdgpu_gart_map_vram_range(adev, pa, adev->vce.gart_node.start,
576 				   num_pages, flags, adev->gart.ptr);
577 	adev->vce.gpu_addr = adev->gmc.gart_start + vce_gart_start_offs;
578 
579 	return 0;
580 }
581 
vce_v1_0_sw_init(struct amdgpu_ip_block * ip_block)582 static int vce_v1_0_sw_init(struct amdgpu_ip_block *ip_block)
583 {
584 	struct amdgpu_device *adev = ip_block->adev;
585 	struct amdgpu_ring *ring;
586 	int r, i;
587 
588 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq);
589 	if (r)
590 		return r;
591 
592 	r = amdgpu_vce_sw_init(adev, VCE_V1_0_FW_SIZE +
593 		VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE);
594 	if (r)
595 		return r;
596 
597 	r = vce_v1_0_load_fw(adev);
598 	if (r)
599 		return r;
600 	r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
601 	if (r)
602 		return r;
603 
604 	for (i = 0; i < adev->vce.num_rings; i++) {
605 		enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
606 
607 		ring = &adev->vce.ring[i];
608 		sprintf(ring->name, "vce%d", i);
609 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
610 				     hw_prio, NULL);
611 		if (r)
612 			return r;
613 	}
614 
615 	return r;
616 }
617 
vce_v1_0_sw_fini(struct amdgpu_ip_block * ip_block)618 static int vce_v1_0_sw_fini(struct amdgpu_ip_block *ip_block)
619 {
620 	struct amdgpu_device *adev = ip_block->adev;
621 	int r;
622 
623 	r = amdgpu_vce_suspend(adev);
624 	if (r)
625 		return r;
626 
627 	r = amdgpu_vce_sw_fini(adev);
628 
629 	amdgpu_gtt_mgr_free_entries(&adev->mman.gtt_mgr, &adev->vce.gart_node);
630 
631 	return r;
632 }
633 
634 /**
635  * vce_v1_0_hw_init - start and test VCE block
636  *
637  * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
638  *
639  * Initialize the hardware, boot up the VCPU and do some testing
640  */
vce_v1_0_hw_init(struct amdgpu_ip_block * ip_block)641 static int vce_v1_0_hw_init(struct amdgpu_ip_block *ip_block)
642 {
643 	struct amdgpu_device *adev = ip_block->adev;
644 	int i, r;
645 
646 	if (adev->pm.dpm_enabled)
647 		amdgpu_dpm_enable_vce(adev, true);
648 	else
649 		amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
650 
651 	for (i = 0; i < adev->vce.num_rings; i++) {
652 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
653 		if (r)
654 			return r;
655 	}
656 
657 	dev_info(adev->dev, "VCE initialized successfully.\n");
658 
659 	return 0;
660 }
661 
vce_v1_0_hw_fini(struct amdgpu_ip_block * ip_block)662 static int vce_v1_0_hw_fini(struct amdgpu_ip_block *ip_block)
663 {
664 	int r;
665 
666 	r = vce_v1_0_stop(ip_block->adev);
667 	if (r)
668 		return r;
669 
670 	cancel_delayed_work_sync(&ip_block->adev->vce.idle_work);
671 	return 0;
672 }
673 
vce_v1_0_suspend(struct amdgpu_ip_block * ip_block)674 static int vce_v1_0_suspend(struct amdgpu_ip_block *ip_block)
675 {
676 	struct amdgpu_device *adev = ip_block->adev;
677 	int r;
678 
679 	/*
680 	 * Proper cleanups before halting the HW engine:
681 	 *   - cancel the delayed idle work
682 	 *   - enable powergating
683 	 *   - enable clockgating
684 	 *   - disable dpm
685 	 *
686 	 * TODO: to align with the VCN implementation, move the
687 	 * jobs for clockgating/powergating/dpm setting to
688 	 * ->set_powergating_state().
689 	 */
690 	cancel_delayed_work_sync(&adev->vce.idle_work);
691 
692 	if (adev->pm.dpm_enabled) {
693 		amdgpu_dpm_enable_vce(adev, false);
694 	} else {
695 		amdgpu_asic_set_vce_clocks(adev, 0, 0);
696 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
697 						       AMD_PG_STATE_GATE);
698 		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
699 						       AMD_CG_STATE_GATE);
700 	}
701 
702 	r = vce_v1_0_hw_fini(ip_block);
703 	if (r) {
704 		dev_err(adev->dev, "vce_v1_0_hw_fini() failed with error %i", r);
705 		return r;
706 	}
707 
708 	return amdgpu_vce_suspend(adev);
709 }
710 
vce_v1_0_resume(struct amdgpu_ip_block * ip_block)711 static int vce_v1_0_resume(struct amdgpu_ip_block *ip_block)
712 {
713 	struct amdgpu_device *adev = ip_block->adev;
714 	int r;
715 
716 	r = vce_v1_0_load_fw(adev);
717 	if (r)
718 		return r;
719 	r = vce_v1_0_ensure_vcpu_bo_32bit_addr(adev);
720 	if (r)
721 		return r;
722 
723 	return vce_v1_0_hw_init(ip_block);
724 }
725 
vce_v1_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned int type,enum amdgpu_interrupt_state state)726 static int vce_v1_0_set_interrupt_state(struct amdgpu_device *adev,
727 					struct amdgpu_irq_src *source,
728 					unsigned int type,
729 					enum amdgpu_interrupt_state state)
730 {
731 	uint32_t val = 0;
732 
733 	if (state == AMDGPU_IRQ_STATE_ENABLE)
734 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
735 
736 	WREG32_P(mmVCE_SYS_INT_EN, val,
737 		 ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
738 	return 0;
739 }
740 
vce_v1_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)741 static int vce_v1_0_process_interrupt(struct amdgpu_device *adev,
742 				      struct amdgpu_irq_src *source,
743 				      struct amdgpu_iv_entry *entry)
744 {
745 	dev_dbg(adev->dev, "IH: VCE\n");
746 	switch (entry->src_data[0]) {
747 	case 0:
748 	case 1:
749 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
750 		break;
751 	default:
752 		dev_err(adev->dev, "Unhandled interrupt: %d %d\n",
753 			  entry->src_id, entry->src_data[0]);
754 		break;
755 	}
756 
757 	return 0;
758 }
759 
vce_v1_0_set_clockgating_state(struct amdgpu_ip_block * ip_block,enum amd_clockgating_state state)760 static int vce_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
761 					  enum amd_clockgating_state state)
762 {
763 	struct amdgpu_device *adev = ip_block->adev;
764 
765 	vce_v1_0_init_cg(adev);
766 	vce_v1_0_enable_mgcg(adev, state == AMD_CG_STATE_GATE);
767 
768 	return 0;
769 }
770 
vce_v1_0_set_powergating_state(struct amdgpu_ip_block * ip_block,enum amd_powergating_state state)771 static int vce_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
772 					  enum amd_powergating_state state)
773 {
774 	struct amdgpu_device *adev = ip_block->adev;
775 
776 	/*
777 	 * This doesn't actually powergate the VCE block.
778 	 * That's done in the dpm code via the SMC.  This
779 	 * just re-inits the block as necessary.  The actual
780 	 * gating still happens in the dpm code.  We should
781 	 * revisit this when there is a cleaner line between
782 	 * the smc and the hw blocks
783 	 */
784 	if (state == AMD_PG_STATE_GATE)
785 		return vce_v1_0_stop(adev);
786 	else
787 		return vce_v1_0_start(adev);
788 }
789 
790 static const struct amd_ip_funcs vce_v1_0_ip_funcs = {
791 	.name = "vce_v1_0",
792 	.early_init = vce_v1_0_early_init,
793 	.sw_init = vce_v1_0_sw_init,
794 	.sw_fini = vce_v1_0_sw_fini,
795 	.hw_init = vce_v1_0_hw_init,
796 	.hw_fini = vce_v1_0_hw_fini,
797 	.suspend = vce_v1_0_suspend,
798 	.resume = vce_v1_0_resume,
799 	.is_idle = vce_v1_0_is_idle,
800 	.wait_for_idle = vce_v1_0_wait_for_idle,
801 	.set_clockgating_state = vce_v1_0_set_clockgating_state,
802 	.set_powergating_state = vce_v1_0_set_powergating_state,
803 };
804 
805 static const struct amdgpu_ring_funcs vce_v1_0_ring_funcs = {
806 	.type = AMDGPU_RING_TYPE_VCE,
807 	.align_mask = 0xf,
808 	.nop = VCE_CMD_NO_OP,
809 	.support_64bit_ptrs = false,
810 	.no_user_fence = true,
811 	.get_rptr = vce_v1_0_ring_get_rptr,
812 	.get_wptr = vce_v1_0_ring_get_wptr,
813 	.set_wptr = vce_v1_0_ring_set_wptr,
814 	.parse_cs = amdgpu_vce_ring_parse_cs,
815 	.emit_frame_size = 6, /* amdgpu_vce_ring_emit_fence  x1 no user fence */
816 	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
817 	.emit_ib = amdgpu_vce_ring_emit_ib,
818 	.emit_fence = amdgpu_vce_ring_emit_fence,
819 	.test_ring = amdgpu_vce_ring_test_ring,
820 	.test_ib = amdgpu_vce_ring_test_ib,
821 	.insert_nop = amdgpu_ring_insert_nop,
822 	.pad_ib = amdgpu_ring_generic_pad_ib,
823 	.begin_use = amdgpu_vce_ring_begin_use,
824 	.end_use = amdgpu_vce_ring_end_use,
825 };
826 
vce_v1_0_set_ring_funcs(struct amdgpu_device * adev)827 static void vce_v1_0_set_ring_funcs(struct amdgpu_device *adev)
828 {
829 	int i;
830 
831 	for (i = 0; i < adev->vce.num_rings; i++) {
832 		adev->vce.ring[i].funcs = &vce_v1_0_ring_funcs;
833 		adev->vce.ring[i].me = i;
834 	}
835 };
836 
837 static const struct amdgpu_irq_src_funcs vce_v1_0_irq_funcs = {
838 	.set = vce_v1_0_set_interrupt_state,
839 	.process = vce_v1_0_process_interrupt,
840 };
841 
vce_v1_0_set_irq_funcs(struct amdgpu_device * adev)842 static void vce_v1_0_set_irq_funcs(struct amdgpu_device *adev)
843 {
844 	adev->vce.irq.num_types = 1;
845 	adev->vce.irq.funcs = &vce_v1_0_irq_funcs;
846 };
847 
848 const struct amdgpu_ip_block_version vce_v1_0_ip_block = {
849 	.type = AMD_IP_BLOCK_TYPE_VCE,
850 	.major = 1,
851 	.minor = 0,
852 	.rev = 0,
853 	.funcs = &vce_v1_0_ip_funcs,
854 };
855