xref: /linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_sh_mask.h"
41 #include "ivsrcid/ivsrcid_vislands30.h"
42 
43 
44 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
45 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
46 #define GRBM_GFX_INDEX__VCE_ALL_PIPE		0x07
47 
48 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
49 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
50 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
51 #define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
52 
53 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
54 
55 #define VCE_V3_0_FW_SIZE	(384 * 1024)
56 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
57 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
58 
59 #define FW_52_8_3	((52 << 24) | (8 << 16) | (3 << 8))
60 
61 #define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
62 					| GRBM_GFX_INDEX__VCE_ALL_PIPE)
63 
64 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
65 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
66 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
67 static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
68 static int vce_v3_0_set_clockgating_state(void *handle,
69 					  enum amd_clockgating_state state);
70 /**
71  * vce_v3_0_ring_get_rptr - get read pointer
72  *
73  * @ring: amdgpu_ring pointer
74  *
75  * Returns the current hardware read pointer
76  */
77 static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
78 {
79 	struct amdgpu_device *adev = ring->adev;
80 	u32 v;
81 
82 	mutex_lock(&adev->grbm_idx_mutex);
83 	if (adev->vce.harvest_config == 0 ||
84 		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
85 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
86 	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
87 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
88 
89 	if (ring->me == 0)
90 		v = RREG32(mmVCE_RB_RPTR);
91 	else if (ring->me == 1)
92 		v = RREG32(mmVCE_RB_RPTR2);
93 	else
94 		v = RREG32(mmVCE_RB_RPTR3);
95 
96 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
97 	mutex_unlock(&adev->grbm_idx_mutex);
98 
99 	return v;
100 }
101 
102 /**
103  * vce_v3_0_ring_get_wptr - get write pointer
104  *
105  * @ring: amdgpu_ring pointer
106  *
107  * Returns the current hardware write pointer
108  */
109 static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
110 {
111 	struct amdgpu_device *adev = ring->adev;
112 	u32 v;
113 
114 	mutex_lock(&adev->grbm_idx_mutex);
115 	if (adev->vce.harvest_config == 0 ||
116 		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
117 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
118 	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
119 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
120 
121 	if (ring->me == 0)
122 		v = RREG32(mmVCE_RB_WPTR);
123 	else if (ring->me == 1)
124 		v = RREG32(mmVCE_RB_WPTR2);
125 	else
126 		v = RREG32(mmVCE_RB_WPTR3);
127 
128 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
129 	mutex_unlock(&adev->grbm_idx_mutex);
130 
131 	return v;
132 }
133 
134 /**
135  * vce_v3_0_ring_set_wptr - set write pointer
136  *
137  * @ring: amdgpu_ring pointer
138  *
139  * Commits the write pointer to the hardware
140  */
141 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
142 {
143 	struct amdgpu_device *adev = ring->adev;
144 
145 	mutex_lock(&adev->grbm_idx_mutex);
146 	if (adev->vce.harvest_config == 0 ||
147 		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
148 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
149 	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
150 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
151 
152 	if (ring->me == 0)
153 		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
154 	else if (ring->me == 1)
155 		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
156 	else
157 		WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
158 
159 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
160 	mutex_unlock(&adev->grbm_idx_mutex);
161 }
162 
163 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
164 {
165 	WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
166 }
167 
168 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
169 					     bool gated)
170 {
171 	u32 data;
172 
173 	/* Set Override to disable Clock Gating */
174 	vce_v3_0_override_vce_clock_gating(adev, true);
175 
176 	/* This function enables MGCG which is controlled by firmware.
177 	   With the clocks in the gated state the core is still
178 	   accessible but the firmware will throttle the clocks on the
179 	   fly as necessary.
180 	*/
181 	if (!gated) {
182 		data = RREG32(mmVCE_CLOCK_GATING_B);
183 		data |= 0x1ff;
184 		data &= ~0xef0000;
185 		WREG32(mmVCE_CLOCK_GATING_B, data);
186 
187 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
188 		data |= 0x3ff000;
189 		data &= ~0xffc00000;
190 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
191 
192 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
193 		data |= 0x2;
194 		data &= ~0x00010000;
195 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
196 
197 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
198 		data |= 0x37f;
199 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
200 
201 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
202 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
203 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
204 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
205 			0x8;
206 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
207 	} else {
208 		data = RREG32(mmVCE_CLOCK_GATING_B);
209 		data &= ~0x80010;
210 		data |= 0xe70008;
211 		WREG32(mmVCE_CLOCK_GATING_B, data);
212 
213 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
214 		data |= 0xffc00000;
215 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
216 
217 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
218 		data |= 0x10000;
219 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
220 
221 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
222 		data &= ~0x3ff;
223 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
224 
225 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
226 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
227 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
228 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
229 			  0x8);
230 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
231 	}
232 	vce_v3_0_override_vce_clock_gating(adev, false);
233 }
234 
235 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
236 {
237 	int i, j;
238 
239 	for (i = 0; i < 10; ++i) {
240 		for (j = 0; j < 100; ++j) {
241 			uint32_t status = RREG32(mmVCE_STATUS);
242 
243 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
244 				return 0;
245 			mdelay(10);
246 		}
247 
248 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
249 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
250 		mdelay(10);
251 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
252 		mdelay(10);
253 	}
254 
255 	return -ETIMEDOUT;
256 }
257 
258 /**
259  * vce_v3_0_start - start VCE block
260  *
261  * @adev: amdgpu_device pointer
262  *
263  * Setup and start the VCE block
264  */
265 static int vce_v3_0_start(struct amdgpu_device *adev)
266 {
267 	struct amdgpu_ring *ring;
268 	int idx, r;
269 
270 	mutex_lock(&adev->grbm_idx_mutex);
271 	for (idx = 0; idx < 2; ++idx) {
272 		if (adev->vce.harvest_config & (1 << idx))
273 			continue;
274 
275 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
276 
277 		/* Program instance 0 reg space for two instances or instance 0 case
278 		program instance 1 reg space for only instance 1 available case */
279 		if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
280 			ring = &adev->vce.ring[0];
281 			WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
282 			WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
283 			WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
284 			WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
285 			WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
286 
287 			ring = &adev->vce.ring[1];
288 			WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
289 			WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
290 			WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
291 			WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
292 			WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
293 
294 			ring = &adev->vce.ring[2];
295 			WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
296 			WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
297 			WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
298 			WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
299 			WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
300 		}
301 
302 		vce_v3_0_mc_resume(adev, idx);
303 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
304 
305 		if (adev->asic_type >= CHIP_STONEY)
306 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
307 		else
308 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
309 
310 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
311 		mdelay(100);
312 
313 		r = vce_v3_0_firmware_loaded(adev);
314 
315 		/* clear BUSY flag */
316 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
317 
318 		if (r) {
319 			DRM_ERROR("VCE not responding, giving up!!!\n");
320 			mutex_unlock(&adev->grbm_idx_mutex);
321 			return r;
322 		}
323 	}
324 
325 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
326 	mutex_unlock(&adev->grbm_idx_mutex);
327 
328 	return 0;
329 }
330 
331 static int vce_v3_0_stop(struct amdgpu_device *adev)
332 {
333 	int idx;
334 
335 	mutex_lock(&adev->grbm_idx_mutex);
336 	for (idx = 0; idx < 2; ++idx) {
337 		if (adev->vce.harvest_config & (1 << idx))
338 			continue;
339 
340 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
341 
342 		if (adev->asic_type >= CHIP_STONEY)
343 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
344 		else
345 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
346 
347 		/* hold on ECPU */
348 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
349 
350 		/* clear VCE STATUS */
351 		WREG32(mmVCE_STATUS, 0);
352 	}
353 
354 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
355 	mutex_unlock(&adev->grbm_idx_mutex);
356 
357 	return 0;
358 }
359 
360 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
361 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
362 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
363 
364 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
365 {
366 	u32 tmp;
367 
368 	if ((adev->asic_type == CHIP_FIJI) ||
369 	    (adev->asic_type == CHIP_STONEY))
370 		return AMDGPU_VCE_HARVEST_VCE1;
371 
372 	if (adev->flags & AMD_IS_APU)
373 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
374 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
375 			VCE_HARVEST_FUSE_MACRO__SHIFT;
376 	else
377 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
378 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
379 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
380 
381 	switch (tmp) {
382 	case 1:
383 		return AMDGPU_VCE_HARVEST_VCE0;
384 	case 2:
385 		return AMDGPU_VCE_HARVEST_VCE1;
386 	case 3:
387 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
388 	default:
389 		if ((adev->asic_type == CHIP_POLARIS10) ||
390 		    (adev->asic_type == CHIP_POLARIS11) ||
391 		    (adev->asic_type == CHIP_POLARIS12) ||
392 		    (adev->asic_type == CHIP_VEGAM))
393 			return AMDGPU_VCE_HARVEST_VCE1;
394 
395 		return 0;
396 	}
397 }
398 
399 static int vce_v3_0_early_init(struct amdgpu_ip_block *ip_block)
400 {
401 	struct amdgpu_device *adev = ip_block->adev;
402 
403 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
404 
405 	if ((adev->vce.harvest_config &
406 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
407 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
408 		return -ENOENT;
409 
410 	adev->vce.num_rings = 3;
411 
412 	vce_v3_0_set_ring_funcs(adev);
413 	vce_v3_0_set_irq_funcs(adev);
414 
415 	return 0;
416 }
417 
418 static int vce_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
419 {
420 	struct amdgpu_device *adev = ip_block->adev;
421 	struct amdgpu_ring *ring;
422 	int r, i;
423 
424 	/* VCE */
425 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);
426 	if (r)
427 		return r;
428 
429 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
430 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
431 	if (r)
432 		return r;
433 
434 	/* 52.8.3 required for 3 ring support */
435 	if (adev->vce.fw_version < FW_52_8_3)
436 		adev->vce.num_rings = 2;
437 
438 	r = amdgpu_vce_resume(adev);
439 	if (r)
440 		return r;
441 
442 	for (i = 0; i < adev->vce.num_rings; i++) {
443 		enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
444 
445 		ring = &adev->vce.ring[i];
446 		sprintf(ring->name, "vce%d", i);
447 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
448 				     hw_prio, NULL);
449 		if (r)
450 			return r;
451 	}
452 
453 	return r;
454 }
455 
456 static int vce_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
457 {
458 	int r;
459 	struct amdgpu_device *adev = ip_block->adev;
460 
461 	r = amdgpu_vce_suspend(adev);
462 	if (r)
463 		return r;
464 
465 	return amdgpu_vce_sw_fini(adev);
466 }
467 
468 static int vce_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
469 {
470 	int r, i;
471 	struct amdgpu_device *adev = ip_block->adev;
472 
473 	vce_v3_0_override_vce_clock_gating(adev, true);
474 
475 	amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
476 
477 	for (i = 0; i < adev->vce.num_rings; i++) {
478 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
479 		if (r)
480 			return r;
481 	}
482 
483 	DRM_INFO("VCE initialized successfully.\n");
484 
485 	return 0;
486 }
487 
488 static int vce_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
489 {
490 	int r;
491 	struct amdgpu_device *adev = ip_block->adev;
492 
493 	cancel_delayed_work_sync(&adev->vce.idle_work);
494 
495 	r = vce_v3_0_wait_for_idle(ip_block);
496 	if (r)
497 		return r;
498 
499 	vce_v3_0_stop(adev);
500 	return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
501 }
502 
503 static int vce_v3_0_suspend(struct amdgpu_ip_block *ip_block)
504 {
505 	int r;
506 	struct amdgpu_device *adev = ip_block->adev;
507 
508 	/*
509 	 * Proper cleanups before halting the HW engine:
510 	 *   - cancel the delayed idle work
511 	 *   - enable powergating
512 	 *   - enable clockgating
513 	 *   - disable dpm
514 	 *
515 	 * TODO: to align with the VCN implementation, move the
516 	 * jobs for clockgating/powergating/dpm setting to
517 	 * ->set_powergating_state().
518 	 */
519 	cancel_delayed_work_sync(&adev->vce.idle_work);
520 
521 	if (adev->pm.dpm_enabled) {
522 		amdgpu_dpm_enable_vce(adev, false);
523 	} else {
524 		amdgpu_asic_set_vce_clocks(adev, 0, 0);
525 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
526 						       AMD_PG_STATE_GATE);
527 		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
528 						       AMD_CG_STATE_GATE);
529 	}
530 
531 	r = vce_v3_0_hw_fini(ip_block);
532 	if (r)
533 		return r;
534 
535 	return amdgpu_vce_suspend(adev);
536 }
537 
538 static int vce_v3_0_resume(struct amdgpu_ip_block *ip_block)
539 {
540 	int r;
541 
542 	r = amdgpu_vce_resume(ip_block->adev);
543 	if (r)
544 		return r;
545 
546 	return vce_v3_0_hw_init(ip_block);
547 }
548 
549 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
550 {
551 	uint32_t offset, size;
552 
553 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
554 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
555 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
556 	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
557 
558 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
559 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
560 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
561 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
562 	WREG32(mmVCE_LMI_VM_CTRL, 0);
563 	WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
564 
565 	if (adev->asic_type >= CHIP_STONEY) {
566 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
567 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
568 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
569 	} else
570 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
571 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
572 	size = VCE_V3_0_FW_SIZE;
573 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
574 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
575 
576 	if (idx == 0) {
577 		offset += size;
578 		size = VCE_V3_0_STACK_SIZE;
579 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
580 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
581 		offset += size;
582 		size = VCE_V3_0_DATA_SIZE;
583 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
584 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
585 	} else {
586 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
587 		size = VCE_V3_0_STACK_SIZE;
588 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
589 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
590 		offset += size;
591 		size = VCE_V3_0_DATA_SIZE;
592 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
593 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
594 	}
595 
596 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
597 	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
598 }
599 
600 static bool vce_v3_0_is_idle(void *handle)
601 {
602 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
603 	u32 mask = 0;
604 
605 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
606 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
607 
608 	return !(RREG32(mmSRBM_STATUS2) & mask);
609 }
610 
611 static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
612 {
613 	unsigned i;
614 	struct amdgpu_device *adev = ip_block->adev;
615 
616 	for (i = 0; i < adev->usec_timeout; i++)
617 		if (vce_v3_0_is_idle(adev))
618 			return 0;
619 
620 	return -ETIMEDOUT;
621 }
622 
623 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
624 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
625 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
626 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
627 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
628 
629 static bool vce_v3_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
630 {
631 	struct amdgpu_device *adev = ip_block->adev;
632 	u32 srbm_soft_reset = 0;
633 
634 	/* According to VCE team , we should use VCE_STATUS instead
635 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
636 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
637 	 * instance's registers are accessed
638 	 * (0 for 1st instance, 10 for 2nd instance).
639 	 *
640 	 *VCE_STATUS
641 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
642 	 *|----+----+-----------+----+----+----+----------+---------+----|
643 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
644 	 *
645 	 * VCE team suggest use bit 3--bit 6 for busy status check
646 	 */
647 	mutex_lock(&adev->grbm_idx_mutex);
648 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
649 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
650 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
651 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
652 	}
653 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
654 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
655 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
656 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
657 	}
658 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
659 	mutex_unlock(&adev->grbm_idx_mutex);
660 
661 	if (srbm_soft_reset) {
662 		adev->vce.srbm_soft_reset = srbm_soft_reset;
663 		return true;
664 	} else {
665 		adev->vce.srbm_soft_reset = 0;
666 		return false;
667 	}
668 }
669 
670 static int vce_v3_0_soft_reset(struct amdgpu_ip_block *ip_block)
671 {
672 	struct amdgpu_device *adev = ip_block->adev;
673 	u32 srbm_soft_reset;
674 
675 	if (!adev->vce.srbm_soft_reset)
676 		return 0;
677 	srbm_soft_reset = adev->vce.srbm_soft_reset;
678 
679 	if (srbm_soft_reset) {
680 		u32 tmp;
681 
682 		tmp = RREG32(mmSRBM_SOFT_RESET);
683 		tmp |= srbm_soft_reset;
684 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
685 		WREG32(mmSRBM_SOFT_RESET, tmp);
686 		tmp = RREG32(mmSRBM_SOFT_RESET);
687 
688 		udelay(50);
689 
690 		tmp &= ~srbm_soft_reset;
691 		WREG32(mmSRBM_SOFT_RESET, tmp);
692 		tmp = RREG32(mmSRBM_SOFT_RESET);
693 
694 		/* Wait a little for things to settle down */
695 		udelay(50);
696 	}
697 
698 	return 0;
699 }
700 
701 static int vce_v3_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
702 {
703 	struct amdgpu_device *adev = ip_block->adev;
704 
705 	if (!adev->vce.srbm_soft_reset)
706 		return 0;
707 
708 	mdelay(5);
709 
710 	return vce_v3_0_suspend(ip_block);
711 }
712 
713 
714 static int vce_v3_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
715 {
716 	struct amdgpu_device *adev = ip_block->adev;
717 
718 	if (!adev->vce.srbm_soft_reset)
719 		return 0;
720 
721 	mdelay(5);
722 
723 	return vce_v3_0_resume(ip_block);
724 }
725 
726 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
727 					struct amdgpu_irq_src *source,
728 					unsigned type,
729 					enum amdgpu_interrupt_state state)
730 {
731 	uint32_t val = 0;
732 
733 	if (state == AMDGPU_IRQ_STATE_ENABLE)
734 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
735 
736 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
737 	return 0;
738 }
739 
740 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
741 				      struct amdgpu_irq_src *source,
742 				      struct amdgpu_iv_entry *entry)
743 {
744 	DRM_DEBUG("IH: VCE\n");
745 
746 	WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
747 
748 	switch (entry->src_data[0]) {
749 	case 0:
750 	case 1:
751 	case 2:
752 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
753 		break;
754 	default:
755 		DRM_ERROR("Unhandled interrupt: %d %d\n",
756 			  entry->src_id, entry->src_data[0]);
757 		break;
758 	}
759 
760 	return 0;
761 }
762 
763 static int vce_v3_0_set_clockgating_state(void *handle,
764 					  enum amd_clockgating_state state)
765 {
766 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
767 	bool enable = (state == AMD_CG_STATE_GATE);
768 	int i;
769 
770 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
771 		return 0;
772 
773 	mutex_lock(&adev->grbm_idx_mutex);
774 	for (i = 0; i < 2; i++) {
775 		/* Program VCE Instance 0 or 1 if not harvested */
776 		if (adev->vce.harvest_config & (1 << i))
777 			continue;
778 
779 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
780 
781 		if (!enable) {
782 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
783 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
784 			data &= ~(0xf | 0xff0);
785 			data |= ((0x0 << 0) | (0x04 << 4));
786 			WREG32(mmVCE_CLOCK_GATING_A, data);
787 
788 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
789 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
790 			data &= ~(0xf | 0xff0);
791 			data |= ((0x0 << 0) | (0x04 << 4));
792 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
793 		}
794 
795 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
796 	}
797 
798 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
799 	mutex_unlock(&adev->grbm_idx_mutex);
800 
801 	return 0;
802 }
803 
804 static int vce_v3_0_set_powergating_state(void *handle,
805 					  enum amd_powergating_state state)
806 {
807 	/* This doesn't actually powergate the VCE block.
808 	 * That's done in the dpm code via the SMC.  This
809 	 * just re-inits the block as necessary.  The actual
810 	 * gating still happens in the dpm code.  We should
811 	 * revisit this when there is a cleaner line between
812 	 * the smc and the hw blocks
813 	 */
814 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
815 	int ret = 0;
816 
817 	if (state == AMD_PG_STATE_GATE) {
818 		ret = vce_v3_0_stop(adev);
819 		if (ret)
820 			goto out;
821 	} else {
822 		ret = vce_v3_0_start(adev);
823 		if (ret)
824 			goto out;
825 	}
826 
827 out:
828 	return ret;
829 }
830 
831 static void vce_v3_0_get_clockgating_state(void *handle, u64 *flags)
832 {
833 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
834 	int data;
835 
836 	mutex_lock(&adev->pm.mutex);
837 
838 	if (adev->flags & AMD_IS_APU)
839 		data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
840 	else
841 		data = RREG32_SMC(ixCURRENT_PG_STATUS);
842 
843 	if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
844 		DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
845 		goto out;
846 	}
847 
848 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
849 
850 	/* AMD_CG_SUPPORT_VCE_MGCG */
851 	data = RREG32(mmVCE_CLOCK_GATING_A);
852 	if (data & (0x04 << 4))
853 		*flags |= AMD_CG_SUPPORT_VCE_MGCG;
854 
855 out:
856 	mutex_unlock(&adev->pm.mutex);
857 }
858 
859 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
860 				  struct amdgpu_job *job,
861 				  struct amdgpu_ib *ib,
862 				  uint32_t flags)
863 {
864 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
865 
866 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
867 	amdgpu_ring_write(ring, vmid);
868 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
869 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
870 	amdgpu_ring_write(ring, ib->length_dw);
871 }
872 
873 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
874 				   unsigned int vmid, uint64_t pd_addr)
875 {
876 	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
877 	amdgpu_ring_write(ring, vmid);
878 	amdgpu_ring_write(ring, pd_addr >> 12);
879 
880 	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
881 	amdgpu_ring_write(ring, vmid);
882 	amdgpu_ring_write(ring, VCE_CMD_END);
883 }
884 
885 static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
886 {
887 	uint32_t seq = ring->fence_drv.sync_seq;
888 	uint64_t addr = ring->fence_drv.gpu_addr;
889 
890 	amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
891 	amdgpu_ring_write(ring, lower_32_bits(addr));
892 	amdgpu_ring_write(ring, upper_32_bits(addr));
893 	amdgpu_ring_write(ring, seq);
894 }
895 
896 static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
897 	.name = "vce_v3_0",
898 	.early_init = vce_v3_0_early_init,
899 	.sw_init = vce_v3_0_sw_init,
900 	.sw_fini = vce_v3_0_sw_fini,
901 	.hw_init = vce_v3_0_hw_init,
902 	.hw_fini = vce_v3_0_hw_fini,
903 	.suspend = vce_v3_0_suspend,
904 	.resume = vce_v3_0_resume,
905 	.is_idle = vce_v3_0_is_idle,
906 	.wait_for_idle = vce_v3_0_wait_for_idle,
907 	.check_soft_reset = vce_v3_0_check_soft_reset,
908 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
909 	.soft_reset = vce_v3_0_soft_reset,
910 	.post_soft_reset = vce_v3_0_post_soft_reset,
911 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
912 	.set_powergating_state = vce_v3_0_set_powergating_state,
913 	.get_clockgating_state = vce_v3_0_get_clockgating_state,
914 };
915 
916 static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
917 	.type = AMDGPU_RING_TYPE_VCE,
918 	.align_mask = 0xf,
919 	.nop = VCE_CMD_NO_OP,
920 	.support_64bit_ptrs = false,
921 	.no_user_fence = true,
922 	.get_rptr = vce_v3_0_ring_get_rptr,
923 	.get_wptr = vce_v3_0_ring_get_wptr,
924 	.set_wptr = vce_v3_0_ring_set_wptr,
925 	.parse_cs = amdgpu_vce_ring_parse_cs,
926 	.emit_frame_size =
927 		4 + /* vce_v3_0_emit_pipeline_sync */
928 		6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
929 	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
930 	.emit_ib = amdgpu_vce_ring_emit_ib,
931 	.emit_fence = amdgpu_vce_ring_emit_fence,
932 	.test_ring = amdgpu_vce_ring_test_ring,
933 	.test_ib = amdgpu_vce_ring_test_ib,
934 	.insert_nop = amdgpu_ring_insert_nop,
935 	.pad_ib = amdgpu_ring_generic_pad_ib,
936 	.begin_use = amdgpu_vce_ring_begin_use,
937 	.end_use = amdgpu_vce_ring_end_use,
938 };
939 
940 static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
941 	.type = AMDGPU_RING_TYPE_VCE,
942 	.align_mask = 0xf,
943 	.nop = VCE_CMD_NO_OP,
944 	.support_64bit_ptrs = false,
945 	.no_user_fence = true,
946 	.get_rptr = vce_v3_0_ring_get_rptr,
947 	.get_wptr = vce_v3_0_ring_get_wptr,
948 	.set_wptr = vce_v3_0_ring_set_wptr,
949 	.patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm,
950 	.emit_frame_size =
951 		6 + /* vce_v3_0_emit_vm_flush */
952 		4 + /* vce_v3_0_emit_pipeline_sync */
953 		6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
954 	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
955 	.emit_ib = vce_v3_0_ring_emit_ib,
956 	.emit_vm_flush = vce_v3_0_emit_vm_flush,
957 	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
958 	.emit_fence = amdgpu_vce_ring_emit_fence,
959 	.test_ring = amdgpu_vce_ring_test_ring,
960 	.test_ib = amdgpu_vce_ring_test_ib,
961 	.insert_nop = amdgpu_ring_insert_nop,
962 	.pad_ib = amdgpu_ring_generic_pad_ib,
963 	.begin_use = amdgpu_vce_ring_begin_use,
964 	.end_use = amdgpu_vce_ring_end_use,
965 };
966 
967 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
968 {
969 	int i;
970 
971 	if (adev->asic_type >= CHIP_STONEY) {
972 		for (i = 0; i < adev->vce.num_rings; i++) {
973 			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
974 			adev->vce.ring[i].me = i;
975 		}
976 		DRM_INFO("VCE enabled in VM mode\n");
977 	} else {
978 		for (i = 0; i < adev->vce.num_rings; i++) {
979 			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
980 			adev->vce.ring[i].me = i;
981 		}
982 		DRM_INFO("VCE enabled in physical mode\n");
983 	}
984 }
985 
986 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
987 	.set = vce_v3_0_set_interrupt_state,
988 	.process = vce_v3_0_process_interrupt,
989 };
990 
991 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
992 {
993 	adev->vce.irq.num_types = 1;
994 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
995 };
996 
997 const struct amdgpu_ip_block_version vce_v3_0_ip_block = {
998 	.type = AMD_IP_BLOCK_TYPE_VCE,
999 	.major = 3,
1000 	.minor = 0,
1001 	.rev = 0,
1002 	.funcs = &vce_v3_0_ip_funcs,
1003 };
1004 
1005 const struct amdgpu_ip_block_version vce_v3_1_ip_block = {
1006 	.type = AMD_IP_BLOCK_TYPE_VCE,
1007 	.major = 3,
1008 	.minor = 1,
1009 	.rev = 0,
1010 	.funcs = &vce_v3_0_ip_funcs,
1011 };
1012 
1013 const struct amdgpu_ip_block_version vce_v3_4_ip_block = {
1014 	.type = AMD_IP_BLOCK_TYPE_VCE,
1015 	.major = 3,
1016 	.minor = 4,
1017 	.rev = 0,
1018 	.funcs = &vce_v3_0_ip_funcs,
1019 };
1020