xref: /linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision 6dfafbd0299a60bfb5d5e277fdf100037c7ded07)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_sh_mask.h"
41 #include "ivsrcid/ivsrcid_vislands30.h"
42 
43 
44 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
45 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
46 #define GRBM_GFX_INDEX__VCE_ALL_PIPE		0x07
47 
48 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
49 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
50 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
51 #define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
52 
53 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
54 
55 #define VCE_V3_0_FW_SIZE	(384 * 1024)
56 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
57 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
58 
59 #define FW_52_8_3	((52 << 24) | (8 << 16) | (3 << 8))
60 
61 #define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
62 					| GRBM_GFX_INDEX__VCE_ALL_PIPE)
63 
64 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
65 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
66 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
67 static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block);
68 static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
69 					  enum amd_clockgating_state state);
70 /**
71  * vce_v3_0_ring_get_rptr - get read pointer
72  *
73  * @ring: amdgpu_ring pointer
74  *
75  * Returns the current hardware read pointer
76  */
77 static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
78 {
79 	struct amdgpu_device *adev = ring->adev;
80 	u32 v;
81 
82 	mutex_lock(&adev->grbm_idx_mutex);
83 	if (adev->vce.harvest_config == 0 ||
84 		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
85 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
86 	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
87 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
88 
89 	if (ring->me == 0)
90 		v = RREG32(mmVCE_RB_RPTR);
91 	else if (ring->me == 1)
92 		v = RREG32(mmVCE_RB_RPTR2);
93 	else
94 		v = RREG32(mmVCE_RB_RPTR3);
95 
96 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
97 	mutex_unlock(&adev->grbm_idx_mutex);
98 
99 	return v;
100 }
101 
102 /**
103  * vce_v3_0_ring_get_wptr - get write pointer
104  *
105  * @ring: amdgpu_ring pointer
106  *
107  * Returns the current hardware write pointer
108  */
109 static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
110 {
111 	struct amdgpu_device *adev = ring->adev;
112 	u32 v;
113 
114 	mutex_lock(&adev->grbm_idx_mutex);
115 	if (adev->vce.harvest_config == 0 ||
116 		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
117 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
118 	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
119 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
120 
121 	if (ring->me == 0)
122 		v = RREG32(mmVCE_RB_WPTR);
123 	else if (ring->me == 1)
124 		v = RREG32(mmVCE_RB_WPTR2);
125 	else
126 		v = RREG32(mmVCE_RB_WPTR3);
127 
128 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
129 	mutex_unlock(&adev->grbm_idx_mutex);
130 
131 	return v;
132 }
133 
134 /**
135  * vce_v3_0_ring_set_wptr - set write pointer
136  *
137  * @ring: amdgpu_ring pointer
138  *
139  * Commits the write pointer to the hardware
140  */
141 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
142 {
143 	struct amdgpu_device *adev = ring->adev;
144 
145 	mutex_lock(&adev->grbm_idx_mutex);
146 	if (adev->vce.harvest_config == 0 ||
147 		adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1)
148 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
149 	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
150 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
151 
152 	if (ring->me == 0)
153 		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
154 	else if (ring->me == 1)
155 		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
156 	else
157 		WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
158 
159 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
160 	mutex_unlock(&adev->grbm_idx_mutex);
161 }
162 
163 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
164 {
165 	WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
166 }
167 
168 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
169 					     bool gated)
170 {
171 	u32 data;
172 
173 	/* Set Override to disable Clock Gating */
174 	vce_v3_0_override_vce_clock_gating(adev, true);
175 
176 	/* This function enables MGCG which is controlled by firmware.
177 	   With the clocks in the gated state the core is still
178 	   accessible but the firmware will throttle the clocks on the
179 	   fly as necessary.
180 	*/
181 	if (!gated) {
182 		data = RREG32(mmVCE_CLOCK_GATING_B);
183 		data |= 0x1ff;
184 		data &= ~0xef0000;
185 		WREG32(mmVCE_CLOCK_GATING_B, data);
186 
187 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
188 		data |= 0x3ff000;
189 		data &= ~0xffc00000;
190 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
191 
192 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
193 		data |= 0x2;
194 		data &= ~0x00010000;
195 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
196 
197 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
198 		data |= 0x37f;
199 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
200 
201 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
202 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
203 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
204 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
205 			0x8;
206 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
207 	} else {
208 		data = RREG32(mmVCE_CLOCK_GATING_B);
209 		data &= ~0x80010;
210 		data |= 0xe70008;
211 		WREG32(mmVCE_CLOCK_GATING_B, data);
212 
213 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
214 		data |= 0xffc00000;
215 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
216 
217 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
218 		data |= 0x10000;
219 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
220 
221 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
222 		data &= ~0x3ff;
223 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
224 
225 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
226 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
227 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
228 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
229 			  0x8);
230 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
231 	}
232 	vce_v3_0_override_vce_clock_gating(adev, false);
233 }
234 
235 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
236 {
237 	int i, j;
238 
239 	for (i = 0; i < 10; ++i) {
240 		for (j = 0; j < 100; ++j) {
241 			uint32_t status = RREG32(mmVCE_STATUS);
242 
243 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
244 				return 0;
245 			mdelay(10);
246 		}
247 
248 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
249 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
250 		mdelay(10);
251 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
252 		mdelay(10);
253 	}
254 
255 	return -ETIMEDOUT;
256 }
257 
258 /**
259  * vce_v3_0_start - start VCE block
260  *
261  * @adev: amdgpu_device pointer
262  *
263  * Setup and start the VCE block
264  */
265 static int vce_v3_0_start(struct amdgpu_device *adev)
266 {
267 	struct amdgpu_ring *ring;
268 	int idx, r;
269 
270 	mutex_lock(&adev->grbm_idx_mutex);
271 	for (idx = 0; idx < 2; ++idx) {
272 		if (adev->vce.harvest_config & (1 << idx))
273 			continue;
274 
275 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
276 
277 		/* Program instance 0 reg space for two instances or instance 0 case
278 		program instance 1 reg space for only instance 1 available case */
279 		if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) {
280 			ring = &adev->vce.ring[0];
281 			WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr));
282 			WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
283 			WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
284 			WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
285 			WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
286 
287 			ring = &adev->vce.ring[1];
288 			WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr));
289 			WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
290 			WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
291 			WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
292 			WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
293 
294 			ring = &adev->vce.ring[2];
295 			WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr));
296 			WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
297 			WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
298 			WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
299 			WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
300 		}
301 
302 		vce_v3_0_mc_resume(adev, idx);
303 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
304 
305 		if (adev->asic_type >= CHIP_STONEY)
306 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
307 		else
308 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
309 
310 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
311 		mdelay(100);
312 
313 		r = vce_v3_0_firmware_loaded(adev);
314 
315 		/* clear BUSY flag */
316 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
317 
318 		if (r) {
319 			DRM_ERROR("VCE not responding, giving up!!!\n");
320 			mutex_unlock(&adev->grbm_idx_mutex);
321 			return r;
322 		}
323 	}
324 
325 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
326 	mutex_unlock(&adev->grbm_idx_mutex);
327 
328 	return 0;
329 }
330 
331 static int vce_v3_0_stop(struct amdgpu_device *adev)
332 {
333 	int idx;
334 
335 	mutex_lock(&adev->grbm_idx_mutex);
336 	for (idx = 0; idx < 2; ++idx) {
337 		if (adev->vce.harvest_config & (1 << idx))
338 			continue;
339 
340 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
341 
342 		if (adev->asic_type >= CHIP_STONEY)
343 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
344 		else
345 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
346 
347 		/* hold on ECPU */
348 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
349 
350 		/* clear VCE STATUS */
351 		WREG32(mmVCE_STATUS, 0);
352 	}
353 
354 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
355 	mutex_unlock(&adev->grbm_idx_mutex);
356 
357 	return 0;
358 }
359 
360 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
361 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
362 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
363 
364 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
365 {
366 	u32 tmp;
367 
368 	if ((adev->asic_type == CHIP_FIJI) ||
369 	    (adev->asic_type == CHIP_STONEY))
370 		return AMDGPU_VCE_HARVEST_VCE1;
371 
372 	if (adev->flags & AMD_IS_APU)
373 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
374 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
375 			VCE_HARVEST_FUSE_MACRO__SHIFT;
376 	else
377 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
378 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
379 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
380 
381 	switch (tmp) {
382 	case 1:
383 		return AMDGPU_VCE_HARVEST_VCE0;
384 	case 2:
385 		return AMDGPU_VCE_HARVEST_VCE1;
386 	case 3:
387 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
388 	default:
389 		if ((adev->asic_type == CHIP_POLARIS10) ||
390 		    (adev->asic_type == CHIP_POLARIS11) ||
391 		    (adev->asic_type == CHIP_POLARIS12) ||
392 		    (adev->asic_type == CHIP_VEGAM))
393 			return AMDGPU_VCE_HARVEST_VCE1;
394 
395 		return 0;
396 	}
397 }
398 
399 static int vce_v3_0_early_init(struct amdgpu_ip_block *ip_block)
400 {
401 	struct amdgpu_device *adev = ip_block->adev;
402 	int r;
403 
404 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
405 
406 	if ((adev->vce.harvest_config &
407 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
408 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
409 		return -ENOENT;
410 
411 	r = amdgpu_vce_early_init(adev);
412 	if (r)
413 		return r;
414 
415 	adev->vce.num_rings = 3;
416 
417 	vce_v3_0_set_ring_funcs(adev);
418 	vce_v3_0_set_irq_funcs(adev);
419 
420 	return 0;
421 }
422 
423 static int vce_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
424 {
425 	struct amdgpu_device *adev = ip_block->adev;
426 	struct amdgpu_ring *ring;
427 	int r, i;
428 
429 	/* VCE */
430 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);
431 	if (r)
432 		return r;
433 
434 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
435 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
436 	if (r)
437 		return r;
438 
439 	/* 52.8.3 required for 3 ring support */
440 	if (adev->vce.fw_version < FW_52_8_3)
441 		adev->vce.num_rings = 2;
442 
443 	r = amdgpu_vce_resume(adev);
444 	if (r)
445 		return r;
446 
447 	for (i = 0; i < adev->vce.num_rings; i++) {
448 		enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
449 
450 		ring = &adev->vce.ring[i];
451 		sprintf(ring->name, "vce%d", i);
452 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
453 				     hw_prio, NULL);
454 		if (r)
455 			return r;
456 	}
457 
458 	return r;
459 }
460 
461 static int vce_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
462 {
463 	int r;
464 	struct amdgpu_device *adev = ip_block->adev;
465 
466 	r = amdgpu_vce_suspend(adev);
467 	if (r)
468 		return r;
469 
470 	return amdgpu_vce_sw_fini(adev);
471 }
472 
473 static int vce_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
474 {
475 	int r, i;
476 	struct amdgpu_device *adev = ip_block->adev;
477 
478 	vce_v3_0_override_vce_clock_gating(adev, true);
479 
480 	amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
481 
482 	for (i = 0; i < adev->vce.num_rings; i++) {
483 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
484 		if (r)
485 			return r;
486 	}
487 
488 	DRM_INFO("VCE initialized successfully.\n");
489 
490 	return 0;
491 }
492 
493 static int vce_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
494 {
495 	int r;
496 	struct amdgpu_device *adev = ip_block->adev;
497 
498 	cancel_delayed_work_sync(&adev->vce.idle_work);
499 
500 	r = vce_v3_0_wait_for_idle(ip_block);
501 	if (r)
502 		return r;
503 
504 	vce_v3_0_stop(adev);
505 	return vce_v3_0_set_clockgating_state(ip_block, AMD_CG_STATE_GATE);
506 }
507 
508 static int vce_v3_0_suspend(struct amdgpu_ip_block *ip_block)
509 {
510 	int r;
511 	struct amdgpu_device *adev = ip_block->adev;
512 
513 	/*
514 	 * Proper cleanups before halting the HW engine:
515 	 *   - cancel the delayed idle work
516 	 *   - enable powergating
517 	 *   - enable clockgating
518 	 *   - disable dpm
519 	 *
520 	 * TODO: to align with the VCN implementation, move the
521 	 * jobs for clockgating/powergating/dpm setting to
522 	 * ->set_powergating_state().
523 	 */
524 	cancel_delayed_work_sync(&adev->vce.idle_work);
525 
526 	if (adev->pm.dpm_enabled) {
527 		amdgpu_dpm_enable_vce(adev, false);
528 	} else {
529 		amdgpu_asic_set_vce_clocks(adev, 0, 0);
530 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
531 						       AMD_PG_STATE_GATE);
532 		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
533 						       AMD_CG_STATE_GATE);
534 	}
535 
536 	r = vce_v3_0_hw_fini(ip_block);
537 	if (r)
538 		return r;
539 
540 	return amdgpu_vce_suspend(adev);
541 }
542 
543 static int vce_v3_0_resume(struct amdgpu_ip_block *ip_block)
544 {
545 	int r;
546 
547 	r = amdgpu_vce_resume(ip_block->adev);
548 	if (r)
549 		return r;
550 
551 	return vce_v3_0_hw_init(ip_block);
552 }
553 
554 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
555 {
556 	uint32_t offset, size;
557 
558 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
559 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
560 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
561 	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
562 
563 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
564 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
565 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
566 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
567 	WREG32(mmVCE_LMI_VM_CTRL, 0);
568 	WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
569 
570 	if (adev->asic_type >= CHIP_STONEY) {
571 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
572 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
573 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
574 	} else
575 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
576 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
577 	size = VCE_V3_0_FW_SIZE;
578 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
579 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
580 
581 	if (idx == 0) {
582 		offset += size;
583 		size = VCE_V3_0_STACK_SIZE;
584 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
585 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
586 		offset += size;
587 		size = VCE_V3_0_DATA_SIZE;
588 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
589 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
590 	} else {
591 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
592 		size = VCE_V3_0_STACK_SIZE;
593 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
594 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
595 		offset += size;
596 		size = VCE_V3_0_DATA_SIZE;
597 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
598 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
599 	}
600 
601 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
602 	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
603 }
604 
605 static bool vce_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
606 {
607 	struct amdgpu_device *adev = ip_block->adev;
608 	u32 mask = 0;
609 
610 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
611 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
612 
613 	return !(RREG32(mmSRBM_STATUS2) & mask);
614 }
615 
616 static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
617 {
618 	unsigned i;
619 	struct amdgpu_device *adev = ip_block->adev;
620 
621 	for (i = 0; i < adev->usec_timeout; i++)
622 		if (vce_v3_0_is_idle(ip_block))
623 			return 0;
624 
625 	return -ETIMEDOUT;
626 }
627 
628 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
629 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
630 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
631 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
632 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
633 
634 static bool vce_v3_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
635 {
636 	struct amdgpu_device *adev = ip_block->adev;
637 	u32 srbm_soft_reset = 0;
638 
639 	/* According to VCE team , we should use VCE_STATUS instead
640 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
641 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
642 	 * instance's registers are accessed
643 	 * (0 for 1st instance, 10 for 2nd instance).
644 	 *
645 	 *VCE_STATUS
646 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
647 	 *|----+----+-----------+----+----+----+----------+---------+----|
648 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
649 	 *
650 	 * VCE team suggest use bit 3--bit 6 for busy status check
651 	 */
652 	mutex_lock(&adev->grbm_idx_mutex);
653 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
654 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
655 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
656 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
657 	}
658 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
659 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
660 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
661 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
662 	}
663 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
664 	mutex_unlock(&adev->grbm_idx_mutex);
665 
666 	if (srbm_soft_reset) {
667 		adev->vce.srbm_soft_reset = srbm_soft_reset;
668 		return true;
669 	} else {
670 		adev->vce.srbm_soft_reset = 0;
671 		return false;
672 	}
673 }
674 
675 static int vce_v3_0_soft_reset(struct amdgpu_ip_block *ip_block)
676 {
677 	struct amdgpu_device *adev = ip_block->adev;
678 	u32 srbm_soft_reset;
679 
680 	if (!adev->vce.srbm_soft_reset)
681 		return 0;
682 	srbm_soft_reset = adev->vce.srbm_soft_reset;
683 
684 	if (srbm_soft_reset) {
685 		u32 tmp;
686 
687 		tmp = RREG32(mmSRBM_SOFT_RESET);
688 		tmp |= srbm_soft_reset;
689 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
690 		WREG32(mmSRBM_SOFT_RESET, tmp);
691 		tmp = RREG32(mmSRBM_SOFT_RESET);
692 
693 		udelay(50);
694 
695 		tmp &= ~srbm_soft_reset;
696 		WREG32(mmSRBM_SOFT_RESET, tmp);
697 		tmp = RREG32(mmSRBM_SOFT_RESET);
698 
699 		/* Wait a little for things to settle down */
700 		udelay(50);
701 	}
702 
703 	return 0;
704 }
705 
706 static int vce_v3_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
707 {
708 	struct amdgpu_device *adev = ip_block->adev;
709 
710 	if (!adev->vce.srbm_soft_reset)
711 		return 0;
712 
713 	mdelay(5);
714 
715 	return vce_v3_0_suspend(ip_block);
716 }
717 
718 
719 static int vce_v3_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
720 {
721 	struct amdgpu_device *adev = ip_block->adev;
722 
723 	if (!adev->vce.srbm_soft_reset)
724 		return 0;
725 
726 	mdelay(5);
727 
728 	return vce_v3_0_resume(ip_block);
729 }
730 
731 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
732 					struct amdgpu_irq_src *source,
733 					unsigned type,
734 					enum amdgpu_interrupt_state state)
735 {
736 	uint32_t val = 0;
737 
738 	if (state == AMDGPU_IRQ_STATE_ENABLE)
739 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
740 
741 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
742 	return 0;
743 }
744 
745 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
746 				      struct amdgpu_irq_src *source,
747 				      struct amdgpu_iv_entry *entry)
748 {
749 	DRM_DEBUG("IH: VCE\n");
750 
751 	WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
752 
753 	switch (entry->src_data[0]) {
754 	case 0:
755 	case 1:
756 	case 2:
757 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
758 		break;
759 	default:
760 		DRM_ERROR("Unhandled interrupt: %d %d\n",
761 			  entry->src_id, entry->src_data[0]);
762 		break;
763 	}
764 
765 	return 0;
766 }
767 
768 static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
769 					  enum amd_clockgating_state state)
770 {
771 	struct amdgpu_device *adev = ip_block->adev;
772 	bool enable = (state == AMD_CG_STATE_GATE);
773 	int i;
774 
775 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
776 		return 0;
777 
778 	mutex_lock(&adev->grbm_idx_mutex);
779 	for (i = 0; i < 2; i++) {
780 		/* Program VCE Instance 0 or 1 if not harvested */
781 		if (adev->vce.harvest_config & (1 << i))
782 			continue;
783 
784 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
785 
786 		if (!enable) {
787 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
788 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
789 			data &= ~(0xf | 0xff0);
790 			data |= ((0x0 << 0) | (0x04 << 4));
791 			WREG32(mmVCE_CLOCK_GATING_A, data);
792 
793 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
794 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
795 			data &= ~(0xf | 0xff0);
796 			data |= ((0x0 << 0) | (0x04 << 4));
797 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
798 		}
799 
800 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
801 	}
802 
803 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
804 	mutex_unlock(&adev->grbm_idx_mutex);
805 
806 	return 0;
807 }
808 
809 static int vce_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
810 					  enum amd_powergating_state state)
811 {
812 	/* This doesn't actually powergate the VCE block.
813 	 * That's done in the dpm code via the SMC.  This
814 	 * just re-inits the block as necessary.  The actual
815 	 * gating still happens in the dpm code.  We should
816 	 * revisit this when there is a cleaner line between
817 	 * the smc and the hw blocks
818 	 */
819 	struct amdgpu_device *adev = ip_block->adev;
820 	int ret = 0;
821 
822 	if (state == AMD_PG_STATE_GATE) {
823 		ret = vce_v3_0_stop(adev);
824 		if (ret)
825 			goto out;
826 	} else {
827 		ret = vce_v3_0_start(adev);
828 		if (ret)
829 			goto out;
830 	}
831 
832 out:
833 	return ret;
834 }
835 
836 static void vce_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
837 {
838 	struct amdgpu_device *adev = ip_block->adev;
839 	int data;
840 
841 	mutex_lock(&adev->pm.mutex);
842 
843 	if (adev->flags & AMD_IS_APU)
844 		data = RREG32_SMC(ixCURRENT_PG_STATUS_APU);
845 	else
846 		data = RREG32_SMC(ixCURRENT_PG_STATUS);
847 
848 	if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
849 		DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
850 		goto out;
851 	}
852 
853 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
854 
855 	/* AMD_CG_SUPPORT_VCE_MGCG */
856 	data = RREG32(mmVCE_CLOCK_GATING_A);
857 	if (data & (0x04 << 4))
858 		*flags |= AMD_CG_SUPPORT_VCE_MGCG;
859 
860 out:
861 	mutex_unlock(&adev->pm.mutex);
862 }
863 
864 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
865 				  struct amdgpu_job *job,
866 				  struct amdgpu_ib *ib,
867 				  uint32_t flags)
868 {
869 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
870 
871 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
872 	amdgpu_ring_write(ring, vmid);
873 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
874 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
875 	amdgpu_ring_write(ring, ib->length_dw);
876 }
877 
878 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
879 				   unsigned int vmid, uint64_t pd_addr)
880 {
881 	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
882 	amdgpu_ring_write(ring, vmid);
883 	amdgpu_ring_write(ring, pd_addr >> 12);
884 
885 	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
886 	amdgpu_ring_write(ring, vmid);
887 	amdgpu_ring_write(ring, VCE_CMD_END);
888 }
889 
890 static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
891 {
892 	uint32_t seq = ring->fence_drv.sync_seq;
893 	uint64_t addr = ring->fence_drv.gpu_addr;
894 
895 	amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
896 	amdgpu_ring_write(ring, lower_32_bits(addr));
897 	amdgpu_ring_write(ring, upper_32_bits(addr));
898 	amdgpu_ring_write(ring, seq);
899 }
900 
901 static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
902 	.name = "vce_v3_0",
903 	.early_init = vce_v3_0_early_init,
904 	.sw_init = vce_v3_0_sw_init,
905 	.sw_fini = vce_v3_0_sw_fini,
906 	.hw_init = vce_v3_0_hw_init,
907 	.hw_fini = vce_v3_0_hw_fini,
908 	.suspend = vce_v3_0_suspend,
909 	.resume = vce_v3_0_resume,
910 	.is_idle = vce_v3_0_is_idle,
911 	.wait_for_idle = vce_v3_0_wait_for_idle,
912 	.check_soft_reset = vce_v3_0_check_soft_reset,
913 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
914 	.soft_reset = vce_v3_0_soft_reset,
915 	.post_soft_reset = vce_v3_0_post_soft_reset,
916 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
917 	.set_powergating_state = vce_v3_0_set_powergating_state,
918 	.get_clockgating_state = vce_v3_0_get_clockgating_state,
919 };
920 
921 static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
922 	.type = AMDGPU_RING_TYPE_VCE,
923 	.align_mask = 0xf,
924 	.nop = VCE_CMD_NO_OP,
925 	.support_64bit_ptrs = false,
926 	.no_user_fence = true,
927 	.get_rptr = vce_v3_0_ring_get_rptr,
928 	.get_wptr = vce_v3_0_ring_get_wptr,
929 	.set_wptr = vce_v3_0_ring_set_wptr,
930 	.parse_cs = amdgpu_vce_ring_parse_cs,
931 	.emit_frame_size =
932 		4 + /* vce_v3_0_emit_pipeline_sync */
933 		6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
934 	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
935 	.emit_ib = amdgpu_vce_ring_emit_ib,
936 	.emit_fence = amdgpu_vce_ring_emit_fence,
937 	.test_ring = amdgpu_vce_ring_test_ring,
938 	.test_ib = amdgpu_vce_ring_test_ib,
939 	.insert_nop = amdgpu_ring_insert_nop,
940 	.pad_ib = amdgpu_ring_generic_pad_ib,
941 	.begin_use = amdgpu_vce_ring_begin_use,
942 	.end_use = amdgpu_vce_ring_end_use,
943 };
944 
945 static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
946 	.type = AMDGPU_RING_TYPE_VCE,
947 	.align_mask = 0xf,
948 	.nop = VCE_CMD_NO_OP,
949 	.support_64bit_ptrs = false,
950 	.no_user_fence = true,
951 	.get_rptr = vce_v3_0_ring_get_rptr,
952 	.get_wptr = vce_v3_0_ring_get_wptr,
953 	.set_wptr = vce_v3_0_ring_set_wptr,
954 	.patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm,
955 	.emit_frame_size =
956 		6 + /* vce_v3_0_emit_vm_flush */
957 		4 + /* vce_v3_0_emit_pipeline_sync */
958 		6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
959 	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
960 	.emit_ib = vce_v3_0_ring_emit_ib,
961 	.emit_vm_flush = vce_v3_0_emit_vm_flush,
962 	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
963 	.emit_fence = amdgpu_vce_ring_emit_fence,
964 	.test_ring = amdgpu_vce_ring_test_ring,
965 	.test_ib = amdgpu_vce_ring_test_ib,
966 	.insert_nop = amdgpu_ring_insert_nop,
967 	.pad_ib = amdgpu_ring_generic_pad_ib,
968 	.begin_use = amdgpu_vce_ring_begin_use,
969 	.end_use = amdgpu_vce_ring_end_use,
970 };
971 
972 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
973 {
974 	int i;
975 
976 	if (adev->asic_type >= CHIP_STONEY) {
977 		for (i = 0; i < adev->vce.num_rings; i++) {
978 			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
979 			adev->vce.ring[i].me = i;
980 		}
981 		DRM_INFO("VCE enabled in VM mode\n");
982 	} else {
983 		for (i = 0; i < adev->vce.num_rings; i++) {
984 			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
985 			adev->vce.ring[i].me = i;
986 		}
987 		DRM_INFO("VCE enabled in physical mode\n");
988 	}
989 }
990 
991 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
992 	.set = vce_v3_0_set_interrupt_state,
993 	.process = vce_v3_0_process_interrupt,
994 };
995 
996 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
997 {
998 	adev->vce.irq.num_types = 1;
999 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
1000 };
1001 
1002 const struct amdgpu_ip_block_version vce_v3_0_ip_block = {
1003 	.type = AMD_IP_BLOCK_TYPE_VCE,
1004 	.major = 3,
1005 	.minor = 0,
1006 	.rev = 0,
1007 	.funcs = &vce_v3_0_ip_funcs,
1008 };
1009 
1010 const struct amdgpu_ip_block_version vce_v3_1_ip_block = {
1011 	.type = AMD_IP_BLOCK_TYPE_VCE,
1012 	.major = 3,
1013 	.minor = 1,
1014 	.rev = 0,
1015 	.funcs = &vce_v3_0_ip_funcs,
1016 };
1017 
1018 const struct amdgpu_ip_block_version vce_v3_4_ip_block = {
1019 	.type = AMD_IP_BLOCK_TYPE_VCE,
1020 	.major = 3,
1021 	.minor = 4,
1022 	.rev = 0,
1023 	.funcs = &vce_v3_0_ip_funcs,
1024 };
1025