xref: /linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision 4cb584e0ee7df70fd0376aee60cf701855ea8c81)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <drm/drmP.h>
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_d.h"
41 #include "gca/gfx_8_0_sh_mask.h"
42 
43 
44 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
45 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
46 #define GRBM_GFX_INDEX__VCE_ALL_PIPE		0x07
47 
48 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
49 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
50 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
51 #define mmGRBM_GFX_INDEX_DEFAULT 0xE0000000
52 
53 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
54 
55 #define VCE_V3_0_FW_SIZE	(384 * 1024)
56 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
57 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
58 
59 #define FW_52_8_3	((52 << 24) | (8 << 16) | (3 << 8))
60 
61 #define GET_VCE_INSTANCE(i)  ((i) << GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT \
62 					| GRBM_GFX_INDEX__VCE_ALL_PIPE)
63 
64 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
65 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
66 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
67 static int vce_v3_0_wait_for_idle(void *handle);
68 
69 /**
70  * vce_v3_0_ring_get_rptr - get read pointer
71  *
72  * @ring: amdgpu_ring pointer
73  *
74  * Returns the current hardware read pointer
75  */
76 static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
77 {
78 	struct amdgpu_device *adev = ring->adev;
79 
80 	if (ring == &adev->vce.ring[0])
81 		return RREG32(mmVCE_RB_RPTR);
82 	else if (ring == &adev->vce.ring[1])
83 		return RREG32(mmVCE_RB_RPTR2);
84 	else
85 		return RREG32(mmVCE_RB_RPTR3);
86 }
87 
88 /**
89  * vce_v3_0_ring_get_wptr - get write pointer
90  *
91  * @ring: amdgpu_ring pointer
92  *
93  * Returns the current hardware write pointer
94  */
95 static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
96 {
97 	struct amdgpu_device *adev = ring->adev;
98 
99 	if (ring == &adev->vce.ring[0])
100 		return RREG32(mmVCE_RB_WPTR);
101 	else if (ring == &adev->vce.ring[1])
102 		return RREG32(mmVCE_RB_WPTR2);
103 	else
104 		return RREG32(mmVCE_RB_WPTR3);
105 }
106 
107 /**
108  * vce_v3_0_ring_set_wptr - set write pointer
109  *
110  * @ring: amdgpu_ring pointer
111  *
112  * Commits the write pointer to the hardware
113  */
114 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
115 {
116 	struct amdgpu_device *adev = ring->adev;
117 
118 	if (ring == &adev->vce.ring[0])
119 		WREG32(mmVCE_RB_WPTR, ring->wptr);
120 	else if (ring == &adev->vce.ring[1])
121 		WREG32(mmVCE_RB_WPTR2, ring->wptr);
122 	else
123 		WREG32(mmVCE_RB_WPTR3, ring->wptr);
124 }
125 
126 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
127 {
128 	WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
129 }
130 
131 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
132 					     bool gated)
133 {
134 	u32 data;
135 
136 	/* Set Override to disable Clock Gating */
137 	vce_v3_0_override_vce_clock_gating(adev, true);
138 
139 	/* This function enables MGCG which is controlled by firmware.
140 	   With the clocks in the gated state the core is still
141 	   accessible but the firmware will throttle the clocks on the
142 	   fly as necessary.
143 	*/
144 	if (!gated) {
145 		data = RREG32(mmVCE_CLOCK_GATING_B);
146 		data |= 0x1ff;
147 		data &= ~0xef0000;
148 		WREG32(mmVCE_CLOCK_GATING_B, data);
149 
150 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
151 		data |= 0x3ff000;
152 		data &= ~0xffc00000;
153 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
154 
155 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
156 		data |= 0x2;
157 		data &= ~0x00010000;
158 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
159 
160 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
161 		data |= 0x37f;
162 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
163 
164 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
165 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
166 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
167 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
168 			0x8;
169 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
170 	} else {
171 		data = RREG32(mmVCE_CLOCK_GATING_B);
172 		data &= ~0x80010;
173 		data |= 0xe70008;
174 		WREG32(mmVCE_CLOCK_GATING_B, data);
175 
176 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
177 		data |= 0xffc00000;
178 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
179 
180 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
181 		data |= 0x10000;
182 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
183 
184 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
185 		data &= ~0x3ff;
186 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
187 
188 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
189 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
190 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
191 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
192 			  0x8);
193 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
194 	}
195 	vce_v3_0_override_vce_clock_gating(adev, false);
196 }
197 
198 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
199 {
200 	int i, j;
201 
202 	for (i = 0; i < 10; ++i) {
203 		for (j = 0; j < 100; ++j) {
204 			uint32_t status = RREG32(mmVCE_STATUS);
205 
206 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
207 				return 0;
208 			mdelay(10);
209 		}
210 
211 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
212 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
213 		mdelay(10);
214 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
215 		mdelay(10);
216 	}
217 
218 	return -ETIMEDOUT;
219 }
220 
221 /**
222  * vce_v3_0_start - start VCE block
223  *
224  * @adev: amdgpu_device pointer
225  *
226  * Setup and start the VCE block
227  */
228 static int vce_v3_0_start(struct amdgpu_device *adev)
229 {
230 	struct amdgpu_ring *ring;
231 	int idx, r;
232 
233 	ring = &adev->vce.ring[0];
234 	WREG32(mmVCE_RB_RPTR, ring->wptr);
235 	WREG32(mmVCE_RB_WPTR, ring->wptr);
236 	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
237 	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
238 	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
239 
240 	ring = &adev->vce.ring[1];
241 	WREG32(mmVCE_RB_RPTR2, ring->wptr);
242 	WREG32(mmVCE_RB_WPTR2, ring->wptr);
243 	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
244 	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
245 	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
246 
247 	ring = &adev->vce.ring[2];
248 	WREG32(mmVCE_RB_RPTR3, ring->wptr);
249 	WREG32(mmVCE_RB_WPTR3, ring->wptr);
250 	WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
251 	WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
252 	WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
253 
254 	mutex_lock(&adev->grbm_idx_mutex);
255 	for (idx = 0; idx < 2; ++idx) {
256 		if (adev->vce.harvest_config & (1 << idx))
257 			continue;
258 
259 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
260 		vce_v3_0_mc_resume(adev, idx);
261 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
262 
263 		if (adev->asic_type >= CHIP_STONEY)
264 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
265 		else
266 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
267 
268 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
269 		mdelay(100);
270 
271 		r = vce_v3_0_firmware_loaded(adev);
272 
273 		/* clear BUSY flag */
274 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
275 
276 		if (r) {
277 			DRM_ERROR("VCE not responding, giving up!!!\n");
278 			mutex_unlock(&adev->grbm_idx_mutex);
279 			return r;
280 		}
281 	}
282 
283 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
284 	mutex_unlock(&adev->grbm_idx_mutex);
285 
286 	return 0;
287 }
288 
289 static int vce_v3_0_stop(struct amdgpu_device *adev)
290 {
291 	int idx;
292 
293 	mutex_lock(&adev->grbm_idx_mutex);
294 	for (idx = 0; idx < 2; ++idx) {
295 		if (adev->vce.harvest_config & (1 << idx))
296 			continue;
297 
298 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx));
299 
300 		if (adev->asic_type >= CHIP_STONEY)
301 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
302 		else
303 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
304 
305 		/* hold on ECPU */
306 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
307 
308 		/* clear BUSY flag */
309 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
310 
311 		/* Set Clock-Gating off */
312 		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
313 			vce_v3_0_set_vce_sw_clock_gating(adev, false);
314 	}
315 
316 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
317 	mutex_unlock(&adev->grbm_idx_mutex);
318 
319 	return 0;
320 }
321 
322 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
323 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
324 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
325 
326 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
327 {
328 	u32 tmp;
329 
330 	/* Fiji, Stoney, Polaris10, Polaris11, Polaris12 are single pipe */
331 	if ((adev->asic_type == CHIP_FIJI) ||
332 	    (adev->asic_type == CHIP_STONEY) ||
333 	    (adev->asic_type == CHIP_POLARIS10) ||
334 	    (adev->asic_type == CHIP_POLARIS11) ||
335 	    (adev->asic_type == CHIP_POLARIS12))
336 		return AMDGPU_VCE_HARVEST_VCE1;
337 
338 	/* Tonga and CZ are dual or single pipe */
339 	if (adev->flags & AMD_IS_APU)
340 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
341 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
342 			VCE_HARVEST_FUSE_MACRO__SHIFT;
343 	else
344 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
345 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
346 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
347 
348 	switch (tmp) {
349 	case 1:
350 		return AMDGPU_VCE_HARVEST_VCE0;
351 	case 2:
352 		return AMDGPU_VCE_HARVEST_VCE1;
353 	case 3:
354 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
355 	default:
356 		return 0;
357 	}
358 }
359 
360 static int vce_v3_0_early_init(void *handle)
361 {
362 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
363 
364 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
365 
366 	if ((adev->vce.harvest_config &
367 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
368 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
369 		return -ENOENT;
370 
371 	adev->vce.num_rings = 3;
372 
373 	vce_v3_0_set_ring_funcs(adev);
374 	vce_v3_0_set_irq_funcs(adev);
375 
376 	return 0;
377 }
378 
379 static int vce_v3_0_sw_init(void *handle)
380 {
381 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
382 	struct amdgpu_ring *ring;
383 	int r, i;
384 
385 	/* VCE */
386 	r = amdgpu_irq_add_id(adev, 167, &adev->vce.irq);
387 	if (r)
388 		return r;
389 
390 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
391 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
392 	if (r)
393 		return r;
394 
395 	/* 52.8.3 required for 3 ring support */
396 	if (adev->vce.fw_version < FW_52_8_3)
397 		adev->vce.num_rings = 2;
398 
399 	r = amdgpu_vce_resume(adev);
400 	if (r)
401 		return r;
402 
403 	for (i = 0; i < adev->vce.num_rings; i++) {
404 		ring = &adev->vce.ring[i];
405 		sprintf(ring->name, "vce%d", i);
406 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
407 		if (r)
408 			return r;
409 	}
410 
411 	return r;
412 }
413 
414 static int vce_v3_0_sw_fini(void *handle)
415 {
416 	int r;
417 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
418 
419 	r = amdgpu_vce_suspend(adev);
420 	if (r)
421 		return r;
422 
423 	r = amdgpu_vce_sw_fini(adev);
424 	if (r)
425 		return r;
426 
427 	return r;
428 }
429 
430 static int vce_v3_0_hw_init(void *handle)
431 {
432 	int r, i;
433 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
434 
435 	r = vce_v3_0_start(adev);
436 	if (r)
437 		return r;
438 
439 	for (i = 0; i < adev->vce.num_rings; i++)
440 		adev->vce.ring[i].ready = false;
441 
442 	for (i = 0; i < adev->vce.num_rings; i++) {
443 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
444 		if (r)
445 			return r;
446 		else
447 			adev->vce.ring[i].ready = true;
448 	}
449 
450 	DRM_INFO("VCE initialized successfully.\n");
451 
452 	return 0;
453 }
454 
455 static int vce_v3_0_hw_fini(void *handle)
456 {
457 	int r;
458 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
459 
460 	r = vce_v3_0_wait_for_idle(handle);
461 	if (r)
462 		return r;
463 
464 	return vce_v3_0_stop(adev);
465 }
466 
467 static int vce_v3_0_suspend(void *handle)
468 {
469 	int r;
470 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
471 
472 	r = vce_v3_0_hw_fini(adev);
473 	if (r)
474 		return r;
475 
476 	r = amdgpu_vce_suspend(adev);
477 	if (r)
478 		return r;
479 
480 	return r;
481 }
482 
483 static int vce_v3_0_resume(void *handle)
484 {
485 	int r;
486 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
487 
488 	r = amdgpu_vce_resume(adev);
489 	if (r)
490 		return r;
491 
492 	r = vce_v3_0_hw_init(adev);
493 	if (r)
494 		return r;
495 
496 	return r;
497 }
498 
499 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
500 {
501 	uint32_t offset, size;
502 
503 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
504 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
505 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
506 	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
507 
508 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
509 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
510 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
511 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
512 	WREG32(mmVCE_LMI_VM_CTRL, 0);
513 	if (adev->asic_type >= CHIP_STONEY) {
514 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
515 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
516 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
517 	} else
518 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
519 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
520 	size = VCE_V3_0_FW_SIZE;
521 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
522 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
523 
524 	if (idx == 0) {
525 		offset += size;
526 		size = VCE_V3_0_STACK_SIZE;
527 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
528 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
529 		offset += size;
530 		size = VCE_V3_0_DATA_SIZE;
531 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
532 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
533 	} else {
534 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
535 		size = VCE_V3_0_STACK_SIZE;
536 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
537 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
538 		offset += size;
539 		size = VCE_V3_0_DATA_SIZE;
540 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
541 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
542 	}
543 
544 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
545 	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
546 }
547 
548 static bool vce_v3_0_is_idle(void *handle)
549 {
550 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
551 	u32 mask = 0;
552 
553 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
554 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
555 
556 	return !(RREG32(mmSRBM_STATUS2) & mask);
557 }
558 
559 static int vce_v3_0_wait_for_idle(void *handle)
560 {
561 	unsigned i;
562 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
563 
564 	for (i = 0; i < adev->usec_timeout; i++)
565 		if (vce_v3_0_is_idle(handle))
566 			return 0;
567 
568 	return -ETIMEDOUT;
569 }
570 
571 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
572 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
573 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
574 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
575 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
576 
577 static bool vce_v3_0_check_soft_reset(void *handle)
578 {
579 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
580 	u32 srbm_soft_reset = 0;
581 
582 	/* According to VCE team , we should use VCE_STATUS instead
583 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
584 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
585 	 * instance's registers are accessed
586 	 * (0 for 1st instance, 10 for 2nd instance).
587 	 *
588 	 *VCE_STATUS
589 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
590 	 *|----+----+-----------+----+----+----+----------+---------+----|
591 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
592 	 *
593 	 * VCE team suggest use bit 3--bit 6 for busy status check
594 	 */
595 	mutex_lock(&adev->grbm_idx_mutex);
596 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
597 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
598 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
599 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
600 	}
601 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
602 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
603 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
604 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
605 	}
606 	WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0));
607 	mutex_unlock(&adev->grbm_idx_mutex);
608 
609 	if (srbm_soft_reset) {
610 		adev->vce.srbm_soft_reset = srbm_soft_reset;
611 		return true;
612 	} else {
613 		adev->vce.srbm_soft_reset = 0;
614 		return false;
615 	}
616 }
617 
618 static int vce_v3_0_soft_reset(void *handle)
619 {
620 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
621 	u32 srbm_soft_reset;
622 
623 	if (!adev->vce.srbm_soft_reset)
624 		return 0;
625 	srbm_soft_reset = adev->vce.srbm_soft_reset;
626 
627 	if (srbm_soft_reset) {
628 		u32 tmp;
629 
630 		tmp = RREG32(mmSRBM_SOFT_RESET);
631 		tmp |= srbm_soft_reset;
632 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
633 		WREG32(mmSRBM_SOFT_RESET, tmp);
634 		tmp = RREG32(mmSRBM_SOFT_RESET);
635 
636 		udelay(50);
637 
638 		tmp &= ~srbm_soft_reset;
639 		WREG32(mmSRBM_SOFT_RESET, tmp);
640 		tmp = RREG32(mmSRBM_SOFT_RESET);
641 
642 		/* Wait a little for things to settle down */
643 		udelay(50);
644 	}
645 
646 	return 0;
647 }
648 
649 static int vce_v3_0_pre_soft_reset(void *handle)
650 {
651 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
652 
653 	if (!adev->vce.srbm_soft_reset)
654 		return 0;
655 
656 	mdelay(5);
657 
658 	return vce_v3_0_suspend(adev);
659 }
660 
661 
662 static int vce_v3_0_post_soft_reset(void *handle)
663 {
664 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
665 
666 	if (!adev->vce.srbm_soft_reset)
667 		return 0;
668 
669 	mdelay(5);
670 
671 	return vce_v3_0_resume(adev);
672 }
673 
674 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
675 					struct amdgpu_irq_src *source,
676 					unsigned type,
677 					enum amdgpu_interrupt_state state)
678 {
679 	uint32_t val = 0;
680 
681 	if (state == AMDGPU_IRQ_STATE_ENABLE)
682 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
683 
684 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
685 	return 0;
686 }
687 
688 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
689 				      struct amdgpu_irq_src *source,
690 				      struct amdgpu_iv_entry *entry)
691 {
692 	DRM_DEBUG("IH: VCE\n");
693 
694 	WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
695 
696 	switch (entry->src_data) {
697 	case 0:
698 	case 1:
699 	case 2:
700 		amdgpu_fence_process(&adev->vce.ring[entry->src_data]);
701 		break;
702 	default:
703 		DRM_ERROR("Unhandled interrupt: %d %d\n",
704 			  entry->src_id, entry->src_data);
705 		break;
706 	}
707 
708 	return 0;
709 }
710 
711 static void vce_v3_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
712 {
713 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
714 
715 	if (enable)
716 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
717 	else
718 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
719 
720 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
721 }
722 
723 static int vce_v3_0_set_clockgating_state(void *handle,
724 					  enum amd_clockgating_state state)
725 {
726 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
727 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
728 	int i;
729 
730 	if ((adev->asic_type == CHIP_POLARIS10) ||
731 		(adev->asic_type == CHIP_TONGA) ||
732 		(adev->asic_type == CHIP_FIJI))
733 		vce_v3_0_set_bypass_mode(adev, enable);
734 
735 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
736 		return 0;
737 
738 	mutex_lock(&adev->grbm_idx_mutex);
739 	for (i = 0; i < 2; i++) {
740 		/* Program VCE Instance 0 or 1 if not harvested */
741 		if (adev->vce.harvest_config & (1 << i))
742 			continue;
743 
744 		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i));
745 
746 		if (enable) {
747 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
748 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
749 			data &= ~(0xf | 0xff0);
750 			data |= ((0x0 << 0) | (0x04 << 4));
751 			WREG32(mmVCE_CLOCK_GATING_A, data);
752 
753 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
754 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
755 			data &= ~(0xf | 0xff0);
756 			data |= ((0x0 << 0) | (0x04 << 4));
757 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
758 		}
759 
760 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
761 	}
762 
763 	WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT);
764 	mutex_unlock(&adev->grbm_idx_mutex);
765 
766 	return 0;
767 }
768 
769 static int vce_v3_0_set_powergating_state(void *handle,
770 					  enum amd_powergating_state state)
771 {
772 	/* This doesn't actually powergate the VCE block.
773 	 * That's done in the dpm code via the SMC.  This
774 	 * just re-inits the block as necessary.  The actual
775 	 * gating still happens in the dpm code.  We should
776 	 * revisit this when there is a cleaner line between
777 	 * the smc and the hw blocks
778 	 */
779 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
780 
781 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
782 		return 0;
783 
784 	if (state == AMD_PG_STATE_GATE)
785 		/* XXX do we need a vce_v3_0_stop()? */
786 		return 0;
787 	else
788 		return vce_v3_0_start(adev);
789 }
790 
791 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
792 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
793 {
794 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
795 	amdgpu_ring_write(ring, vm_id);
796 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
797 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
798 	amdgpu_ring_write(ring, ib->length_dw);
799 }
800 
801 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
802 			 unsigned int vm_id, uint64_t pd_addr)
803 {
804 	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
805 	amdgpu_ring_write(ring, vm_id);
806 	amdgpu_ring_write(ring, pd_addr >> 12);
807 
808 	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
809 	amdgpu_ring_write(ring, vm_id);
810 	amdgpu_ring_write(ring, VCE_CMD_END);
811 }
812 
813 static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
814 {
815 	uint32_t seq = ring->fence_drv.sync_seq;
816 	uint64_t addr = ring->fence_drv.gpu_addr;
817 
818 	amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
819 	amdgpu_ring_write(ring, lower_32_bits(addr));
820 	amdgpu_ring_write(ring, upper_32_bits(addr));
821 	amdgpu_ring_write(ring, seq);
822 }
823 
824 static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
825 	.name = "vce_v3_0",
826 	.early_init = vce_v3_0_early_init,
827 	.late_init = NULL,
828 	.sw_init = vce_v3_0_sw_init,
829 	.sw_fini = vce_v3_0_sw_fini,
830 	.hw_init = vce_v3_0_hw_init,
831 	.hw_fini = vce_v3_0_hw_fini,
832 	.suspend = vce_v3_0_suspend,
833 	.resume = vce_v3_0_resume,
834 	.is_idle = vce_v3_0_is_idle,
835 	.wait_for_idle = vce_v3_0_wait_for_idle,
836 	.check_soft_reset = vce_v3_0_check_soft_reset,
837 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
838 	.soft_reset = vce_v3_0_soft_reset,
839 	.post_soft_reset = vce_v3_0_post_soft_reset,
840 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
841 	.set_powergating_state = vce_v3_0_set_powergating_state,
842 };
843 
844 static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
845 	.type = AMDGPU_RING_TYPE_VCE,
846 	.align_mask = 0xf,
847 	.nop = VCE_CMD_NO_OP,
848 	.get_rptr = vce_v3_0_ring_get_rptr,
849 	.get_wptr = vce_v3_0_ring_get_wptr,
850 	.set_wptr = vce_v3_0_ring_set_wptr,
851 	.parse_cs = amdgpu_vce_ring_parse_cs,
852 	.emit_frame_size =
853 		4 + /* vce_v3_0_emit_pipeline_sync */
854 		6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
855 	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
856 	.emit_ib = amdgpu_vce_ring_emit_ib,
857 	.emit_fence = amdgpu_vce_ring_emit_fence,
858 	.test_ring = amdgpu_vce_ring_test_ring,
859 	.test_ib = amdgpu_vce_ring_test_ib,
860 	.insert_nop = amdgpu_ring_insert_nop,
861 	.pad_ib = amdgpu_ring_generic_pad_ib,
862 	.begin_use = amdgpu_vce_ring_begin_use,
863 	.end_use = amdgpu_vce_ring_end_use,
864 };
865 
866 static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
867 	.type = AMDGPU_RING_TYPE_VCE,
868 	.align_mask = 0xf,
869 	.nop = VCE_CMD_NO_OP,
870 	.get_rptr = vce_v3_0_ring_get_rptr,
871 	.get_wptr = vce_v3_0_ring_get_wptr,
872 	.set_wptr = vce_v3_0_ring_set_wptr,
873 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
874 	.emit_frame_size =
875 		6 + /* vce_v3_0_emit_vm_flush */
876 		4 + /* vce_v3_0_emit_pipeline_sync */
877 		6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
878 	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
879 	.emit_ib = vce_v3_0_ring_emit_ib,
880 	.emit_vm_flush = vce_v3_0_emit_vm_flush,
881 	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
882 	.emit_fence = amdgpu_vce_ring_emit_fence,
883 	.test_ring = amdgpu_vce_ring_test_ring,
884 	.test_ib = amdgpu_vce_ring_test_ib,
885 	.insert_nop = amdgpu_ring_insert_nop,
886 	.pad_ib = amdgpu_ring_generic_pad_ib,
887 	.begin_use = amdgpu_vce_ring_begin_use,
888 	.end_use = amdgpu_vce_ring_end_use,
889 };
890 
891 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
892 {
893 	int i;
894 
895 	if (adev->asic_type >= CHIP_STONEY) {
896 		for (i = 0; i < adev->vce.num_rings; i++)
897 			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
898 		DRM_INFO("VCE enabled in VM mode\n");
899 	} else {
900 		for (i = 0; i < adev->vce.num_rings; i++)
901 			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
902 		DRM_INFO("VCE enabled in physical mode\n");
903 	}
904 }
905 
906 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
907 	.set = vce_v3_0_set_interrupt_state,
908 	.process = vce_v3_0_process_interrupt,
909 };
910 
911 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
912 {
913 	adev->vce.irq.num_types = 1;
914 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
915 };
916 
917 const struct amdgpu_ip_block_version vce_v3_0_ip_block =
918 {
919 	.type = AMD_IP_BLOCK_TYPE_VCE,
920 	.major = 3,
921 	.minor = 0,
922 	.rev = 0,
923 	.funcs = &vce_v3_0_ip_funcs,
924 };
925 
926 const struct amdgpu_ip_block_version vce_v3_1_ip_block =
927 {
928 	.type = AMD_IP_BLOCK_TYPE_VCE,
929 	.major = 3,
930 	.minor = 1,
931 	.rev = 0,
932 	.funcs = &vce_v3_0_ip_funcs,
933 };
934 
935 const struct amdgpu_ip_block_version vce_v3_4_ip_block =
936 {
937 	.type = AMD_IP_BLOCK_TYPE_VCE,
938 	.major = 3,
939 	.minor = 4,
940 	.rev = 0,
941 	.funcs = &vce_v3_0_ip_funcs,
942 };
943