xref: /linux/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c (revision 32786fdc9506aeba98278c1844d4bfb766863832)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <drm/drmP.h>
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "vid.h"
33 #include "vce/vce_3_0_d.h"
34 #include "vce/vce_3_0_sh_mask.h"
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 #include "gca/gfx_8_0_d.h"
38 #include "smu/smu_7_1_2_d.h"
39 #include "smu/smu_7_1_2_sh_mask.h"
40 #include "gca/gfx_8_0_d.h"
41 #include "gca/gfx_8_0_sh_mask.h"
42 
43 
44 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
45 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
46 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
47 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
48 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
49 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
50 
51 #define VCE_V3_0_FW_SIZE	(384 * 1024)
52 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
53 #define VCE_V3_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
54 
55 #define FW_52_8_3	((52 << 24) | (8 << 16) | (3 << 8))
56 
57 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
58 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
59 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
60 static int vce_v3_0_wait_for_idle(void *handle);
61 
62 /**
63  * vce_v3_0_ring_get_rptr - get read pointer
64  *
65  * @ring: amdgpu_ring pointer
66  *
67  * Returns the current hardware read pointer
68  */
69 static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
70 {
71 	struct amdgpu_device *adev = ring->adev;
72 
73 	if (ring == &adev->vce.ring[0])
74 		return RREG32(mmVCE_RB_RPTR);
75 	else if (ring == &adev->vce.ring[1])
76 		return RREG32(mmVCE_RB_RPTR2);
77 	else
78 		return RREG32(mmVCE_RB_RPTR3);
79 }
80 
81 /**
82  * vce_v3_0_ring_get_wptr - get write pointer
83  *
84  * @ring: amdgpu_ring pointer
85  *
86  * Returns the current hardware write pointer
87  */
88 static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
89 {
90 	struct amdgpu_device *adev = ring->adev;
91 
92 	if (ring == &adev->vce.ring[0])
93 		return RREG32(mmVCE_RB_WPTR);
94 	else if (ring == &adev->vce.ring[1])
95 		return RREG32(mmVCE_RB_WPTR2);
96 	else
97 		return RREG32(mmVCE_RB_WPTR3);
98 }
99 
100 /**
101  * vce_v3_0_ring_set_wptr - set write pointer
102  *
103  * @ring: amdgpu_ring pointer
104  *
105  * Commits the write pointer to the hardware
106  */
107 static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
108 {
109 	struct amdgpu_device *adev = ring->adev;
110 
111 	if (ring == &adev->vce.ring[0])
112 		WREG32(mmVCE_RB_WPTR, ring->wptr);
113 	else if (ring == &adev->vce.ring[1])
114 		WREG32(mmVCE_RB_WPTR2, ring->wptr);
115 	else
116 		WREG32(mmVCE_RB_WPTR3, ring->wptr);
117 }
118 
119 static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
120 {
121 	WREG32_FIELD(VCE_RB_ARB_CTRL, VCE_CGTT_OVERRIDE, override ? 1 : 0);
122 }
123 
124 static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
125 					     bool gated)
126 {
127 	u32 data;
128 
129 	/* Set Override to disable Clock Gating */
130 	vce_v3_0_override_vce_clock_gating(adev, true);
131 
132 	/* This function enables MGCG which is controlled by firmware.
133 	   With the clocks in the gated state the core is still
134 	   accessible but the firmware will throttle the clocks on the
135 	   fly as necessary.
136 	*/
137 	if (!gated) {
138 		data = RREG32(mmVCE_CLOCK_GATING_B);
139 		data |= 0x1ff;
140 		data &= ~0xef0000;
141 		WREG32(mmVCE_CLOCK_GATING_B, data);
142 
143 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
144 		data |= 0x3ff000;
145 		data &= ~0xffc00000;
146 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
147 
148 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
149 		data |= 0x2;
150 		data &= ~0x00010000;
151 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
152 
153 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
154 		data |= 0x37f;
155 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
156 
157 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
158 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
159 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
160 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
161 			0x8;
162 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
163 	} else {
164 		data = RREG32(mmVCE_CLOCK_GATING_B);
165 		data &= ~0x80010;
166 		data |= 0xe70008;
167 		WREG32(mmVCE_CLOCK_GATING_B, data);
168 
169 		data = RREG32(mmVCE_UENC_CLOCK_GATING);
170 		data |= 0xffc00000;
171 		WREG32(mmVCE_UENC_CLOCK_GATING, data);
172 
173 		data = RREG32(mmVCE_UENC_CLOCK_GATING_2);
174 		data |= 0x10000;
175 		WREG32(mmVCE_UENC_CLOCK_GATING_2, data);
176 
177 		data = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
178 		data &= ~0xffc00000;
179 		WREG32(mmVCE_UENC_REG_CLOCK_GATING, data);
180 
181 		data = RREG32(mmVCE_UENC_DMA_DCLK_CTRL);
182 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
183 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
184 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
185 			  0x8);
186 		WREG32(mmVCE_UENC_DMA_DCLK_CTRL, data);
187 	}
188 	vce_v3_0_override_vce_clock_gating(adev, false);
189 }
190 
191 static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
192 {
193 	int i, j;
194 
195 	for (i = 0; i < 10; ++i) {
196 		for (j = 0; j < 100; ++j) {
197 			uint32_t status = RREG32(mmVCE_STATUS);
198 
199 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
200 				return 0;
201 			mdelay(10);
202 		}
203 
204 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
205 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
206 		mdelay(10);
207 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
208 		mdelay(10);
209 	}
210 
211 	return -ETIMEDOUT;
212 }
213 
214 /**
215  * vce_v3_0_start - start VCE block
216  *
217  * @adev: amdgpu_device pointer
218  *
219  * Setup and start the VCE block
220  */
221 static int vce_v3_0_start(struct amdgpu_device *adev)
222 {
223 	struct amdgpu_ring *ring;
224 	int idx, r;
225 
226 	ring = &adev->vce.ring[0];
227 	WREG32(mmVCE_RB_RPTR, ring->wptr);
228 	WREG32(mmVCE_RB_WPTR, ring->wptr);
229 	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
230 	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
231 	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
232 
233 	ring = &adev->vce.ring[1];
234 	WREG32(mmVCE_RB_RPTR2, ring->wptr);
235 	WREG32(mmVCE_RB_WPTR2, ring->wptr);
236 	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
237 	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
238 	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
239 
240 	ring = &adev->vce.ring[2];
241 	WREG32(mmVCE_RB_RPTR3, ring->wptr);
242 	WREG32(mmVCE_RB_WPTR3, ring->wptr);
243 	WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr);
244 	WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr));
245 	WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4);
246 
247 	mutex_lock(&adev->grbm_idx_mutex);
248 	for (idx = 0; idx < 2; ++idx) {
249 		if (adev->vce.harvest_config & (1 << idx))
250 			continue;
251 
252 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, idx);
253 		vce_v3_0_mc_resume(adev, idx);
254 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1);
255 
256 		if (adev->asic_type >= CHIP_STONEY)
257 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
258 		else
259 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 1);
260 
261 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 0);
262 		mdelay(100);
263 
264 		r = vce_v3_0_firmware_loaded(adev);
265 
266 		/* clear BUSY flag */
267 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
268 
269 		if (r) {
270 			DRM_ERROR("VCE not responding, giving up!!!\n");
271 			mutex_unlock(&adev->grbm_idx_mutex);
272 			return r;
273 		}
274 	}
275 
276 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
277 	mutex_unlock(&adev->grbm_idx_mutex);
278 
279 	return 0;
280 }
281 
282 static int vce_v3_0_stop(struct amdgpu_device *adev)
283 {
284 	int idx;
285 
286 	mutex_lock(&adev->grbm_idx_mutex);
287 	for (idx = 0; idx < 2; ++idx) {
288 		if (adev->vce.harvest_config & (1 << idx))
289 			continue;
290 
291 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, idx);
292 
293 		if (adev->asic_type >= CHIP_STONEY)
294 			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
295 		else
296 			WREG32_FIELD(VCE_VCPU_CNTL, CLK_EN, 0);
297 
298 		/* hold on ECPU */
299 		WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1);
300 
301 		/* clear BUSY flag */
302 		WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0);
303 
304 		/* Set Clock-Gating off */
305 		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
306 			vce_v3_0_set_vce_sw_clock_gating(adev, false);
307 	}
308 
309 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
310 	mutex_unlock(&adev->grbm_idx_mutex);
311 
312 	return 0;
313 }
314 
315 #define ixVCE_HARVEST_FUSE_MACRO__ADDRESS     0xC0014074
316 #define VCE_HARVEST_FUSE_MACRO__SHIFT       27
317 #define VCE_HARVEST_FUSE_MACRO__MASK        0x18000000
318 
319 static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev)
320 {
321 	u32 tmp;
322 
323 	/* Fiji, Stoney, Polaris10, Polaris11 are single pipe */
324 	if ((adev->asic_type == CHIP_FIJI) ||
325 	    (adev->asic_type == CHIP_STONEY) ||
326 	    (adev->asic_type == CHIP_POLARIS10) ||
327 	    (adev->asic_type == CHIP_POLARIS11))
328 		return AMDGPU_VCE_HARVEST_VCE1;
329 
330 	/* Tonga and CZ are dual or single pipe */
331 	if (adev->flags & AMD_IS_APU)
332 		tmp = (RREG32_SMC(ixVCE_HARVEST_FUSE_MACRO__ADDRESS) &
333 		       VCE_HARVEST_FUSE_MACRO__MASK) >>
334 			VCE_HARVEST_FUSE_MACRO__SHIFT;
335 	else
336 		tmp = (RREG32_SMC(ixCC_HARVEST_FUSES) &
337 		       CC_HARVEST_FUSES__VCE_DISABLE_MASK) >>
338 			CC_HARVEST_FUSES__VCE_DISABLE__SHIFT;
339 
340 	switch (tmp) {
341 	case 1:
342 		return AMDGPU_VCE_HARVEST_VCE0;
343 	case 2:
344 		return AMDGPU_VCE_HARVEST_VCE1;
345 	case 3:
346 		return AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1;
347 	default:
348 		return 0;
349 	}
350 }
351 
352 static int vce_v3_0_early_init(void *handle)
353 {
354 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
355 
356 	adev->vce.harvest_config = vce_v3_0_get_harvest_config(adev);
357 
358 	if ((adev->vce.harvest_config &
359 	     (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1)) ==
360 	    (AMDGPU_VCE_HARVEST_VCE0 | AMDGPU_VCE_HARVEST_VCE1))
361 		return -ENOENT;
362 
363 	adev->vce.num_rings = 3;
364 
365 	vce_v3_0_set_ring_funcs(adev);
366 	vce_v3_0_set_irq_funcs(adev);
367 
368 	return 0;
369 }
370 
371 static int vce_v3_0_sw_init(void *handle)
372 {
373 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
374 	struct amdgpu_ring *ring;
375 	int r, i;
376 
377 	/* VCE */
378 	r = amdgpu_irq_add_id(adev, 167, &adev->vce.irq);
379 	if (r)
380 		return r;
381 
382 	r = amdgpu_vce_sw_init(adev, VCE_V3_0_FW_SIZE +
383 		(VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE) * 2);
384 	if (r)
385 		return r;
386 
387 	/* 52.8.3 required for 3 ring support */
388 	if (adev->vce.fw_version < FW_52_8_3)
389 		adev->vce.num_rings = 2;
390 
391 	r = amdgpu_vce_resume(adev);
392 	if (r)
393 		return r;
394 
395 	for (i = 0; i < adev->vce.num_rings; i++) {
396 		ring = &adev->vce.ring[i];
397 		sprintf(ring->name, "vce%d", i);
398 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
399 		if (r)
400 			return r;
401 	}
402 
403 	return r;
404 }
405 
406 static int vce_v3_0_sw_fini(void *handle)
407 {
408 	int r;
409 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
410 
411 	r = amdgpu_vce_suspend(adev);
412 	if (r)
413 		return r;
414 
415 	r = amdgpu_vce_sw_fini(adev);
416 	if (r)
417 		return r;
418 
419 	return r;
420 }
421 
422 static int vce_v3_0_hw_init(void *handle)
423 {
424 	int r, i;
425 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
426 
427 	r = vce_v3_0_start(adev);
428 	if (r)
429 		return r;
430 
431 	for (i = 0; i < adev->vce.num_rings; i++)
432 		adev->vce.ring[i].ready = false;
433 
434 	for (i = 0; i < adev->vce.num_rings; i++) {
435 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
436 		if (r)
437 			return r;
438 		else
439 			adev->vce.ring[i].ready = true;
440 	}
441 
442 	DRM_INFO("VCE initialized successfully.\n");
443 
444 	return 0;
445 }
446 
447 static int vce_v3_0_hw_fini(void *handle)
448 {
449 	int r;
450 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
451 
452 	r = vce_v3_0_wait_for_idle(handle);
453 	if (r)
454 		return r;
455 
456 	return vce_v3_0_stop(adev);
457 }
458 
459 static int vce_v3_0_suspend(void *handle)
460 {
461 	int r;
462 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
463 
464 	r = vce_v3_0_hw_fini(adev);
465 	if (r)
466 		return r;
467 
468 	r = amdgpu_vce_suspend(adev);
469 	if (r)
470 		return r;
471 
472 	return r;
473 }
474 
475 static int vce_v3_0_resume(void *handle)
476 {
477 	int r;
478 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
479 
480 	r = amdgpu_vce_resume(adev);
481 	if (r)
482 		return r;
483 
484 	r = vce_v3_0_hw_init(adev);
485 	if (r)
486 		return r;
487 
488 	return r;
489 }
490 
491 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
492 {
493 	uint32_t offset, size;
494 
495 	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
496 	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
497 	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
498 	WREG32(mmVCE_CLOCK_GATING_B, 0x1FF);
499 
500 	WREG32(mmVCE_LMI_CTRL, 0x00398000);
501 	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
502 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
503 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
504 	WREG32(mmVCE_LMI_VM_CTRL, 0);
505 	if (adev->asic_type >= CHIP_STONEY) {
506 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
507 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
508 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
509 	} else
510 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
511 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
512 	size = VCE_V3_0_FW_SIZE;
513 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
514 	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
515 
516 	if (idx == 0) {
517 		offset += size;
518 		size = VCE_V3_0_STACK_SIZE;
519 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff);
520 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
521 		offset += size;
522 		size = VCE_V3_0_DATA_SIZE;
523 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff);
524 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
525 	} else {
526 		offset += size + VCE_V3_0_STACK_SIZE + VCE_V3_0_DATA_SIZE;
527 		size = VCE_V3_0_STACK_SIZE;
528 		WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0xfffffff);
529 		WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
530 		offset += size;
531 		size = VCE_V3_0_DATA_SIZE;
532 		WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0xfffffff);
533 		WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
534 	}
535 
536 	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
537 	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
538 }
539 
540 static bool vce_v3_0_is_idle(void *handle)
541 {
542 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
543 	u32 mask = 0;
544 
545 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
546 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
547 
548 	return !(RREG32(mmSRBM_STATUS2) & mask);
549 }
550 
551 static int vce_v3_0_wait_for_idle(void *handle)
552 {
553 	unsigned i;
554 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
555 
556 	for (i = 0; i < adev->usec_timeout; i++)
557 		if (vce_v3_0_is_idle(handle))
558 			return 0;
559 
560 	return -ETIMEDOUT;
561 }
562 
563 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
564 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
565 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
566 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
567 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
568 
569 static bool vce_v3_0_check_soft_reset(void *handle)
570 {
571 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
572 	u32 srbm_soft_reset = 0;
573 
574 	/* According to VCE team , we should use VCE_STATUS instead
575 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
576 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
577 	 * instance's registers are accessed
578 	 * (0 for 1st instance, 10 for 2nd instance).
579 	 *
580 	 *VCE_STATUS
581 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
582 	 *|----+----+-----------+----+----+----+----------+---------+----|
583 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
584 	 *
585 	 * VCE team suggest use bit 3--bit 6 for busy status check
586 	 */
587 	mutex_lock(&adev->grbm_idx_mutex);
588 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
589 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
590 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
591 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
592 	}
593 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
594 	if (RREG32(mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
595 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
596 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
597 	}
598 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
599 	mutex_unlock(&adev->grbm_idx_mutex);
600 
601 	if (srbm_soft_reset) {
602 		adev->vce.srbm_soft_reset = srbm_soft_reset;
603 		return true;
604 	} else {
605 		adev->vce.srbm_soft_reset = 0;
606 		return false;
607 	}
608 }
609 
610 static int vce_v3_0_soft_reset(void *handle)
611 {
612 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
613 	u32 srbm_soft_reset;
614 
615 	if (!adev->vce.srbm_soft_reset)
616 		return 0;
617 	srbm_soft_reset = adev->vce.srbm_soft_reset;
618 
619 	if (srbm_soft_reset) {
620 		u32 tmp;
621 
622 		tmp = RREG32(mmSRBM_SOFT_RESET);
623 		tmp |= srbm_soft_reset;
624 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
625 		WREG32(mmSRBM_SOFT_RESET, tmp);
626 		tmp = RREG32(mmSRBM_SOFT_RESET);
627 
628 		udelay(50);
629 
630 		tmp &= ~srbm_soft_reset;
631 		WREG32(mmSRBM_SOFT_RESET, tmp);
632 		tmp = RREG32(mmSRBM_SOFT_RESET);
633 
634 		/* Wait a little for things to settle down */
635 		udelay(50);
636 	}
637 
638 	return 0;
639 }
640 
641 static int vce_v3_0_pre_soft_reset(void *handle)
642 {
643 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
644 
645 	if (!adev->vce.srbm_soft_reset)
646 		return 0;
647 
648 	mdelay(5);
649 
650 	return vce_v3_0_suspend(adev);
651 }
652 
653 
654 static int vce_v3_0_post_soft_reset(void *handle)
655 {
656 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
657 
658 	if (!adev->vce.srbm_soft_reset)
659 		return 0;
660 
661 	mdelay(5);
662 
663 	return vce_v3_0_resume(adev);
664 }
665 
666 static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev,
667 					struct amdgpu_irq_src *source,
668 					unsigned type,
669 					enum amdgpu_interrupt_state state)
670 {
671 	uint32_t val = 0;
672 
673 	if (state == AMDGPU_IRQ_STATE_ENABLE)
674 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
675 
676 	WREG32_P(mmVCE_SYS_INT_EN, val, ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
677 	return 0;
678 }
679 
680 static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
681 				      struct amdgpu_irq_src *source,
682 				      struct amdgpu_iv_entry *entry)
683 {
684 	DRM_DEBUG("IH: VCE\n");
685 
686 	WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1);
687 
688 	switch (entry->src_data) {
689 	case 0:
690 	case 1:
691 	case 2:
692 		amdgpu_fence_process(&adev->vce.ring[entry->src_data]);
693 		break;
694 	default:
695 		DRM_ERROR("Unhandled interrupt: %d %d\n",
696 			  entry->src_id, entry->src_data);
697 		break;
698 	}
699 
700 	return 0;
701 }
702 
703 static void vce_v3_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
704 {
705 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
706 
707 	if (enable)
708 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
709 	else
710 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
711 
712 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
713 }
714 
715 static int vce_v3_0_set_clockgating_state(void *handle,
716 					  enum amd_clockgating_state state)
717 {
718 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
719 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
720 	int i;
721 
722 	if ((adev->asic_type == CHIP_POLARIS10) ||
723 		(adev->asic_type == CHIP_TONGA) ||
724 		(adev->asic_type == CHIP_FIJI))
725 		vce_v3_0_set_bypass_mode(adev, enable);
726 
727 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
728 		return 0;
729 
730 	mutex_lock(&adev->grbm_idx_mutex);
731 	for (i = 0; i < 2; i++) {
732 		/* Program VCE Instance 0 or 1 if not harvested */
733 		if (adev->vce.harvest_config & (1 << i))
734 			continue;
735 
736 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
737 
738 		if (enable) {
739 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
740 			uint32_t data = RREG32(mmVCE_CLOCK_GATING_A);
741 			data &= ~(0xf | 0xff0);
742 			data |= ((0x0 << 0) | (0x04 << 4));
743 			WREG32(mmVCE_CLOCK_GATING_A, data);
744 
745 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
746 			data = RREG32(mmVCE_UENC_CLOCK_GATING);
747 			data &= ~(0xf | 0xff0);
748 			data |= ((0x0 << 0) | (0x04 << 4));
749 			WREG32(mmVCE_UENC_CLOCK_GATING, data);
750 		}
751 
752 		vce_v3_0_set_vce_sw_clock_gating(adev, enable);
753 	}
754 
755 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
756 	mutex_unlock(&adev->grbm_idx_mutex);
757 
758 	return 0;
759 }
760 
761 static int vce_v3_0_set_powergating_state(void *handle,
762 					  enum amd_powergating_state state)
763 {
764 	/* This doesn't actually powergate the VCE block.
765 	 * That's done in the dpm code via the SMC.  This
766 	 * just re-inits the block as necessary.  The actual
767 	 * gating still happens in the dpm code.  We should
768 	 * revisit this when there is a cleaner line between
769 	 * the smc and the hw blocks
770 	 */
771 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
772 
773 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
774 		return 0;
775 
776 	if (state == AMD_PG_STATE_GATE)
777 		/* XXX do we need a vce_v3_0_stop()? */
778 		return 0;
779 	else
780 		return vce_v3_0_start(adev);
781 }
782 
783 static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
784 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
785 {
786 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
787 	amdgpu_ring_write(ring, vm_id);
788 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
789 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
790 	amdgpu_ring_write(ring, ib->length_dw);
791 }
792 
793 static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
794 			 unsigned int vm_id, uint64_t pd_addr)
795 {
796 	amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
797 	amdgpu_ring_write(ring, vm_id);
798 	amdgpu_ring_write(ring, pd_addr >> 12);
799 
800 	amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB);
801 	amdgpu_ring_write(ring, vm_id);
802 	amdgpu_ring_write(ring, VCE_CMD_END);
803 }
804 
805 static void vce_v3_0_emit_pipeline_sync(struct amdgpu_ring *ring)
806 {
807 	uint32_t seq = ring->fence_drv.sync_seq;
808 	uint64_t addr = ring->fence_drv.gpu_addr;
809 
810 	amdgpu_ring_write(ring, VCE_CMD_WAIT_GE);
811 	amdgpu_ring_write(ring, lower_32_bits(addr));
812 	amdgpu_ring_write(ring, upper_32_bits(addr));
813 	amdgpu_ring_write(ring, seq);
814 }
815 
816 static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
817 	.name = "vce_v3_0",
818 	.early_init = vce_v3_0_early_init,
819 	.late_init = NULL,
820 	.sw_init = vce_v3_0_sw_init,
821 	.sw_fini = vce_v3_0_sw_fini,
822 	.hw_init = vce_v3_0_hw_init,
823 	.hw_fini = vce_v3_0_hw_fini,
824 	.suspend = vce_v3_0_suspend,
825 	.resume = vce_v3_0_resume,
826 	.is_idle = vce_v3_0_is_idle,
827 	.wait_for_idle = vce_v3_0_wait_for_idle,
828 	.check_soft_reset = vce_v3_0_check_soft_reset,
829 	.pre_soft_reset = vce_v3_0_pre_soft_reset,
830 	.soft_reset = vce_v3_0_soft_reset,
831 	.post_soft_reset = vce_v3_0_post_soft_reset,
832 	.set_clockgating_state = vce_v3_0_set_clockgating_state,
833 	.set_powergating_state = vce_v3_0_set_powergating_state,
834 };
835 
836 static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
837 	.type = AMDGPU_RING_TYPE_VCE,
838 	.align_mask = 0xf,
839 	.nop = VCE_CMD_NO_OP,
840 	.get_rptr = vce_v3_0_ring_get_rptr,
841 	.get_wptr = vce_v3_0_ring_get_wptr,
842 	.set_wptr = vce_v3_0_ring_set_wptr,
843 	.parse_cs = amdgpu_vce_ring_parse_cs,
844 	.emit_frame_size =
845 		4 + /* vce_v3_0_emit_pipeline_sync */
846 		6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
847 	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
848 	.emit_ib = amdgpu_vce_ring_emit_ib,
849 	.emit_fence = amdgpu_vce_ring_emit_fence,
850 	.test_ring = amdgpu_vce_ring_test_ring,
851 	.test_ib = amdgpu_vce_ring_test_ib,
852 	.insert_nop = amdgpu_ring_insert_nop,
853 	.pad_ib = amdgpu_ring_generic_pad_ib,
854 	.begin_use = amdgpu_vce_ring_begin_use,
855 	.end_use = amdgpu_vce_ring_end_use,
856 };
857 
858 static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
859 	.type = AMDGPU_RING_TYPE_VCE,
860 	.align_mask = 0xf,
861 	.nop = VCE_CMD_NO_OP,
862 	.get_rptr = vce_v3_0_ring_get_rptr,
863 	.get_wptr = vce_v3_0_ring_get_wptr,
864 	.set_wptr = vce_v3_0_ring_set_wptr,
865 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
866 	.emit_frame_size =
867 		6 + /* vce_v3_0_emit_vm_flush */
868 		4 + /* vce_v3_0_emit_pipeline_sync */
869 		6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
870 	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
871 	.emit_ib = vce_v3_0_ring_emit_ib,
872 	.emit_vm_flush = vce_v3_0_emit_vm_flush,
873 	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,
874 	.emit_fence = amdgpu_vce_ring_emit_fence,
875 	.test_ring = amdgpu_vce_ring_test_ring,
876 	.test_ib = amdgpu_vce_ring_test_ib,
877 	.insert_nop = amdgpu_ring_insert_nop,
878 	.pad_ib = amdgpu_ring_generic_pad_ib,
879 	.begin_use = amdgpu_vce_ring_begin_use,
880 	.end_use = amdgpu_vce_ring_end_use,
881 };
882 
883 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
884 {
885 	int i;
886 
887 	if (adev->asic_type >= CHIP_STONEY) {
888 		for (i = 0; i < adev->vce.num_rings; i++)
889 			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
890 		DRM_INFO("VCE enabled in VM mode\n");
891 	} else {
892 		for (i = 0; i < adev->vce.num_rings; i++)
893 			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
894 		DRM_INFO("VCE enabled in physical mode\n");
895 	}
896 }
897 
898 static const struct amdgpu_irq_src_funcs vce_v3_0_irq_funcs = {
899 	.set = vce_v3_0_set_interrupt_state,
900 	.process = vce_v3_0_process_interrupt,
901 };
902 
903 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev)
904 {
905 	adev->vce.irq.num_types = 1;
906 	adev->vce.irq.funcs = &vce_v3_0_irq_funcs;
907 };
908 
909 const struct amdgpu_ip_block_version vce_v3_0_ip_block =
910 {
911 	.type = AMD_IP_BLOCK_TYPE_VCE,
912 	.major = 3,
913 	.minor = 0,
914 	.rev = 0,
915 	.funcs = &vce_v3_0_ip_funcs,
916 };
917 
918 const struct amdgpu_ip_block_version vce_v3_1_ip_block =
919 {
920 	.type = AMD_IP_BLOCK_TYPE_VCE,
921 	.major = 3,
922 	.minor = 1,
923 	.rev = 0,
924 	.funcs = &vce_v3_0_ip_funcs,
925 };
926 
927 const struct amdgpu_ip_block_version vce_v3_4_ip_block =
928 {
929 	.type = AMD_IP_BLOCK_TYPE_VCE,
930 	.major = 3,
931 	.minor = 4,
932 	.rev = 0,
933 	.funcs = &vce_v3_0_ip_funcs,
934 };
935