xref: /linux/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c (revision 429508c84d95811dd1300181dfe84743caff9a38)
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/firmware.h>
25 #include <drm/drm_drv.h>
26 
27 #include "amdgpu.h"
28 #include "amdgpu_vcn.h"
29 #include "amdgpu_pm.h"
30 #include "soc15.h"
31 #include "soc15d.h"
32 #include "vcn_v2_0.h"
33 #include "mmsch_v1_0.h"
34 #include "vcn_v2_5.h"
35 
36 #include "vcn/vcn_2_5_offset.h"
37 #include "vcn/vcn_2_5_sh_mask.h"
38 #include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
39 
40 #define VCN_VID_SOC_ADDRESS_2_0					0x1fa00
41 #define VCN1_VID_SOC_ADDRESS_3_0				0x48200
42 
43 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET			0x27
44 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET			0x0f
45 #define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET			0x10
46 #define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET			0x11
47 #define mmUVD_NO_OP_INTERNAL_OFFSET				0x29
48 #define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET			0x66
49 #define mmUVD_SCRATCH9_INTERNAL_OFFSET				0xc01d
50 
51 #define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET			0x431
52 #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET		0x3b4
53 #define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET		0x3b5
54 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET			0x25c
55 
56 #define VCN25_MAX_HW_INSTANCES_ARCTURUS			2
57 
58 static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
59 static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
60 static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
61 static int vcn_v2_5_set_powergating_state(void *handle,
62 				enum amd_powergating_state state);
63 static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
64 				int inst_idx, struct dpg_pause_state *new_state);
65 static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
66 static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev);
67 
68 static int amdgpu_ih_clientid_vcns[] = {
69 	SOC15_IH_CLIENTID_VCN,
70 	SOC15_IH_CLIENTID_VCN1
71 };
72 
73 /**
74  * vcn_v2_5_early_init - set function pointers and load microcode
75  *
76  * @handle: amdgpu_device pointer
77  *
78  * Set ring and irq function pointers
79  * Load microcode from filesystem
80  */
81 static int vcn_v2_5_early_init(void *handle)
82 {
83 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
84 
85 	if (amdgpu_sriov_vf(adev)) {
86 		adev->vcn.num_vcn_inst = 2;
87 		adev->vcn.harvest_config = 0;
88 		adev->vcn.num_enc_rings = 1;
89 	} else {
90 		u32 harvest;
91 		int i;
92 
93 		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
94 			harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
95 			if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
96 				adev->vcn.harvest_config |= 1 << i;
97 		}
98 		if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
99 					AMDGPU_VCN_HARVEST_VCN1))
100 			/* both instances are harvested, disable the block */
101 			return -ENOENT;
102 
103 		adev->vcn.num_enc_rings = 2;
104 	}
105 
106 	vcn_v2_5_set_dec_ring_funcs(adev);
107 	vcn_v2_5_set_enc_ring_funcs(adev);
108 	vcn_v2_5_set_irq_funcs(adev);
109 	vcn_v2_5_set_ras_funcs(adev);
110 
111 	return amdgpu_vcn_early_init(adev);
112 }
113 
114 /**
115  * vcn_v2_5_sw_init - sw init for VCN block
116  *
117  * @handle: amdgpu_device pointer
118  *
119  * Load firmware and sw initialization
120  */
121 static int vcn_v2_5_sw_init(void *handle)
122 {
123 	struct amdgpu_ring *ring;
124 	int i, j, r;
125 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
126 
127 	for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
128 		if (adev->vcn.harvest_config & (1 << j))
129 			continue;
130 		/* VCN DEC TRAP */
131 		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
132 				VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq);
133 		if (r)
134 			return r;
135 
136 		/* VCN ENC TRAP */
137 		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
138 			r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
139 				i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq);
140 			if (r)
141 				return r;
142 		}
143 
144 		/* VCN POISON TRAP */
145 		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
146 			VCN_2_6__SRCID_UVD_POISON, &adev->vcn.inst[j].ras_poison_irq);
147 		if (r)
148 			return r;
149 	}
150 
151 	r = amdgpu_vcn_sw_init(adev);
152 	if (r)
153 		return r;
154 
155 	amdgpu_vcn_setup_ucode(adev);
156 
157 	r = amdgpu_vcn_resume(adev);
158 	if (r)
159 		return r;
160 
161 	for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
162 		volatile struct amdgpu_fw_shared *fw_shared;
163 
164 		if (adev->vcn.harvest_config & (1 << j))
165 			continue;
166 		adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
167 		adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
168 		adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
169 		adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
170 		adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
171 		adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
172 
173 		adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
174 		adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(VCN, j, mmUVD_SCRATCH9);
175 		adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
176 		adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA0);
177 		adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
178 		adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA1);
179 		adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
180 		adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_CMD);
181 		adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
182 		adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(VCN, j, mmUVD_NO_OP);
183 
184 		ring = &adev->vcn.inst[j].ring_dec;
185 		ring->use_doorbell = true;
186 
187 		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
188 				(amdgpu_sriov_vf(adev) ? 2*j : 8*j);
189 
190 		if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(2, 5, 0))
191 			ring->vm_hub = AMDGPU_MMHUB1(0);
192 		else
193 			ring->vm_hub = AMDGPU_MMHUB0(0);
194 
195 		sprintf(ring->name, "vcn_dec_%d", j);
196 		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq,
197 				     0, AMDGPU_RING_PRIO_DEFAULT, NULL);
198 		if (r)
199 			return r;
200 
201 		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
202 			enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);
203 
204 			ring = &adev->vcn.inst[j].ring_enc[i];
205 			ring->use_doorbell = true;
206 
207 			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
208 					(amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));
209 
210 			if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
211 			    IP_VERSION(2, 5, 0))
212 				ring->vm_hub = AMDGPU_MMHUB1(0);
213 			else
214 				ring->vm_hub = AMDGPU_MMHUB0(0);
215 
216 			sprintf(ring->name, "vcn_enc_%d.%d", j, i);
217 			r = amdgpu_ring_init(adev, ring, 512,
218 					     &adev->vcn.inst[j].irq, 0,
219 					     hw_prio, NULL);
220 			if (r)
221 				return r;
222 		}
223 
224 		fw_shared = adev->vcn.inst[j].fw_shared.cpu_addr;
225 		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
226 
227 		if (amdgpu_vcnfw_log)
228 			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
229 	}
230 
231 	if (amdgpu_sriov_vf(adev)) {
232 		r = amdgpu_virt_alloc_mm_table(adev);
233 		if (r)
234 			return r;
235 	}
236 
237 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
238 		adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
239 
240 	r = amdgpu_vcn_ras_sw_init(adev);
241 	if (r)
242 		return r;
243 
244 	return 0;
245 }
246 
247 /**
248  * vcn_v2_5_sw_fini - sw fini for VCN block
249  *
250  * @handle: amdgpu_device pointer
251  *
252  * VCN suspend and free up sw allocation
253  */
254 static int vcn_v2_5_sw_fini(void *handle)
255 {
256 	int i, r, idx;
257 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
258 	volatile struct amdgpu_fw_shared *fw_shared;
259 
260 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
261 		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
262 			if (adev->vcn.harvest_config & (1 << i))
263 				continue;
264 			fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
265 			fw_shared->present_flag_0 = 0;
266 		}
267 		drm_dev_exit(idx);
268 	}
269 
270 
271 	if (amdgpu_sriov_vf(adev))
272 		amdgpu_virt_free_mm_table(adev);
273 
274 	r = amdgpu_vcn_suspend(adev);
275 	if (r)
276 		return r;
277 
278 	r = amdgpu_vcn_sw_fini(adev);
279 
280 	return r;
281 }
282 
283 /**
284  * vcn_v2_5_hw_init - start and test VCN block
285  *
286  * @handle: amdgpu_device pointer
287  *
288  * Initialize the hardware, boot up the VCPU and do some testing
289  */
290 static int vcn_v2_5_hw_init(void *handle)
291 {
292 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
293 	struct amdgpu_ring *ring;
294 	int i, j, r = 0;
295 
296 	if (amdgpu_sriov_vf(adev))
297 		r = vcn_v2_5_sriov_start(adev);
298 
299 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
300 		if (adev->vcn.harvest_config & (1 << j))
301 			continue;
302 
303 		if (amdgpu_sriov_vf(adev)) {
304 			adev->vcn.inst[j].ring_enc[0].sched.ready = true;
305 			adev->vcn.inst[j].ring_enc[1].sched.ready = false;
306 			adev->vcn.inst[j].ring_enc[2].sched.ready = false;
307 			adev->vcn.inst[j].ring_dec.sched.ready = true;
308 		} else {
309 
310 			ring = &adev->vcn.inst[j].ring_dec;
311 
312 			adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
313 						     ring->doorbell_index, j);
314 
315 			r = amdgpu_ring_test_helper(ring);
316 			if (r)
317 				return r;
318 
319 			for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
320 				ring = &adev->vcn.inst[j].ring_enc[i];
321 				r = amdgpu_ring_test_helper(ring);
322 				if (r)
323 					return r;
324 			}
325 		}
326 	}
327 
328 	return r;
329 }
330 
331 /**
332  * vcn_v2_5_hw_fini - stop the hardware block
333  *
334  * @handle: amdgpu_device pointer
335  *
336  * Stop the VCN block, mark ring as not ready any more
337  */
338 static int vcn_v2_5_hw_fini(void *handle)
339 {
340 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
341 	int i;
342 
343 	cancel_delayed_work_sync(&adev->vcn.idle_work);
344 
345 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
346 		if (adev->vcn.harvest_config & (1 << i))
347 			continue;
348 
349 		if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
350 		    (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
351 		     RREG32_SOC15(VCN, i, mmUVD_STATUS)))
352 			vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
353 
354 		if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
355 			amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0);
356 	}
357 
358 	return 0;
359 }
360 
361 /**
362  * vcn_v2_5_suspend - suspend VCN block
363  *
364  * @handle: amdgpu_device pointer
365  *
366  * HW fini and suspend VCN block
367  */
368 static int vcn_v2_5_suspend(void *handle)
369 {
370 	int r;
371 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
372 
373 	r = vcn_v2_5_hw_fini(adev);
374 	if (r)
375 		return r;
376 
377 	r = amdgpu_vcn_suspend(adev);
378 
379 	return r;
380 }
381 
382 /**
383  * vcn_v2_5_resume - resume VCN block
384  *
385  * @handle: amdgpu_device pointer
386  *
387  * Resume firmware and hw init VCN block
388  */
389 static int vcn_v2_5_resume(void *handle)
390 {
391 	int r;
392 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
393 
394 	r = amdgpu_vcn_resume(adev);
395 	if (r)
396 		return r;
397 
398 	r = vcn_v2_5_hw_init(adev);
399 
400 	return r;
401 }
402 
403 /**
404  * vcn_v2_5_mc_resume - memory controller programming
405  *
406  * @adev: amdgpu_device pointer
407  *
408  * Let the VCN memory controller know it's offsets
409  */
410 static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
411 {
412 	uint32_t size;
413 	uint32_t offset;
414 	int i;
415 
416 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
417 		if (adev->vcn.harvest_config & (1 << i))
418 			continue;
419 
420 		size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
421 		/* cache window 0: fw */
422 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
423 			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
424 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
425 			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
426 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
427 			WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
428 			offset = 0;
429 		} else {
430 			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
431 				lower_32_bits(adev->vcn.inst[i].gpu_addr));
432 			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
433 				upper_32_bits(adev->vcn.inst[i].gpu_addr));
434 			offset = size;
435 			WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0,
436 				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
437 		}
438 		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size);
439 
440 		/* cache window 1: stack */
441 		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
442 			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
443 		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
444 			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
445 		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
446 		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
447 
448 		/* cache window 2: context */
449 		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
450 			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
451 		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
452 			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
453 		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
454 		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
455 
456 		/* non-cache window */
457 		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
458 			lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
459 		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
460 			upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
461 		WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
462 		WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
463 			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
464 	}
465 }
466 
467 static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
468 {
469 	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4);
470 	uint32_t offset;
471 
472 	/* cache window 0: fw */
473 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
474 		if (!indirect) {
475 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
476 				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
477 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
478 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
479 				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
480 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
481 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
482 				VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
483 		} else {
484 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
485 				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
486 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
487 				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
488 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
489 				VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
490 		}
491 		offset = 0;
492 	} else {
493 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
494 			VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
495 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
496 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
497 			VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
498 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
499 		offset = size;
500 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
501 			VCN, 0, mmUVD_VCPU_CACHE_OFFSET0),
502 			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
503 	}
504 
505 	if (!indirect)
506 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
507 			VCN, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
508 	else
509 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
510 			VCN, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
511 
512 	/* cache window 1: stack */
513 	if (!indirect) {
514 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
515 			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
516 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
517 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
518 			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
519 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
520 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
521 			VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
522 	} else {
523 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
524 			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
525 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
526 			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
527 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
528 			VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
529 	}
530 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
531 		VCN, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
532 
533 	/* cache window 2: context */
534 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
535 		VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
536 		lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
537 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
538 		VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
539 		upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
540 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
541 		VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
542 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
543 		VCN, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
544 
545 	/* non-cache window */
546 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
547 		VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
548 		lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
549 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
550 		VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
551 		upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
552 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
553 		VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
554 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
555 		VCN, 0, mmUVD_VCPU_NONCACHE_SIZE0),
556 		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
557 
558 	/* VCN global tiling registers */
559 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
560 		VCN, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
561 }
562 
563 /**
564  * vcn_v2_5_disable_clock_gating - disable VCN clock gating
565  *
566  * @adev: amdgpu_device pointer
567  *
568  * Disable clock gating for VCN block
569  */
570 static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
571 {
572 	uint32_t data;
573 	int i;
574 
575 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
576 		if (adev->vcn.harvest_config & (1 << i))
577 			continue;
578 		/* UVD disable CGC */
579 		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
580 		if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
581 			data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
582 		else
583 			data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
584 		data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
585 		data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
586 		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
587 
588 		data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
589 		data &= ~(UVD_CGC_GATE__SYS_MASK
590 			| UVD_CGC_GATE__UDEC_MASK
591 			| UVD_CGC_GATE__MPEG2_MASK
592 			| UVD_CGC_GATE__REGS_MASK
593 			| UVD_CGC_GATE__RBC_MASK
594 			| UVD_CGC_GATE__LMI_MC_MASK
595 			| UVD_CGC_GATE__LMI_UMC_MASK
596 			| UVD_CGC_GATE__IDCT_MASK
597 			| UVD_CGC_GATE__MPRD_MASK
598 			| UVD_CGC_GATE__MPC_MASK
599 			| UVD_CGC_GATE__LBSI_MASK
600 			| UVD_CGC_GATE__LRBBM_MASK
601 			| UVD_CGC_GATE__UDEC_RE_MASK
602 			| UVD_CGC_GATE__UDEC_CM_MASK
603 			| UVD_CGC_GATE__UDEC_IT_MASK
604 			| UVD_CGC_GATE__UDEC_DB_MASK
605 			| UVD_CGC_GATE__UDEC_MP_MASK
606 			| UVD_CGC_GATE__WCB_MASK
607 			| UVD_CGC_GATE__VCPU_MASK
608 			| UVD_CGC_GATE__MMSCH_MASK);
609 
610 		WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);
611 
612 		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0,  0xFFFFFFFF);
613 
614 		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
615 		data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
616 			| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
617 			| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
618 			| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
619 			| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
620 			| UVD_CGC_CTRL__SYS_MODE_MASK
621 			| UVD_CGC_CTRL__UDEC_MODE_MASK
622 			| UVD_CGC_CTRL__MPEG2_MODE_MASK
623 			| UVD_CGC_CTRL__REGS_MODE_MASK
624 			| UVD_CGC_CTRL__RBC_MODE_MASK
625 			| UVD_CGC_CTRL__LMI_MC_MODE_MASK
626 			| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
627 			| UVD_CGC_CTRL__IDCT_MODE_MASK
628 			| UVD_CGC_CTRL__MPRD_MODE_MASK
629 			| UVD_CGC_CTRL__MPC_MODE_MASK
630 			| UVD_CGC_CTRL__LBSI_MODE_MASK
631 			| UVD_CGC_CTRL__LRBBM_MODE_MASK
632 			| UVD_CGC_CTRL__WCB_MODE_MASK
633 			| UVD_CGC_CTRL__VCPU_MODE_MASK
634 			| UVD_CGC_CTRL__MMSCH_MODE_MASK);
635 		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
636 
637 		/* turn on */
638 		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
639 		data |= (UVD_SUVD_CGC_GATE__SRE_MASK
640 			| UVD_SUVD_CGC_GATE__SIT_MASK
641 			| UVD_SUVD_CGC_GATE__SMP_MASK
642 			| UVD_SUVD_CGC_GATE__SCM_MASK
643 			| UVD_SUVD_CGC_GATE__SDB_MASK
644 			| UVD_SUVD_CGC_GATE__SRE_H264_MASK
645 			| UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
646 			| UVD_SUVD_CGC_GATE__SIT_H264_MASK
647 			| UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
648 			| UVD_SUVD_CGC_GATE__SCM_H264_MASK
649 			| UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
650 			| UVD_SUVD_CGC_GATE__SDB_H264_MASK
651 			| UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
652 			| UVD_SUVD_CGC_GATE__SCLR_MASK
653 			| UVD_SUVD_CGC_GATE__UVD_SC_MASK
654 			| UVD_SUVD_CGC_GATE__ENT_MASK
655 			| UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
656 			| UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
657 			| UVD_SUVD_CGC_GATE__SITE_MASK
658 			| UVD_SUVD_CGC_GATE__SRE_VP9_MASK
659 			| UVD_SUVD_CGC_GATE__SCM_VP9_MASK
660 			| UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
661 			| UVD_SUVD_CGC_GATE__SDB_VP9_MASK
662 			| UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
663 		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);
664 
665 		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
666 		data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
667 			| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
668 			| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
669 			| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
670 			| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
671 			| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
672 			| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
673 			| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
674 			| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
675 			| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
676 		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
677 	}
678 }
679 
680 static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
681 		uint8_t sram_sel, int inst_idx, uint8_t indirect)
682 {
683 	uint32_t reg_data = 0;
684 
685 	/* enable sw clock gating control */
686 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
687 		reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
688 	else
689 		reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
690 	reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
691 	reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
692 	reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
693 		 UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
694 		 UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
695 		 UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
696 		 UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
697 		 UVD_CGC_CTRL__SYS_MODE_MASK |
698 		 UVD_CGC_CTRL__UDEC_MODE_MASK |
699 		 UVD_CGC_CTRL__MPEG2_MODE_MASK |
700 		 UVD_CGC_CTRL__REGS_MODE_MASK |
701 		 UVD_CGC_CTRL__RBC_MODE_MASK |
702 		 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
703 		 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
704 		 UVD_CGC_CTRL__IDCT_MODE_MASK |
705 		 UVD_CGC_CTRL__MPRD_MODE_MASK |
706 		 UVD_CGC_CTRL__MPC_MODE_MASK |
707 		 UVD_CGC_CTRL__LBSI_MODE_MASK |
708 		 UVD_CGC_CTRL__LRBBM_MODE_MASK |
709 		 UVD_CGC_CTRL__WCB_MODE_MASK |
710 		 UVD_CGC_CTRL__VCPU_MODE_MASK |
711 		 UVD_CGC_CTRL__MMSCH_MODE_MASK);
712 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
713 		VCN, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
714 
715 	/* turn off clock gating */
716 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
717 		VCN, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
718 
719 	/* turn on SUVD clock gating */
720 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
721 		VCN, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
722 
723 	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
724 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
725 		VCN, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
726 }
727 
728 /**
729  * vcn_v2_5_enable_clock_gating - enable VCN clock gating
730  *
731  * @adev: amdgpu_device pointer
732  *
733  * Enable clock gating for VCN block
734  */
735 static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
736 {
737 	uint32_t data = 0;
738 	int i;
739 
740 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
741 		if (adev->vcn.harvest_config & (1 << i))
742 			continue;
743 		/* enable UVD CGC */
744 		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
745 		if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
746 			data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
747 		else
748 			data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
749 		data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
750 		data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
751 		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
752 
753 		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
754 		data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
755 			| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
756 			| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
757 			| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
758 			| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
759 			| UVD_CGC_CTRL__SYS_MODE_MASK
760 			| UVD_CGC_CTRL__UDEC_MODE_MASK
761 			| UVD_CGC_CTRL__MPEG2_MODE_MASK
762 			| UVD_CGC_CTRL__REGS_MODE_MASK
763 			| UVD_CGC_CTRL__RBC_MODE_MASK
764 			| UVD_CGC_CTRL__LMI_MC_MODE_MASK
765 			| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
766 			| UVD_CGC_CTRL__IDCT_MODE_MASK
767 			| UVD_CGC_CTRL__MPRD_MODE_MASK
768 			| UVD_CGC_CTRL__MPC_MODE_MASK
769 			| UVD_CGC_CTRL__LBSI_MODE_MASK
770 			| UVD_CGC_CTRL__LRBBM_MODE_MASK
771 			| UVD_CGC_CTRL__WCB_MODE_MASK
772 			| UVD_CGC_CTRL__VCPU_MODE_MASK);
773 		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);
774 
775 		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
776 		data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
777 			| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
778 			| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
779 			| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
780 			| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
781 			| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
782 			| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
783 			| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
784 			| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
785 			| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
786 		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
787 	}
788 }
789 
790 static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx,
791 				bool indirect)
792 {
793 	uint32_t tmp;
794 
795 	if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(2, 6, 0))
796 		return;
797 
798 	tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
799 	      VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
800 	      VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
801 	      VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
802 	WREG32_SOC15_DPG_MODE(inst_idx,
803 			      SOC15_DPG_MODE_OFFSET(VCN, 0, mmVCN_RAS_CNTL),
804 			      tmp, 0, indirect);
805 
806 	tmp = UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
807 	WREG32_SOC15_DPG_MODE(inst_idx,
808 			      SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_VCPU_INT_EN),
809 			      tmp, 0, indirect);
810 
811 	tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
812 	WREG32_SOC15_DPG_MODE(inst_idx,
813 			      SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_SYS_INT_EN),
814 			      tmp, 0, indirect);
815 }
816 
817 static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
818 {
819 	volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
820 	struct amdgpu_ring *ring;
821 	uint32_t rb_bufsz, tmp;
822 
823 	/* disable register anti-hang mechanism */
824 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
825 		~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
826 	/* enable dynamic power gating mode */
827 	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS);
828 	tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
829 	tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
830 	WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);
831 
832 	if (indirect)
833 		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
834 
835 	/* enable clock gating */
836 	vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
837 
838 	/* enable VCPU clock */
839 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
840 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
841 	tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
842 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
843 		VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
844 
845 	/* disable master interupt */
846 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
847 		VCN, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
848 
849 	/* setup mmUVD_LMI_CTRL */
850 	tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
851 		UVD_LMI_CTRL__REQ_MODE_MASK |
852 		UVD_LMI_CTRL__CRC_RESET_MASK |
853 		UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
854 		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
855 		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
856 		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
857 		0x00100000L);
858 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
859 		VCN, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
860 
861 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
862 		VCN, 0, mmUVD_MPC_CNTL),
863 		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
864 
865 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
866 		VCN, 0, mmUVD_MPC_SET_MUXA0),
867 		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
868 		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
869 		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
870 		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
871 
872 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
873 		VCN, 0, mmUVD_MPC_SET_MUXB0),
874 		((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
875 		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
876 		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
877 		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
878 
879 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
880 		VCN, 0, mmUVD_MPC_SET_MUX),
881 		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
882 		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
883 		 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
884 
885 	vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
886 
887 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
888 		VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
889 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
890 		VCN, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
891 
892 	/* enable LMI MC and UMC channels */
893 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
894 		VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
895 
896 	vcn_v2_6_enable_ras(adev, inst_idx, indirect);
897 
898 	/* unblock VCPU register access */
899 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
900 		VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
901 
902 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
903 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
904 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
905 		VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
906 
907 	/* enable master interrupt */
908 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
909 		VCN, 0, mmUVD_MASTINT_EN),
910 		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
911 
912 	if (indirect)
913 		amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
914 
915 	ring = &adev->vcn.inst[inst_idx].ring_dec;
916 	/* force RBC into idle state */
917 	rb_bufsz = order_base_2(ring->ring_size);
918 	tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
919 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
920 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
921 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
922 	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
923 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
924 
925 	/* Stall DPG before WPTR/RPTR reset */
926 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
927 		UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
928 		~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
929 	fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
930 
931 	/* set the write pointer delay */
932 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
933 
934 	/* set the wb address */
935 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
936 		(upper_32_bits(ring->gpu_addr) >> 2));
937 
938 	/* program the RB_BASE for ring buffer */
939 	WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
940 		lower_32_bits(ring->gpu_addr));
941 	WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
942 		upper_32_bits(ring->gpu_addr));
943 
944 	/* Initialize the ring buffer's read and write pointers */
945 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0);
946 
947 	WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0);
948 
949 	ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR);
950 	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
951 		lower_32_bits(ring->wptr));
952 
953 	fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
954 	/* Unstall DPG */
955 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
956 		0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
957 
958 	return 0;
959 }
960 
961 static int vcn_v2_5_start(struct amdgpu_device *adev)
962 {
963 	struct amdgpu_ring *ring;
964 	uint32_t rb_bufsz, tmp;
965 	int i, j, k, r;
966 
967 	if (adev->pm.dpm_enabled)
968 		amdgpu_dpm_enable_uvd(adev, true);
969 
970 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
971 		if (adev->vcn.harvest_config & (1 << i))
972 			continue;
973 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
974 			r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
975 			continue;
976 		}
977 
978 		/* disable register anti-hang mechanism */
979 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0,
980 			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
981 
982 		/* set uvd status busy */
983 		tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
984 		WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
985 	}
986 
987 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
988 		return 0;
989 
990 	/*SW clock gating */
991 	vcn_v2_5_disable_clock_gating(adev);
992 
993 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
994 		if (adev->vcn.harvest_config & (1 << i))
995 			continue;
996 		/* enable VCPU clock */
997 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
998 			UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
999 
1000 		/* disable master interrupt */
1001 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
1002 			~UVD_MASTINT_EN__VCPU_EN_MASK);
1003 
1004 		/* setup mmUVD_LMI_CTRL */
1005 		tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
1006 		tmp &= ~0xff;
1007 		WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8|
1008 			UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK	|
1009 			UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
1010 			UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
1011 			UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
1012 
1013 		/* setup mmUVD_MPC_CNTL */
1014 		tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
1015 		tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
1016 		tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
1017 		WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
1018 
1019 		/* setup UVD_MPC_SET_MUXA0 */
1020 		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
1021 			((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
1022 			(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
1023 			(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
1024 			(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
1025 
1026 		/* setup UVD_MPC_SET_MUXB0 */
1027 		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
1028 			((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
1029 			(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
1030 			(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
1031 			(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
1032 
1033 		/* setup mmUVD_MPC_SET_MUX */
1034 		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
1035 			((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
1036 			(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
1037 			(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
1038 	}
1039 
1040 	vcn_v2_5_mc_resume(adev);
1041 
1042 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1043 		volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1044 		if (adev->vcn.harvest_config & (1 << i))
1045 			continue;
1046 		/* VCN global tiling registers */
1047 		WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
1048 			adev->gfx.config.gb_addr_config);
1049 		WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
1050 			adev->gfx.config.gb_addr_config);
1051 
1052 		/* enable LMI MC and UMC channels */
1053 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
1054 			~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1055 
1056 		/* unblock VCPU register access */
1057 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
1058 			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1059 
1060 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1061 			~UVD_VCPU_CNTL__BLK_RST_MASK);
1062 
1063 		for (k = 0; k < 10; ++k) {
1064 			uint32_t status;
1065 
1066 			for (j = 0; j < 100; ++j) {
1067 				status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
1068 				if (status & 2)
1069 					break;
1070 				if (amdgpu_emu_mode == 1)
1071 					msleep(500);
1072 				else
1073 					mdelay(10);
1074 			}
1075 			r = 0;
1076 			if (status & 2)
1077 				break;
1078 
1079 			DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
1080 			WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1081 				UVD_VCPU_CNTL__BLK_RST_MASK,
1082 				~UVD_VCPU_CNTL__BLK_RST_MASK);
1083 			mdelay(10);
1084 			WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1085 				~UVD_VCPU_CNTL__BLK_RST_MASK);
1086 
1087 			mdelay(10);
1088 			r = -1;
1089 		}
1090 
1091 		if (r) {
1092 			DRM_ERROR("VCN decode not responding, giving up!!!\n");
1093 			return r;
1094 		}
1095 
1096 		/* enable master interrupt */
1097 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
1098 			UVD_MASTINT_EN__VCPU_EN_MASK,
1099 			~UVD_MASTINT_EN__VCPU_EN_MASK);
1100 
1101 		/* clear the busy bit of VCN_STATUS */
1102 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
1103 			~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1104 
1105 		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
1106 
1107 		ring = &adev->vcn.inst[i].ring_dec;
1108 		/* force RBC into idle state */
1109 		rb_bufsz = order_base_2(ring->ring_size);
1110 		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
1111 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1112 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1113 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1114 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1115 		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
1116 
1117 		fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
1118 		/* program the RB_BASE for ring buffer */
1119 		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1120 			lower_32_bits(ring->gpu_addr));
1121 		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1122 			upper_32_bits(ring->gpu_addr));
1123 
1124 		/* Initialize the ring buffer's read and write pointers */
1125 		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
1126 
1127 		ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
1128 		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
1129 				lower_32_bits(ring->wptr));
1130 		fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
1131 
1132 		fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
1133 		ring = &adev->vcn.inst[i].ring_enc[0];
1134 		WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1135 		WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1136 		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
1137 		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1138 		WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
1139 		fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
1140 
1141 		fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
1142 		ring = &adev->vcn.inst[i].ring_enc[1];
1143 		WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1144 		WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1145 		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1146 		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1147 		WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
1148 		fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
1149 	}
1150 
1151 	return 0;
1152 }
1153 
1154 static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev,
1155 				struct amdgpu_mm_table *table)
1156 {
1157 	uint32_t data = 0, loop = 0, size = 0;
1158 	uint64_t addr = table->gpu_addr;
1159 	struct mmsch_v1_1_init_header *header = NULL;
1160 
1161 	header = (struct mmsch_v1_1_init_header *)table->cpu_addr;
1162 	size = header->total_size;
1163 
1164 	/*
1165 	 * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of
1166 	 *  memory descriptor location
1167 	 */
1168 	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
1169 	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
1170 
1171 	/* 2, update vmid of descriptor */
1172 	data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID);
1173 	data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
1174 	/* use domain0 for MM scheduler */
1175 	data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
1176 	WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, data);
1177 
1178 	/* 3, notify mmsch about the size of this descriptor */
1179 	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size);
1180 
1181 	/* 4, set resp to zero */
1182 	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
1183 
1184 	/*
1185 	 * 5, kick off the initialization and wait until
1186 	 * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero
1187 	 */
1188 	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
1189 
1190 	data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
1191 	loop = 10;
1192 	while ((data & 0x10000002) != 0x10000002) {
1193 		udelay(100);
1194 		data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
1195 		loop--;
1196 		if (!loop)
1197 			break;
1198 	}
1199 
1200 	if (!loop) {
1201 		dev_err(adev->dev,
1202 			"failed to init MMSCH, mmMMSCH_VF_MAILBOX_RESP = %x\n",
1203 			data);
1204 		return -EBUSY;
1205 	}
1206 
1207 	return 0;
1208 }
1209 
1210 static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
1211 {
1212 	struct amdgpu_ring *ring;
1213 	uint32_t offset, size, tmp, i, rb_bufsz;
1214 	uint32_t table_size = 0;
1215 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
1216 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
1217 	struct mmsch_v1_0_cmd_end end = { { 0 } };
1218 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
1219 	struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table;
1220 
1221 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
1222 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
1223 	end.cmd_header.command_type = MMSCH_COMMAND__END;
1224 
1225 	header->version = MMSCH_VERSION;
1226 	header->total_size = sizeof(struct mmsch_v1_1_init_header) >> 2;
1227 	init_table += header->total_size;
1228 
1229 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1230 		header->eng[i].table_offset = header->total_size;
1231 		header->eng[i].init_status = 0;
1232 		header->eng[i].table_size = 0;
1233 
1234 		table_size = 0;
1235 
1236 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(
1237 			SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS),
1238 			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
1239 
1240 		size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
1241 		/* mc resume*/
1242 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1243 			MMSCH_V1_0_INSERT_DIRECT_WT(
1244 				SOC15_REG_OFFSET(VCN, i,
1245 					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1246 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
1247 			MMSCH_V1_0_INSERT_DIRECT_WT(
1248 				SOC15_REG_OFFSET(VCN, i,
1249 					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1250 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
1251 			offset = 0;
1252 			MMSCH_V1_0_INSERT_DIRECT_WT(
1253 				SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0), 0);
1254 		} else {
1255 			MMSCH_V1_0_INSERT_DIRECT_WT(
1256 				SOC15_REG_OFFSET(VCN, i,
1257 					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1258 				lower_32_bits(adev->vcn.inst[i].gpu_addr));
1259 			MMSCH_V1_0_INSERT_DIRECT_WT(
1260 				SOC15_REG_OFFSET(VCN, i,
1261 					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1262 				upper_32_bits(adev->vcn.inst[i].gpu_addr));
1263 			offset = size;
1264 			MMSCH_V1_0_INSERT_DIRECT_WT(
1265 				SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0),
1266 				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
1267 		}
1268 
1269 		MMSCH_V1_0_INSERT_DIRECT_WT(
1270 			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE0),
1271 			size);
1272 		MMSCH_V1_0_INSERT_DIRECT_WT(
1273 			SOC15_REG_OFFSET(VCN, i,
1274 				mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
1275 			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
1276 		MMSCH_V1_0_INSERT_DIRECT_WT(
1277 			SOC15_REG_OFFSET(VCN, i,
1278 				mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
1279 			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
1280 		MMSCH_V1_0_INSERT_DIRECT_WT(
1281 			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET1),
1282 			0);
1283 		MMSCH_V1_0_INSERT_DIRECT_WT(
1284 			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE1),
1285 			AMDGPU_VCN_STACK_SIZE);
1286 		MMSCH_V1_0_INSERT_DIRECT_WT(
1287 			SOC15_REG_OFFSET(VCN, i,
1288 				mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
1289 			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset +
1290 				AMDGPU_VCN_STACK_SIZE));
1291 		MMSCH_V1_0_INSERT_DIRECT_WT(
1292 			SOC15_REG_OFFSET(VCN, i,
1293 				mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
1294 			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset +
1295 				AMDGPU_VCN_STACK_SIZE));
1296 		MMSCH_V1_0_INSERT_DIRECT_WT(
1297 			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET2),
1298 			0);
1299 		MMSCH_V1_0_INSERT_DIRECT_WT(
1300 			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE2),
1301 			AMDGPU_VCN_CONTEXT_SIZE);
1302 
1303 		ring = &adev->vcn.inst[i].ring_enc[0];
1304 		ring->wptr = 0;
1305 
1306 		MMSCH_V1_0_INSERT_DIRECT_WT(
1307 			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_LO),
1308 			lower_32_bits(ring->gpu_addr));
1309 		MMSCH_V1_0_INSERT_DIRECT_WT(
1310 			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_HI),
1311 			upper_32_bits(ring->gpu_addr));
1312 		MMSCH_V1_0_INSERT_DIRECT_WT(
1313 			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_SIZE),
1314 			ring->ring_size / 4);
1315 
1316 		ring = &adev->vcn.inst[i].ring_dec;
1317 		ring->wptr = 0;
1318 		MMSCH_V1_0_INSERT_DIRECT_WT(
1319 			SOC15_REG_OFFSET(VCN, i,
1320 				mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
1321 			lower_32_bits(ring->gpu_addr));
1322 		MMSCH_V1_0_INSERT_DIRECT_WT(
1323 			SOC15_REG_OFFSET(VCN, i,
1324 				mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
1325 			upper_32_bits(ring->gpu_addr));
1326 
1327 		/* force RBC into idle state */
1328 		rb_bufsz = order_base_2(ring->ring_size);
1329 		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
1330 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1331 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1332 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1333 		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1334 		MMSCH_V1_0_INSERT_DIRECT_WT(
1335 			SOC15_REG_OFFSET(VCN, i, mmUVD_RBC_RB_CNTL), tmp);
1336 
1337 		/* add end packet */
1338 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
1339 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
1340 		init_table += sizeof(struct mmsch_v1_0_cmd_end) / 4;
1341 
1342 		/* refine header */
1343 		header->eng[i].table_size = table_size;
1344 		header->total_size += table_size;
1345 	}
1346 
1347 	return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
1348 }
1349 
1350 static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
1351 {
1352 	uint32_t tmp;
1353 
1354 	/* Wait for power status to be 1 */
1355 	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
1356 		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1357 
1358 	/* wait for read ptr to be equal to write ptr */
1359 	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
1360 	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
1361 
1362 	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
1363 	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
1364 
1365 	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
1366 	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
1367 
1368 	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
1369 		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1370 
1371 	/* disable dynamic power gating mode */
1372 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
1373 			~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
1374 
1375 	return 0;
1376 }
1377 
1378 static int vcn_v2_5_stop(struct amdgpu_device *adev)
1379 {
1380 	uint32_t tmp;
1381 	int i, r = 0;
1382 
1383 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1384 		if (adev->vcn.harvest_config & (1 << i))
1385 			continue;
1386 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1387 			r = vcn_v2_5_stop_dpg_mode(adev, i);
1388 			continue;
1389 		}
1390 
1391 		/* wait for vcn idle */
1392 		r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
1393 		if (r)
1394 			return r;
1395 
1396 		tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
1397 			UVD_LMI_STATUS__READ_CLEAN_MASK |
1398 			UVD_LMI_STATUS__WRITE_CLEAN_MASK |
1399 			UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
1400 		r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
1401 		if (r)
1402 			return r;
1403 
1404 		/* block LMI UMC channel */
1405 		tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
1406 		tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
1407 		WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
1408 
1409 		tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
1410 			UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
1411 		r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
1412 		if (r)
1413 			return r;
1414 
1415 		/* block VCPU register access */
1416 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
1417 			UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
1418 			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1419 
1420 		/* reset VCPU */
1421 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1422 			UVD_VCPU_CNTL__BLK_RST_MASK,
1423 			~UVD_VCPU_CNTL__BLK_RST_MASK);
1424 
1425 		/* disable VCPU clock */
1426 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1427 			~(UVD_VCPU_CNTL__CLK_EN_MASK));
1428 
1429 		/* clear status */
1430 		WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
1431 
1432 		vcn_v2_5_enable_clock_gating(adev);
1433 
1434 		/* enable register anti-hang mechanism */
1435 		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS),
1436 			UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
1437 			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1438 	}
1439 
1440 	if (adev->pm.dpm_enabled)
1441 		amdgpu_dpm_enable_uvd(adev, false);
1442 
1443 	return 0;
1444 }
1445 
1446 static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
1447 				int inst_idx, struct dpg_pause_state *new_state)
1448 {
1449 	struct amdgpu_ring *ring;
1450 	uint32_t reg_data = 0;
1451 	int ret_code = 0;
1452 
1453 	/* pause/unpause if state is changed */
1454 	if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
1455 		DRM_DEBUG("dpg pause state changed %d -> %d",
1456 			adev->vcn.inst[inst_idx].pause_state.fw_based,	new_state->fw_based);
1457 		reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) &
1458 			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1459 
1460 		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
1461 			ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
1462 				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1463 
1464 			if (!ret_code) {
1465 				volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
1466 
1467 				/* pause DPG */
1468 				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1469 				WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
1470 
1471 				/* wait for ACK */
1472 				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
1473 					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
1474 					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1475 
1476 				/* Stall DPG before WPTR/RPTR reset */
1477 				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1478 					   UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
1479 					   ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1480 
1481 				/* Restore */
1482 				fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
1483 				ring = &adev->vcn.inst[inst_idx].ring_enc[0];
1484 				ring->wptr = 0;
1485 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
1486 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1487 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
1488 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1489 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1490 				fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
1491 
1492 				fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
1493 				ring = &adev->vcn.inst[inst_idx].ring_enc[1];
1494 				ring->wptr = 0;
1495 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1496 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1497 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
1498 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1499 				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1500 				fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
1501 
1502 				/* Unstall DPG */
1503 				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1504 					   0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1505 
1506 				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
1507 					   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1508 			}
1509 		} else {
1510 			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1511 			WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
1512 			SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
1513 				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1514 		}
1515 		adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
1516 	}
1517 
1518 	return 0;
1519 }
1520 
1521 /**
1522  * vcn_v2_5_dec_ring_get_rptr - get read pointer
1523  *
1524  * @ring: amdgpu_ring pointer
1525  *
1526  * Returns the current hardware read pointer
1527  */
1528 static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
1529 {
1530 	struct amdgpu_device *adev = ring->adev;
1531 
1532 	return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR);
1533 }
1534 
1535 /**
1536  * vcn_v2_5_dec_ring_get_wptr - get write pointer
1537  *
1538  * @ring: amdgpu_ring pointer
1539  *
1540  * Returns the current hardware write pointer
1541  */
1542 static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
1543 {
1544 	struct amdgpu_device *adev = ring->adev;
1545 
1546 	if (ring->use_doorbell)
1547 		return *ring->wptr_cpu_addr;
1548 	else
1549 		return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
1550 }
1551 
1552 /**
1553  * vcn_v2_5_dec_ring_set_wptr - set write pointer
1554  *
1555  * @ring: amdgpu_ring pointer
1556  *
1557  * Commits the write pointer to the hardware
1558  */
1559 static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
1560 {
1561 	struct amdgpu_device *adev = ring->adev;
1562 
1563 	if (ring->use_doorbell) {
1564 		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
1565 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1566 	} else {
1567 		WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
1568 	}
1569 }
1570 
1571 static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
1572 	.type = AMDGPU_RING_TYPE_VCN_DEC,
1573 	.align_mask = 0xf,
1574 	.secure_submission_supported = true,
1575 	.get_rptr = vcn_v2_5_dec_ring_get_rptr,
1576 	.get_wptr = vcn_v2_5_dec_ring_get_wptr,
1577 	.set_wptr = vcn_v2_5_dec_ring_set_wptr,
1578 	.emit_frame_size =
1579 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
1580 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
1581 		8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
1582 		14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
1583 		6,
1584 	.emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
1585 	.emit_ib = vcn_v2_0_dec_ring_emit_ib,
1586 	.emit_fence = vcn_v2_0_dec_ring_emit_fence,
1587 	.emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
1588 	.test_ring = vcn_v2_0_dec_ring_test_ring,
1589 	.test_ib = amdgpu_vcn_dec_ring_test_ib,
1590 	.insert_nop = vcn_v2_0_dec_ring_insert_nop,
1591 	.insert_start = vcn_v2_0_dec_ring_insert_start,
1592 	.insert_end = vcn_v2_0_dec_ring_insert_end,
1593 	.pad_ib = amdgpu_ring_generic_pad_ib,
1594 	.begin_use = amdgpu_vcn_ring_begin_use,
1595 	.end_use = amdgpu_vcn_ring_end_use,
1596 	.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
1597 	.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
1598 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1599 };
1600 
1601 /**
1602  * vcn_v2_5_enc_ring_get_rptr - get enc read pointer
1603  *
1604  * @ring: amdgpu_ring pointer
1605  *
1606  * Returns the current hardware enc read pointer
1607  */
1608 static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring)
1609 {
1610 	struct amdgpu_device *adev = ring->adev;
1611 
1612 	if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
1613 		return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR);
1614 	else
1615 		return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2);
1616 }
1617 
1618 /**
1619  * vcn_v2_5_enc_ring_get_wptr - get enc write pointer
1620  *
1621  * @ring: amdgpu_ring pointer
1622  *
1623  * Returns the current hardware enc write pointer
1624  */
1625 static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring)
1626 {
1627 	struct amdgpu_device *adev = ring->adev;
1628 
1629 	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
1630 		if (ring->use_doorbell)
1631 			return *ring->wptr_cpu_addr;
1632 		else
1633 			return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
1634 	} else {
1635 		if (ring->use_doorbell)
1636 			return *ring->wptr_cpu_addr;
1637 		else
1638 			return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
1639 	}
1640 }
1641 
1642 /**
1643  * vcn_v2_5_enc_ring_set_wptr - set enc write pointer
1644  *
1645  * @ring: amdgpu_ring pointer
1646  *
1647  * Commits the enc write pointer to the hardware
1648  */
1649 static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring)
1650 {
1651 	struct amdgpu_device *adev = ring->adev;
1652 
1653 	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
1654 		if (ring->use_doorbell) {
1655 			*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
1656 			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1657 		} else {
1658 			WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1659 		}
1660 	} else {
1661 		if (ring->use_doorbell) {
1662 			*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
1663 			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1664 		} else {
1665 			WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1666 		}
1667 	}
1668 }
1669 
1670 static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
1671 	.type = AMDGPU_RING_TYPE_VCN_ENC,
1672 	.align_mask = 0x3f,
1673 	.nop = VCN_ENC_CMD_NO_OP,
1674 	.get_rptr = vcn_v2_5_enc_ring_get_rptr,
1675 	.get_wptr = vcn_v2_5_enc_ring_get_wptr,
1676 	.set_wptr = vcn_v2_5_enc_ring_set_wptr,
1677 	.emit_frame_size =
1678 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1679 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1680 		4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
1681 		5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
1682 		1, /* vcn_v2_0_enc_ring_insert_end */
1683 	.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
1684 	.emit_ib = vcn_v2_0_enc_ring_emit_ib,
1685 	.emit_fence = vcn_v2_0_enc_ring_emit_fence,
1686 	.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
1687 	.test_ring = amdgpu_vcn_enc_ring_test_ring,
1688 	.test_ib = amdgpu_vcn_enc_ring_test_ib,
1689 	.insert_nop = amdgpu_ring_insert_nop,
1690 	.insert_end = vcn_v2_0_enc_ring_insert_end,
1691 	.pad_ib = amdgpu_ring_generic_pad_ib,
1692 	.begin_use = amdgpu_vcn_ring_begin_use,
1693 	.end_use = amdgpu_vcn_ring_end_use,
1694 	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
1695 	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
1696 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1697 };
1698 
1699 static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
1700 {
1701 	int i;
1702 
1703 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1704 		if (adev->vcn.harvest_config & (1 << i))
1705 			continue;
1706 		adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs;
1707 		adev->vcn.inst[i].ring_dec.me = i;
1708 	}
1709 }
1710 
1711 static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
1712 {
1713 	int i, j;
1714 
1715 	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
1716 		if (adev->vcn.harvest_config & (1 << j))
1717 			continue;
1718 		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
1719 			adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs;
1720 			adev->vcn.inst[j].ring_enc[i].me = j;
1721 		}
1722 	}
1723 }
1724 
1725 static bool vcn_v2_5_is_idle(void *handle)
1726 {
1727 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1728 	int i, ret = 1;
1729 
1730 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1731 		if (adev->vcn.harvest_config & (1 << i))
1732 			continue;
1733 		ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
1734 	}
1735 
1736 	return ret;
1737 }
1738 
1739 static int vcn_v2_5_wait_for_idle(void *handle)
1740 {
1741 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1742 	int i, ret = 0;
1743 
1744 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1745 		if (adev->vcn.harvest_config & (1 << i))
1746 			continue;
1747 		ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
1748 			UVD_STATUS__IDLE);
1749 		if (ret)
1750 			return ret;
1751 	}
1752 
1753 	return ret;
1754 }
1755 
1756 static int vcn_v2_5_set_clockgating_state(void *handle,
1757 					  enum amd_clockgating_state state)
1758 {
1759 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1760 	bool enable = (state == AMD_CG_STATE_GATE);
1761 
1762 	if (amdgpu_sriov_vf(adev))
1763 		return 0;
1764 
1765 	if (enable) {
1766 		if (!vcn_v2_5_is_idle(handle))
1767 			return -EBUSY;
1768 		vcn_v2_5_enable_clock_gating(adev);
1769 	} else {
1770 		vcn_v2_5_disable_clock_gating(adev);
1771 	}
1772 
1773 	return 0;
1774 }
1775 
1776 static int vcn_v2_5_set_powergating_state(void *handle,
1777 					  enum amd_powergating_state state)
1778 {
1779 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1780 	int ret;
1781 
1782 	if (amdgpu_sriov_vf(adev))
1783 		return 0;
1784 
1785 	if(state == adev->vcn.cur_state)
1786 		return 0;
1787 
1788 	if (state == AMD_PG_STATE_GATE)
1789 		ret = vcn_v2_5_stop(adev);
1790 	else
1791 		ret = vcn_v2_5_start(adev);
1792 
1793 	if(!ret)
1794 		adev->vcn.cur_state = state;
1795 
1796 	return ret;
1797 }
1798 
1799 static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev,
1800 					struct amdgpu_irq_src *source,
1801 					unsigned type,
1802 					enum amdgpu_interrupt_state state)
1803 {
1804 	return 0;
1805 }
1806 
1807 static int vcn_v2_6_set_ras_interrupt_state(struct amdgpu_device *adev,
1808 					struct amdgpu_irq_src *source,
1809 					unsigned int type,
1810 					enum amdgpu_interrupt_state state)
1811 {
1812 	return 0;
1813 }
1814 
1815 static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
1816 				      struct amdgpu_irq_src *source,
1817 				      struct amdgpu_iv_entry *entry)
1818 {
1819 	uint32_t ip_instance;
1820 
1821 	switch (entry->client_id) {
1822 	case SOC15_IH_CLIENTID_VCN:
1823 		ip_instance = 0;
1824 		break;
1825 	case SOC15_IH_CLIENTID_VCN1:
1826 		ip_instance = 1;
1827 		break;
1828 	default:
1829 		DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
1830 		return 0;
1831 	}
1832 
1833 	DRM_DEBUG("IH: VCN TRAP\n");
1834 
1835 	switch (entry->src_id) {
1836 	case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
1837 		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
1838 		break;
1839 	case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
1840 		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
1841 		break;
1842 	case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
1843 		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
1844 		break;
1845 	default:
1846 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1847 			  entry->src_id, entry->src_data[0]);
1848 		break;
1849 	}
1850 
1851 	return 0;
1852 }
1853 
1854 static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = {
1855 	.set = vcn_v2_5_set_interrupt_state,
1856 	.process = vcn_v2_5_process_interrupt,
1857 };
1858 
1859 static const struct amdgpu_irq_src_funcs vcn_v2_6_ras_irq_funcs = {
1860 	.set = vcn_v2_6_set_ras_interrupt_state,
1861 	.process = amdgpu_vcn_process_poison_irq,
1862 };
1863 
1864 static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
1865 {
1866 	int i;
1867 
1868 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1869 		if (adev->vcn.harvest_config & (1 << i))
1870 			continue;
1871 		adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
1872 		adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
1873 
1874 		adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1;
1875 		adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v2_6_ras_irq_funcs;
1876 	}
1877 }
1878 
1879 static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
1880 	.name = "vcn_v2_5",
1881 	.early_init = vcn_v2_5_early_init,
1882 	.late_init = NULL,
1883 	.sw_init = vcn_v2_5_sw_init,
1884 	.sw_fini = vcn_v2_5_sw_fini,
1885 	.hw_init = vcn_v2_5_hw_init,
1886 	.hw_fini = vcn_v2_5_hw_fini,
1887 	.suspend = vcn_v2_5_suspend,
1888 	.resume = vcn_v2_5_resume,
1889 	.is_idle = vcn_v2_5_is_idle,
1890 	.wait_for_idle = vcn_v2_5_wait_for_idle,
1891 	.check_soft_reset = NULL,
1892 	.pre_soft_reset = NULL,
1893 	.soft_reset = NULL,
1894 	.post_soft_reset = NULL,
1895 	.set_clockgating_state = vcn_v2_5_set_clockgating_state,
1896 	.set_powergating_state = vcn_v2_5_set_powergating_state,
1897 	.dump_ip_state = NULL,
1898 	.print_ip_state = NULL,
1899 };
1900 
1901 static const struct amd_ip_funcs vcn_v2_6_ip_funcs = {
1902         .name = "vcn_v2_6",
1903         .early_init = vcn_v2_5_early_init,
1904         .late_init = NULL,
1905         .sw_init = vcn_v2_5_sw_init,
1906         .sw_fini = vcn_v2_5_sw_fini,
1907         .hw_init = vcn_v2_5_hw_init,
1908         .hw_fini = vcn_v2_5_hw_fini,
1909         .suspend = vcn_v2_5_suspend,
1910         .resume = vcn_v2_5_resume,
1911         .is_idle = vcn_v2_5_is_idle,
1912         .wait_for_idle = vcn_v2_5_wait_for_idle,
1913         .check_soft_reset = NULL,
1914         .pre_soft_reset = NULL,
1915         .soft_reset = NULL,
1916         .post_soft_reset = NULL,
1917         .set_clockgating_state = vcn_v2_5_set_clockgating_state,
1918         .set_powergating_state = vcn_v2_5_set_powergating_state,
1919 	.dump_ip_state = NULL,
1920 	.print_ip_state = NULL,
1921 };
1922 
1923 const struct amdgpu_ip_block_version vcn_v2_5_ip_block =
1924 {
1925 		.type = AMD_IP_BLOCK_TYPE_VCN,
1926 		.major = 2,
1927 		.minor = 5,
1928 		.rev = 0,
1929 		.funcs = &vcn_v2_5_ip_funcs,
1930 };
1931 
1932 const struct amdgpu_ip_block_version vcn_v2_6_ip_block =
1933 {
1934 		.type = AMD_IP_BLOCK_TYPE_VCN,
1935 		.major = 2,
1936 		.minor = 6,
1937 		.rev = 0,
1938 		.funcs = &vcn_v2_6_ip_funcs,
1939 };
1940 
1941 static uint32_t vcn_v2_6_query_poison_by_instance(struct amdgpu_device *adev,
1942 			uint32_t instance, uint32_t sub_block)
1943 {
1944 	uint32_t poison_stat = 0, reg_value = 0;
1945 
1946 	switch (sub_block) {
1947 	case AMDGPU_VCN_V2_6_VCPU_VCODEC:
1948 		reg_value = RREG32_SOC15(VCN, instance, mmUVD_RAS_VCPU_VCODEC_STATUS);
1949 		poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
1950 		break;
1951 	default:
1952 		break;
1953 	}
1954 
1955 	if (poison_stat)
1956 		dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
1957 			instance, sub_block);
1958 
1959 	return poison_stat;
1960 }
1961 
1962 static bool vcn_v2_6_query_poison_status(struct amdgpu_device *adev)
1963 {
1964 	uint32_t inst, sub;
1965 	uint32_t poison_stat = 0;
1966 
1967 	for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
1968 		for (sub = 0; sub < AMDGPU_VCN_V2_6_MAX_SUB_BLOCK; sub++)
1969 			poison_stat +=
1970 			vcn_v2_6_query_poison_by_instance(adev, inst, sub);
1971 
1972 	return !!poison_stat;
1973 }
1974 
1975 const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = {
1976 	.query_poison_status = vcn_v2_6_query_poison_status,
1977 };
1978 
1979 static struct amdgpu_vcn_ras vcn_v2_6_ras = {
1980 	.ras_block = {
1981 		.hw_ops = &vcn_v2_6_ras_hw_ops,
1982 		.ras_late_init = amdgpu_vcn_ras_late_init,
1983 	},
1984 };
1985 
1986 static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev)
1987 {
1988 	switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) {
1989 	case IP_VERSION(2, 6, 0):
1990 		adev->vcn.ras = &vcn_v2_6_ras;
1991 		break;
1992 	default:
1993 		break;
1994 	}
1995 }
1996