xref: /linux/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c (revision 569d7db70e5dcf13fbf072f10e9096577ac1e565)
1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/firmware.h>
25 #include <drm/drm_drv.h>
26 
27 #include "amdgpu.h"
28 #include "amdgpu_vcn.h"
29 #include "amdgpu_pm.h"
30 #include "soc15.h"
31 #include "soc15d.h"
32 #include "soc15_hw_ip.h"
33 #include "vcn_v2_0.h"
34 #include "mmsch_v4_0_3.h"
35 
36 #include "vcn/vcn_4_0_3_offset.h"
37 #include "vcn/vcn_4_0_3_sh_mask.h"
38 #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h"
39 
40 #define mmUVD_DPG_LMA_CTL		regUVD_DPG_LMA_CTL
41 #define mmUVD_DPG_LMA_CTL_BASE_IDX	regUVD_DPG_LMA_CTL_BASE_IDX
42 #define mmUVD_DPG_LMA_DATA		regUVD_DPG_LMA_DATA
43 #define mmUVD_DPG_LMA_DATA_BASE_IDX	regUVD_DPG_LMA_DATA_BASE_IDX
44 
45 #define VCN_VID_SOC_ADDRESS_2_0		0x1fb00
46 #define VCN1_VID_SOC_ADDRESS_3_0	0x48300
47 
48 static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
49 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
50 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
51 static int vcn_v4_0_3_set_powergating_state(void *handle,
52 		enum amd_powergating_state state);
53 static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev,
54 		int inst_idx, struct dpg_pause_state *new_state);
55 static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring);
56 static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev);
57 static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
58 				  int inst_idx, bool indirect);
59 /**
60  * vcn_v4_0_3_early_init - set function pointers
61  *
62  * @handle: amdgpu_device pointer
63  *
64  * Set ring and irq function pointers
65  */
66 static int vcn_v4_0_3_early_init(void *handle)
67 {
68 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
69 
70 	/* re-use enc ring as unified ring */
71 	adev->vcn.num_enc_rings = 1;
72 
73 	vcn_v4_0_3_set_unified_ring_funcs(adev);
74 	vcn_v4_0_3_set_irq_funcs(adev);
75 	vcn_v4_0_3_set_ras_funcs(adev);
76 
77 	return amdgpu_vcn_early_init(adev);
78 }
79 
80 /**
81  * vcn_v4_0_3_sw_init - sw init for VCN block
82  *
83  * @handle: amdgpu_device pointer
84  *
85  * Load firmware and sw initialization
86  */
87 static int vcn_v4_0_3_sw_init(void *handle)
88 {
89 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
90 	struct amdgpu_ring *ring;
91 	int i, r, vcn_inst;
92 
93 	r = amdgpu_vcn_sw_init(adev);
94 	if (r)
95 		return r;
96 
97 	amdgpu_vcn_setup_ucode(adev);
98 
99 	r = amdgpu_vcn_resume(adev);
100 	if (r)
101 		return r;
102 
103 	/* VCN DEC TRAP */
104 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
105 		VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
106 	if (r)
107 		return r;
108 
109 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
110 		volatile struct amdgpu_vcn4_fw_shared *fw_shared;
111 
112 		vcn_inst = GET_INST(VCN, i);
113 
114 		ring = &adev->vcn.inst[i].ring_enc[0];
115 		ring->use_doorbell = true;
116 
117 		if (!amdgpu_sriov_vf(adev))
118 			ring->doorbell_index =
119 				(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
120 				9 * vcn_inst;
121 		else
122 			ring->doorbell_index =
123 				(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
124 				32 * vcn_inst;
125 
126 		ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id);
127 		sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id);
128 		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
129 				     AMDGPU_RING_PRIO_DEFAULT,
130 				     &adev->vcn.inst[i].sched_score);
131 		if (r)
132 			return r;
133 
134 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
135 		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
136 		fw_shared->sq.is_enabled = true;
137 
138 		if (amdgpu_vcnfw_log)
139 			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
140 	}
141 
142 	if (amdgpu_sriov_vf(adev)) {
143 		r = amdgpu_virt_alloc_mm_table(adev);
144 		if (r)
145 			return r;
146 	}
147 
148 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
149 		adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
150 
151 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
152 		r = amdgpu_vcn_ras_sw_init(adev);
153 		if (r) {
154 			dev_err(adev->dev, "Failed to initialize vcn ras block!\n");
155 			return r;
156 		}
157 	}
158 
159 	return 0;
160 }
161 
162 /**
163  * vcn_v4_0_3_sw_fini - sw fini for VCN block
164  *
165  * @handle: amdgpu_device pointer
166  *
167  * VCN suspend and free up sw allocation
168  */
169 static int vcn_v4_0_3_sw_fini(void *handle)
170 {
171 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
172 	int i, r, idx;
173 
174 	if (drm_dev_enter(&adev->ddev, &idx)) {
175 		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
176 			volatile struct amdgpu_vcn4_fw_shared *fw_shared;
177 
178 			fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
179 			fw_shared->present_flag_0 = 0;
180 			fw_shared->sq.is_enabled = cpu_to_le32(false);
181 		}
182 		drm_dev_exit(idx);
183 	}
184 
185 	if (amdgpu_sriov_vf(adev))
186 		amdgpu_virt_free_mm_table(adev);
187 
188 	r = amdgpu_vcn_suspend(adev);
189 	if (r)
190 		return r;
191 
192 	r = amdgpu_vcn_sw_fini(adev);
193 
194 	return r;
195 }
196 
197 /**
198  * vcn_v4_0_3_hw_init - start and test VCN block
199  *
200  * @handle: amdgpu_device pointer
201  *
202  * Initialize the hardware, boot up the VCPU and do some testing
203  */
204 static int vcn_v4_0_3_hw_init(void *handle)
205 {
206 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
207 	struct amdgpu_ring *ring;
208 	int i, r, vcn_inst;
209 
210 	if (amdgpu_sriov_vf(adev)) {
211 		r = vcn_v4_0_3_start_sriov(adev);
212 		if (r)
213 			return r;
214 
215 		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
216 			ring = &adev->vcn.inst[i].ring_enc[0];
217 			ring->wptr = 0;
218 			ring->wptr_old = 0;
219 			vcn_v4_0_3_unified_ring_set_wptr(ring);
220 			ring->sched.ready = true;
221 		}
222 	} else {
223 		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
224 			vcn_inst = GET_INST(VCN, i);
225 			ring = &adev->vcn.inst[i].ring_enc[0];
226 
227 			if (ring->use_doorbell) {
228 				adev->nbio.funcs->vcn_doorbell_range(
229 					adev, ring->use_doorbell,
230 					(adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
231 						9 * vcn_inst,
232 					adev->vcn.inst[i].aid_id);
233 
234 				WREG32_SOC15(
235 					VCN, GET_INST(VCN, ring->me),
236 					regVCN_RB1_DB_CTRL,
237 					ring->doorbell_index
238 							<< VCN_RB1_DB_CTRL__OFFSET__SHIFT |
239 						VCN_RB1_DB_CTRL__EN_MASK);
240 
241 				/* Read DB_CTRL to flush the write DB_CTRL command. */
242 				RREG32_SOC15(
243 					VCN, GET_INST(VCN, ring->me),
244 					regVCN_RB1_DB_CTRL);
245 			}
246 
247 			r = amdgpu_ring_test_helper(ring);
248 			if (r)
249 				return r;
250 		}
251 	}
252 
253 	return r;
254 }
255 
256 /**
257  * vcn_v4_0_3_hw_fini - stop the hardware block
258  *
259  * @handle: amdgpu_device pointer
260  *
261  * Stop the VCN block, mark ring as not ready any more
262  */
263 static int vcn_v4_0_3_hw_fini(void *handle)
264 {
265 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
266 
267 	cancel_delayed_work_sync(&adev->vcn.idle_work);
268 
269 	if (adev->vcn.cur_state != AMD_PG_STATE_GATE)
270 		vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
271 
272 	return 0;
273 }
274 
275 /**
276  * vcn_v4_0_3_suspend - suspend VCN block
277  *
278  * @handle: amdgpu_device pointer
279  *
280  * HW fini and suspend VCN block
281  */
282 static int vcn_v4_0_3_suspend(void *handle)
283 {
284 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
285 	int r;
286 
287 	r = vcn_v4_0_3_hw_fini(adev);
288 	if (r)
289 		return r;
290 
291 	r = amdgpu_vcn_suspend(adev);
292 
293 	return r;
294 }
295 
296 /**
297  * vcn_v4_0_3_resume - resume VCN block
298  *
299  * @handle: amdgpu_device pointer
300  *
301  * Resume firmware and hw init VCN block
302  */
303 static int vcn_v4_0_3_resume(void *handle)
304 {
305 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
306 	int r;
307 
308 	r = amdgpu_vcn_resume(adev);
309 	if (r)
310 		return r;
311 
312 	r = vcn_v4_0_3_hw_init(adev);
313 
314 	return r;
315 }
316 
317 /**
318  * vcn_v4_0_3_mc_resume - memory controller programming
319  *
320  * @adev: amdgpu_device pointer
321  * @inst_idx: instance number
322  *
323  * Let the VCN memory controller know it's offsets
324  */
325 static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx)
326 {
327 	uint32_t offset, size, vcn_inst;
328 	const struct common_firmware_header *hdr;
329 
330 	hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
331 	size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
332 
333 	vcn_inst = GET_INST(VCN, inst_idx);
334 	/* cache window 0: fw */
335 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
336 		WREG32_SOC15(
337 			VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
338 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx]
339 				 .tmr_mc_addr_lo));
340 		WREG32_SOC15(
341 			VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
342 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx]
343 				 .tmr_mc_addr_hi));
344 		WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0);
345 		offset = 0;
346 	} else {
347 		WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
348 			     lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr));
349 		WREG32_SOC15(VCN, vcn_inst,
350 			     regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
351 			     upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr));
352 		offset = size;
353 		WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0,
354 			     AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
355 	}
356 	WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size);
357 
358 	/* cache window 1: stack */
359 	WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
360 		     lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset));
361 	WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
362 		     upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset));
363 	WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0);
364 	WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1,
365 		     AMDGPU_VCN_STACK_SIZE);
366 
367 	/* cache window 2: context */
368 	WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
369 		     lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
370 				   AMDGPU_VCN_STACK_SIZE));
371 	WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
372 		     upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
373 				   AMDGPU_VCN_STACK_SIZE));
374 	WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0);
375 	WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2,
376 		     AMDGPU_VCN_CONTEXT_SIZE);
377 
378 	/* non-cache window */
379 	WREG32_SOC15(
380 		VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
381 		lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr));
382 	WREG32_SOC15(
383 		VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
384 		upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr));
385 	WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
386 	WREG32_SOC15(
387 		VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0,
388 		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
389 }
390 
391 /**
392  * vcn_v4_0_3_mc_resume_dpg_mode - memory controller programming for dpg mode
393  *
394  * @adev: amdgpu_device pointer
395  * @inst_idx: instance number index
396  * @indirect: indirectly write sram
397  *
398  * Let the VCN memory controller know it's offsets with dpg mode
399  */
400 static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
401 {
402 	uint32_t offset, size;
403 	const struct common_firmware_header *hdr;
404 
405 	hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data;
406 	size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
407 
408 	/* cache window 0: fw */
409 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
410 		if (!indirect) {
411 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
412 				VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
413 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
414 					inst_idx].tmr_mc_addr_lo), 0, indirect);
415 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
416 				VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
417 				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN +
418 					inst_idx].tmr_mc_addr_hi), 0, indirect);
419 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
420 				VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
421 		} else {
422 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
423 				VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
424 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
425 				VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
426 			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
427 				VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
428 		}
429 		offset = 0;
430 	} else {
431 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
432 			VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
433 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
434 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
435 			VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
436 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
437 		offset = size;
438 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
439 			VCN, 0, regUVD_VCPU_CACHE_OFFSET0),
440 			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
441 	}
442 
443 	if (!indirect)
444 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
445 			VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
446 	else
447 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
448 			VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
449 
450 	/* cache window 1: stack */
451 	if (!indirect) {
452 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
453 			VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
454 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
455 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
456 			VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
457 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
458 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
459 			VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
460 	} else {
461 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
462 			VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
463 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
464 			VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
465 		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
466 			VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
467 	}
468 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
469 			VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
470 
471 	/* cache window 2: context */
472 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
473 			VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
474 			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
475 				AMDGPU_VCN_STACK_SIZE), 0, indirect);
476 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
477 			VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
478 			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset +
479 				AMDGPU_VCN_STACK_SIZE), 0, indirect);
480 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
481 			VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
482 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
483 			VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
484 
485 	/* non-cache window */
486 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
487 			VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
488 			lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
489 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
490 			VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
491 			upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
492 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
493 			VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
494 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
495 			VCN, 0, regUVD_VCPU_NONCACHE_SIZE0),
496 			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
497 
498 	/* VCN global tiling registers */
499 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
500 		VCN, 0, regUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
501 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
502 		VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
503 }
504 
505 /**
506  * vcn_v4_0_3_disable_clock_gating - disable VCN clock gating
507  *
508  * @adev: amdgpu_device pointer
509  * @inst_idx: instance number
510  *
511  * Disable clock gating for VCN block
512  */
513 static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx)
514 {
515 	uint32_t data;
516 	int vcn_inst;
517 
518 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
519 		return;
520 
521 	vcn_inst = GET_INST(VCN, inst_idx);
522 
523 	/* VCN disable CGC */
524 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
525 	data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
526 	data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
527 	data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
528 	WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
529 
530 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE);
531 	data &= ~(UVD_CGC_GATE__SYS_MASK
532 		| UVD_CGC_GATE__MPEG2_MASK
533 		| UVD_CGC_GATE__REGS_MASK
534 		| UVD_CGC_GATE__RBC_MASK
535 		| UVD_CGC_GATE__LMI_MC_MASK
536 		| UVD_CGC_GATE__LMI_UMC_MASK
537 		| UVD_CGC_GATE__MPC_MASK
538 		| UVD_CGC_GATE__LBSI_MASK
539 		| UVD_CGC_GATE__LRBBM_MASK
540 		| UVD_CGC_GATE__WCB_MASK
541 		| UVD_CGC_GATE__VCPU_MASK
542 		| UVD_CGC_GATE__MMSCH_MASK);
543 
544 	WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE, data);
545 	SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF);
546 
547 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
548 	data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK
549 		| UVD_CGC_CTRL__MPEG2_MODE_MASK
550 		| UVD_CGC_CTRL__REGS_MODE_MASK
551 		| UVD_CGC_CTRL__RBC_MODE_MASK
552 		| UVD_CGC_CTRL__LMI_MC_MODE_MASK
553 		| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
554 		| UVD_CGC_CTRL__MPC_MODE_MASK
555 		| UVD_CGC_CTRL__LBSI_MODE_MASK
556 		| UVD_CGC_CTRL__LRBBM_MODE_MASK
557 		| UVD_CGC_CTRL__WCB_MODE_MASK
558 		| UVD_CGC_CTRL__VCPU_MODE_MASK
559 		| UVD_CGC_CTRL__MMSCH_MODE_MASK);
560 	WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
561 
562 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE);
563 	data |= (UVD_SUVD_CGC_GATE__SRE_MASK
564 		| UVD_SUVD_CGC_GATE__SIT_MASK
565 		| UVD_SUVD_CGC_GATE__SMP_MASK
566 		| UVD_SUVD_CGC_GATE__SCM_MASK
567 		| UVD_SUVD_CGC_GATE__SDB_MASK
568 		| UVD_SUVD_CGC_GATE__SRE_H264_MASK
569 		| UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
570 		| UVD_SUVD_CGC_GATE__SIT_H264_MASK
571 		| UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
572 		| UVD_SUVD_CGC_GATE__SCM_H264_MASK
573 		| UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
574 		| UVD_SUVD_CGC_GATE__SDB_H264_MASK
575 		| UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
576 		| UVD_SUVD_CGC_GATE__ENT_MASK
577 		| UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
578 		| UVD_SUVD_CGC_GATE__SITE_MASK
579 		| UVD_SUVD_CGC_GATE__SRE_VP9_MASK
580 		| UVD_SUVD_CGC_GATE__SCM_VP9_MASK
581 		| UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
582 		| UVD_SUVD_CGC_GATE__SDB_VP9_MASK
583 		| UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
584 	WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE, data);
585 
586 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL);
587 	data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
588 		| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
589 		| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
590 		| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
591 		| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
592 		| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
593 		| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
594 		| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
595 	WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data);
596 }
597 
598 /**
599  * vcn_v4_0_3_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode
600  *
601  * @adev: amdgpu_device pointer
602  * @sram_sel: sram select
603  * @inst_idx: instance number index
604  * @indirect: indirectly write sram
605  *
606  * Disable clock gating for VCN block with dpg mode
607  */
608 static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel,
609 				int inst_idx, uint8_t indirect)
610 {
611 	uint32_t reg_data = 0;
612 
613 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
614 		return;
615 
616 	/* enable sw clock gating control */
617 	reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
618 	reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
619 	reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
620 	reg_data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK |
621 		 UVD_CGC_CTRL__MPEG2_MODE_MASK |
622 		 UVD_CGC_CTRL__REGS_MODE_MASK |
623 		 UVD_CGC_CTRL__RBC_MODE_MASK |
624 		 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
625 		 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
626 		 UVD_CGC_CTRL__IDCT_MODE_MASK |
627 		 UVD_CGC_CTRL__MPRD_MODE_MASK |
628 		 UVD_CGC_CTRL__MPC_MODE_MASK |
629 		 UVD_CGC_CTRL__LBSI_MODE_MASK |
630 		 UVD_CGC_CTRL__LRBBM_MODE_MASK |
631 		 UVD_CGC_CTRL__WCB_MODE_MASK |
632 		 UVD_CGC_CTRL__VCPU_MODE_MASK);
633 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
634 		VCN, 0, regUVD_CGC_CTRL), reg_data, sram_sel, indirect);
635 
636 	/* turn off clock gating */
637 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
638 		VCN, 0, regUVD_CGC_GATE), 0, sram_sel, indirect);
639 
640 	/* turn on SUVD clock gating */
641 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
642 		VCN, 0, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
643 
644 	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
645 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
646 		VCN, 0, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
647 }
648 
649 /**
650  * vcn_v4_0_3_enable_clock_gating - enable VCN clock gating
651  *
652  * @adev: amdgpu_device pointer
653  * @inst_idx: instance number
654  *
655  * Enable clock gating for VCN block
656  */
657 static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx)
658 {
659 	uint32_t data;
660 	int vcn_inst;
661 
662 	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
663 		return;
664 
665 	vcn_inst = GET_INST(VCN, inst_idx);
666 
667 	/* enable VCN CGC */
668 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
669 	data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
670 	data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
671 	data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
672 	WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
673 
674 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL);
675 	data |= (UVD_CGC_CTRL__SYS_MODE_MASK
676 		| UVD_CGC_CTRL__MPEG2_MODE_MASK
677 		| UVD_CGC_CTRL__REGS_MODE_MASK
678 		| UVD_CGC_CTRL__RBC_MODE_MASK
679 		| UVD_CGC_CTRL__LMI_MC_MODE_MASK
680 		| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
681 		| UVD_CGC_CTRL__MPC_MODE_MASK
682 		| UVD_CGC_CTRL__LBSI_MODE_MASK
683 		| UVD_CGC_CTRL__LRBBM_MODE_MASK
684 		| UVD_CGC_CTRL__WCB_MODE_MASK
685 		| UVD_CGC_CTRL__VCPU_MODE_MASK);
686 	WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data);
687 
688 	data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL);
689 	data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
690 		| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
691 		| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
692 		| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
693 		| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
694 		| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
695 		| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
696 		| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
697 	WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data);
698 }
699 
700 /**
701  * vcn_v4_0_3_start_dpg_mode - VCN start with dpg mode
702  *
703  * @adev: amdgpu_device pointer
704  * @inst_idx: instance number index
705  * @indirect: indirectly write sram
706  *
707  * Start VCN block with dpg mode
708  */
709 static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
710 {
711 	volatile struct amdgpu_vcn4_fw_shared *fw_shared =
712 						adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
713 	struct amdgpu_ring *ring;
714 	int vcn_inst;
715 	uint32_t tmp;
716 
717 	vcn_inst = GET_INST(VCN, inst_idx);
718 	/* disable register anti-hang mechanism */
719 	WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1,
720 		 ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
721 	/* enable dynamic power gating mode */
722 	tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS);
723 	tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
724 	tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
725 	WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp);
726 
727 	if (indirect) {
728 		DRM_DEV_DEBUG(adev->dev, "VCN %d start: on AID %d",
729 			inst_idx, adev->vcn.inst[inst_idx].aid_id);
730 		adev->vcn.inst[inst_idx].dpg_sram_curr_addr =
731 				(uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
732 		/* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */
733 		WREG32_SOC15_DPG_MODE(inst_idx, 0xDEADBEEF,
734 			adev->vcn.inst[inst_idx].aid_id, 0, true);
735 	}
736 
737 	/* enable clock gating */
738 	vcn_v4_0_3_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
739 
740 	/* enable VCPU clock */
741 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
742 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
743 	tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
744 
745 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
746 		VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
747 
748 	/* disable master interrupt */
749 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
750 		VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect);
751 
752 	/* setup regUVD_LMI_CTRL */
753 	tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
754 		UVD_LMI_CTRL__REQ_MODE_MASK |
755 		UVD_LMI_CTRL__CRC_RESET_MASK |
756 		UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
757 		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
758 		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
759 		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
760 		0x00100000L);
761 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
762 		VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect);
763 
764 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
765 		VCN, 0, regUVD_MPC_CNTL),
766 		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
767 
768 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
769 		VCN, 0, regUVD_MPC_SET_MUXA0),
770 		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
771 		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
772 		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
773 		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
774 
775 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
776 		VCN, 0, regUVD_MPC_SET_MUXB0),
777 		 ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
778 		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
779 		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
780 		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
781 
782 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
783 		VCN, 0, regUVD_MPC_SET_MUX),
784 		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
785 		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
786 		 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
787 
788 	vcn_v4_0_3_mc_resume_dpg_mode(adev, inst_idx, indirect);
789 
790 	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
791 	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
792 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
793 		VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect);
794 
795 	/* enable LMI MC and UMC channels */
796 	tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT;
797 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
798 		VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect);
799 
800 	vcn_v4_0_3_enable_ras(adev, inst_idx, indirect);
801 
802 	/* enable master interrupt */
803 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
804 		VCN, 0, regUVD_MASTINT_EN),
805 		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
806 
807 	if (indirect)
808 		amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM);
809 
810 	ring = &adev->vcn.inst[inst_idx].ring_enc[0];
811 
812 	/* program the RB_BASE for ring buffer */
813 	WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
814 		     lower_32_bits(ring->gpu_addr));
815 	WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
816 		     upper_32_bits(ring->gpu_addr));
817 
818 	WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
819 		     ring->ring_size / sizeof(uint32_t));
820 
821 	/* resetting ring, fw should not check RB ring */
822 	tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
823 	tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
824 	WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
825 	fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
826 
827 	/* Initialize the ring buffer's read and write pointers */
828 	WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
829 	WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
830 	ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
831 
832 	tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
833 	tmp |= VCN_RB_ENABLE__RB_EN_MASK;
834 	WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
835 	fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
836 
837 	/*resetting done, fw can check RB ring */
838 	fw_shared->sq.queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
839 
840 	return 0;
841 }
842 
843 static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
844 {
845 	int i, vcn_inst;
846 	struct amdgpu_ring *ring_enc;
847 	uint64_t cache_addr;
848 	uint64_t rb_enc_addr;
849 	uint64_t ctx_addr;
850 	uint32_t param, resp, expected;
851 	uint32_t offset, cache_size;
852 	uint32_t tmp, timeout;
853 
854 	struct amdgpu_mm_table *table = &adev->virt.mm_table;
855 	uint32_t *table_loc;
856 	uint32_t table_size;
857 	uint32_t size, size_dw;
858 	uint32_t init_status;
859 	uint32_t enabled_vcn;
860 
861 	struct mmsch_v4_0_cmd_direct_write
862 		direct_wt = { {0} };
863 	struct mmsch_v4_0_cmd_direct_read_modify_write
864 		direct_rd_mod_wt = { {0} };
865 	struct mmsch_v4_0_cmd_end end = { {0} };
866 	struct mmsch_v4_0_3_init_header header;
867 
868 	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
869 	volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
870 
871 	direct_wt.cmd_header.command_type =
872 		MMSCH_COMMAND__DIRECT_REG_WRITE;
873 	direct_rd_mod_wt.cmd_header.command_type =
874 		MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
875 	end.cmd_header.command_type = MMSCH_COMMAND__END;
876 
877 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
878 		vcn_inst = GET_INST(VCN, i);
879 
880 		memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
881 		header.version = MMSCH_VERSION;
882 		header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
883 
884 		table_loc = (uint32_t *)table->cpu_addr;
885 		table_loc += header.total_size;
886 
887 		table_size = 0;
888 
889 		MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
890 			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
891 
892 		cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4);
893 
894 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
895 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
896 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
897 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
898 
899 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
900 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
901 				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
902 
903 			offset = 0;
904 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
905 				regUVD_VCPU_CACHE_OFFSET0), 0);
906 		} else {
907 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
908 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
909 				lower_32_bits(adev->vcn.inst[i].gpu_addr));
910 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
911 				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
912 				upper_32_bits(adev->vcn.inst[i].gpu_addr));
913 			offset = cache_size;
914 			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
915 				regUVD_VCPU_CACHE_OFFSET0),
916 				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
917 		}
918 
919 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
920 			regUVD_VCPU_CACHE_SIZE0),
921 			cache_size);
922 
923 		cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
924 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
925 			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
926 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
927 			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
928 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
929 			regUVD_VCPU_CACHE_OFFSET1), 0);
930 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
931 			regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
932 
933 		cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
934 			AMDGPU_VCN_STACK_SIZE;
935 
936 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
937 			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
938 
939 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
940 			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
941 
942 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
943 			regUVD_VCPU_CACHE_OFFSET2), 0);
944 
945 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
946 			regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
947 
948 		fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
949 		rb_setup = &fw_shared->rb_setup;
950 
951 		ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
952 		ring_enc->wptr = 0;
953 		rb_enc_addr = ring_enc->gpu_addr;
954 
955 		rb_setup->is_rb_enabled_flags |= RB_ENABLED;
956 		rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
957 		rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
958 		rb_setup->rb_size = ring_enc->ring_size / 4;
959 		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
960 
961 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
962 			regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
963 			lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
964 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
965 			regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
966 			upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
967 		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
968 			regUVD_VCPU_NONCACHE_SIZE0),
969 			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
970 		MMSCH_V4_0_INSERT_END();
971 
972 		header.vcn0.init_status = 0;
973 		header.vcn0.table_offset = header.total_size;
974 		header.vcn0.table_size = table_size;
975 		header.total_size += table_size;
976 
977 		/* Send init table to mmsch */
978 		size = sizeof(struct mmsch_v4_0_3_init_header);
979 		table_loc = (uint32_t *)table->cpu_addr;
980 		memcpy((void *)table_loc, &header, size);
981 
982 		ctx_addr = table->gpu_addr;
983 		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
984 		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
985 
986 		tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
987 		tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
988 		tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
989 		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
990 
991 		size = header.total_size;
992 		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
993 
994 		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
995 
996 		param = 0x00000001;
997 		WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
998 		tmp = 0;
999 		timeout = 1000;
1000 		resp = 0;
1001 		expected = MMSCH_VF_MAILBOX_RESP__OK;
1002 		while (resp != expected) {
1003 			resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
1004 			if (resp != 0)
1005 				break;
1006 
1007 			udelay(10);
1008 			tmp = tmp + 10;
1009 			if (tmp >= timeout) {
1010 				DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
1011 					" waiting for regMMSCH_VF_MAILBOX_RESP "\
1012 					"(expected=0x%08x, readback=0x%08x)\n",
1013 					tmp, expected, resp);
1014 				return -EBUSY;
1015 			}
1016 		}
1017 
1018 		enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
1019 		init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
1020 		if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
1021 					&& init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
1022 			DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
1023 				"status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
1024 		}
1025 	}
1026 
1027 	return 0;
1028 }
1029 
1030 /**
1031  * vcn_v4_0_3_start - VCN start
1032  *
1033  * @adev: amdgpu_device pointer
1034  *
1035  * Start VCN block
1036  */
1037 static int vcn_v4_0_3_start(struct amdgpu_device *adev)
1038 {
1039 	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1040 	struct amdgpu_ring *ring;
1041 	int i, j, k, r, vcn_inst;
1042 	uint32_t tmp;
1043 
1044 	if (adev->pm.dpm_enabled)
1045 		amdgpu_dpm_enable_uvd(adev, true);
1046 
1047 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1048 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1049 			r = vcn_v4_0_3_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
1050 			continue;
1051 		}
1052 
1053 		vcn_inst = GET_INST(VCN, i);
1054 		/* set VCN status busy */
1055 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) |
1056 		      UVD_STATUS__UVD_BUSY;
1057 		WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp);
1058 
1059 		/*SW clock gating */
1060 		vcn_v4_0_3_disable_clock_gating(adev, i);
1061 
1062 		/* enable VCPU clock */
1063 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
1064 			 UVD_VCPU_CNTL__CLK_EN_MASK,
1065 			 ~UVD_VCPU_CNTL__CLK_EN_MASK);
1066 
1067 		/* disable master interrupt */
1068 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0,
1069 			 ~UVD_MASTINT_EN__VCPU_EN_MASK);
1070 
1071 		/* enable LMI MC and UMC channels */
1072 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0,
1073 			 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1074 
1075 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
1076 		tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1077 		tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1078 		WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
1079 
1080 		/* setup regUVD_LMI_CTRL */
1081 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL);
1082 		WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL,
1083 			     tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
1084 				     UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
1085 				     UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
1086 				     UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
1087 
1088 		/* setup regUVD_MPC_CNTL */
1089 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL);
1090 		tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
1091 		tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
1092 		WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp);
1093 
1094 		/* setup UVD_MPC_SET_MUXA0 */
1095 		WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0,
1096 			     ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
1097 			      (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
1098 			      (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
1099 			      (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
1100 
1101 		/* setup UVD_MPC_SET_MUXB0 */
1102 		WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0,
1103 			     ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
1104 			      (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
1105 			      (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
1106 			      (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
1107 
1108 		/* setup UVD_MPC_SET_MUX */
1109 		WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX,
1110 			     ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
1111 			      (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
1112 			      (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
1113 
1114 		vcn_v4_0_3_mc_resume(adev, i);
1115 
1116 		/* VCN global tiling registers */
1117 		WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG,
1118 			     adev->gfx.config.gb_addr_config);
1119 		WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG,
1120 			     adev->gfx.config.gb_addr_config);
1121 
1122 		/* unblock VCPU register access */
1123 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0,
1124 			 ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1125 
1126 		/* release VCPU reset to boot */
1127 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
1128 			 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1129 
1130 		for (j = 0; j < 10; ++j) {
1131 			uint32_t status;
1132 
1133 			for (k = 0; k < 100; ++k) {
1134 				status = RREG32_SOC15(VCN, vcn_inst,
1135 						      regUVD_STATUS);
1136 				if (status & 2)
1137 					break;
1138 				mdelay(10);
1139 			}
1140 			r = 0;
1141 			if (status & 2)
1142 				break;
1143 
1144 			DRM_DEV_ERROR(adev->dev,
1145 				"VCN decode not responding, trying to reset the VCPU!!!\n");
1146 			WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
1147 						  regUVD_VCPU_CNTL),
1148 				 UVD_VCPU_CNTL__BLK_RST_MASK,
1149 				 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1150 			mdelay(10);
1151 			WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst,
1152 						  regUVD_VCPU_CNTL),
1153 				 0, ~UVD_VCPU_CNTL__BLK_RST_MASK);
1154 
1155 			mdelay(10);
1156 			r = -1;
1157 		}
1158 
1159 		if (r) {
1160 			DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n");
1161 			return r;
1162 		}
1163 
1164 		/* enable master interrupt */
1165 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN),
1166 			 UVD_MASTINT_EN__VCPU_EN_MASK,
1167 			 ~UVD_MASTINT_EN__VCPU_EN_MASK);
1168 
1169 		/* clear the busy bit of VCN_STATUS */
1170 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0,
1171 			 ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1172 
1173 		ring = &adev->vcn.inst[i].ring_enc[0];
1174 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1175 
1176 		/* program the RB_BASE for ring buffer */
1177 		WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO,
1178 			     lower_32_bits(ring->gpu_addr));
1179 		WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI,
1180 			     upper_32_bits(ring->gpu_addr));
1181 
1182 		WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE,
1183 			     ring->ring_size / sizeof(uint32_t));
1184 
1185 		/* resetting ring, fw should not check RB ring */
1186 		tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
1187 		tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK);
1188 		WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
1189 
1190 		/* Initialize the ring buffer's read and write pointers */
1191 		WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0);
1192 		WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0);
1193 
1194 		tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE);
1195 		tmp |= VCN_RB_ENABLE__RB_EN_MASK;
1196 		WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp);
1197 
1198 		ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
1199 		fw_shared->sq.queue_mode &=
1200 			cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF));
1201 
1202 	}
1203 	return 0;
1204 }
1205 
1206 /**
1207  * vcn_v4_0_3_stop_dpg_mode - VCN stop with dpg mode
1208  *
1209  * @adev: amdgpu_device pointer
1210  * @inst_idx: instance number index
1211  *
1212  * Stop VCN block with dpg mode
1213  */
1214 static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
1215 {
1216 	uint32_t tmp;
1217 	int vcn_inst;
1218 
1219 	vcn_inst = GET_INST(VCN, inst_idx);
1220 
1221 	/* Wait for power status to be 1 */
1222 	SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
1223 			   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1224 
1225 	/* wait for read ptr to be equal to write ptr */
1226 	tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR);
1227 	SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
1228 
1229 	SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1,
1230 			   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1231 
1232 	/* disable dynamic power gating mode */
1233 	WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0,
1234 		 ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
1235 	return 0;
1236 }
1237 
1238 /**
1239  * vcn_v4_0_3_stop - VCN stop
1240  *
1241  * @adev: amdgpu_device pointer
1242  *
1243  * Stop VCN block
1244  */
1245 static int vcn_v4_0_3_stop(struct amdgpu_device *adev)
1246 {
1247 	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
1248 	int i, r = 0, vcn_inst;
1249 	uint32_t tmp;
1250 
1251 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1252 		vcn_inst = GET_INST(VCN, i);
1253 
1254 		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1255 		fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
1256 
1257 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1258 			vcn_v4_0_3_stop_dpg_mode(adev, i);
1259 			continue;
1260 		}
1261 
1262 		/* wait for vcn idle */
1263 		r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS,
1264 				       UVD_STATUS__IDLE, 0x7);
1265 		if (r)
1266 			goto Done;
1267 
1268 		tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
1269 			UVD_LMI_STATUS__READ_CLEAN_MASK |
1270 			UVD_LMI_STATUS__WRITE_CLEAN_MASK |
1271 			UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
1272 		r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
1273 				       tmp);
1274 		if (r)
1275 			goto Done;
1276 
1277 		/* stall UMC channel */
1278 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2);
1279 		tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
1280 		WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp);
1281 		tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
1282 			UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
1283 		r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp,
1284 				       tmp);
1285 		if (r)
1286 			goto Done;
1287 
1288 		/* Unblock VCPU Register access */
1289 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL),
1290 			 UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
1291 			 ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1292 
1293 		/* release VCPU reset to boot */
1294 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL),
1295 			 UVD_VCPU_CNTL__BLK_RST_MASK,
1296 			 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1297 
1298 		/* disable VCPU clock */
1299 		WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0,
1300 			 ~(UVD_VCPU_CNTL__CLK_EN_MASK));
1301 
1302 		/* reset LMI UMC/LMI/VCPU */
1303 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
1304 		tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1305 		WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
1306 
1307 		tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET);
1308 		tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1309 		WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp);
1310 
1311 		/* clear VCN status */
1312 		WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0);
1313 
1314 		/* apply HW clock gating */
1315 		vcn_v4_0_3_enable_clock_gating(adev, i);
1316 	}
1317 Done:
1318 	if (adev->pm.dpm_enabled)
1319 		amdgpu_dpm_enable_uvd(adev, false);
1320 
1321 	return 0;
1322 }
1323 
1324 /**
1325  * vcn_v4_0_3_pause_dpg_mode - VCN pause with dpg mode
1326  *
1327  * @adev: amdgpu_device pointer
1328  * @inst_idx: instance number index
1329  * @new_state: pause state
1330  *
1331  * Pause dpg mode for VCN block
1332  */
1333 static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
1334 				struct dpg_pause_state *new_state)
1335 {
1336 
1337 	return 0;
1338 }
1339 
1340 /**
1341  * vcn_v4_0_3_unified_ring_get_rptr - get unified read pointer
1342  *
1343  * @ring: amdgpu_ring pointer
1344  *
1345  * Returns the current hardware unified read pointer
1346  */
1347 static uint64_t vcn_v4_0_3_unified_ring_get_rptr(struct amdgpu_ring *ring)
1348 {
1349 	struct amdgpu_device *adev = ring->adev;
1350 
1351 	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1352 		DRM_ERROR("wrong ring id is identified in %s", __func__);
1353 
1354 	return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR);
1355 }
1356 
1357 /**
1358  * vcn_v4_0_3_unified_ring_get_wptr - get unified write pointer
1359  *
1360  * @ring: amdgpu_ring pointer
1361  *
1362  * Returns the current hardware unified write pointer
1363  */
1364 static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring)
1365 {
1366 	struct amdgpu_device *adev = ring->adev;
1367 
1368 	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1369 		DRM_ERROR("wrong ring id is identified in %s", __func__);
1370 
1371 	if (ring->use_doorbell)
1372 		return *ring->wptr_cpu_addr;
1373 	else
1374 		return RREG32_SOC15(VCN, GET_INST(VCN, ring->me),
1375 				    regUVD_RB_WPTR);
1376 }
1377 
1378 /**
1379  * vcn_v4_0_3_unified_ring_set_wptr - set enc write pointer
1380  *
1381  * @ring: amdgpu_ring pointer
1382  *
1383  * Commits the enc write pointer to the hardware
1384  */
1385 static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring)
1386 {
1387 	struct amdgpu_device *adev = ring->adev;
1388 
1389 	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
1390 		DRM_ERROR("wrong ring id is identified in %s", __func__);
1391 
1392 	if (ring->use_doorbell) {
1393 		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
1394 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1395 	} else {
1396 		WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR,
1397 			     lower_32_bits(ring->wptr));
1398 	}
1399 }
1400 
1401 static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = {
1402 	.type = AMDGPU_RING_TYPE_VCN_ENC,
1403 	.align_mask = 0x3f,
1404 	.nop = VCN_ENC_CMD_NO_OP,
1405 	.get_rptr = vcn_v4_0_3_unified_ring_get_rptr,
1406 	.get_wptr = vcn_v4_0_3_unified_ring_get_wptr,
1407 	.set_wptr = vcn_v4_0_3_unified_ring_set_wptr,
1408 	.emit_frame_size =
1409 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1410 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1411 		4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
1412 		5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
1413 		1, /* vcn_v2_0_enc_ring_insert_end */
1414 	.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
1415 	.emit_ib = vcn_v2_0_enc_ring_emit_ib,
1416 	.emit_fence = vcn_v2_0_enc_ring_emit_fence,
1417 	.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
1418 	.test_ring = amdgpu_vcn_enc_ring_test_ring,
1419 	.test_ib = amdgpu_vcn_unified_ring_test_ib,
1420 	.insert_nop = amdgpu_ring_insert_nop,
1421 	.insert_end = vcn_v2_0_enc_ring_insert_end,
1422 	.pad_ib = amdgpu_ring_generic_pad_ib,
1423 	.begin_use = amdgpu_vcn_ring_begin_use,
1424 	.end_use = amdgpu_vcn_ring_end_use,
1425 	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
1426 	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
1427 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1428 };
1429 
1430 /**
1431  * vcn_v4_0_3_set_unified_ring_funcs - set unified ring functions
1432  *
1433  * @adev: amdgpu_device pointer
1434  *
1435  * Set unified ring functions
1436  */
1437 static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev)
1438 {
1439 	int i, vcn_inst;
1440 
1441 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1442 		adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_3_unified_ring_vm_funcs;
1443 		adev->vcn.inst[i].ring_enc[0].me = i;
1444 		vcn_inst = GET_INST(VCN, i);
1445 		adev->vcn.inst[i].aid_id =
1446 			vcn_inst / adev->vcn.num_inst_per_aid;
1447 	}
1448 }
1449 
1450 /**
1451  * vcn_v4_0_3_is_idle - check VCN block is idle
1452  *
1453  * @handle: amdgpu_device pointer
1454  *
1455  * Check whether VCN block is idle
1456  */
1457 static bool vcn_v4_0_3_is_idle(void *handle)
1458 {
1459 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1460 	int i, ret = 1;
1461 
1462 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1463 		ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) ==
1464 			UVD_STATUS__IDLE);
1465 	}
1466 
1467 	return ret;
1468 }
1469 
1470 /**
1471  * vcn_v4_0_3_wait_for_idle - wait for VCN block idle
1472  *
1473  * @handle: amdgpu_device pointer
1474  *
1475  * Wait for VCN block idle
1476  */
1477 static int vcn_v4_0_3_wait_for_idle(void *handle)
1478 {
1479 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1480 	int i, ret = 0;
1481 
1482 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1483 		ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS,
1484 					 UVD_STATUS__IDLE, UVD_STATUS__IDLE);
1485 		if (ret)
1486 			return ret;
1487 	}
1488 
1489 	return ret;
1490 }
1491 
1492 /* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state
1493  *
1494  * @handle: amdgpu_device pointer
1495  * @state: clock gating state
1496  *
1497  * Set VCN block clockgating state
1498  */
1499 static int vcn_v4_0_3_set_clockgating_state(void *handle,
1500 					  enum amd_clockgating_state state)
1501 {
1502 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1503 	bool enable = state == AMD_CG_STATE_GATE;
1504 	int i;
1505 
1506 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1507 		if (enable) {
1508 			if (RREG32_SOC15(VCN, GET_INST(VCN, i),
1509 					 regUVD_STATUS) != UVD_STATUS__IDLE)
1510 				return -EBUSY;
1511 			vcn_v4_0_3_enable_clock_gating(adev, i);
1512 		} else {
1513 			vcn_v4_0_3_disable_clock_gating(adev, i);
1514 		}
1515 	}
1516 	return 0;
1517 }
1518 
1519 /**
1520  * vcn_v4_0_3_set_powergating_state - set VCN block powergating state
1521  *
1522  * @handle: amdgpu_device pointer
1523  * @state: power gating state
1524  *
1525  * Set VCN block powergating state
1526  */
1527 static int vcn_v4_0_3_set_powergating_state(void *handle,
1528 					  enum amd_powergating_state state)
1529 {
1530 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1531 	int ret;
1532 
1533 	/* for SRIOV, guest should not control VCN Power-gating
1534 	 * MMSCH FW should control Power-gating and clock-gating
1535 	 * guest should avoid touching CGC and PG
1536 	 */
1537 	if (amdgpu_sriov_vf(adev)) {
1538 		adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
1539 		return 0;
1540 	}
1541 
1542 	if (state == adev->vcn.cur_state)
1543 		return 0;
1544 
1545 	if (state == AMD_PG_STATE_GATE)
1546 		ret = vcn_v4_0_3_stop(adev);
1547 	else
1548 		ret = vcn_v4_0_3_start(adev);
1549 
1550 	if (!ret)
1551 		adev->vcn.cur_state = state;
1552 
1553 	return ret;
1554 }
1555 
1556 /**
1557  * vcn_v4_0_3_set_interrupt_state - set VCN block interrupt state
1558  *
1559  * @adev: amdgpu_device pointer
1560  * @source: interrupt sources
1561  * @type: interrupt types
1562  * @state: interrupt states
1563  *
1564  * Set VCN block interrupt state
1565  */
1566 static int vcn_v4_0_3_set_interrupt_state(struct amdgpu_device *adev,
1567 					struct amdgpu_irq_src *source,
1568 					unsigned int type,
1569 					enum amdgpu_interrupt_state state)
1570 {
1571 	return 0;
1572 }
1573 
1574 /**
1575  * vcn_v4_0_3_process_interrupt - process VCN block interrupt
1576  *
1577  * @adev: amdgpu_device pointer
1578  * @source: interrupt sources
1579  * @entry: interrupt entry from clients and sources
1580  *
1581  * Process VCN block interrupt
1582  */
1583 static int vcn_v4_0_3_process_interrupt(struct amdgpu_device *adev,
1584 				      struct amdgpu_irq_src *source,
1585 				      struct amdgpu_iv_entry *entry)
1586 {
1587 	uint32_t i, inst;
1588 
1589 	i = node_id_to_phys_map[entry->node_id];
1590 
1591 	DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n");
1592 
1593 	for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst)
1594 		if (adev->vcn.inst[inst].aid_id == i)
1595 			break;
1596 
1597 	if (inst >= adev->vcn.num_vcn_inst) {
1598 		dev_WARN_ONCE(adev->dev, 1,
1599 			      "Interrupt received for unknown VCN instance %d",
1600 			      entry->node_id);
1601 		return 0;
1602 	}
1603 
1604 	switch (entry->src_id) {
1605 	case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
1606 		amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]);
1607 		break;
1608 	default:
1609 		DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
1610 			  entry->src_id, entry->src_data[0]);
1611 		break;
1612 	}
1613 
1614 	return 0;
1615 }
1616 
1617 static const struct amdgpu_irq_src_funcs vcn_v4_0_3_irq_funcs = {
1618 	.set = vcn_v4_0_3_set_interrupt_state,
1619 	.process = vcn_v4_0_3_process_interrupt,
1620 };
1621 
1622 /**
1623  * vcn_v4_0_3_set_irq_funcs - set VCN block interrupt irq functions
1624  *
1625  * @adev: amdgpu_device pointer
1626  *
1627  * Set VCN block interrupt irq functions
1628  */
1629 static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev)
1630 {
1631 	int i;
1632 
1633 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1634 		adev->vcn.inst->irq.num_types++;
1635 	}
1636 	adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs;
1637 }
1638 
1639 static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = {
1640 	.name = "vcn_v4_0_3",
1641 	.early_init = vcn_v4_0_3_early_init,
1642 	.late_init = NULL,
1643 	.sw_init = vcn_v4_0_3_sw_init,
1644 	.sw_fini = vcn_v4_0_3_sw_fini,
1645 	.hw_init = vcn_v4_0_3_hw_init,
1646 	.hw_fini = vcn_v4_0_3_hw_fini,
1647 	.suspend = vcn_v4_0_3_suspend,
1648 	.resume = vcn_v4_0_3_resume,
1649 	.is_idle = vcn_v4_0_3_is_idle,
1650 	.wait_for_idle = vcn_v4_0_3_wait_for_idle,
1651 	.check_soft_reset = NULL,
1652 	.pre_soft_reset = NULL,
1653 	.soft_reset = NULL,
1654 	.post_soft_reset = NULL,
1655 	.set_clockgating_state = vcn_v4_0_3_set_clockgating_state,
1656 	.set_powergating_state = vcn_v4_0_3_set_powergating_state,
1657 	.dump_ip_state = NULL,
1658 	.print_ip_state = NULL,
1659 };
1660 
1661 const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = {
1662 	.type = AMD_IP_BLOCK_TYPE_VCN,
1663 	.major = 4,
1664 	.minor = 0,
1665 	.rev = 3,
1666 	.funcs = &vcn_v4_0_3_ip_funcs,
1667 };
1668 
1669 static const struct amdgpu_ras_err_status_reg_entry vcn_v4_0_3_ue_reg_list[] = {
1670 	{AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDD, regVCN_UE_ERR_STATUS_HI_VIDD),
1671 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDD"},
1672 	{AMDGPU_RAS_REG_ENTRY(VCN, 0, regVCN_UE_ERR_STATUS_LO_VIDV, regVCN_UE_ERR_STATUS_HI_VIDV),
1673 	1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "VIDV"},
1674 };
1675 
1676 static void vcn_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev,
1677 						  uint32_t vcn_inst,
1678 						  void *ras_err_status)
1679 {
1680 	struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status;
1681 
1682 	/* vcn v4_0_3 only support query uncorrectable errors */
1683 	amdgpu_ras_inst_query_ras_error_count(adev,
1684 			vcn_v4_0_3_ue_reg_list,
1685 			ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
1686 			NULL, 0, GET_INST(VCN, vcn_inst),
1687 			AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
1688 			&err_data->ue_count);
1689 }
1690 
1691 static void vcn_v4_0_3_query_ras_error_count(struct amdgpu_device *adev,
1692 					     void *ras_err_status)
1693 {
1694 	uint32_t i;
1695 
1696 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
1697 		dev_warn(adev->dev, "VCN RAS is not supported\n");
1698 		return;
1699 	}
1700 
1701 	for (i = 0; i < adev->vcn.num_vcn_inst; i++)
1702 		vcn_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status);
1703 }
1704 
1705 static void vcn_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev,
1706 						  uint32_t vcn_inst)
1707 {
1708 	amdgpu_ras_inst_reset_ras_error_count(adev,
1709 					vcn_v4_0_3_ue_reg_list,
1710 					ARRAY_SIZE(vcn_v4_0_3_ue_reg_list),
1711 					GET_INST(VCN, vcn_inst));
1712 }
1713 
1714 static void vcn_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev)
1715 {
1716 	uint32_t i;
1717 
1718 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) {
1719 		dev_warn(adev->dev, "VCN RAS is not supported\n");
1720 		return;
1721 	}
1722 
1723 	for (i = 0; i < adev->vcn.num_vcn_inst; i++)
1724 		vcn_v4_0_3_inst_reset_ras_error_count(adev, i);
1725 }
1726 
1727 static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = {
1728 	.query_ras_error_count = vcn_v4_0_3_query_ras_error_count,
1729 	.reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count,
1730 };
1731 
1732 static struct amdgpu_vcn_ras vcn_v4_0_3_ras = {
1733 	.ras_block = {
1734 		.hw_ops = &vcn_v4_0_3_ras_hw_ops,
1735 	},
1736 };
1737 
1738 static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
1739 {
1740 	adev->vcn.ras = &vcn_v4_0_3_ras;
1741 }
1742 
1743 static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev,
1744 				  int inst_idx, bool indirect)
1745 {
1746 	uint32_t tmp;
1747 
1748 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
1749 		return;
1750 
1751 	tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
1752 	      VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
1753 	      VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
1754 	      VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
1755 	WREG32_SOC15_DPG_MODE(inst_idx,
1756 			      SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
1757 			      tmp, 0, indirect);
1758 
1759 	tmp = UVD_VCPU_INT_EN2__RASCNTL_VCPU_VCODEC_EN_MASK;
1760 	WREG32_SOC15_DPG_MODE(inst_idx,
1761 			      SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_VCPU_INT_EN2),
1762 			      tmp, 0, indirect);
1763 
1764 	tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
1765 	WREG32_SOC15_DPG_MODE(inst_idx,
1766 			      SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
1767 			      tmp, 0, indirect);
1768 }
1769