xref: /linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision 2b64b2ed277ff23e785fbdb65098ee7e1252d64f)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15.h"
32 #include "soc15d.h"
33 #include "soc15_common.h"
34 #include "mmsch_v1_0.h"
35 
36 #include "vce/vce_4_0_offset.h"
37 #include "vce/vce_4_0_default.h"
38 #include "vce/vce_4_0_sh_mask.h"
39 #include "mmhub/mmhub_1_0_offset.h"
40 #include "mmhub/mmhub_1_0_sh_mask.h"
41 
42 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
43 
44 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
45 
46 #define VCE_V4_0_FW_SIZE	(384 * 1024)
47 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
48 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
49 
50 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
51 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
52 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
53 
54 /**
55  * vce_v4_0_ring_get_rptr - get read pointer
56  *
57  * @ring: amdgpu_ring pointer
58  *
59  * Returns the current hardware read pointer
60  */
61 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
62 {
63 	struct amdgpu_device *adev = ring->adev;
64 
65 	if (ring->me == 0)
66 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
67 	else if (ring->me == 1)
68 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
69 	else
70 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
71 }
72 
73 /**
74  * vce_v4_0_ring_get_wptr - get write pointer
75  *
76  * @ring: amdgpu_ring pointer
77  *
78  * Returns the current hardware write pointer
79  */
80 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
81 {
82 	struct amdgpu_device *adev = ring->adev;
83 
84 	if (ring->use_doorbell)
85 		return adev->wb.wb[ring->wptr_offs];
86 
87 	if (ring->me == 0)
88 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
89 	else if (ring->me == 1)
90 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
91 	else
92 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
93 }
94 
95 /**
96  * vce_v4_0_ring_set_wptr - set write pointer
97  *
98  * @ring: amdgpu_ring pointer
99  *
100  * Commits the write pointer to the hardware
101  */
102 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
103 {
104 	struct amdgpu_device *adev = ring->adev;
105 
106 	if (ring->use_doorbell) {
107 		/* XXX check if swapping is necessary on BE */
108 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
109 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
110 		return;
111 	}
112 
113 	if (ring->me == 0)
114 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
115 			lower_32_bits(ring->wptr));
116 	else if (ring->me == 1)
117 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
118 			lower_32_bits(ring->wptr));
119 	else
120 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
121 			lower_32_bits(ring->wptr));
122 }
123 
124 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
125 {
126 	int i, j;
127 
128 	for (i = 0; i < 10; ++i) {
129 		for (j = 0; j < 100; ++j) {
130 			uint32_t status =
131 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
132 
133 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
134 				return 0;
135 			mdelay(10);
136 		}
137 
138 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
139 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
140 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
141 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142 		mdelay(10);
143 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
144 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
145 		mdelay(10);
146 
147 	}
148 
149 	return -ETIMEDOUT;
150 }
151 
152 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
153 				struct amdgpu_mm_table *table)
154 {
155 	uint32_t data = 0, loop;
156 	uint64_t addr = table->gpu_addr;
157 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
158 	uint32_t size;
159 
160 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
161 
162 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
163 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
164 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
165 
166 	/* 2, update vmid of descriptor */
167 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
168 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
169 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
170 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
171 
172 	/* 3, notify mmsch about the size of this descriptor */
173 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
174 
175 	/* 4, set resp to zero */
176 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
177 
178 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
179 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
180 	adev->vce.ring[0].wptr = 0;
181 	adev->vce.ring[0].wptr_old = 0;
182 
183 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
184 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
185 
186 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
187 	loop = 1000;
188 	while ((data & 0x10000002) != 0x10000002) {
189 		udelay(10);
190 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
191 		loop--;
192 		if (!loop)
193 			break;
194 	}
195 
196 	if (!loop) {
197 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
198 		return -EBUSY;
199 	}
200 
201 	return 0;
202 }
203 
204 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
205 {
206 	struct amdgpu_ring *ring;
207 	uint32_t offset, size;
208 	uint32_t table_size = 0;
209 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
210 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
211 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
212 	struct mmsch_v1_0_cmd_end end = { { 0 } };
213 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
214 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
215 
216 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
217 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
218 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
219 	end.cmd_header.command_type = MMSCH_COMMAND__END;
220 
221 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
222 		header->version = MMSCH_VERSION;
223 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
224 
225 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
226 			header->vce_table_offset = header->header_size;
227 		else
228 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
229 
230 		init_table += header->vce_table_offset;
231 
232 		ring = &adev->vce.ring[0];
233 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
234 					    lower_32_bits(ring->gpu_addr));
235 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
236 					    upper_32_bits(ring->gpu_addr));
237 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
238 					    ring->ring_size / 4);
239 
240 		/* BEGING OF MC_RESUME */
241 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
242 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
243 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
245 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
246 
247 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
248 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
249 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
250 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
251 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
252 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
253 						(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
254 		} else {
255 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
257 						adev->vce.gpu_addr >> 8);
258 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
259 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
260 						(adev->vce.gpu_addr >> 40) & 0xff);
261 		}
262 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
263 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
264 						adev->vce.gpu_addr >> 8);
265 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
266 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
267 						(adev->vce.gpu_addr >> 40) & 0xff);
268 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
269 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
270 						adev->vce.gpu_addr >> 8);
271 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
273 						(adev->vce.gpu_addr >> 40) & 0xff);
274 
275 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
276 		size = VCE_V4_0_FW_SIZE;
277 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
278 					offset & ~0x0f000000);
279 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
280 
281 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
282 		size = VCE_V4_0_STACK_SIZE;
283 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
284 					(offset & ~0x0f000000) | (1 << 24));
285 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
286 
287 		offset += size;
288 		size = VCE_V4_0_DATA_SIZE;
289 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
290 					(offset & ~0x0f000000) | (2 << 24));
291 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
292 
293 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
294 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
295 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
296 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
297 
298 		/* end of MC_RESUME */
299 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
300 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
301 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
302 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
303 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
304 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
305 
306 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
307 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
308 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
309 
310 		/* clear BUSY flag */
311 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
312 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
313 
314 		/* add end packet */
315 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
316 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
317 		header->vce_table_size = table_size;
318 	}
319 
320 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
321 }
322 
323 /**
324  * vce_v4_0_start - start VCE block
325  *
326  * @adev: amdgpu_device pointer
327  *
328  * Setup and start the VCE block
329  */
330 static int vce_v4_0_start(struct amdgpu_device *adev)
331 {
332 	struct amdgpu_ring *ring;
333 	int r;
334 
335 	ring = &adev->vce.ring[0];
336 
337 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
338 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
339 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
340 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
341 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
342 
343 	ring = &adev->vce.ring[1];
344 
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
347 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
348 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
349 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
350 
351 	ring = &adev->vce.ring[2];
352 
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
355 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
356 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
357 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
358 
359 	vce_v4_0_mc_resume(adev);
360 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
361 			~VCE_STATUS__JOB_BUSY_MASK);
362 
363 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
364 
365 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
366 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
367 	mdelay(100);
368 
369 	r = vce_v4_0_firmware_loaded(adev);
370 
371 	/* clear BUSY flag */
372 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
373 
374 	if (r) {
375 		DRM_ERROR("VCE not responding, giving up!!!\n");
376 		return r;
377 	}
378 
379 	return 0;
380 }
381 
382 static int vce_v4_0_stop(struct amdgpu_device *adev)
383 {
384 
385 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
386 
387 	/* hold on ECPU */
388 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
389 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
390 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
391 
392 	/* clear BUSY flag */
393 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
394 
395 	/* Set Clock-Gating off */
396 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
397 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
398 	*/
399 
400 	return 0;
401 }
402 
403 static int vce_v4_0_early_init(void *handle)
404 {
405 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
406 
407 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
408 		adev->vce.num_rings = 1;
409 	else
410 		adev->vce.num_rings = 3;
411 
412 	vce_v4_0_set_ring_funcs(adev);
413 	vce_v4_0_set_irq_funcs(adev);
414 
415 	return 0;
416 }
417 
418 static int vce_v4_0_sw_init(void *handle)
419 {
420 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
421 	struct amdgpu_ring *ring;
422 
423 	unsigned size;
424 	int r, i;
425 
426 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
427 	if (r)
428 		return r;
429 
430 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
431 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
432 		size += VCE_V4_0_FW_SIZE;
433 
434 	r = amdgpu_vce_sw_init(adev, size);
435 	if (r)
436 		return r;
437 
438 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
439 		const struct common_firmware_header *hdr;
440 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
441 
442 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
443 		if (!adev->vce.saved_bo)
444 			return -ENOMEM;
445 
446 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
447 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
448 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
449 		adev->firmware.fw_size +=
450 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
451 		DRM_INFO("PSP loading VCE firmware\n");
452 	} else {
453 		r = amdgpu_vce_resume(adev);
454 		if (r)
455 			return r;
456 	}
457 
458 	for (i = 0; i < adev->vce.num_rings; i++) {
459 		ring = &adev->vce.ring[i];
460 		sprintf(ring->name, "vce%d", i);
461 		if (amdgpu_sriov_vf(adev)) {
462 			/* DOORBELL only works under SRIOV */
463 			ring->use_doorbell = true;
464 
465 			/* currently only use the first encoding ring for sriov,
466 			 * so set unused location for other unused rings.
467 			 */
468 			if (i == 0)
469 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
470 			else
471 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
472 		}
473 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
474 		if (r)
475 			return r;
476 	}
477 
478 
479 	r = amdgpu_vce_entity_init(adev);
480 	if (r)
481 		return r;
482 
483 	r = amdgpu_virt_alloc_mm_table(adev);
484 	if (r)
485 		return r;
486 
487 	return r;
488 }
489 
490 static int vce_v4_0_sw_fini(void *handle)
491 {
492 	int r;
493 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
494 
495 	/* free MM table */
496 	amdgpu_virt_free_mm_table(adev);
497 
498 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
499 		kvfree(adev->vce.saved_bo);
500 		adev->vce.saved_bo = NULL;
501 	}
502 
503 	r = amdgpu_vce_suspend(adev);
504 	if (r)
505 		return r;
506 
507 	return amdgpu_vce_sw_fini(adev);
508 }
509 
510 static int vce_v4_0_hw_init(void *handle)
511 {
512 	int r, i;
513 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
514 
515 	if (amdgpu_sriov_vf(adev))
516 		r = vce_v4_0_sriov_start(adev);
517 	else
518 		r = vce_v4_0_start(adev);
519 	if (r)
520 		return r;
521 
522 	for (i = 0; i < adev->vce.num_rings; i++) {
523 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
524 		if (r)
525 			return r;
526 	}
527 
528 	DRM_INFO("VCE initialized successfully.\n");
529 
530 	return 0;
531 }
532 
533 static int vce_v4_0_hw_fini(void *handle)
534 {
535 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
536 	int i;
537 
538 	if (!amdgpu_sriov_vf(adev)) {
539 		/* vce_v4_0_wait_for_idle(handle); */
540 		vce_v4_0_stop(adev);
541 	} else {
542 		/* full access mode, so don't touch any VCE register */
543 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
544 	}
545 
546 	for (i = 0; i < adev->vce.num_rings; i++)
547 		adev->vce.ring[i].sched.ready = false;
548 
549 	return 0;
550 }
551 
552 static int vce_v4_0_suspend(void *handle)
553 {
554 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
555 	int r;
556 
557 	if (adev->vce.vcpu_bo == NULL)
558 		return 0;
559 
560 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
561 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
562 		void *ptr = adev->vce.cpu_addr;
563 
564 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
565 	}
566 
567 	r = vce_v4_0_hw_fini(adev);
568 	if (r)
569 		return r;
570 
571 	return amdgpu_vce_suspend(adev);
572 }
573 
574 static int vce_v4_0_resume(void *handle)
575 {
576 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
577 	int r;
578 
579 	if (adev->vce.vcpu_bo == NULL)
580 		return -EINVAL;
581 
582 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
583 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
584 		void *ptr = adev->vce.cpu_addr;
585 
586 		memcpy_toio(ptr, adev->vce.saved_bo, size);
587 	} else {
588 		r = amdgpu_vce_resume(adev);
589 		if (r)
590 			return r;
591 	}
592 
593 	return vce_v4_0_hw_init(adev);
594 }
595 
596 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
597 {
598 	uint32_t offset, size;
599 	uint64_t tmr_mc_addr;
600 
601 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
602 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
603 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
604 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
605 
606 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
607 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
608 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
609 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
610 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
611 
612 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
613 
614 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
615 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
616 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
617 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
618 			(tmr_mc_addr >> 8));
619 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
620 			(tmr_mc_addr >> 40) & 0xff);
621 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
622 	} else {
623 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
624 			(adev->vce.gpu_addr >> 8));
625 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
626 			(adev->vce.gpu_addr >> 40) & 0xff);
627 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
628 	}
629 
630 	size = VCE_V4_0_FW_SIZE;
631 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
632 
633 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
634 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
635 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
636 	size = VCE_V4_0_STACK_SIZE;
637 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
638 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
639 
640 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
641 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
642 	offset += size;
643 	size = VCE_V4_0_DATA_SIZE;
644 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
645 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
646 
647 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
648 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
649 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
650 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
651 }
652 
653 static int vce_v4_0_set_clockgating_state(void *handle,
654 					  enum amd_clockgating_state state)
655 {
656 	/* needed for driver unload*/
657 	return 0;
658 }
659 
660 #if 0
661 static bool vce_v4_0_is_idle(void *handle)
662 {
663 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
664 	u32 mask = 0;
665 
666 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
667 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
668 
669 	return !(RREG32(mmSRBM_STATUS2) & mask);
670 }
671 
672 static int vce_v4_0_wait_for_idle(void *handle)
673 {
674 	unsigned i;
675 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
676 
677 	for (i = 0; i < adev->usec_timeout; i++)
678 		if (vce_v4_0_is_idle(handle))
679 			return 0;
680 
681 	return -ETIMEDOUT;
682 }
683 
684 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
685 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
686 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
687 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
688 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
689 
690 static bool vce_v4_0_check_soft_reset(void *handle)
691 {
692 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
693 	u32 srbm_soft_reset = 0;
694 
695 	/* According to VCE team , we should use VCE_STATUS instead
696 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
697 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
698 	 * instance's registers are accessed
699 	 * (0 for 1st instance, 10 for 2nd instance).
700 	 *
701 	 *VCE_STATUS
702 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
703 	 *|----+----+-----------+----+----+----+----------+---------+----|
704 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
705 	 *
706 	 * VCE team suggest use bit 3--bit 6 for busy status check
707 	 */
708 	mutex_lock(&adev->grbm_idx_mutex);
709 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
710 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
711 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
712 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
713 	}
714 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
715 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
716 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
717 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
718 	}
719 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
720 	mutex_unlock(&adev->grbm_idx_mutex);
721 
722 	if (srbm_soft_reset) {
723 		adev->vce.srbm_soft_reset = srbm_soft_reset;
724 		return true;
725 	} else {
726 		adev->vce.srbm_soft_reset = 0;
727 		return false;
728 	}
729 }
730 
731 static int vce_v4_0_soft_reset(void *handle)
732 {
733 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
734 	u32 srbm_soft_reset;
735 
736 	if (!adev->vce.srbm_soft_reset)
737 		return 0;
738 	srbm_soft_reset = adev->vce.srbm_soft_reset;
739 
740 	if (srbm_soft_reset) {
741 		u32 tmp;
742 
743 		tmp = RREG32(mmSRBM_SOFT_RESET);
744 		tmp |= srbm_soft_reset;
745 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
746 		WREG32(mmSRBM_SOFT_RESET, tmp);
747 		tmp = RREG32(mmSRBM_SOFT_RESET);
748 
749 		udelay(50);
750 
751 		tmp &= ~srbm_soft_reset;
752 		WREG32(mmSRBM_SOFT_RESET, tmp);
753 		tmp = RREG32(mmSRBM_SOFT_RESET);
754 
755 		/* Wait a little for things to settle down */
756 		udelay(50);
757 	}
758 
759 	return 0;
760 }
761 
762 static int vce_v4_0_pre_soft_reset(void *handle)
763 {
764 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
765 
766 	if (!adev->vce.srbm_soft_reset)
767 		return 0;
768 
769 	mdelay(5);
770 
771 	return vce_v4_0_suspend(adev);
772 }
773 
774 
775 static int vce_v4_0_post_soft_reset(void *handle)
776 {
777 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
778 
779 	if (!adev->vce.srbm_soft_reset)
780 		return 0;
781 
782 	mdelay(5);
783 
784 	return vce_v4_0_resume(adev);
785 }
786 
787 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
788 {
789 	u32 tmp, data;
790 
791 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
792 	if (override)
793 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
794 	else
795 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
796 
797 	if (tmp != data)
798 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
799 }
800 
801 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
802 					     bool gated)
803 {
804 	u32 data;
805 
806 	/* Set Override to disable Clock Gating */
807 	vce_v4_0_override_vce_clock_gating(adev, true);
808 
809 	/* This function enables MGCG which is controlled by firmware.
810 	   With the clocks in the gated state the core is still
811 	   accessible but the firmware will throttle the clocks on the
812 	   fly as necessary.
813 	*/
814 	if (gated) {
815 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
816 		data |= 0x1ff;
817 		data &= ~0xef0000;
818 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
819 
820 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
821 		data |= 0x3ff000;
822 		data &= ~0xffc00000;
823 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
824 
825 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
826 		data |= 0x2;
827 		data &= ~0x00010000;
828 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
829 
830 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
831 		data |= 0x37f;
832 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
833 
834 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
835 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
836 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
837 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
838 			0x8;
839 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
840 	} else {
841 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
842 		data &= ~0x80010;
843 		data |= 0xe70008;
844 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
845 
846 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
847 		data |= 0xffc00000;
848 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
849 
850 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
851 		data |= 0x10000;
852 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
853 
854 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
855 		data &= ~0xffc00000;
856 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
857 
858 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
859 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
860 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
861 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
862 			  0x8);
863 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
864 	}
865 	vce_v4_0_override_vce_clock_gating(adev, false);
866 }
867 
868 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
869 {
870 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
871 
872 	if (enable)
873 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
874 	else
875 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
876 
877 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
878 }
879 
880 static int vce_v4_0_set_clockgating_state(void *handle,
881 					  enum amd_clockgating_state state)
882 {
883 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
884 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
885 	int i;
886 
887 	if ((adev->asic_type == CHIP_POLARIS10) ||
888 		(adev->asic_type == CHIP_TONGA) ||
889 		(adev->asic_type == CHIP_FIJI))
890 		vce_v4_0_set_bypass_mode(adev, enable);
891 
892 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
893 		return 0;
894 
895 	mutex_lock(&adev->grbm_idx_mutex);
896 	for (i = 0; i < 2; i++) {
897 		/* Program VCE Instance 0 or 1 if not harvested */
898 		if (adev->vce.harvest_config & (1 << i))
899 			continue;
900 
901 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
902 
903 		if (enable) {
904 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
905 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
906 			data &= ~(0xf | 0xff0);
907 			data |= ((0x0 << 0) | (0x04 << 4));
908 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
909 
910 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
911 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
912 			data &= ~(0xf | 0xff0);
913 			data |= ((0x0 << 0) | (0x04 << 4));
914 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
915 		}
916 
917 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
918 	}
919 
920 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
921 	mutex_unlock(&adev->grbm_idx_mutex);
922 
923 	return 0;
924 }
925 
926 static int vce_v4_0_set_powergating_state(void *handle,
927 					  enum amd_powergating_state state)
928 {
929 	/* This doesn't actually powergate the VCE block.
930 	 * That's done in the dpm code via the SMC.  This
931 	 * just re-inits the block as necessary.  The actual
932 	 * gating still happens in the dpm code.  We should
933 	 * revisit this when there is a cleaner line between
934 	 * the smc and the hw blocks
935 	 */
936 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
937 
938 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
939 		return 0;
940 
941 	if (state == AMD_PG_STATE_GATE)
942 		/* XXX do we need a vce_v4_0_stop()? */
943 		return 0;
944 	else
945 		return vce_v4_0_start(adev);
946 }
947 #endif
948 
949 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
950 					struct amdgpu_ib *ib, uint32_t flags)
951 {
952 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
953 
954 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
955 	amdgpu_ring_write(ring, vmid);
956 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
957 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
958 	amdgpu_ring_write(ring, ib->length_dw);
959 }
960 
961 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
962 			u64 seq, unsigned flags)
963 {
964 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
965 
966 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
967 	amdgpu_ring_write(ring, addr);
968 	amdgpu_ring_write(ring, upper_32_bits(addr));
969 	amdgpu_ring_write(ring, seq);
970 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
971 }
972 
973 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
974 {
975 	amdgpu_ring_write(ring, VCE_CMD_END);
976 }
977 
978 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
979 				   uint32_t val, uint32_t mask)
980 {
981 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
982 	amdgpu_ring_write(ring,	reg << 2);
983 	amdgpu_ring_write(ring, mask);
984 	amdgpu_ring_write(ring, val);
985 }
986 
987 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
988 				   unsigned int vmid, uint64_t pd_addr)
989 {
990 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
991 
992 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
993 
994 	/* wait for reg writes */
995 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
996 			       lower_32_bits(pd_addr), 0xffffffff);
997 }
998 
999 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1000 			       uint32_t reg, uint32_t val)
1001 {
1002 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1003 	amdgpu_ring_write(ring,	reg << 2);
1004 	amdgpu_ring_write(ring, val);
1005 }
1006 
1007 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1008 					struct amdgpu_irq_src *source,
1009 					unsigned type,
1010 					enum amdgpu_interrupt_state state)
1011 {
1012 	uint32_t val = 0;
1013 
1014 	if (!amdgpu_sriov_vf(adev)) {
1015 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1016 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1017 
1018 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1019 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1020 	}
1021 	return 0;
1022 }
1023 
1024 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1025 				      struct amdgpu_irq_src *source,
1026 				      struct amdgpu_iv_entry *entry)
1027 {
1028 	DRM_DEBUG("IH: VCE\n");
1029 
1030 	switch (entry->src_data[0]) {
1031 	case 0:
1032 	case 1:
1033 	case 2:
1034 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1035 		break;
1036 	default:
1037 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1038 			  entry->src_id, entry->src_data[0]);
1039 		break;
1040 	}
1041 
1042 	return 0;
1043 }
1044 
1045 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1046 	.name = "vce_v4_0",
1047 	.early_init = vce_v4_0_early_init,
1048 	.late_init = NULL,
1049 	.sw_init = vce_v4_0_sw_init,
1050 	.sw_fini = vce_v4_0_sw_fini,
1051 	.hw_init = vce_v4_0_hw_init,
1052 	.hw_fini = vce_v4_0_hw_fini,
1053 	.suspend = vce_v4_0_suspend,
1054 	.resume = vce_v4_0_resume,
1055 	.is_idle = NULL /* vce_v4_0_is_idle */,
1056 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1057 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1058 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1059 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1060 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1061 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1062 	.set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1063 };
1064 
1065 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1066 	.type = AMDGPU_RING_TYPE_VCE,
1067 	.align_mask = 0x3f,
1068 	.nop = VCE_CMD_NO_OP,
1069 	.support_64bit_ptrs = false,
1070 	.vmhub = AMDGPU_MMHUB,
1071 	.get_rptr = vce_v4_0_ring_get_rptr,
1072 	.get_wptr = vce_v4_0_ring_get_wptr,
1073 	.set_wptr = vce_v4_0_ring_set_wptr,
1074 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1075 	.emit_frame_size =
1076 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1077 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1078 		4 + /* vce_v4_0_emit_vm_flush */
1079 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1080 		1, /* vce_v4_0_ring_insert_end */
1081 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1082 	.emit_ib = vce_v4_0_ring_emit_ib,
1083 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1084 	.emit_fence = vce_v4_0_ring_emit_fence,
1085 	.test_ring = amdgpu_vce_ring_test_ring,
1086 	.test_ib = amdgpu_vce_ring_test_ib,
1087 	.insert_nop = amdgpu_ring_insert_nop,
1088 	.insert_end = vce_v4_0_ring_insert_end,
1089 	.pad_ib = amdgpu_ring_generic_pad_ib,
1090 	.begin_use = amdgpu_vce_ring_begin_use,
1091 	.end_use = amdgpu_vce_ring_end_use,
1092 	.emit_wreg = vce_v4_0_emit_wreg,
1093 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1094 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1095 };
1096 
1097 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1098 {
1099 	int i;
1100 
1101 	for (i = 0; i < adev->vce.num_rings; i++) {
1102 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1103 		adev->vce.ring[i].me = i;
1104 	}
1105 	DRM_INFO("VCE enabled in VM mode\n");
1106 }
1107 
1108 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1109 	.set = vce_v4_0_set_interrupt_state,
1110 	.process = vce_v4_0_process_interrupt,
1111 };
1112 
1113 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1114 {
1115 	adev->vce.irq.num_types = 1;
1116 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1117 };
1118 
1119 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1120 {
1121 	.type = AMD_IP_BLOCK_TYPE_VCE,
1122 	.major = 4,
1123 	.minor = 0,
1124 	.rev = 0,
1125 	.funcs = &vce_v4_0_ip_funcs,
1126 };
1127