xref: /linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision e58e871becec2d3b04ed91c0c16fe8deac9c9dfa)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34 
35 #include "vega10/soc15ip.h"
36 #include "vega10/VCE/vce_4_0_offset.h"
37 #include "vega10/VCE/vce_4_0_default.h"
38 #include "vega10/VCE/vce_4_0_sh_mask.h"
39 #include "vega10/MMHUB/mmhub_1_0_offset.h"
40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
41 
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
43 
44 #define VCE_V4_0_FW_SIZE	(384 * 1024)
45 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
46 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47 
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51 
52 /**
53  * vce_v4_0_ring_get_rptr - get read pointer
54  *
55  * @ring: amdgpu_ring pointer
56  *
57  * Returns the current hardware read pointer
58  */
59 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
60 {
61 	struct amdgpu_device *adev = ring->adev;
62 
63 	if (ring == &adev->vce.ring[0])
64 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
65 	else if (ring == &adev->vce.ring[1])
66 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
67 	else
68 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
69 }
70 
71 /**
72  * vce_v4_0_ring_get_wptr - get write pointer
73  *
74  * @ring: amdgpu_ring pointer
75  *
76  * Returns the current hardware write pointer
77  */
78 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
79 {
80 	struct amdgpu_device *adev = ring->adev;
81 
82 	if (ring->use_doorbell)
83 		return adev->wb.wb[ring->wptr_offs];
84 
85 	if (ring == &adev->vce.ring[0])
86 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
87 	else if (ring == &adev->vce.ring[1])
88 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
89 	else
90 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
91 }
92 
93 /**
94  * vce_v4_0_ring_set_wptr - set write pointer
95  *
96  * @ring: amdgpu_ring pointer
97  *
98  * Commits the write pointer to the hardware
99  */
100 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
101 {
102 	struct amdgpu_device *adev = ring->adev;
103 
104 	if (ring->use_doorbell) {
105 		/* XXX check if swapping is necessary on BE */
106 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
107 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
108 		return;
109 	}
110 
111 	if (ring == &adev->vce.ring[0])
112 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
113 			lower_32_bits(ring->wptr));
114 	else if (ring == &adev->vce.ring[1])
115 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
116 			lower_32_bits(ring->wptr));
117 	else
118 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
119 			lower_32_bits(ring->wptr));
120 }
121 
122 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
123 {
124 	int i, j;
125 
126 	for (i = 0; i < 10; ++i) {
127 		for (j = 0; j < 100; ++j) {
128 			uint32_t status =
129 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
130 
131 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
132 				return 0;
133 			mdelay(10);
134 		}
135 
136 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
137 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
138 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
139 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
140 		mdelay(10);
141 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
142 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 		mdelay(10);
144 
145 	}
146 
147 	return -ETIMEDOUT;
148 }
149 
150 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
151 				struct amdgpu_mm_table *table)
152 {
153 	uint32_t data = 0, loop;
154 	uint64_t addr = table->gpu_addr;
155 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
156 	uint32_t size;
157 
158 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
159 
160 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
161 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
162 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
163 
164 	/* 2, update vmid of descriptor */
165 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
166 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
167 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
168 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
169 
170 	/* 3, notify mmsch about the size of this descriptor */
171 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
172 
173 	/* 4, set resp to zero */
174 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
175 
176 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
177 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
178 
179 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
180 	loop = 1000;
181 	while ((data & 0x10000002) != 0x10000002) {
182 		udelay(10);
183 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
184 		loop--;
185 		if (!loop)
186 			break;
187 	}
188 
189 	if (!loop) {
190 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
191 		return -EBUSY;
192 	}
193 
194 	return 0;
195 }
196 
197 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
198 {
199 	struct amdgpu_ring *ring;
200 	uint32_t offset, size;
201 	uint32_t table_size = 0;
202 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
203 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
204 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
205 	struct mmsch_v1_0_cmd_end end = { { 0 } };
206 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
207 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
208 
209 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
210 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
211 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
212 	end.cmd_header.command_type = MMSCH_COMMAND__END;
213 
214 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
215 		header->version = MMSCH_VERSION;
216 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
217 
218 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
219 			header->vce_table_offset = header->header_size;
220 		else
221 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
222 
223 		init_table += header->vce_table_offset;
224 
225 		ring = &adev->vce.ring[0];
226 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
227 					    lower_32_bits(ring->gpu_addr));
228 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
229 					    upper_32_bits(ring->gpu_addr));
230 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
231 					    ring->ring_size / 4);
232 
233 		/* BEGING OF MC_RESUME */
234 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
235 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
236 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
237 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
238 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
239 
240 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
241 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
242 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
243 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
244 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
245 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
246 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
247 		} else {
248 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
249 						adev->vce.gpu_addr >> 8);
250 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
251 						adev->vce.gpu_addr >> 8);
252 		    MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
253 						adev->vce.gpu_addr >> 8);
254 		}
255 
256 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
257 		size = VCE_V4_0_FW_SIZE;
258 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
259 					    offset & 0x7FFFFFFF);
260 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
261 
262 		offset += size;
263 		size = VCE_V4_0_STACK_SIZE;
264 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
265 					    offset & 0x7FFFFFFF);
266 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
267 
268 		offset += size;
269 		size = VCE_V4_0_DATA_SIZE;
270 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
271 					    offset & 0x7FFFFFFF);
272 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
273 
274 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
275 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
276 						   0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
277 
278 		/* end of MC_RESUME */
279 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
280 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
281 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
282 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
283 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
284 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
285 
286 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
287 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
288 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
289 
290 		/* clear BUSY flag */
291 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
292 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
293 
294 		/* add end packet */
295 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
296 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
297 		header->vce_table_size = table_size;
298 
299 		return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
300 	}
301 
302 	return -EINVAL; /* already initializaed ? */
303 }
304 
305 /**
306  * vce_v4_0_start - start VCE block
307  *
308  * @adev: amdgpu_device pointer
309  *
310  * Setup and start the VCE block
311  */
312 static int vce_v4_0_start(struct amdgpu_device *adev)
313 {
314 	struct amdgpu_ring *ring;
315 	int r;
316 
317 	ring = &adev->vce.ring[0];
318 
319 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
320 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
321 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
322 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
323 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
324 
325 	ring = &adev->vce.ring[1];
326 
327 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
328 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
329 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
330 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
331 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
332 
333 	ring = &adev->vce.ring[2];
334 
335 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
336 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
337 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
338 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
339 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
340 
341 	vce_v4_0_mc_resume(adev);
342 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
343 			~VCE_STATUS__JOB_BUSY_MASK);
344 
345 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
346 
347 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
348 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
349 	mdelay(100);
350 
351 	r = vce_v4_0_firmware_loaded(adev);
352 
353 	/* clear BUSY flag */
354 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
355 
356 	if (r) {
357 		DRM_ERROR("VCE not responding, giving up!!!\n");
358 		return r;
359 	}
360 
361 	return 0;
362 }
363 
364 static int vce_v4_0_stop(struct amdgpu_device *adev)
365 {
366 
367 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
368 
369 	/* hold on ECPU */
370 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
371 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
372 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373 
374 	/* clear BUSY flag */
375 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
376 
377 	/* Set Clock-Gating off */
378 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
379 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
380 	*/
381 
382 	return 0;
383 }
384 
385 static int vce_v4_0_early_init(void *handle)
386 {
387 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
388 
389 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
390 		adev->vce.num_rings = 1;
391 	else
392 		adev->vce.num_rings = 3;
393 
394 	vce_v4_0_set_ring_funcs(adev);
395 	vce_v4_0_set_irq_funcs(adev);
396 
397 	return 0;
398 }
399 
400 static int vce_v4_0_sw_init(void *handle)
401 {
402 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
403 	struct amdgpu_ring *ring;
404 	unsigned size;
405 	int r, i;
406 
407 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
408 	if (r)
409 		return r;
410 
411 	size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
412 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
413 		size += VCE_V4_0_FW_SIZE;
414 
415 	r = amdgpu_vce_sw_init(adev, size);
416 	if (r)
417 		return r;
418 
419 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
420 		const struct common_firmware_header *hdr;
421 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
422 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
423 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
424 		adev->firmware.fw_size +=
425 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
426 		DRM_INFO("PSP loading VCE firmware\n");
427 	}
428 
429 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
430 		r = amdgpu_vce_resume(adev);
431 		if (r)
432 			return r;
433 	}
434 
435 	for (i = 0; i < adev->vce.num_rings; i++) {
436 		ring = &adev->vce.ring[i];
437 		sprintf(ring->name, "vce%d", i);
438 		if (amdgpu_sriov_vf(adev)) {
439 			/* DOORBELL only works under SRIOV */
440 			ring->use_doorbell = true;
441 			if (i == 0)
442 				ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2;
443 			else if (i == 1)
444 				ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2;
445 			else
446 				ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1;
447 		}
448 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
449 		if (r)
450 			return r;
451 	}
452 
453 	r = amdgpu_virt_alloc_mm_table(adev);
454 	if (r)
455 		return r;
456 
457 	return r;
458 }
459 
460 static int vce_v4_0_sw_fini(void *handle)
461 {
462 	int r;
463 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
464 
465 	/* free MM table */
466 	amdgpu_virt_free_mm_table(adev);
467 
468 	r = amdgpu_vce_suspend(adev);
469 	if (r)
470 		return r;
471 
472 	return amdgpu_vce_sw_fini(adev);
473 }
474 
475 static int vce_v4_0_hw_init(void *handle)
476 {
477 	int r, i;
478 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
479 
480 	if (amdgpu_sriov_vf(adev))
481 		r = vce_v4_0_sriov_start(adev);
482 	else
483 		r = vce_v4_0_start(adev);
484 	if (r)
485 		return r;
486 
487 	for (i = 0; i < adev->vce.num_rings; i++)
488 		adev->vce.ring[i].ready = false;
489 
490 	for (i = 0; i < adev->vce.num_rings; i++) {
491 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
492 		if (r)
493 			return r;
494 		else
495 			adev->vce.ring[i].ready = true;
496 	}
497 
498 	DRM_INFO("VCE initialized successfully.\n");
499 
500 	return 0;
501 }
502 
503 static int vce_v4_0_hw_fini(void *handle)
504 {
505 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
506 	int i;
507 
508 	/* vce_v4_0_wait_for_idle(handle); */
509 	vce_v4_0_stop(adev);
510 	for (i = 0; i < adev->vce.num_rings; i++)
511 		adev->vce.ring[i].ready = false;
512 
513 	return 0;
514 }
515 
516 static int vce_v4_0_suspend(void *handle)
517 {
518 	int r;
519 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
520 
521 	r = vce_v4_0_hw_fini(adev);
522 	if (r)
523 		return r;
524 
525 	return amdgpu_vce_suspend(adev);
526 }
527 
528 static int vce_v4_0_resume(void *handle)
529 {
530 	int r;
531 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
532 
533 	r = amdgpu_vce_resume(adev);
534 	if (r)
535 		return r;
536 
537 	return vce_v4_0_hw_init(adev);
538 }
539 
540 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
541 {
542 	uint32_t offset, size;
543 
544 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
545 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
546 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
547 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
548 
549 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
550 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
551 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
552 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
553 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
554 
555 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
556 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
557 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
558 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
559 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
560 	} else {
561 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
562 			(adev->vce.gpu_addr >> 8));
563 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
564 			(adev->vce.gpu_addr >> 40) & 0xff);
565 	}
566 
567 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
568 	size = VCE_V4_0_FW_SIZE;
569 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
570 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
571 
572 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
573 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
574 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
575 	size = VCE_V4_0_STACK_SIZE;
576 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
577 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
578 
579 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
580 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
581 	offset += size;
582 	size = VCE_V4_0_DATA_SIZE;
583 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
584 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
585 
586 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
587 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
588 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
589 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
590 }
591 
592 static int vce_v4_0_set_clockgating_state(void *handle,
593 					  enum amd_clockgating_state state)
594 {
595 	/* needed for driver unload*/
596 	return 0;
597 }
598 
599 #if 0
600 static bool vce_v4_0_is_idle(void *handle)
601 {
602 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
603 	u32 mask = 0;
604 
605 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
606 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
607 
608 	return !(RREG32(mmSRBM_STATUS2) & mask);
609 }
610 
611 static int vce_v4_0_wait_for_idle(void *handle)
612 {
613 	unsigned i;
614 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
615 
616 	for (i = 0; i < adev->usec_timeout; i++)
617 		if (vce_v4_0_is_idle(handle))
618 			return 0;
619 
620 	return -ETIMEDOUT;
621 }
622 
623 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
624 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
625 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
626 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
627 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
628 
629 static bool vce_v4_0_check_soft_reset(void *handle)
630 {
631 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
632 	u32 srbm_soft_reset = 0;
633 
634 	/* According to VCE team , we should use VCE_STATUS instead
635 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
636 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
637 	 * instance's registers are accessed
638 	 * (0 for 1st instance, 10 for 2nd instance).
639 	 *
640 	 *VCE_STATUS
641 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
642 	 *|----+----+-----------+----+----+----+----------+---------+----|
643 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
644 	 *
645 	 * VCE team suggest use bit 3--bit 6 for busy status check
646 	 */
647 	mutex_lock(&adev->grbm_idx_mutex);
648 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
649 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
650 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
651 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
652 	}
653 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
654 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
655 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
656 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
657 	}
658 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
659 	mutex_unlock(&adev->grbm_idx_mutex);
660 
661 	if (srbm_soft_reset) {
662 		adev->vce.srbm_soft_reset = srbm_soft_reset;
663 		return true;
664 	} else {
665 		adev->vce.srbm_soft_reset = 0;
666 		return false;
667 	}
668 }
669 
670 static int vce_v4_0_soft_reset(void *handle)
671 {
672 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
673 	u32 srbm_soft_reset;
674 
675 	if (!adev->vce.srbm_soft_reset)
676 		return 0;
677 	srbm_soft_reset = adev->vce.srbm_soft_reset;
678 
679 	if (srbm_soft_reset) {
680 		u32 tmp;
681 
682 		tmp = RREG32(mmSRBM_SOFT_RESET);
683 		tmp |= srbm_soft_reset;
684 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
685 		WREG32(mmSRBM_SOFT_RESET, tmp);
686 		tmp = RREG32(mmSRBM_SOFT_RESET);
687 
688 		udelay(50);
689 
690 		tmp &= ~srbm_soft_reset;
691 		WREG32(mmSRBM_SOFT_RESET, tmp);
692 		tmp = RREG32(mmSRBM_SOFT_RESET);
693 
694 		/* Wait a little for things to settle down */
695 		udelay(50);
696 	}
697 
698 	return 0;
699 }
700 
701 static int vce_v4_0_pre_soft_reset(void *handle)
702 {
703 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
704 
705 	if (!adev->vce.srbm_soft_reset)
706 		return 0;
707 
708 	mdelay(5);
709 
710 	return vce_v4_0_suspend(adev);
711 }
712 
713 
714 static int vce_v4_0_post_soft_reset(void *handle)
715 {
716 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
717 
718 	if (!adev->vce.srbm_soft_reset)
719 		return 0;
720 
721 	mdelay(5);
722 
723 	return vce_v4_0_resume(adev);
724 }
725 
726 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
727 {
728 	u32 tmp, data;
729 
730 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
731 	if (override)
732 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
733 	else
734 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
735 
736 	if (tmp != data)
737 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
738 }
739 
740 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
741 					     bool gated)
742 {
743 	u32 data;
744 
745 	/* Set Override to disable Clock Gating */
746 	vce_v4_0_override_vce_clock_gating(adev, true);
747 
748 	/* This function enables MGCG which is controlled by firmware.
749 	   With the clocks in the gated state the core is still
750 	   accessible but the firmware will throttle the clocks on the
751 	   fly as necessary.
752 	*/
753 	if (gated) {
754 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
755 		data |= 0x1ff;
756 		data &= ~0xef0000;
757 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
758 
759 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
760 		data |= 0x3ff000;
761 		data &= ~0xffc00000;
762 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
763 
764 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
765 		data |= 0x2;
766 		data &= ~0x00010000;
767 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
768 
769 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
770 		data |= 0x37f;
771 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
772 
773 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
774 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
775 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
776 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
777 			0x8;
778 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
779 	} else {
780 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
781 		data &= ~0x80010;
782 		data |= 0xe70008;
783 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
784 
785 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
786 		data |= 0xffc00000;
787 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
788 
789 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
790 		data |= 0x10000;
791 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
792 
793 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
794 		data &= ~0xffc00000;
795 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
796 
797 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
798 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
799 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
800 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
801 			  0x8);
802 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
803 	}
804 	vce_v4_0_override_vce_clock_gating(adev, false);
805 }
806 
807 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
808 {
809 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
810 
811 	if (enable)
812 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
813 	else
814 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
815 
816 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
817 }
818 
819 static int vce_v4_0_set_clockgating_state(void *handle,
820 					  enum amd_clockgating_state state)
821 {
822 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
823 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
824 	int i;
825 
826 	if ((adev->asic_type == CHIP_POLARIS10) ||
827 		(adev->asic_type == CHIP_TONGA) ||
828 		(adev->asic_type == CHIP_FIJI))
829 		vce_v4_0_set_bypass_mode(adev, enable);
830 
831 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
832 		return 0;
833 
834 	mutex_lock(&adev->grbm_idx_mutex);
835 	for (i = 0; i < 2; i++) {
836 		/* Program VCE Instance 0 or 1 if not harvested */
837 		if (adev->vce.harvest_config & (1 << i))
838 			continue;
839 
840 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
841 
842 		if (enable) {
843 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
844 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
845 			data &= ~(0xf | 0xff0);
846 			data |= ((0x0 << 0) | (0x04 << 4));
847 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
848 
849 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
850 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
851 			data &= ~(0xf | 0xff0);
852 			data |= ((0x0 << 0) | (0x04 << 4));
853 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
854 		}
855 
856 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
857 	}
858 
859 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
860 	mutex_unlock(&adev->grbm_idx_mutex);
861 
862 	return 0;
863 }
864 
865 static int vce_v4_0_set_powergating_state(void *handle,
866 					  enum amd_powergating_state state)
867 {
868 	/* This doesn't actually powergate the VCE block.
869 	 * That's done in the dpm code via the SMC.  This
870 	 * just re-inits the block as necessary.  The actual
871 	 * gating still happens in the dpm code.  We should
872 	 * revisit this when there is a cleaner line between
873 	 * the smc and the hw blocks
874 	 */
875 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
876 
877 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
878 		return 0;
879 
880 	if (state == AMD_PG_STATE_GATE)
881 		/* XXX do we need a vce_v4_0_stop()? */
882 		return 0;
883 	else
884 		return vce_v4_0_start(adev);
885 }
886 #endif
887 
888 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
889 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
890 {
891 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
892 	amdgpu_ring_write(ring, vm_id);
893 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
894 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
895 	amdgpu_ring_write(ring, ib->length_dw);
896 }
897 
898 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
899 			u64 seq, unsigned flags)
900 {
901 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
902 
903 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
904 	amdgpu_ring_write(ring, addr);
905 	amdgpu_ring_write(ring, upper_32_bits(addr));
906 	amdgpu_ring_write(ring, seq);
907 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
908 }
909 
910 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
911 {
912 	amdgpu_ring_write(ring, VCE_CMD_END);
913 }
914 
915 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
916 			 unsigned int vm_id, uint64_t pd_addr)
917 {
918 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
919 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
920 	unsigned eng = ring->vm_inv_eng;
921 
922 	pd_addr = pd_addr | 0x1; /* valid bit */
923 	/* now only use physical base address of PDE and valid */
924 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
925 
926 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
927 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
928 	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
929 
930 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
931 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
932 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
933 
934 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
935 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
936 	amdgpu_ring_write(ring, 0xffffffff);
937 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
938 
939 	/* flush TLB */
940 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
941 	amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
942 	amdgpu_ring_write(ring, req);
943 
944 	/* wait for flush */
945 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
946 	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
947 	amdgpu_ring_write(ring, 1 << vm_id);
948 	amdgpu_ring_write(ring, 1 << vm_id);
949 }
950 
951 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
952 					struct amdgpu_irq_src *source,
953 					unsigned type,
954 					enum amdgpu_interrupt_state state)
955 {
956 	uint32_t val = 0;
957 
958 	if (state == AMDGPU_IRQ_STATE_ENABLE)
959 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
960 
961 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
962 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
963 	return 0;
964 }
965 
966 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
967 				      struct amdgpu_irq_src *source,
968 				      struct amdgpu_iv_entry *entry)
969 {
970 	DRM_DEBUG("IH: VCE\n");
971 
972 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
973 			VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
974 			~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
975 
976 	switch (entry->src_data[0]) {
977 	case 0:
978 	case 1:
979 	case 2:
980 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
981 		break;
982 	default:
983 		DRM_ERROR("Unhandled interrupt: %d %d\n",
984 			  entry->src_id, entry->src_data[0]);
985 		break;
986 	}
987 
988 	return 0;
989 }
990 
991 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
992 	.name = "vce_v4_0",
993 	.early_init = vce_v4_0_early_init,
994 	.late_init = NULL,
995 	.sw_init = vce_v4_0_sw_init,
996 	.sw_fini = vce_v4_0_sw_fini,
997 	.hw_init = vce_v4_0_hw_init,
998 	.hw_fini = vce_v4_0_hw_fini,
999 	.suspend = vce_v4_0_suspend,
1000 	.resume = vce_v4_0_resume,
1001 	.is_idle = NULL /* vce_v4_0_is_idle */,
1002 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1003 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1004 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1005 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1006 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1007 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1008 	.set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1009 };
1010 
1011 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1012 	.type = AMDGPU_RING_TYPE_VCE,
1013 	.align_mask = 0x3f,
1014 	.nop = VCE_CMD_NO_OP,
1015 	.support_64bit_ptrs = false,
1016 	.vmhub = AMDGPU_MMHUB,
1017 	.get_rptr = vce_v4_0_ring_get_rptr,
1018 	.get_wptr = vce_v4_0_ring_get_wptr,
1019 	.set_wptr = vce_v4_0_ring_set_wptr,
1020 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1021 	.emit_frame_size =
1022 		17 + /* vce_v4_0_emit_vm_flush */
1023 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1024 		1, /* vce_v4_0_ring_insert_end */
1025 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1026 	.emit_ib = vce_v4_0_ring_emit_ib,
1027 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1028 	.emit_fence = vce_v4_0_ring_emit_fence,
1029 	.test_ring = amdgpu_vce_ring_test_ring,
1030 	.test_ib = amdgpu_vce_ring_test_ib,
1031 	.insert_nop = amdgpu_ring_insert_nop,
1032 	.insert_end = vce_v4_0_ring_insert_end,
1033 	.pad_ib = amdgpu_ring_generic_pad_ib,
1034 	.begin_use = amdgpu_vce_ring_begin_use,
1035 	.end_use = amdgpu_vce_ring_end_use,
1036 };
1037 
1038 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1039 {
1040 	int i;
1041 
1042 	for (i = 0; i < adev->vce.num_rings; i++)
1043 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1044 	DRM_INFO("VCE enabled in VM mode\n");
1045 }
1046 
1047 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1048 	.set = vce_v4_0_set_interrupt_state,
1049 	.process = vce_v4_0_process_interrupt,
1050 };
1051 
1052 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1053 {
1054 	adev->vce.irq.num_types = 1;
1055 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1056 };
1057 
1058 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1059 {
1060 	.type = AMD_IP_BLOCK_TYPE_VCE,
1061 	.major = 4,
1062 	.minor = 0,
1063 	.rev = 0,
1064 	.funcs = &vce_v4_0_ip_funcs,
1065 };
1066