xref: /linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision ea49432d184a6a09f84461604b7711a4e9f5ec9c)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drm_drv.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "soc15_common.h"
35 #include "mmsch_v1_0.h"
36 
37 #include "vce/vce_4_0_offset.h"
38 #include "vce/vce_4_0_default.h"
39 #include "vce/vce_4_0_sh_mask.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "mmhub/mmhub_1_0_sh_mask.h"
42 
43 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
44 
45 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
46 
47 #define VCE_V4_0_FW_SIZE	(384 * 1024)
48 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
49 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50 
51 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
52 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
53 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54 
55 /**
56  * vce_v4_0_ring_get_rptr - get read pointer
57  *
58  * @ring: amdgpu_ring pointer
59  *
60  * Returns the current hardware read pointer
61  */
62 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63 {
64 	struct amdgpu_device *adev = ring->adev;
65 
66 	if (ring->me == 0)
67 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
68 	else if (ring->me == 1)
69 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
70 	else
71 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
72 }
73 
74 /**
75  * vce_v4_0_ring_get_wptr - get write pointer
76  *
77  * @ring: amdgpu_ring pointer
78  *
79  * Returns the current hardware write pointer
80  */
81 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82 {
83 	struct amdgpu_device *adev = ring->adev;
84 
85 	if (ring->use_doorbell)
86 		return adev->wb.wb[ring->wptr_offs];
87 
88 	if (ring->me == 0)
89 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
90 	else if (ring->me == 1)
91 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
92 	else
93 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
94 }
95 
96 /**
97  * vce_v4_0_ring_set_wptr - set write pointer
98  *
99  * @ring: amdgpu_ring pointer
100  *
101  * Commits the write pointer to the hardware
102  */
103 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104 {
105 	struct amdgpu_device *adev = ring->adev;
106 
107 	if (ring->use_doorbell) {
108 		/* XXX check if swapping is necessary on BE */
109 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
110 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
111 		return;
112 	}
113 
114 	if (ring->me == 0)
115 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
116 			lower_32_bits(ring->wptr));
117 	else if (ring->me == 1)
118 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
119 			lower_32_bits(ring->wptr));
120 	else
121 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
122 			lower_32_bits(ring->wptr));
123 }
124 
125 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126 {
127 	int i, j;
128 
129 	for (i = 0; i < 10; ++i) {
130 		for (j = 0; j < 100; ++j) {
131 			uint32_t status =
132 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133 
134 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
135 				return 0;
136 			mdelay(10);
137 		}
138 
139 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
140 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
141 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
142 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 		mdelay(10);
144 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
145 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
146 		mdelay(10);
147 
148 	}
149 
150 	return -ETIMEDOUT;
151 }
152 
153 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
154 				struct amdgpu_mm_table *table)
155 {
156 	uint32_t data = 0, loop;
157 	uint64_t addr = table->gpu_addr;
158 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
159 	uint32_t size;
160 
161 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
162 
163 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
164 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
165 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166 
167 	/* 2, update vmid of descriptor */
168 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
169 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
170 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
171 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172 
173 	/* 3, notify mmsch about the size of this descriptor */
174 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175 
176 	/* 4, set resp to zero */
177 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178 
179 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
180 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
181 	adev->vce.ring[0].wptr = 0;
182 	adev->vce.ring[0].wptr_old = 0;
183 
184 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
185 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186 
187 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188 	loop = 1000;
189 	while ((data & 0x10000002) != 0x10000002) {
190 		udelay(10);
191 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
192 		loop--;
193 		if (!loop)
194 			break;
195 	}
196 
197 	if (!loop) {
198 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
199 		return -EBUSY;
200 	}
201 
202 	return 0;
203 }
204 
205 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206 {
207 	struct amdgpu_ring *ring;
208 	uint32_t offset, size;
209 	uint32_t table_size = 0;
210 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
211 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
212 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
213 	struct mmsch_v1_0_cmd_end end = { { 0 } };
214 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
215 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216 
217 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
218 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
219 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
220 	end.cmd_header.command_type = MMSCH_COMMAND__END;
221 
222 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
223 		header->version = MMSCH_VERSION;
224 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225 
226 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
227 			header->vce_table_offset = header->header_size;
228 		else
229 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230 
231 		init_table += header->vce_table_offset;
232 
233 		ring = &adev->vce.ring[0];
234 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
235 					    lower_32_bits(ring->gpu_addr));
236 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
237 					    upper_32_bits(ring->gpu_addr));
238 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
239 					    ring->ring_size / 4);
240 
241 		/* BEGING OF MC_RESUME */
242 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
243 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
245 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
246 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247 
248 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
249 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
250 			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
251 			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
252 			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253 
254 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
255 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
256 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258 						(tmr_mc_addr >> 40) & 0xff);
259 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
260 		} else {
261 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
262 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
263 						adev->vce.gpu_addr >> 8);
264 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
265 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
266 						(adev->vce.gpu_addr >> 40) & 0xff);
267 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
268 						offset & ~0x0f000000);
269 
270 		}
271 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
273 						adev->vce.gpu_addr >> 8);
274 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
275 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
276 						(adev->vce.gpu_addr >> 40) & 0xff);
277 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
279 						adev->vce.gpu_addr >> 8);
280 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
282 						(adev->vce.gpu_addr >> 40) & 0xff);
283 
284 		size = VCE_V4_0_FW_SIZE;
285 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286 
287 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
288 		size = VCE_V4_0_STACK_SIZE;
289 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
290 					(offset & ~0x0f000000) | (1 << 24));
291 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292 
293 		offset += size;
294 		size = VCE_V4_0_DATA_SIZE;
295 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
296 					(offset & ~0x0f000000) | (2 << 24));
297 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298 
299 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
300 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
301 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
302 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303 
304 		/* end of MC_RESUME */
305 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
306 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
307 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
308 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
309 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
310 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311 
312 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
313 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
314 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315 
316 		/* clear BUSY flag */
317 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
318 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
319 
320 		/* add end packet */
321 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
322 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
323 		header->vce_table_size = table_size;
324 	}
325 
326 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
327 }
328 
329 /**
330  * vce_v4_0_start - start VCE block
331  *
332  * @adev: amdgpu_device pointer
333  *
334  * Setup and start the VCE block
335  */
336 static int vce_v4_0_start(struct amdgpu_device *adev)
337 {
338 	struct amdgpu_ring *ring;
339 	int r;
340 
341 	ring = &adev->vce.ring[0];
342 
343 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
344 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
347 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348 
349 	ring = &adev->vce.ring[1];
350 
351 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
352 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
355 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356 
357 	ring = &adev->vce.ring[2];
358 
359 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
360 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
361 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
362 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
363 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364 
365 	vce_v4_0_mc_resume(adev);
366 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
367 			~VCE_STATUS__JOB_BUSY_MASK);
368 
369 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370 
371 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
372 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373 	mdelay(100);
374 
375 	r = vce_v4_0_firmware_loaded(adev);
376 
377 	/* clear BUSY flag */
378 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379 
380 	if (r) {
381 		DRM_ERROR("VCE not responding, giving up!!!\n");
382 		return r;
383 	}
384 
385 	return 0;
386 }
387 
388 static int vce_v4_0_stop(struct amdgpu_device *adev)
389 {
390 
391 	/* Disable VCPU */
392 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393 
394 	/* hold on ECPU */
395 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
396 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
397 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398 
399 	/* clear VCE_STATUS */
400 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401 
402 	/* Set Clock-Gating off */
403 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
404 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
405 	*/
406 
407 	return 0;
408 }
409 
410 static int vce_v4_0_early_init(void *handle)
411 {
412 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413 
414 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
415 		adev->vce.num_rings = 1;
416 	else
417 		adev->vce.num_rings = 3;
418 
419 	vce_v4_0_set_ring_funcs(adev);
420 	vce_v4_0_set_irq_funcs(adev);
421 
422 	return 0;
423 }
424 
425 static int vce_v4_0_sw_init(void *handle)
426 {
427 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
428 	struct amdgpu_ring *ring;
429 
430 	unsigned size;
431 	int r, i;
432 
433 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
434 	if (r)
435 		return r;
436 
437 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
438 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
439 		size += VCE_V4_0_FW_SIZE;
440 
441 	r = amdgpu_vce_sw_init(adev, size);
442 	if (r)
443 		return r;
444 
445 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
446 		const struct common_firmware_header *hdr;
447 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448 
449 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
450 		if (!adev->vce.saved_bo)
451 			return -ENOMEM;
452 
453 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
454 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
455 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
456 		adev->firmware.fw_size +=
457 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
458 		DRM_INFO("PSP loading VCE firmware\n");
459 	} else {
460 		r = amdgpu_vce_resume(adev);
461 		if (r)
462 			return r;
463 	}
464 
465 	for (i = 0; i < adev->vce.num_rings; i++) {
466 		enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
467 
468 		ring = &adev->vce.ring[i];
469 		sprintf(ring->name, "vce%d", i);
470 		if (amdgpu_sriov_vf(adev)) {
471 			/* DOORBELL only works under SRIOV */
472 			ring->use_doorbell = true;
473 
474 			/* currently only use the first encoding ring for sriov,
475 			 * so set unused location for other unused rings.
476 			 */
477 			if (i == 0)
478 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
479 			else
480 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
481 		}
482 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
483 				     hw_prio, NULL);
484 		if (r)
485 			return r;
486 	}
487 
488 
489 	r = amdgpu_vce_entity_init(adev);
490 	if (r)
491 		return r;
492 
493 	r = amdgpu_virt_alloc_mm_table(adev);
494 	if (r)
495 		return r;
496 
497 	return r;
498 }
499 
500 static int vce_v4_0_sw_fini(void *handle)
501 {
502 	int r;
503 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
504 
505 	/* free MM table */
506 	amdgpu_virt_free_mm_table(adev);
507 
508 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
509 		kvfree(adev->vce.saved_bo);
510 		adev->vce.saved_bo = NULL;
511 	}
512 
513 	r = amdgpu_vce_suspend(adev);
514 	if (r)
515 		return r;
516 
517 	return amdgpu_vce_sw_fini(adev);
518 }
519 
520 static int vce_v4_0_hw_init(void *handle)
521 {
522 	int r, i;
523 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
524 
525 	if (amdgpu_sriov_vf(adev))
526 		r = vce_v4_0_sriov_start(adev);
527 	else
528 		r = vce_v4_0_start(adev);
529 	if (r)
530 		return r;
531 
532 	for (i = 0; i < adev->vce.num_rings; i++) {
533 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
534 		if (r)
535 			return r;
536 	}
537 
538 	DRM_INFO("VCE initialized successfully.\n");
539 
540 	return 0;
541 }
542 
543 static int vce_v4_0_hw_fini(void *handle)
544 {
545 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
546 
547 	cancel_delayed_work_sync(&adev->vce.idle_work);
548 
549 	if (!amdgpu_sriov_vf(adev)) {
550 		/* vce_v4_0_wait_for_idle(handle); */
551 		vce_v4_0_stop(adev);
552 	} else {
553 		/* full access mode, so don't touch any VCE register */
554 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
555 	}
556 
557 	return 0;
558 }
559 
560 static int vce_v4_0_suspend(void *handle)
561 {
562 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
563 	int r, idx;
564 
565 	if (adev->vce.vcpu_bo == NULL)
566 		return 0;
567 
568 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
569 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
570 			unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
571 			void *ptr = adev->vce.cpu_addr;
572 
573 			memcpy_fromio(adev->vce.saved_bo, ptr, size);
574 		}
575 		drm_dev_exit(idx);
576 	}
577 
578 	/*
579 	 * Proper cleanups before halting the HW engine:
580 	 *   - cancel the delayed idle work
581 	 *   - enable powergating
582 	 *   - enable clockgating
583 	 *   - disable dpm
584 	 *
585 	 * TODO: to align with the VCN implementation, move the
586 	 * jobs for clockgating/powergating/dpm setting to
587 	 * ->set_powergating_state().
588 	 */
589 	cancel_delayed_work_sync(&adev->vce.idle_work);
590 
591 	if (adev->pm.dpm_enabled) {
592 		amdgpu_dpm_enable_vce(adev, false);
593 	} else {
594 		amdgpu_asic_set_vce_clocks(adev, 0, 0);
595 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
596 						       AMD_PG_STATE_GATE);
597 		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
598 						       AMD_CG_STATE_GATE);
599 	}
600 
601 	r = vce_v4_0_hw_fini(adev);
602 	if (r)
603 		return r;
604 
605 	return amdgpu_vce_suspend(adev);
606 }
607 
608 static int vce_v4_0_resume(void *handle)
609 {
610 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
611 	int r, idx;
612 
613 	if (adev->vce.vcpu_bo == NULL)
614 		return -EINVAL;
615 
616 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
617 
618 		if (drm_dev_enter(adev_to_drm(adev), &idx)) {
619 			unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
620 			void *ptr = adev->vce.cpu_addr;
621 
622 			memcpy_toio(ptr, adev->vce.saved_bo, size);
623 			drm_dev_exit(idx);
624 		}
625 	} else {
626 		r = amdgpu_vce_resume(adev);
627 		if (r)
628 			return r;
629 	}
630 
631 	return vce_v4_0_hw_init(adev);
632 }
633 
634 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
635 {
636 	uint32_t offset, size;
637 	uint64_t tmr_mc_addr;
638 
639 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
640 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
641 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
642 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
643 
644 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
645 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
646 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
647 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
648 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
649 
650 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
651 
652 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
653 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
654 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
655 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
656 			(tmr_mc_addr >> 8));
657 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
658 			(tmr_mc_addr >> 40) & 0xff);
659 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
660 	} else {
661 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
662 			(adev->vce.gpu_addr >> 8));
663 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
664 			(adev->vce.gpu_addr >> 40) & 0xff);
665 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
666 	}
667 
668 	size = VCE_V4_0_FW_SIZE;
669 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
670 
671 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
672 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
673 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
674 	size = VCE_V4_0_STACK_SIZE;
675 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
676 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
677 
678 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
679 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
680 	offset += size;
681 	size = VCE_V4_0_DATA_SIZE;
682 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
683 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
684 
685 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
686 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
687 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
688 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
689 }
690 
691 static int vce_v4_0_set_clockgating_state(void *handle,
692 					  enum amd_clockgating_state state)
693 {
694 	/* needed for driver unload*/
695 	return 0;
696 }
697 
698 #if 0
699 static bool vce_v4_0_is_idle(void *handle)
700 {
701 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
702 	u32 mask = 0;
703 
704 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
705 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
706 
707 	return !(RREG32(mmSRBM_STATUS2) & mask);
708 }
709 
710 static int vce_v4_0_wait_for_idle(void *handle)
711 {
712 	unsigned i;
713 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
714 
715 	for (i = 0; i < adev->usec_timeout; i++)
716 		if (vce_v4_0_is_idle(handle))
717 			return 0;
718 
719 	return -ETIMEDOUT;
720 }
721 
722 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
723 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
724 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
725 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
726 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
727 
728 static bool vce_v4_0_check_soft_reset(void *handle)
729 {
730 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
731 	u32 srbm_soft_reset = 0;
732 
733 	/* According to VCE team , we should use VCE_STATUS instead
734 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
735 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
736 	 * instance's registers are accessed
737 	 * (0 for 1st instance, 10 for 2nd instance).
738 	 *
739 	 *VCE_STATUS
740 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
741 	 *|----+----+-----------+----+----+----+----------+---------+----|
742 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
743 	 *
744 	 * VCE team suggest use bit 3--bit 6 for busy status check
745 	 */
746 	mutex_lock(&adev->grbm_idx_mutex);
747 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
748 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
749 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
750 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
751 	}
752 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
753 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
754 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
755 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
756 	}
757 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
758 	mutex_unlock(&adev->grbm_idx_mutex);
759 
760 	if (srbm_soft_reset) {
761 		adev->vce.srbm_soft_reset = srbm_soft_reset;
762 		return true;
763 	} else {
764 		adev->vce.srbm_soft_reset = 0;
765 		return false;
766 	}
767 }
768 
769 static int vce_v4_0_soft_reset(void *handle)
770 {
771 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
772 	u32 srbm_soft_reset;
773 
774 	if (!adev->vce.srbm_soft_reset)
775 		return 0;
776 	srbm_soft_reset = adev->vce.srbm_soft_reset;
777 
778 	if (srbm_soft_reset) {
779 		u32 tmp;
780 
781 		tmp = RREG32(mmSRBM_SOFT_RESET);
782 		tmp |= srbm_soft_reset;
783 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
784 		WREG32(mmSRBM_SOFT_RESET, tmp);
785 		tmp = RREG32(mmSRBM_SOFT_RESET);
786 
787 		udelay(50);
788 
789 		tmp &= ~srbm_soft_reset;
790 		WREG32(mmSRBM_SOFT_RESET, tmp);
791 		tmp = RREG32(mmSRBM_SOFT_RESET);
792 
793 		/* Wait a little for things to settle down */
794 		udelay(50);
795 	}
796 
797 	return 0;
798 }
799 
800 static int vce_v4_0_pre_soft_reset(void *handle)
801 {
802 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
803 
804 	if (!adev->vce.srbm_soft_reset)
805 		return 0;
806 
807 	mdelay(5);
808 
809 	return vce_v4_0_suspend(adev);
810 }
811 
812 
813 static int vce_v4_0_post_soft_reset(void *handle)
814 {
815 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
816 
817 	if (!adev->vce.srbm_soft_reset)
818 		return 0;
819 
820 	mdelay(5);
821 
822 	return vce_v4_0_resume(adev);
823 }
824 
825 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
826 {
827 	u32 tmp, data;
828 
829 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
830 	if (override)
831 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
832 	else
833 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
834 
835 	if (tmp != data)
836 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
837 }
838 
839 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
840 					     bool gated)
841 {
842 	u32 data;
843 
844 	/* Set Override to disable Clock Gating */
845 	vce_v4_0_override_vce_clock_gating(adev, true);
846 
847 	/* This function enables MGCG which is controlled by firmware.
848 	   With the clocks in the gated state the core is still
849 	   accessible but the firmware will throttle the clocks on the
850 	   fly as necessary.
851 	*/
852 	if (gated) {
853 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
854 		data |= 0x1ff;
855 		data &= ~0xef0000;
856 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
857 
858 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
859 		data |= 0x3ff000;
860 		data &= ~0xffc00000;
861 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
862 
863 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
864 		data |= 0x2;
865 		data &= ~0x00010000;
866 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
867 
868 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
869 		data |= 0x37f;
870 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
871 
872 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
873 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
874 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
875 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
876 			0x8;
877 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
878 	} else {
879 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
880 		data &= ~0x80010;
881 		data |= 0xe70008;
882 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
883 
884 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
885 		data |= 0xffc00000;
886 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
887 
888 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
889 		data |= 0x10000;
890 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
891 
892 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
893 		data &= ~0xffc00000;
894 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
895 
896 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
897 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
898 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
899 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
900 			  0x8);
901 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
902 	}
903 	vce_v4_0_override_vce_clock_gating(adev, false);
904 }
905 
906 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
907 {
908 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
909 
910 	if (enable)
911 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
912 	else
913 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
914 
915 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
916 }
917 
918 static int vce_v4_0_set_clockgating_state(void *handle,
919 					  enum amd_clockgating_state state)
920 {
921 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
922 	bool enable = (state == AMD_CG_STATE_GATE);
923 	int i;
924 
925 	if ((adev->asic_type == CHIP_POLARIS10) ||
926 		(adev->asic_type == CHIP_TONGA) ||
927 		(adev->asic_type == CHIP_FIJI))
928 		vce_v4_0_set_bypass_mode(adev, enable);
929 
930 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
931 		return 0;
932 
933 	mutex_lock(&adev->grbm_idx_mutex);
934 	for (i = 0; i < 2; i++) {
935 		/* Program VCE Instance 0 or 1 if not harvested */
936 		if (adev->vce.harvest_config & (1 << i))
937 			continue;
938 
939 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
940 
941 		if (enable) {
942 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
943 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
944 			data &= ~(0xf | 0xff0);
945 			data |= ((0x0 << 0) | (0x04 << 4));
946 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
947 
948 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
949 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
950 			data &= ~(0xf | 0xff0);
951 			data |= ((0x0 << 0) | (0x04 << 4));
952 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
953 		}
954 
955 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
956 	}
957 
958 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
959 	mutex_unlock(&adev->grbm_idx_mutex);
960 
961 	return 0;
962 }
963 #endif
964 
965 static int vce_v4_0_set_powergating_state(void *handle,
966 					  enum amd_powergating_state state)
967 {
968 	/* This doesn't actually powergate the VCE block.
969 	 * That's done in the dpm code via the SMC.  This
970 	 * just re-inits the block as necessary.  The actual
971 	 * gating still happens in the dpm code.  We should
972 	 * revisit this when there is a cleaner line between
973 	 * the smc and the hw blocks
974 	 */
975 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
976 
977 	if (state == AMD_PG_STATE_GATE)
978 		return vce_v4_0_stop(adev);
979 	else
980 		return vce_v4_0_start(adev);
981 }
982 
983 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
984 					struct amdgpu_ib *ib, uint32_t flags)
985 {
986 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
987 
988 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
989 	amdgpu_ring_write(ring, vmid);
990 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
991 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
992 	amdgpu_ring_write(ring, ib->length_dw);
993 }
994 
995 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
996 			u64 seq, unsigned flags)
997 {
998 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
999 
1000 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
1001 	amdgpu_ring_write(ring, addr);
1002 	amdgpu_ring_write(ring, upper_32_bits(addr));
1003 	amdgpu_ring_write(ring, seq);
1004 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
1005 }
1006 
1007 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
1008 {
1009 	amdgpu_ring_write(ring, VCE_CMD_END);
1010 }
1011 
1012 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1013 				   uint32_t val, uint32_t mask)
1014 {
1015 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1016 	amdgpu_ring_write(ring,	reg << 2);
1017 	amdgpu_ring_write(ring, mask);
1018 	amdgpu_ring_write(ring, val);
1019 }
1020 
1021 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
1022 				   unsigned int vmid, uint64_t pd_addr)
1023 {
1024 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
1025 
1026 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1027 
1028 	/* wait for reg writes */
1029 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1030 			       vmid * hub->ctx_addr_distance,
1031 			       lower_32_bits(pd_addr), 0xffffffff);
1032 }
1033 
1034 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1035 			       uint32_t reg, uint32_t val)
1036 {
1037 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1038 	amdgpu_ring_write(ring,	reg << 2);
1039 	amdgpu_ring_write(ring, val);
1040 }
1041 
1042 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1043 					struct amdgpu_irq_src *source,
1044 					unsigned type,
1045 					enum amdgpu_interrupt_state state)
1046 {
1047 	uint32_t val = 0;
1048 
1049 	if (!amdgpu_sriov_vf(adev)) {
1050 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1051 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1052 
1053 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1054 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1055 	}
1056 	return 0;
1057 }
1058 
1059 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1060 				      struct amdgpu_irq_src *source,
1061 				      struct amdgpu_iv_entry *entry)
1062 {
1063 	DRM_DEBUG("IH: VCE\n");
1064 
1065 	switch (entry->src_data[0]) {
1066 	case 0:
1067 	case 1:
1068 	case 2:
1069 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1070 		break;
1071 	default:
1072 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1073 			  entry->src_id, entry->src_data[0]);
1074 		break;
1075 	}
1076 
1077 	return 0;
1078 }
1079 
1080 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1081 	.name = "vce_v4_0",
1082 	.early_init = vce_v4_0_early_init,
1083 	.late_init = NULL,
1084 	.sw_init = vce_v4_0_sw_init,
1085 	.sw_fini = vce_v4_0_sw_fini,
1086 	.hw_init = vce_v4_0_hw_init,
1087 	.hw_fini = vce_v4_0_hw_fini,
1088 	.suspend = vce_v4_0_suspend,
1089 	.resume = vce_v4_0_resume,
1090 	.is_idle = NULL /* vce_v4_0_is_idle */,
1091 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1092 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1093 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1094 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1095 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1096 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1097 	.set_powergating_state = vce_v4_0_set_powergating_state,
1098 };
1099 
1100 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1101 	.type = AMDGPU_RING_TYPE_VCE,
1102 	.align_mask = 0x3f,
1103 	.nop = VCE_CMD_NO_OP,
1104 	.support_64bit_ptrs = false,
1105 	.no_user_fence = true,
1106 	.vmhub = AMDGPU_MMHUB_0,
1107 	.get_rptr = vce_v4_0_ring_get_rptr,
1108 	.get_wptr = vce_v4_0_ring_get_wptr,
1109 	.set_wptr = vce_v4_0_ring_set_wptr,
1110 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1111 	.emit_frame_size =
1112 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1113 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1114 		4 + /* vce_v4_0_emit_vm_flush */
1115 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1116 		1, /* vce_v4_0_ring_insert_end */
1117 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1118 	.emit_ib = vce_v4_0_ring_emit_ib,
1119 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1120 	.emit_fence = vce_v4_0_ring_emit_fence,
1121 	.test_ring = amdgpu_vce_ring_test_ring,
1122 	.test_ib = amdgpu_vce_ring_test_ib,
1123 	.insert_nop = amdgpu_ring_insert_nop,
1124 	.insert_end = vce_v4_0_ring_insert_end,
1125 	.pad_ib = amdgpu_ring_generic_pad_ib,
1126 	.begin_use = amdgpu_vce_ring_begin_use,
1127 	.end_use = amdgpu_vce_ring_end_use,
1128 	.emit_wreg = vce_v4_0_emit_wreg,
1129 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1130 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1131 };
1132 
1133 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1134 {
1135 	int i;
1136 
1137 	for (i = 0; i < adev->vce.num_rings; i++) {
1138 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1139 		adev->vce.ring[i].me = i;
1140 	}
1141 	DRM_INFO("VCE enabled in VM mode\n");
1142 }
1143 
1144 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1145 	.set = vce_v4_0_set_interrupt_state,
1146 	.process = vce_v4_0_process_interrupt,
1147 };
1148 
1149 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1150 {
1151 	adev->vce.irq.num_types = 1;
1152 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1153 };
1154 
1155 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1156 {
1157 	.type = AMD_IP_BLOCK_TYPE_VCE,
1158 	.major = 4,
1159 	.minor = 0,
1160 	.rev = 0,
1161 	.funcs = &vce_v4_0_ip_funcs,
1162 };
1163