xref: /linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision c532de5a67a70f8533d495f8f2aaa9a0491c3ad0)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drm_drv.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_vce.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "soc15_common.h"
35 #include "mmsch_v1_0.h"
36 
37 #include "vce/vce_4_0_offset.h"
38 #include "vce/vce_4_0_default.h"
39 #include "vce/vce_4_0_sh_mask.h"
40 #include "mmhub/mmhub_1_0_offset.h"
41 #include "mmhub/mmhub_1_0_sh_mask.h"
42 
43 #include "ivsrcid/vce/irqsrcs_vce_4_0.h"
44 
45 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
46 
47 #define VCE_V4_0_FW_SIZE	(384 * 1024)
48 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
49 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
50 
51 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
52 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
53 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
54 
55 /**
56  * vce_v4_0_ring_get_rptr - get read pointer
57  *
58  * @ring: amdgpu_ring pointer
59  *
60  * Returns the current hardware read pointer
61  */
62 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
63 {
64 	struct amdgpu_device *adev = ring->adev;
65 
66 	if (ring->me == 0)
67 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
68 	else if (ring->me == 1)
69 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
70 	else
71 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
72 }
73 
74 /**
75  * vce_v4_0_ring_get_wptr - get write pointer
76  *
77  * @ring: amdgpu_ring pointer
78  *
79  * Returns the current hardware write pointer
80  */
81 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
82 {
83 	struct amdgpu_device *adev = ring->adev;
84 
85 	if (ring->use_doorbell)
86 		return *ring->wptr_cpu_addr;
87 
88 	if (ring->me == 0)
89 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
90 	else if (ring->me == 1)
91 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
92 	else
93 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
94 }
95 
96 /**
97  * vce_v4_0_ring_set_wptr - set write pointer
98  *
99  * @ring: amdgpu_ring pointer
100  *
101  * Commits the write pointer to the hardware
102  */
103 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
104 {
105 	struct amdgpu_device *adev = ring->adev;
106 
107 	if (ring->use_doorbell) {
108 		/* XXX check if swapping is necessary on BE */
109 		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
110 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
111 		return;
112 	}
113 
114 	if (ring->me == 0)
115 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
116 			lower_32_bits(ring->wptr));
117 	else if (ring->me == 1)
118 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
119 			lower_32_bits(ring->wptr));
120 	else
121 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
122 			lower_32_bits(ring->wptr));
123 }
124 
125 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
126 {
127 	int i, j;
128 
129 	for (i = 0; i < 10; ++i) {
130 		for (j = 0; j < 100; ++j) {
131 			uint32_t status =
132 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
133 
134 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
135 				return 0;
136 			mdelay(10);
137 		}
138 
139 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
140 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
141 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
142 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
143 		mdelay(10);
144 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
145 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
146 		mdelay(10);
147 
148 	}
149 
150 	return -ETIMEDOUT;
151 }
152 
153 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
154 				struct amdgpu_mm_table *table)
155 {
156 	uint32_t data = 0, loop;
157 	uint64_t addr = table->gpu_addr;
158 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
159 	uint32_t size;
160 
161 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
162 
163 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
164 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
165 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
166 
167 	/* 2, update vmid of descriptor */
168 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
169 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
170 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
171 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
172 
173 	/* 3, notify mmsch about the size of this descriptor */
174 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
175 
176 	/* 4, set resp to zero */
177 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
178 
179 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
180 	*adev->vce.ring[0].wptr_cpu_addr = 0;
181 	adev->vce.ring[0].wptr = 0;
182 	adev->vce.ring[0].wptr_old = 0;
183 
184 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
185 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
186 
187 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188 	loop = 1000;
189 	while ((data & 0x10000002) != 0x10000002) {
190 		udelay(10);
191 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
192 		loop--;
193 		if (!loop)
194 			break;
195 	}
196 
197 	if (!loop) {
198 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
199 		return -EBUSY;
200 	}
201 
202 	return 0;
203 }
204 
205 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
206 {
207 	struct amdgpu_ring *ring;
208 	uint32_t offset, size;
209 	uint32_t table_size = 0;
210 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
211 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
212 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
213 	struct mmsch_v1_0_cmd_end end = { { 0 } };
214 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
215 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
216 
217 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
218 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
219 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
220 	end.cmd_header.command_type = MMSCH_COMMAND__END;
221 
222 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
223 		header->version = MMSCH_VERSION;
224 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
225 
226 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
227 			header->vce_table_offset = header->header_size;
228 		else
229 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
230 
231 		init_table += header->vce_table_offset;
232 
233 		ring = &adev->vce.ring[0];
234 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
235 					    lower_32_bits(ring->gpu_addr));
236 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
237 					    upper_32_bits(ring->gpu_addr));
238 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
239 					    ring->ring_size / 4);
240 
241 		/* BEGING OF MC_RESUME */
242 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
243 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
244 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
245 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
246 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
247 
248 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
249 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
250 			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
251 			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
252 			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
253 
254 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
255 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
256 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
257 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
258 						(tmr_mc_addr >> 40) & 0xff);
259 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
260 		} else {
261 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
262 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
263 						adev->vce.gpu_addr >> 8);
264 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
265 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
266 						(adev->vce.gpu_addr >> 40) & 0xff);
267 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
268 						offset & ~0x0f000000);
269 
270 		}
271 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
272 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
273 						adev->vce.gpu_addr >> 8);
274 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
275 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
276 						(adev->vce.gpu_addr >> 40) & 0xff);
277 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
278 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
279 						adev->vce.gpu_addr >> 8);
280 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
281 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
282 						(adev->vce.gpu_addr >> 40) & 0xff);
283 
284 		size = VCE_V4_0_FW_SIZE;
285 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
286 
287 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
288 		size = VCE_V4_0_STACK_SIZE;
289 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
290 					(offset & ~0x0f000000) | (1 << 24));
291 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
292 
293 		offset += size;
294 		size = VCE_V4_0_DATA_SIZE;
295 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
296 					(offset & ~0x0f000000) | (2 << 24));
297 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
298 
299 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
300 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
301 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
302 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
303 
304 		/* end of MC_RESUME */
305 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
306 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
307 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
308 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
309 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
310 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
311 
312 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
313 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
314 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
315 
316 		/* clear BUSY flag */
317 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
318 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
319 
320 		/* add end packet */
321 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
322 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
323 		header->vce_table_size = table_size;
324 	}
325 
326 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
327 }
328 
329 /**
330  * vce_v4_0_start - start VCE block
331  *
332  * @adev: amdgpu_device pointer
333  *
334  * Setup and start the VCE block
335  */
336 static int vce_v4_0_start(struct amdgpu_device *adev)
337 {
338 	struct amdgpu_ring *ring;
339 	int r;
340 
341 	ring = &adev->vce.ring[0];
342 
343 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
344 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
347 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
348 
349 	ring = &adev->vce.ring[1];
350 
351 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
352 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
355 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
356 
357 	ring = &adev->vce.ring[2];
358 
359 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
360 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
361 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
362 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
363 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
364 
365 	vce_v4_0_mc_resume(adev);
366 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
367 			~VCE_STATUS__JOB_BUSY_MASK);
368 
369 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
370 
371 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
372 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
373 	mdelay(100);
374 
375 	r = vce_v4_0_firmware_loaded(adev);
376 
377 	/* clear BUSY flag */
378 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
379 
380 	if (r) {
381 		DRM_ERROR("VCE not responding, giving up!!!\n");
382 		return r;
383 	}
384 
385 	return 0;
386 }
387 
388 static int vce_v4_0_stop(struct amdgpu_device *adev)
389 {
390 
391 	/* Disable VCPU */
392 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
393 
394 	/* hold on ECPU */
395 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
396 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
397 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
398 
399 	/* clear VCE_STATUS */
400 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0);
401 
402 	/* Set Clock-Gating off */
403 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
404 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
405 	*/
406 
407 	return 0;
408 }
409 
410 static int vce_v4_0_early_init(void *handle)
411 {
412 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
413 
414 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
415 		adev->vce.num_rings = 1;
416 	else
417 		adev->vce.num_rings = 3;
418 
419 	vce_v4_0_set_ring_funcs(adev);
420 	vce_v4_0_set_irq_funcs(adev);
421 
422 	return 0;
423 }
424 
425 static int vce_v4_0_sw_init(void *handle)
426 {
427 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
428 	struct amdgpu_ring *ring;
429 
430 	unsigned size;
431 	int r, i;
432 
433 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
434 	if (r)
435 		return r;
436 
437 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
438 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
439 		size += VCE_V4_0_FW_SIZE;
440 
441 	r = amdgpu_vce_sw_init(adev, size);
442 	if (r)
443 		return r;
444 
445 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
446 		const struct common_firmware_header *hdr;
447 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
448 
449 		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
450 		if (!adev->vce.saved_bo)
451 			return -ENOMEM;
452 
453 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
454 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
455 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
456 		adev->firmware.fw_size +=
457 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
458 		DRM_INFO("PSP loading VCE firmware\n");
459 	} else {
460 		r = amdgpu_vce_resume(adev);
461 		if (r)
462 			return r;
463 	}
464 
465 	for (i = 0; i < adev->vce.num_rings; i++) {
466 		enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i);
467 
468 		ring = &adev->vce.ring[i];
469 		ring->vm_hub = AMDGPU_MMHUB0(0);
470 		sprintf(ring->name, "vce%d", i);
471 		if (amdgpu_sriov_vf(adev)) {
472 			/* DOORBELL only works under SRIOV */
473 			ring->use_doorbell = true;
474 
475 			/* currently only use the first encoding ring for sriov,
476 			 * so set unused location for other unused rings.
477 			 */
478 			if (i == 0)
479 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2;
480 			else
481 				ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1;
482 		}
483 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0,
484 				     hw_prio, NULL);
485 		if (r)
486 			return r;
487 	}
488 
489 	r = amdgpu_virt_alloc_mm_table(adev);
490 	if (r)
491 		return r;
492 
493 	return r;
494 }
495 
496 static int vce_v4_0_sw_fini(void *handle)
497 {
498 	int r;
499 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
500 
501 	/* free MM table */
502 	amdgpu_virt_free_mm_table(adev);
503 
504 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
505 		kvfree(adev->vce.saved_bo);
506 		adev->vce.saved_bo = NULL;
507 	}
508 
509 	r = amdgpu_vce_suspend(adev);
510 	if (r)
511 		return r;
512 
513 	return amdgpu_vce_sw_fini(adev);
514 }
515 
516 static int vce_v4_0_hw_init(void *handle)
517 {
518 	int r, i;
519 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
520 
521 	if (amdgpu_sriov_vf(adev))
522 		r = vce_v4_0_sriov_start(adev);
523 	else
524 		r = vce_v4_0_start(adev);
525 	if (r)
526 		return r;
527 
528 	for (i = 0; i < adev->vce.num_rings; i++) {
529 		r = amdgpu_ring_test_helper(&adev->vce.ring[i]);
530 		if (r)
531 			return r;
532 	}
533 
534 	DRM_INFO("VCE initialized successfully.\n");
535 
536 	return 0;
537 }
538 
539 static int vce_v4_0_hw_fini(void *handle)
540 {
541 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
542 
543 	cancel_delayed_work_sync(&adev->vce.idle_work);
544 
545 	if (!amdgpu_sriov_vf(adev)) {
546 		/* vce_v4_0_wait_for_idle(handle); */
547 		vce_v4_0_stop(adev);
548 	} else {
549 		/* full access mode, so don't touch any VCE register */
550 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
551 	}
552 
553 	return 0;
554 }
555 
556 static int vce_v4_0_suspend(void *handle)
557 {
558 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
559 	int r, idx;
560 
561 	if (adev->vce.vcpu_bo == NULL)
562 		return 0;
563 
564 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
565 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
566 			unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
567 			void *ptr = adev->vce.cpu_addr;
568 
569 			memcpy_fromio(adev->vce.saved_bo, ptr, size);
570 		}
571 		drm_dev_exit(idx);
572 	}
573 
574 	/*
575 	 * Proper cleanups before halting the HW engine:
576 	 *   - cancel the delayed idle work
577 	 *   - enable powergating
578 	 *   - enable clockgating
579 	 *   - disable dpm
580 	 *
581 	 * TODO: to align with the VCN implementation, move the
582 	 * jobs for clockgating/powergating/dpm setting to
583 	 * ->set_powergating_state().
584 	 */
585 	cancel_delayed_work_sync(&adev->vce.idle_work);
586 
587 	if (adev->pm.dpm_enabled) {
588 		amdgpu_dpm_enable_vce(adev, false);
589 	} else {
590 		amdgpu_asic_set_vce_clocks(adev, 0, 0);
591 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
592 						       AMD_PG_STATE_GATE);
593 		amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
594 						       AMD_CG_STATE_GATE);
595 	}
596 
597 	r = vce_v4_0_hw_fini(adev);
598 	if (r)
599 		return r;
600 
601 	return amdgpu_vce_suspend(adev);
602 }
603 
604 static int vce_v4_0_resume(void *handle)
605 {
606 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
607 	int r, idx;
608 
609 	if (adev->vce.vcpu_bo == NULL)
610 		return -EINVAL;
611 
612 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
613 
614 		if (drm_dev_enter(adev_to_drm(adev), &idx)) {
615 			unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
616 			void *ptr = adev->vce.cpu_addr;
617 
618 			memcpy_toio(ptr, adev->vce.saved_bo, size);
619 			drm_dev_exit(idx);
620 		}
621 	} else {
622 		r = amdgpu_vce_resume(adev);
623 		if (r)
624 			return r;
625 	}
626 
627 	return vce_v4_0_hw_init(adev);
628 }
629 
630 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
631 {
632 	uint32_t offset, size;
633 	uint64_t tmr_mc_addr;
634 
635 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
636 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
637 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
638 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
639 
640 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
641 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
642 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
643 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
644 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
645 
646 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
647 
648 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
649 		tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
650 										adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
651 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
652 			(tmr_mc_addr >> 8));
653 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
654 			(tmr_mc_addr >> 40) & 0xff);
655 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
656 	} else {
657 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
658 			(adev->vce.gpu_addr >> 8));
659 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
660 			(adev->vce.gpu_addr >> 40) & 0xff);
661 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
662 	}
663 
664 	size = VCE_V4_0_FW_SIZE;
665 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
666 
667 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
668 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
669 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
670 	size = VCE_V4_0_STACK_SIZE;
671 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
672 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
673 
674 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
675 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
676 	offset += size;
677 	size = VCE_V4_0_DATA_SIZE;
678 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
679 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
680 
681 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
682 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
683 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
684 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
685 }
686 
687 static int vce_v4_0_set_clockgating_state(void *handle,
688 					  enum amd_clockgating_state state)
689 {
690 	/* needed for driver unload*/
691 	return 0;
692 }
693 
694 #if 0
695 static bool vce_v4_0_is_idle(void *handle)
696 {
697 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
698 	u32 mask = 0;
699 
700 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
701 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
702 
703 	return !(RREG32(mmSRBM_STATUS2) & mask);
704 }
705 
706 static int vce_v4_0_wait_for_idle(void *handle)
707 {
708 	unsigned i;
709 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
710 
711 	for (i = 0; i < adev->usec_timeout; i++)
712 		if (vce_v4_0_is_idle(handle))
713 			return 0;
714 
715 	return -ETIMEDOUT;
716 }
717 
718 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
719 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
720 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
721 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
722 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
723 
724 static bool vce_v4_0_check_soft_reset(void *handle)
725 {
726 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
727 	u32 srbm_soft_reset = 0;
728 
729 	/* According to VCE team , we should use VCE_STATUS instead
730 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
731 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
732 	 * instance's registers are accessed
733 	 * (0 for 1st instance, 10 for 2nd instance).
734 	 *
735 	 *VCE_STATUS
736 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
737 	 *|----+----+-----------+----+----+----+----------+---------+----|
738 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
739 	 *
740 	 * VCE team suggest use bit 3--bit 6 for busy status check
741 	 */
742 	mutex_lock(&adev->grbm_idx_mutex);
743 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
744 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
745 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
746 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
747 	}
748 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
749 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
750 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
751 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
752 	}
753 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
754 	mutex_unlock(&adev->grbm_idx_mutex);
755 
756 	if (srbm_soft_reset) {
757 		adev->vce.srbm_soft_reset = srbm_soft_reset;
758 		return true;
759 	} else {
760 		adev->vce.srbm_soft_reset = 0;
761 		return false;
762 	}
763 }
764 
765 static int vce_v4_0_soft_reset(void *handle)
766 {
767 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
768 	u32 srbm_soft_reset;
769 
770 	if (!adev->vce.srbm_soft_reset)
771 		return 0;
772 	srbm_soft_reset = adev->vce.srbm_soft_reset;
773 
774 	if (srbm_soft_reset) {
775 		u32 tmp;
776 
777 		tmp = RREG32(mmSRBM_SOFT_RESET);
778 		tmp |= srbm_soft_reset;
779 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
780 		WREG32(mmSRBM_SOFT_RESET, tmp);
781 		tmp = RREG32(mmSRBM_SOFT_RESET);
782 
783 		udelay(50);
784 
785 		tmp &= ~srbm_soft_reset;
786 		WREG32(mmSRBM_SOFT_RESET, tmp);
787 		tmp = RREG32(mmSRBM_SOFT_RESET);
788 
789 		/* Wait a little for things to settle down */
790 		udelay(50);
791 	}
792 
793 	return 0;
794 }
795 
796 static int vce_v4_0_pre_soft_reset(void *handle)
797 {
798 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
799 
800 	if (!adev->vce.srbm_soft_reset)
801 		return 0;
802 
803 	mdelay(5);
804 
805 	return vce_v4_0_suspend(adev);
806 }
807 
808 
809 static int vce_v4_0_post_soft_reset(void *handle)
810 {
811 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
812 
813 	if (!adev->vce.srbm_soft_reset)
814 		return 0;
815 
816 	mdelay(5);
817 
818 	return vce_v4_0_resume(adev);
819 }
820 
821 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
822 {
823 	u32 tmp, data;
824 
825 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
826 	if (override)
827 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
828 	else
829 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
830 
831 	if (tmp != data)
832 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
833 }
834 
835 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
836 					     bool gated)
837 {
838 	u32 data;
839 
840 	/* Set Override to disable Clock Gating */
841 	vce_v4_0_override_vce_clock_gating(adev, true);
842 
843 	/* This function enables MGCG which is controlled by firmware.
844 	   With the clocks in the gated state the core is still
845 	   accessible but the firmware will throttle the clocks on the
846 	   fly as necessary.
847 	*/
848 	if (gated) {
849 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
850 		data |= 0x1ff;
851 		data &= ~0xef0000;
852 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
853 
854 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
855 		data |= 0x3ff000;
856 		data &= ~0xffc00000;
857 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
858 
859 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
860 		data |= 0x2;
861 		data &= ~0x00010000;
862 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
863 
864 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
865 		data |= 0x37f;
866 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
867 
868 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
869 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
870 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
871 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
872 			0x8;
873 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
874 	} else {
875 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
876 		data &= ~0x80010;
877 		data |= 0xe70008;
878 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
879 
880 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
881 		data |= 0xffc00000;
882 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
883 
884 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
885 		data |= 0x10000;
886 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
887 
888 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
889 		data &= ~0xffc00000;
890 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
891 
892 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
893 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
894 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
895 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
896 			  0x8);
897 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
898 	}
899 	vce_v4_0_override_vce_clock_gating(adev, false);
900 }
901 
902 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
903 {
904 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
905 
906 	if (enable)
907 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
908 	else
909 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
910 
911 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
912 }
913 
914 static int vce_v4_0_set_clockgating_state(void *handle,
915 					  enum amd_clockgating_state state)
916 {
917 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
918 	bool enable = (state == AMD_CG_STATE_GATE);
919 	int i;
920 
921 	if ((adev->asic_type == CHIP_POLARIS10) ||
922 		(adev->asic_type == CHIP_TONGA) ||
923 		(adev->asic_type == CHIP_FIJI))
924 		vce_v4_0_set_bypass_mode(adev, enable);
925 
926 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
927 		return 0;
928 
929 	mutex_lock(&adev->grbm_idx_mutex);
930 	for (i = 0; i < 2; i++) {
931 		/* Program VCE Instance 0 or 1 if not harvested */
932 		if (adev->vce.harvest_config & (1 << i))
933 			continue;
934 
935 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
936 
937 		if (enable) {
938 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
939 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
940 			data &= ~(0xf | 0xff0);
941 			data |= ((0x0 << 0) | (0x04 << 4));
942 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
943 
944 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
945 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
946 			data &= ~(0xf | 0xff0);
947 			data |= ((0x0 << 0) | (0x04 << 4));
948 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
949 		}
950 
951 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
952 	}
953 
954 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
955 	mutex_unlock(&adev->grbm_idx_mutex);
956 
957 	return 0;
958 }
959 #endif
960 
961 static int vce_v4_0_set_powergating_state(void *handle,
962 					  enum amd_powergating_state state)
963 {
964 	/* This doesn't actually powergate the VCE block.
965 	 * That's done in the dpm code via the SMC.  This
966 	 * just re-inits the block as necessary.  The actual
967 	 * gating still happens in the dpm code.  We should
968 	 * revisit this when there is a cleaner line between
969 	 * the smc and the hw blocks
970 	 */
971 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
972 
973 	if (state == AMD_PG_STATE_GATE)
974 		return vce_v4_0_stop(adev);
975 	else
976 		return vce_v4_0_start(adev);
977 }
978 
979 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
980 					struct amdgpu_ib *ib, uint32_t flags)
981 {
982 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
983 
984 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
985 	amdgpu_ring_write(ring, vmid);
986 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
987 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
988 	amdgpu_ring_write(ring, ib->length_dw);
989 }
990 
991 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
992 			u64 seq, unsigned flags)
993 {
994 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
995 
996 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
997 	amdgpu_ring_write(ring, addr);
998 	amdgpu_ring_write(ring, upper_32_bits(addr));
999 	amdgpu_ring_write(ring, seq);
1000 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
1001 }
1002 
1003 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
1004 {
1005 	amdgpu_ring_write(ring, VCE_CMD_END);
1006 }
1007 
1008 static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1009 				   uint32_t val, uint32_t mask)
1010 {
1011 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1012 	amdgpu_ring_write(ring,	reg << 2);
1013 	amdgpu_ring_write(ring, mask);
1014 	amdgpu_ring_write(ring, val);
1015 }
1016 
1017 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
1018 				   unsigned int vmid, uint64_t pd_addr)
1019 {
1020 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
1021 
1022 	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1023 
1024 	/* wait for reg writes */
1025 	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 +
1026 			       vmid * hub->ctx_addr_distance,
1027 			       lower_32_bits(pd_addr), 0xffffffff);
1028 }
1029 
1030 static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
1031 			       uint32_t reg, uint32_t val)
1032 {
1033 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1034 	amdgpu_ring_write(ring,	reg << 2);
1035 	amdgpu_ring_write(ring, val);
1036 }
1037 
1038 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1039 					struct amdgpu_irq_src *source,
1040 					unsigned type,
1041 					enum amdgpu_interrupt_state state)
1042 {
1043 	uint32_t val = 0;
1044 
1045 	if (!amdgpu_sriov_vf(adev)) {
1046 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1047 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1048 
1049 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1050 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1051 	}
1052 	return 0;
1053 }
1054 
1055 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1056 				      struct amdgpu_irq_src *source,
1057 				      struct amdgpu_iv_entry *entry)
1058 {
1059 	DRM_DEBUG("IH: VCE\n");
1060 
1061 	switch (entry->src_data[0]) {
1062 	case 0:
1063 	case 1:
1064 	case 2:
1065 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1066 		break;
1067 	default:
1068 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1069 			  entry->src_id, entry->src_data[0]);
1070 		break;
1071 	}
1072 
1073 	return 0;
1074 }
1075 
1076 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1077 	.name = "vce_v4_0",
1078 	.early_init = vce_v4_0_early_init,
1079 	.late_init = NULL,
1080 	.sw_init = vce_v4_0_sw_init,
1081 	.sw_fini = vce_v4_0_sw_fini,
1082 	.hw_init = vce_v4_0_hw_init,
1083 	.hw_fini = vce_v4_0_hw_fini,
1084 	.suspend = vce_v4_0_suspend,
1085 	.resume = vce_v4_0_resume,
1086 	.is_idle = NULL /* vce_v4_0_is_idle */,
1087 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1088 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1089 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1090 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1091 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1092 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1093 	.set_powergating_state = vce_v4_0_set_powergating_state,
1094 };
1095 
1096 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1097 	.type = AMDGPU_RING_TYPE_VCE,
1098 	.align_mask = 0x3f,
1099 	.nop = VCE_CMD_NO_OP,
1100 	.support_64bit_ptrs = false,
1101 	.no_user_fence = true,
1102 	.get_rptr = vce_v4_0_ring_get_rptr,
1103 	.get_wptr = vce_v4_0_ring_get_wptr,
1104 	.set_wptr = vce_v4_0_ring_set_wptr,
1105 	.patch_cs_in_place = amdgpu_vce_ring_parse_cs_vm,
1106 	.emit_frame_size =
1107 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1108 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1109 		4 + /* vce_v4_0_emit_vm_flush */
1110 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1111 		1, /* vce_v4_0_ring_insert_end */
1112 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1113 	.emit_ib = vce_v4_0_ring_emit_ib,
1114 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1115 	.emit_fence = vce_v4_0_ring_emit_fence,
1116 	.test_ring = amdgpu_vce_ring_test_ring,
1117 	.test_ib = amdgpu_vce_ring_test_ib,
1118 	.insert_nop = amdgpu_ring_insert_nop,
1119 	.insert_end = vce_v4_0_ring_insert_end,
1120 	.pad_ib = amdgpu_ring_generic_pad_ib,
1121 	.begin_use = amdgpu_vce_ring_begin_use,
1122 	.end_use = amdgpu_vce_ring_end_use,
1123 	.emit_wreg = vce_v4_0_emit_wreg,
1124 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
1125 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1126 };
1127 
1128 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1129 {
1130 	int i;
1131 
1132 	for (i = 0; i < adev->vce.num_rings; i++) {
1133 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1134 		adev->vce.ring[i].me = i;
1135 	}
1136 	DRM_INFO("VCE enabled in VM mode\n");
1137 }
1138 
1139 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1140 	.set = vce_v4_0_set_interrupt_state,
1141 	.process = vce_v4_0_process_interrupt,
1142 };
1143 
1144 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1145 {
1146 	adev->vce.irq.num_types = 1;
1147 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1148 };
1149 
1150 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1151 {
1152 	.type = AMD_IP_BLOCK_TYPE_VCE,
1153 	.major = 4,
1154 	.minor = 0,
1155 	.rev = 0,
1156 	.funcs = &vce_v4_0_ip_funcs,
1157 };
1158