xref: /linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision e3b9f1e81de2083f359bacd2a94bf1c024f2ede0)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34 
35 #include "vce/vce_4_0_offset.h"
36 #include "vce/vce_4_0_default.h"
37 #include "vce/vce_4_0_sh_mask.h"
38 #include "mmhub/mmhub_1_0_offset.h"
39 #include "mmhub/mmhub_1_0_sh_mask.h"
40 
41 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
42 
43 #define VCE_V4_0_FW_SIZE	(384 * 1024)
44 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
45 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
46 
47 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
48 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
49 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
50 
51 /**
52  * vce_v4_0_ring_get_rptr - get read pointer
53  *
54  * @ring: amdgpu_ring pointer
55  *
56  * Returns the current hardware read pointer
57  */
58 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
59 {
60 	struct amdgpu_device *adev = ring->adev;
61 
62 	if (ring == &adev->vce.ring[0])
63 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
64 	else if (ring == &adev->vce.ring[1])
65 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
66 	else
67 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
68 }
69 
70 /**
71  * vce_v4_0_ring_get_wptr - get write pointer
72  *
73  * @ring: amdgpu_ring pointer
74  *
75  * Returns the current hardware write pointer
76  */
77 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
78 {
79 	struct amdgpu_device *adev = ring->adev;
80 
81 	if (ring->use_doorbell)
82 		return adev->wb.wb[ring->wptr_offs];
83 
84 	if (ring == &adev->vce.ring[0])
85 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
86 	else if (ring == &adev->vce.ring[1])
87 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
88 	else
89 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
90 }
91 
92 /**
93  * vce_v4_0_ring_set_wptr - set write pointer
94  *
95  * @ring: amdgpu_ring pointer
96  *
97  * Commits the write pointer to the hardware
98  */
99 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
100 {
101 	struct amdgpu_device *adev = ring->adev;
102 
103 	if (ring->use_doorbell) {
104 		/* XXX check if swapping is necessary on BE */
105 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
106 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
107 		return;
108 	}
109 
110 	if (ring == &adev->vce.ring[0])
111 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
112 			lower_32_bits(ring->wptr));
113 	else if (ring == &adev->vce.ring[1])
114 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
115 			lower_32_bits(ring->wptr));
116 	else
117 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
118 			lower_32_bits(ring->wptr));
119 }
120 
121 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
122 {
123 	int i, j;
124 
125 	for (i = 0; i < 10; ++i) {
126 		for (j = 0; j < 100; ++j) {
127 			uint32_t status =
128 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
129 
130 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
131 				return 0;
132 			mdelay(10);
133 		}
134 
135 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
136 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
137 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
138 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
139 		mdelay(10);
140 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
141 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
142 		mdelay(10);
143 
144 	}
145 
146 	return -ETIMEDOUT;
147 }
148 
149 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
150 				struct amdgpu_mm_table *table)
151 {
152 	uint32_t data = 0, loop;
153 	uint64_t addr = table->gpu_addr;
154 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
155 	uint32_t size;
156 
157 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
158 
159 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
160 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
161 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
162 
163 	/* 2, update vmid of descriptor */
164 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
165 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
166 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
167 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
168 
169 	/* 3, notify mmsch about the size of this descriptor */
170 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
171 
172 	/* 4, set resp to zero */
173 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
174 
175 	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
176 	adev->wb.wb[adev->vce.ring[0].wptr_offs] = 0;
177 	adev->vce.ring[0].wptr = 0;
178 	adev->vce.ring[0].wptr_old = 0;
179 
180 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
181 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
182 
183 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
184 	loop = 1000;
185 	while ((data & 0x10000002) != 0x10000002) {
186 		udelay(10);
187 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
188 		loop--;
189 		if (!loop)
190 			break;
191 	}
192 
193 	if (!loop) {
194 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
195 		return -EBUSY;
196 	}
197 
198 	return 0;
199 }
200 
201 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
202 {
203 	struct amdgpu_ring *ring;
204 	uint32_t offset, size;
205 	uint32_t table_size = 0;
206 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
207 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
208 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
209 	struct mmsch_v1_0_cmd_end end = { { 0 } };
210 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
211 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
212 
213 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
214 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
215 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
216 	end.cmd_header.command_type = MMSCH_COMMAND__END;
217 
218 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
219 		header->version = MMSCH_VERSION;
220 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
221 
222 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
223 			header->vce_table_offset = header->header_size;
224 		else
225 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
226 
227 		init_table += header->vce_table_offset;
228 
229 		ring = &adev->vce.ring[0];
230 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO),
231 					    lower_32_bits(ring->gpu_addr));
232 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI),
233 					    upper_32_bits(ring->gpu_addr));
234 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE),
235 					    ring->ring_size / 4);
236 
237 		/* BEGING OF MC_RESUME */
238 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
239 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
240 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
241 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
242 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
243 
244 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
245 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
246 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
247 						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
248 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
249 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
250 						(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
251 		} else {
252 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
253 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
254 						adev->vce.gpu_addr >> 8);
255 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
256 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
257 						(adev->vce.gpu_addr >> 40) & 0xff);
258 		}
259 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
260 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
261 						adev->vce.gpu_addr >> 8);
262 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
263 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR1),
264 						(adev->vce.gpu_addr >> 40) & 0xff);
265 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
266 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR2),
267 						adev->vce.gpu_addr >> 8);
268 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
269 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
270 						(adev->vce.gpu_addr >> 40) & 0xff);
271 
272 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
273 		size = VCE_V4_0_FW_SIZE;
274 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
275 					offset & ~0x0f000000);
276 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
277 
278 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
279 		size = VCE_V4_0_STACK_SIZE;
280 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1),
281 					(offset & ~0x0f000000) | (1 << 24));
282 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
283 
284 		offset += size;
285 		size = VCE_V4_0_DATA_SIZE;
286 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2),
287 					(offset & ~0x0f000000) | (2 << 24));
288 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
289 
290 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
291 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
292 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
293 						   VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
294 
295 		/* end of MC_RESUME */
296 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
297 						   VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK);
298 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
299 						   ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
300 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
301 						   ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
302 
303 		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
304 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
305 					      VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
306 
307 		/* clear BUSY flag */
308 		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
309 						   ~VCE_STATUS__JOB_BUSY_MASK, 0);
310 
311 		/* add end packet */
312 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
313 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
314 		header->vce_table_size = table_size;
315 	}
316 
317 	return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
318 }
319 
320 /**
321  * vce_v4_0_start - start VCE block
322  *
323  * @adev: amdgpu_device pointer
324  *
325  * Setup and start the VCE block
326  */
327 static int vce_v4_0_start(struct amdgpu_device *adev)
328 {
329 	struct amdgpu_ring *ring;
330 	int r;
331 
332 	ring = &adev->vce.ring[0];
333 
334 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
335 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
336 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
337 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
338 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
339 
340 	ring = &adev->vce.ring[1];
341 
342 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
343 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
344 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
345 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
346 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
347 
348 	ring = &adev->vce.ring[2];
349 
350 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
351 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
352 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
353 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
354 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
355 
356 	vce_v4_0_mc_resume(adev);
357 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
358 			~VCE_STATUS__JOB_BUSY_MASK);
359 
360 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
361 
362 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
363 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
364 	mdelay(100);
365 
366 	r = vce_v4_0_firmware_loaded(adev);
367 
368 	/* clear BUSY flag */
369 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
370 
371 	if (r) {
372 		DRM_ERROR("VCE not responding, giving up!!!\n");
373 		return r;
374 	}
375 
376 	return 0;
377 }
378 
379 static int vce_v4_0_stop(struct amdgpu_device *adev)
380 {
381 
382 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
383 
384 	/* hold on ECPU */
385 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
386 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
387 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
388 
389 	/* clear BUSY flag */
390 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
391 
392 	/* Set Clock-Gating off */
393 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
394 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
395 	*/
396 
397 	return 0;
398 }
399 
400 static int vce_v4_0_early_init(void *handle)
401 {
402 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
403 
404 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
405 		adev->vce.num_rings = 1;
406 	else
407 		adev->vce.num_rings = 3;
408 
409 	vce_v4_0_set_ring_funcs(adev);
410 	vce_v4_0_set_irq_funcs(adev);
411 
412 	return 0;
413 }
414 
415 static int vce_v4_0_sw_init(void *handle)
416 {
417 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
418 	struct amdgpu_ring *ring;
419 	unsigned size;
420 	int r, i;
421 
422 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
423 	if (r)
424 		return r;
425 
426 	size  = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE;
427 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
428 		size += VCE_V4_0_FW_SIZE;
429 
430 	r = amdgpu_vce_sw_init(adev, size);
431 	if (r)
432 		return r;
433 
434 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
435 		const struct common_firmware_header *hdr;
436 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
437 
438 		adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
439 		if (!adev->vce.saved_bo)
440 			return -ENOMEM;
441 
442 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
443 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
444 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
445 		adev->firmware.fw_size +=
446 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
447 		DRM_INFO("PSP loading VCE firmware\n");
448 	} else {
449 		r = amdgpu_vce_resume(adev);
450 		if (r)
451 			return r;
452 	}
453 
454 	for (i = 0; i < adev->vce.num_rings; i++) {
455 		ring = &adev->vce.ring[i];
456 		sprintf(ring->name, "vce%d", i);
457 		if (amdgpu_sriov_vf(adev)) {
458 			/* DOORBELL only works under SRIOV */
459 			ring->use_doorbell = true;
460 
461 			/* currently only use the first encoding ring for sriov,
462 			 * so set unused location for other unused rings.
463 			 */
464 			if (i == 0)
465 				ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2;
466 			else
467 				ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1;
468 		}
469 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
470 		if (r)
471 			return r;
472 	}
473 
474 	r = amdgpu_virt_alloc_mm_table(adev);
475 	if (r)
476 		return r;
477 
478 	return r;
479 }
480 
481 static int vce_v4_0_sw_fini(void *handle)
482 {
483 	int r;
484 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
485 
486 	/* free MM table */
487 	amdgpu_virt_free_mm_table(adev);
488 
489 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
490 		kfree(adev->vce.saved_bo);
491 		adev->vce.saved_bo = NULL;
492 	}
493 
494 	r = amdgpu_vce_suspend(adev);
495 	if (r)
496 		return r;
497 
498 	return amdgpu_vce_sw_fini(adev);
499 }
500 
501 static int vce_v4_0_hw_init(void *handle)
502 {
503 	int r, i;
504 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
505 
506 	if (amdgpu_sriov_vf(adev))
507 		r = vce_v4_0_sriov_start(adev);
508 	else
509 		r = vce_v4_0_start(adev);
510 	if (r)
511 		return r;
512 
513 	for (i = 0; i < adev->vce.num_rings; i++)
514 		adev->vce.ring[i].ready = false;
515 
516 	for (i = 0; i < adev->vce.num_rings; i++) {
517 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
518 		if (r)
519 			return r;
520 		else
521 			adev->vce.ring[i].ready = true;
522 	}
523 
524 	DRM_INFO("VCE initialized successfully.\n");
525 
526 	return 0;
527 }
528 
529 static int vce_v4_0_hw_fini(void *handle)
530 {
531 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
532 	int i;
533 
534 	if (!amdgpu_sriov_vf(adev)) {
535 		/* vce_v4_0_wait_for_idle(handle); */
536 		vce_v4_0_stop(adev);
537 	} else {
538 		/* full access mode, so don't touch any VCE register */
539 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
540 	}
541 
542 	for (i = 0; i < adev->vce.num_rings; i++)
543 		adev->vce.ring[i].ready = false;
544 
545 	return 0;
546 }
547 
548 static int vce_v4_0_suspend(void *handle)
549 {
550 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
551 	int r;
552 
553 	if (adev->vce.vcpu_bo == NULL)
554 		return 0;
555 
556 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
557 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
558 		void *ptr = adev->vce.cpu_addr;
559 
560 		memcpy_fromio(adev->vce.saved_bo, ptr, size);
561 	}
562 
563 	r = vce_v4_0_hw_fini(adev);
564 	if (r)
565 		return r;
566 
567 	return amdgpu_vce_suspend(adev);
568 }
569 
570 static int vce_v4_0_resume(void *handle)
571 {
572 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
573 	int r;
574 
575 	if (adev->vce.vcpu_bo == NULL)
576 		return -EINVAL;
577 
578 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
579 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
580 		void *ptr = adev->vce.cpu_addr;
581 
582 		memcpy_toio(ptr, adev->vce.saved_bo, size);
583 	} else {
584 		r = amdgpu_vce_resume(adev);
585 		if (r)
586 			return r;
587 	}
588 
589 	return vce_v4_0_hw_init(adev);
590 }
591 
592 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
593 {
594 	uint32_t offset, size;
595 
596 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
597 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
598 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
599 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
600 
601 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
602 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
603 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
604 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
605 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
606 
607 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
608 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
609 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
610 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
611 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
612 	} else {
613 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
614 			(adev->vce.gpu_addr >> 8));
615 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
616 			(adev->vce.gpu_addr >> 40) & 0xff);
617 	}
618 
619 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
620 	size = VCE_V4_0_FW_SIZE;
621 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
622 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
623 
624 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
625 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
626 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
627 	size = VCE_V4_0_STACK_SIZE;
628 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
629 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
630 
631 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
632 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
633 	offset += size;
634 	size = VCE_V4_0_DATA_SIZE;
635 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
636 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
637 
638 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
639 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
640 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
641 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
642 }
643 
644 static int vce_v4_0_set_clockgating_state(void *handle,
645 					  enum amd_clockgating_state state)
646 {
647 	/* needed for driver unload*/
648 	return 0;
649 }
650 
651 #if 0
652 static bool vce_v4_0_is_idle(void *handle)
653 {
654 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
655 	u32 mask = 0;
656 
657 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
658 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
659 
660 	return !(RREG32(mmSRBM_STATUS2) & mask);
661 }
662 
663 static int vce_v4_0_wait_for_idle(void *handle)
664 {
665 	unsigned i;
666 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
667 
668 	for (i = 0; i < adev->usec_timeout; i++)
669 		if (vce_v4_0_is_idle(handle))
670 			return 0;
671 
672 	return -ETIMEDOUT;
673 }
674 
675 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
676 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
677 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
678 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
679 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
680 
681 static bool vce_v4_0_check_soft_reset(void *handle)
682 {
683 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
684 	u32 srbm_soft_reset = 0;
685 
686 	/* According to VCE team , we should use VCE_STATUS instead
687 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
688 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
689 	 * instance's registers are accessed
690 	 * (0 for 1st instance, 10 for 2nd instance).
691 	 *
692 	 *VCE_STATUS
693 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
694 	 *|----+----+-----------+----+----+----+----------+---------+----|
695 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
696 	 *
697 	 * VCE team suggest use bit 3--bit 6 for busy status check
698 	 */
699 	mutex_lock(&adev->grbm_idx_mutex);
700 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
701 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
702 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
703 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
704 	}
705 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
706 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
707 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
708 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
709 	}
710 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
711 	mutex_unlock(&adev->grbm_idx_mutex);
712 
713 	if (srbm_soft_reset) {
714 		adev->vce.srbm_soft_reset = srbm_soft_reset;
715 		return true;
716 	} else {
717 		adev->vce.srbm_soft_reset = 0;
718 		return false;
719 	}
720 }
721 
722 static int vce_v4_0_soft_reset(void *handle)
723 {
724 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
725 	u32 srbm_soft_reset;
726 
727 	if (!adev->vce.srbm_soft_reset)
728 		return 0;
729 	srbm_soft_reset = adev->vce.srbm_soft_reset;
730 
731 	if (srbm_soft_reset) {
732 		u32 tmp;
733 
734 		tmp = RREG32(mmSRBM_SOFT_RESET);
735 		tmp |= srbm_soft_reset;
736 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
737 		WREG32(mmSRBM_SOFT_RESET, tmp);
738 		tmp = RREG32(mmSRBM_SOFT_RESET);
739 
740 		udelay(50);
741 
742 		tmp &= ~srbm_soft_reset;
743 		WREG32(mmSRBM_SOFT_RESET, tmp);
744 		tmp = RREG32(mmSRBM_SOFT_RESET);
745 
746 		/* Wait a little for things to settle down */
747 		udelay(50);
748 	}
749 
750 	return 0;
751 }
752 
753 static int vce_v4_0_pre_soft_reset(void *handle)
754 {
755 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
756 
757 	if (!adev->vce.srbm_soft_reset)
758 		return 0;
759 
760 	mdelay(5);
761 
762 	return vce_v4_0_suspend(adev);
763 }
764 
765 
766 static int vce_v4_0_post_soft_reset(void *handle)
767 {
768 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
769 
770 	if (!adev->vce.srbm_soft_reset)
771 		return 0;
772 
773 	mdelay(5);
774 
775 	return vce_v4_0_resume(adev);
776 }
777 
778 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
779 {
780 	u32 tmp, data;
781 
782 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
783 	if (override)
784 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
785 	else
786 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
787 
788 	if (tmp != data)
789 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
790 }
791 
792 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
793 					     bool gated)
794 {
795 	u32 data;
796 
797 	/* Set Override to disable Clock Gating */
798 	vce_v4_0_override_vce_clock_gating(adev, true);
799 
800 	/* This function enables MGCG which is controlled by firmware.
801 	   With the clocks in the gated state the core is still
802 	   accessible but the firmware will throttle the clocks on the
803 	   fly as necessary.
804 	*/
805 	if (gated) {
806 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
807 		data |= 0x1ff;
808 		data &= ~0xef0000;
809 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
810 
811 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
812 		data |= 0x3ff000;
813 		data &= ~0xffc00000;
814 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
815 
816 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
817 		data |= 0x2;
818 		data &= ~0x00010000;
819 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
820 
821 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
822 		data |= 0x37f;
823 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
824 
825 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
826 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
827 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
828 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
829 			0x8;
830 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
831 	} else {
832 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
833 		data &= ~0x80010;
834 		data |= 0xe70008;
835 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
836 
837 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
838 		data |= 0xffc00000;
839 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
840 
841 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
842 		data |= 0x10000;
843 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
844 
845 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
846 		data &= ~0xffc00000;
847 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
848 
849 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
850 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
851 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
852 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
853 			  0x8);
854 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
855 	}
856 	vce_v4_0_override_vce_clock_gating(adev, false);
857 }
858 
859 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
860 {
861 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
862 
863 	if (enable)
864 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
865 	else
866 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
867 
868 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
869 }
870 
871 static int vce_v4_0_set_clockgating_state(void *handle,
872 					  enum amd_clockgating_state state)
873 {
874 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
875 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
876 	int i;
877 
878 	if ((adev->asic_type == CHIP_POLARIS10) ||
879 		(adev->asic_type == CHIP_TONGA) ||
880 		(adev->asic_type == CHIP_FIJI))
881 		vce_v4_0_set_bypass_mode(adev, enable);
882 
883 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
884 		return 0;
885 
886 	mutex_lock(&adev->grbm_idx_mutex);
887 	for (i = 0; i < 2; i++) {
888 		/* Program VCE Instance 0 or 1 if not harvested */
889 		if (adev->vce.harvest_config & (1 << i))
890 			continue;
891 
892 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
893 
894 		if (enable) {
895 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
896 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
897 			data &= ~(0xf | 0xff0);
898 			data |= ((0x0 << 0) | (0x04 << 4));
899 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
900 
901 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
902 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
903 			data &= ~(0xf | 0xff0);
904 			data |= ((0x0 << 0) | (0x04 << 4));
905 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
906 		}
907 
908 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
909 	}
910 
911 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
912 	mutex_unlock(&adev->grbm_idx_mutex);
913 
914 	return 0;
915 }
916 
917 static int vce_v4_0_set_powergating_state(void *handle,
918 					  enum amd_powergating_state state)
919 {
920 	/* This doesn't actually powergate the VCE block.
921 	 * That's done in the dpm code via the SMC.  This
922 	 * just re-inits the block as necessary.  The actual
923 	 * gating still happens in the dpm code.  We should
924 	 * revisit this when there is a cleaner line between
925 	 * the smc and the hw blocks
926 	 */
927 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
928 
929 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
930 		return 0;
931 
932 	if (state == AMD_PG_STATE_GATE)
933 		/* XXX do we need a vce_v4_0_stop()? */
934 		return 0;
935 	else
936 		return vce_v4_0_start(adev);
937 }
938 #endif
939 
940 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
941 		struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch)
942 {
943 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
944 	amdgpu_ring_write(ring, vmid);
945 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
946 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
947 	amdgpu_ring_write(ring, ib->length_dw);
948 }
949 
950 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
951 			u64 seq, unsigned flags)
952 {
953 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
954 
955 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
956 	amdgpu_ring_write(ring, addr);
957 	amdgpu_ring_write(ring, upper_32_bits(addr));
958 	amdgpu_ring_write(ring, seq);
959 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
960 }
961 
962 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
963 {
964 	amdgpu_ring_write(ring, VCE_CMD_END);
965 }
966 
967 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
968 			 unsigned int vmid, uint64_t pd_addr)
969 {
970 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
971 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
972 	uint64_t flags = AMDGPU_PTE_VALID;
973 	unsigned eng = ring->vm_inv_eng;
974 
975 	amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
976 	pd_addr |= flags;
977 
978 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
979 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2);
980 	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
981 
982 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
983 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
984 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
985 
986 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
987 	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
988 	amdgpu_ring_write(ring, 0xffffffff);
989 	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
990 
991 	/* flush TLB */
992 	amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
993 	amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
994 	amdgpu_ring_write(ring, req);
995 
996 	/* wait for flush */
997 	amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
998 	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
999 	amdgpu_ring_write(ring, 1 << vmid);
1000 	amdgpu_ring_write(ring, 1 << vmid);
1001 }
1002 
1003 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1004 					struct amdgpu_irq_src *source,
1005 					unsigned type,
1006 					enum amdgpu_interrupt_state state)
1007 {
1008 	uint32_t val = 0;
1009 
1010 	if (!amdgpu_sriov_vf(adev)) {
1011 		if (state == AMDGPU_IRQ_STATE_ENABLE)
1012 			val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1013 
1014 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1015 				~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1016 	}
1017 	return 0;
1018 }
1019 
1020 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1021 				      struct amdgpu_irq_src *source,
1022 				      struct amdgpu_iv_entry *entry)
1023 {
1024 	DRM_DEBUG("IH: VCE\n");
1025 
1026 	switch (entry->src_data[0]) {
1027 	case 0:
1028 	case 1:
1029 	case 2:
1030 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1031 		break;
1032 	default:
1033 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1034 			  entry->src_id, entry->src_data[0]);
1035 		break;
1036 	}
1037 
1038 	return 0;
1039 }
1040 
1041 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1042 	.name = "vce_v4_0",
1043 	.early_init = vce_v4_0_early_init,
1044 	.late_init = NULL,
1045 	.sw_init = vce_v4_0_sw_init,
1046 	.sw_fini = vce_v4_0_sw_fini,
1047 	.hw_init = vce_v4_0_hw_init,
1048 	.hw_fini = vce_v4_0_hw_fini,
1049 	.suspend = vce_v4_0_suspend,
1050 	.resume = vce_v4_0_resume,
1051 	.is_idle = NULL /* vce_v4_0_is_idle */,
1052 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1053 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1054 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1055 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1056 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1057 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1058 	.set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1059 };
1060 
1061 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1062 	.type = AMDGPU_RING_TYPE_VCE,
1063 	.align_mask = 0x3f,
1064 	.nop = VCE_CMD_NO_OP,
1065 	.support_64bit_ptrs = false,
1066 	.vmhub = AMDGPU_MMHUB,
1067 	.get_rptr = vce_v4_0_ring_get_rptr,
1068 	.get_wptr = vce_v4_0_ring_get_wptr,
1069 	.set_wptr = vce_v4_0_ring_set_wptr,
1070 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1071 	.emit_frame_size =
1072 		17 + /* vce_v4_0_emit_vm_flush */
1073 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1074 		1, /* vce_v4_0_ring_insert_end */
1075 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1076 	.emit_ib = vce_v4_0_ring_emit_ib,
1077 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1078 	.emit_fence = vce_v4_0_ring_emit_fence,
1079 	.test_ring = amdgpu_vce_ring_test_ring,
1080 	.test_ib = amdgpu_vce_ring_test_ib,
1081 	.insert_nop = amdgpu_ring_insert_nop,
1082 	.insert_end = vce_v4_0_ring_insert_end,
1083 	.pad_ib = amdgpu_ring_generic_pad_ib,
1084 	.begin_use = amdgpu_vce_ring_begin_use,
1085 	.end_use = amdgpu_vce_ring_end_use,
1086 };
1087 
1088 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1089 {
1090 	int i;
1091 
1092 	for (i = 0; i < adev->vce.num_rings; i++)
1093 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1094 	DRM_INFO("VCE enabled in VM mode\n");
1095 }
1096 
1097 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1098 	.set = vce_v4_0_set_interrupt_state,
1099 	.process = vce_v4_0_process_interrupt,
1100 };
1101 
1102 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1103 {
1104 	adev->vce.irq.num_types = 1;
1105 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1106 };
1107 
1108 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1109 {
1110 	.type = AMD_IP_BLOCK_TYPE_VCE,
1111 	.major = 4,
1112 	.minor = 0,
1113 	.rev = 0,
1114 	.funcs = &vce_v4_0_ip_funcs,
1115 };
1116