xref: /linux/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c (revision 593043d35ddff8ab033546c2a89bb1d4080d03e1)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  */
26 
27 #include <linux/firmware.h>
28 #include <drm/drmP.h>
29 #include "amdgpu.h"
30 #include "amdgpu_vce.h"
31 #include "soc15d.h"
32 #include "soc15_common.h"
33 #include "mmsch_v1_0.h"
34 
35 #include "vega10/soc15ip.h"
36 #include "vega10/VCE/vce_4_0_offset.h"
37 #include "vega10/VCE/vce_4_0_default.h"
38 #include "vega10/VCE/vce_4_0_sh_mask.h"
39 #include "vega10/MMHUB/mmhub_1_0_offset.h"
40 #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
41 
42 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
43 
44 #define VCE_V4_0_FW_SIZE	(384 * 1024)
45 #define VCE_V4_0_STACK_SIZE	(64 * 1024)
46 #define VCE_V4_0_DATA_SIZE	((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
47 
48 static void vce_v4_0_mc_resume(struct amdgpu_device *adev);
49 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev);
50 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev);
51 
52 static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt,
53 					  uint32_t *init_table,
54 					  uint32_t reg_offset,
55 					  uint32_t value)
56 {
57 	direct_wt->cmd_header.reg_offset = reg_offset;
58 	direct_wt->reg_value = value;
59 	memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write));
60 }
61 
62 static inline void mmsch_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt,
63 						 uint32_t *init_table,
64 						 uint32_t reg_offset,
65 						 uint32_t mask, uint32_t data)
66 {
67 	direct_rd_mod_wt->cmd_header.reg_offset = reg_offset;
68 	direct_rd_mod_wt->mask_value = mask;
69 	direct_rd_mod_wt->write_data = data;
70 	memcpy((void *)init_table, direct_rd_mod_wt,
71 	       sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write));
72 }
73 
74 static inline void mmsch_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll,
75 					    uint32_t *init_table,
76 					    uint32_t reg_offset,
77 					    uint32_t mask, uint32_t wait)
78 {
79 	direct_poll->cmd_header.reg_offset = reg_offset;
80 	direct_poll->mask_value = mask;
81 	direct_poll->wait_value = wait;
82 	memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling));
83 }
84 
85 #define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \
86 	mmsch_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \
87 				      init_table, (reg), \
88 				      (mask), (data)); \
89 	init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
90 	table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \
91 }
92 
93 #define INSERT_DIRECT_WT(reg, value) { \
94 	mmsch_insert_direct_wt(&direct_wt, \
95 			       init_table, (reg), \
96 			       (value)); \
97 	init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
98 	table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \
99 }
100 
101 #define INSERT_DIRECT_POLL(reg, mask, wait) { \
102 	mmsch_insert_direct_poll(&direct_poll, \
103 				 init_table, (reg), \
104 				 (mask), (wait)); \
105 	init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
106 	table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \
107 }
108 
109 /**
110  * vce_v4_0_ring_get_rptr - get read pointer
111  *
112  * @ring: amdgpu_ring pointer
113  *
114  * Returns the current hardware read pointer
115  */
116 static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
117 {
118 	struct amdgpu_device *adev = ring->adev;
119 
120 	if (ring == &adev->vce.ring[0])
121 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
122 	else if (ring == &adev->vce.ring[1])
123 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
124 	else
125 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
126 }
127 
128 /**
129  * vce_v4_0_ring_get_wptr - get write pointer
130  *
131  * @ring: amdgpu_ring pointer
132  *
133  * Returns the current hardware write pointer
134  */
135 static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
136 {
137 	struct amdgpu_device *adev = ring->adev;
138 
139 	if (ring->use_doorbell)
140 		return adev->wb.wb[ring->wptr_offs];
141 
142 	if (ring == &adev->vce.ring[0])
143 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
144 	else if (ring == &adev->vce.ring[1])
145 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
146 	else
147 		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
148 }
149 
150 /**
151  * vce_v4_0_ring_set_wptr - set write pointer
152  *
153  * @ring: amdgpu_ring pointer
154  *
155  * Commits the write pointer to the hardware
156  */
157 static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
158 {
159 	struct amdgpu_device *adev = ring->adev;
160 
161 	if (ring->use_doorbell) {
162 		/* XXX check if swapping is necessary on BE */
163 		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
164 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
165 		return;
166 	}
167 
168 	if (ring == &adev->vce.ring[0])
169 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
170 			lower_32_bits(ring->wptr));
171 	else if (ring == &adev->vce.ring[1])
172 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
173 			lower_32_bits(ring->wptr));
174 	else
175 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3),
176 			lower_32_bits(ring->wptr));
177 }
178 
179 static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev)
180 {
181 	int i, j;
182 
183 	for (i = 0; i < 10; ++i) {
184 		for (j = 0; j < 100; ++j) {
185 			uint32_t status =
186 				RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS));
187 
188 			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
189 				return 0;
190 			mdelay(10);
191 		}
192 
193 		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
194 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
195 				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
196 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
197 		mdelay(10);
198 		WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
199 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
200 		mdelay(10);
201 
202 	}
203 
204 	return -ETIMEDOUT;
205 }
206 
207 static int vce_v4_0_mmsch_start(struct amdgpu_device *adev,
208 				struct amdgpu_mm_table *table)
209 {
210 	uint32_t data = 0, loop;
211 	uint64_t addr = table->gpu_addr;
212 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
213 	uint32_t size;
214 
215 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
216 
217 	/* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */
218 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr));
219 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr));
220 
221 	/* 2, update vmid of descriptor */
222 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID));
223 	data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK;
224 	data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */
225 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data);
226 
227 	/* 3, notify mmsch about the size of this descriptor */
228 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size);
229 
230 	/* 4, set resp to zero */
231 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0);
232 
233 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
234 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001);
235 
236 	data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
237 	loop = 1000;
238 	while ((data & 0x10000002) != 0x10000002) {
239 		udelay(10);
240 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP));
241 		loop--;
242 		if (!loop)
243 			break;
244 	}
245 
246 	if (!loop) {
247 		dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
248 		return -EBUSY;
249 	}
250 
251 	return 0;
252 }
253 
254 static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
255 {
256 	struct amdgpu_ring *ring;
257 	uint32_t offset, size;
258 	uint32_t table_size = 0;
259 	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
260 	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
261 	struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
262 	struct mmsch_v1_0_cmd_end end = { { 0 } };
263 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
264 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
265 
266 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
267 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
268 	direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
269 	end.cmd_header.command_type = MMSCH_COMMAND__END;
270 
271 	if (header->vce_table_offset == 0 && header->vce_table_size == 0) {
272 		header->version = MMSCH_VERSION;
273 		header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2;
274 
275 		if (header->uvd_table_offset == 0 && header->uvd_table_size == 0)
276 			header->vce_table_offset = header->header_size;
277 		else
278 			header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset;
279 
280 		init_table += header->vce_table_offset;
281 
282 		ring = &adev->vce.ring[0];
283 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), ring->wptr);
284 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), ring->wptr);
285 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), lower_32_bits(ring->gpu_addr));
286 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
287 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
288 
289 		/* BEGING OF MC_RESUME */
290 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), ~(1 << 16), 0);
291 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), ~0xFF9FF000, 0x1FF000);
292 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), ~0x3F, 0x3F);
293 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
294 
295 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000);
296 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0);
297 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
298 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
299 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
300 
301 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8);
302 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8);
303 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8);
304 
305 		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
306 		size = VCE_V4_0_FW_SIZE;
307 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & 0x7FFFFFFF);
308 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
309 
310 		offset += size;
311 		size = VCE_V4_0_STACK_SIZE;
312 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), offset & 0x7FFFFFFF);
313 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
314 
315 		offset += size;
316 		size = VCE_V4_0_DATA_SIZE;
317 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), offset & 0x7FFFFFFF);
318 		INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
319 
320 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0);
321 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
322 				0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
323 
324 		/* end of MC_RESUME */
325 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL),
326 				~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK);
327 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
328 				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0);
329 
330 		INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
331 				VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK,
332 				VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK);
333 
334 		/* clear BUSY flag */
335 		INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS),
336 				~VCE_STATUS__JOB_BUSY_MASK, 0);
337 
338 		/* add end packet */
339 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
340 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
341 		header->vce_table_size = table_size;
342 
343 		return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table);
344 	}
345 
346 	return -EINVAL; /* already initializaed ? */
347 }
348 
349 /**
350  * vce_v4_0_start - start VCE block
351  *
352  * @adev: amdgpu_device pointer
353  *
354  * Setup and start the VCE block
355  */
356 static int vce_v4_0_start(struct amdgpu_device *adev)
357 {
358 	struct amdgpu_ring *ring;
359 	int r;
360 
361 	ring = &adev->vce.ring[0];
362 
363 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr));
364 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr));
365 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr);
366 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
367 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4);
368 
369 	ring = &adev->vce.ring[1];
370 
371 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr));
372 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr));
373 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr);
374 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr));
375 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4);
376 
377 	ring = &adev->vce.ring[2];
378 
379 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr));
380 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr));
381 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr);
382 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr));
383 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4);
384 
385 	vce_v4_0_mc_resume(adev);
386 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK,
387 			~VCE_STATUS__JOB_BUSY_MASK);
388 
389 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001);
390 
391 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0,
392 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
393 	mdelay(100);
394 
395 	r = vce_v4_0_firmware_loaded(adev);
396 
397 	/* clear BUSY flag */
398 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
399 
400 	if (r) {
401 		DRM_ERROR("VCE not responding, giving up!!!\n");
402 		return r;
403 	}
404 
405 	return 0;
406 }
407 
408 static int vce_v4_0_stop(struct amdgpu_device *adev)
409 {
410 
411 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001);
412 
413 	/* hold on ECPU */
414 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET),
415 			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
416 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
417 
418 	/* clear BUSY flag */
419 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK);
420 
421 	/* Set Clock-Gating off */
422 	/* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
423 		vce_v4_0_set_vce_sw_clock_gating(adev, false);
424 	*/
425 
426 	return 0;
427 }
428 
429 static int vce_v4_0_early_init(void *handle)
430 {
431 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
432 
433 	if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */
434 		adev->vce.num_rings = 1;
435 	else
436 		adev->vce.num_rings = 3;
437 
438 	vce_v4_0_set_ring_funcs(adev);
439 	vce_v4_0_set_irq_funcs(adev);
440 
441 	return 0;
442 }
443 
444 static int vce_v4_0_sw_init(void *handle)
445 {
446 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
447 	struct amdgpu_ring *ring;
448 	unsigned size;
449 	int r, i;
450 
451 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
452 	if (r)
453 		return r;
454 
455 	size  = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2;
456 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
457 		size += VCE_V4_0_FW_SIZE;
458 
459 	r = amdgpu_vce_sw_init(adev, size);
460 	if (r)
461 		return r;
462 
463 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
464 		const struct common_firmware_header *hdr;
465 		hdr = (const struct common_firmware_header *)adev->vce.fw->data;
466 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE;
467 		adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw;
468 		adev->firmware.fw_size +=
469 			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
470 		DRM_INFO("PSP loading VCE firmware\n");
471 	}
472 
473 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
474 		r = amdgpu_vce_resume(adev);
475 		if (r)
476 			return r;
477 	}
478 
479 	for (i = 0; i < adev->vce.num_rings; i++) {
480 		ring = &adev->vce.ring[i];
481 		sprintf(ring->name, "vce%d", i);
482 		if (amdgpu_sriov_vf(adev)) {
483 			/* DOORBELL only works under SRIOV */
484 			ring->use_doorbell = true;
485 			if (i == 0)
486 				ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2;
487 			else if (i == 1)
488 				ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2;
489 			else
490 				ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1;
491 		}
492 		r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0);
493 		if (r)
494 			return r;
495 	}
496 
497 	if (amdgpu_sriov_vf(adev)) {
498 		r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
499 					    AMDGPU_GEM_DOMAIN_VRAM,
500 					    &adev->virt.mm_table.bo,
501 					    &adev->virt.mm_table.gpu_addr,
502 					    (void *)&adev->virt.mm_table.cpu_addr);
503 		if (!r) {
504 			memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
505 			printk("mm table gpu addr = 0x%llx, cpu addr = %p. \n",
506 			       adev->virt.mm_table.gpu_addr,
507 			       adev->virt.mm_table.cpu_addr);
508 		}
509 		return r;
510 	}
511 
512 	return r;
513 }
514 
515 static int vce_v4_0_sw_fini(void *handle)
516 {
517 	int r;
518 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
519 
520 	/* free MM table */
521 	if (amdgpu_sriov_vf(adev))
522 		amdgpu_bo_free_kernel(&adev->virt.mm_table.bo,
523 				      &adev->virt.mm_table.gpu_addr,
524 				      (void *)&adev->virt.mm_table.cpu_addr);
525 
526 	r = amdgpu_vce_suspend(adev);
527 	if (r)
528 		return r;
529 
530 	return amdgpu_vce_sw_fini(adev);
531 }
532 
533 static int vce_v4_0_hw_init(void *handle)
534 {
535 	int r, i;
536 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
537 
538 	if (amdgpu_sriov_vf(adev))
539 		r = vce_v4_0_sriov_start(adev);
540 	else
541 		r = vce_v4_0_start(adev);
542 	if (r)
543 		return r;
544 
545 	for (i = 0; i < adev->vce.num_rings; i++)
546 		adev->vce.ring[i].ready = false;
547 
548 	for (i = 0; i < adev->vce.num_rings; i++) {
549 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
550 		if (r)
551 			return r;
552 		else
553 			adev->vce.ring[i].ready = true;
554 	}
555 
556 	DRM_INFO("VCE initialized successfully.\n");
557 
558 	return 0;
559 }
560 
561 static int vce_v4_0_hw_fini(void *handle)
562 {
563 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
564 	int i;
565 
566 	/* vce_v4_0_wait_for_idle(handle); */
567 	vce_v4_0_stop(adev);
568 	for (i = 0; i < adev->vce.num_rings; i++)
569 		adev->vce.ring[i].ready = false;
570 
571 	return 0;
572 }
573 
574 static int vce_v4_0_suspend(void *handle)
575 {
576 	int r;
577 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
578 
579 	r = vce_v4_0_hw_fini(adev);
580 	if (r)
581 		return r;
582 
583 	return amdgpu_vce_suspend(adev);
584 }
585 
586 static int vce_v4_0_resume(void *handle)
587 {
588 	int r;
589 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
590 
591 	r = amdgpu_vce_resume(adev);
592 	if (r)
593 		return r;
594 
595 	return vce_v4_0_hw_init(adev);
596 }
597 
598 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
599 {
600 	uint32_t offset, size;
601 
602 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
603 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
604 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F);
605 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF);
606 
607 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000);
608 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1);
609 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0);
610 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
611 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
612 
613 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
614 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
615 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
616 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
617 			(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
618 	} else {
619 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
620 			(adev->vce.gpu_addr >> 8));
621 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
622 			(adev->vce.gpu_addr >> 40) & 0xff);
623 	}
624 
625 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
626 	size = VCE_V4_0_FW_SIZE;
627 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
628 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
629 
630 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
631 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff);
632 	offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
633 	size = VCE_V4_0_STACK_SIZE;
634 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24));
635 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size);
636 
637 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8));
638 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff);
639 	offset += size;
640 	size = VCE_V4_0_DATA_SIZE;
641 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24));
642 	WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size);
643 
644 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100);
645 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN),
646 			VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK,
647 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
648 }
649 
650 static int vce_v4_0_set_clockgating_state(void *handle,
651 					  enum amd_clockgating_state state)
652 {
653 	/* needed for driver unload*/
654 	return 0;
655 }
656 
657 #if 0
658 static bool vce_v4_0_is_idle(void *handle)
659 {
660 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
661 	u32 mask = 0;
662 
663 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK;
664 	mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK;
665 
666 	return !(RREG32(mmSRBM_STATUS2) & mask);
667 }
668 
669 static int vce_v4_0_wait_for_idle(void *handle)
670 {
671 	unsigned i;
672 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
673 
674 	for (i = 0; i < adev->usec_timeout; i++)
675 		if (vce_v4_0_is_idle(handle))
676 			return 0;
677 
678 	return -ETIMEDOUT;
679 }
680 
681 #define  VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK  0x00000008L   /* AUTO_BUSY */
682 #define  VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK   0x00000010L   /* RB0_BUSY */
683 #define  VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK   0x00000020L   /* RB1_BUSY */
684 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
685 				      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
686 
687 static bool vce_v4_0_check_soft_reset(void *handle)
688 {
689 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
690 	u32 srbm_soft_reset = 0;
691 
692 	/* According to VCE team , we should use VCE_STATUS instead
693 	 * SRBM_STATUS.VCE_BUSY bit for busy status checking.
694 	 * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE
695 	 * instance's registers are accessed
696 	 * (0 for 1st instance, 10 for 2nd instance).
697 	 *
698 	 *VCE_STATUS
699 	 *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 |          |FW_LOADED|JOB |
700 	 *|----+----+-----------+----+----+----+----------+---------+----|
701 	 *|bit8|bit7|    bit6   |bit5|bit4|bit3|   bit2   |  bit1   |bit0|
702 	 *
703 	 * VCE team suggest use bit 3--bit 6 for busy status check
704 	 */
705 	mutex_lock(&adev->grbm_idx_mutex);
706 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
707 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
708 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
709 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
710 	}
711 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10);
712 	if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) {
713 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
714 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
715 	}
716 	WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
717 	mutex_unlock(&adev->grbm_idx_mutex);
718 
719 	if (srbm_soft_reset) {
720 		adev->vce.srbm_soft_reset = srbm_soft_reset;
721 		return true;
722 	} else {
723 		adev->vce.srbm_soft_reset = 0;
724 		return false;
725 	}
726 }
727 
728 static int vce_v4_0_soft_reset(void *handle)
729 {
730 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
731 	u32 srbm_soft_reset;
732 
733 	if (!adev->vce.srbm_soft_reset)
734 		return 0;
735 	srbm_soft_reset = adev->vce.srbm_soft_reset;
736 
737 	if (srbm_soft_reset) {
738 		u32 tmp;
739 
740 		tmp = RREG32(mmSRBM_SOFT_RESET);
741 		tmp |= srbm_soft_reset;
742 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
743 		WREG32(mmSRBM_SOFT_RESET, tmp);
744 		tmp = RREG32(mmSRBM_SOFT_RESET);
745 
746 		udelay(50);
747 
748 		tmp &= ~srbm_soft_reset;
749 		WREG32(mmSRBM_SOFT_RESET, tmp);
750 		tmp = RREG32(mmSRBM_SOFT_RESET);
751 
752 		/* Wait a little for things to settle down */
753 		udelay(50);
754 	}
755 
756 	return 0;
757 }
758 
759 static int vce_v4_0_pre_soft_reset(void *handle)
760 {
761 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
762 
763 	if (!adev->vce.srbm_soft_reset)
764 		return 0;
765 
766 	mdelay(5);
767 
768 	return vce_v4_0_suspend(adev);
769 }
770 
771 
772 static int vce_v4_0_post_soft_reset(void *handle)
773 {
774 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
775 
776 	if (!adev->vce.srbm_soft_reset)
777 		return 0;
778 
779 	mdelay(5);
780 
781 	return vce_v4_0_resume(adev);
782 }
783 
784 static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override)
785 {
786 	u32 tmp, data;
787 
788 	tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL));
789 	if (override)
790 		data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
791 	else
792 		data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK;
793 
794 	if (tmp != data)
795 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data);
796 }
797 
798 static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
799 					     bool gated)
800 {
801 	u32 data;
802 
803 	/* Set Override to disable Clock Gating */
804 	vce_v4_0_override_vce_clock_gating(adev, true);
805 
806 	/* This function enables MGCG which is controlled by firmware.
807 	   With the clocks in the gated state the core is still
808 	   accessible but the firmware will throttle the clocks on the
809 	   fly as necessary.
810 	*/
811 	if (gated) {
812 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
813 		data |= 0x1ff;
814 		data &= ~0xef0000;
815 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
816 
817 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
818 		data |= 0x3ff000;
819 		data &= ~0xffc00000;
820 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
821 
822 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
823 		data |= 0x2;
824 		data &= ~0x00010000;
825 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
826 
827 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
828 		data |= 0x37f;
829 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
830 
831 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
832 		data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
833 			VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
834 			VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
835 			0x8;
836 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
837 	} else {
838 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B));
839 		data &= ~0x80010;
840 		data |= 0xe70008;
841 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data);
842 
843 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING));
844 		data |= 0xffc00000;
845 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data);
846 
847 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2));
848 		data |= 0x10000;
849 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data);
850 
851 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING));
852 		data &= ~0xffc00000;
853 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data);
854 
855 		data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL));
856 		data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK |
857 			  VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK |
858 			  VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK  |
859 			  0x8);
860 		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data);
861 	}
862 	vce_v4_0_override_vce_clock_gating(adev, false);
863 }
864 
865 static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
866 {
867 	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
868 
869 	if (enable)
870 		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
871 	else
872 		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
873 
874 	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
875 }
876 
877 static int vce_v4_0_set_clockgating_state(void *handle,
878 					  enum amd_clockgating_state state)
879 {
880 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
881 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
882 	int i;
883 
884 	if ((adev->asic_type == CHIP_POLARIS10) ||
885 		(adev->asic_type == CHIP_TONGA) ||
886 		(adev->asic_type == CHIP_FIJI))
887 		vce_v4_0_set_bypass_mode(adev, enable);
888 
889 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
890 		return 0;
891 
892 	mutex_lock(&adev->grbm_idx_mutex);
893 	for (i = 0; i < 2; i++) {
894 		/* Program VCE Instance 0 or 1 if not harvested */
895 		if (adev->vce.harvest_config & (1 << i))
896 			continue;
897 
898 		WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i);
899 
900 		if (enable) {
901 			/* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */
902 			uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A);
903 			data &= ~(0xf | 0xff0);
904 			data |= ((0x0 << 0) | (0x04 << 4));
905 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data);
906 
907 			/* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */
908 			data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING);
909 			data &= ~(0xf | 0xff0);
910 			data |= ((0x0 << 0) | (0x04 << 4));
911 			WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data);
912 		}
913 
914 		vce_v4_0_set_vce_sw_clock_gating(adev, enable);
915 	}
916 
917 	WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0);
918 	mutex_unlock(&adev->grbm_idx_mutex);
919 
920 	return 0;
921 }
922 
923 static int vce_v4_0_set_powergating_state(void *handle,
924 					  enum amd_powergating_state state)
925 {
926 	/* This doesn't actually powergate the VCE block.
927 	 * That's done in the dpm code via the SMC.  This
928 	 * just re-inits the block as necessary.  The actual
929 	 * gating still happens in the dpm code.  We should
930 	 * revisit this when there is a cleaner line between
931 	 * the smc and the hw blocks
932 	 */
933 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
934 
935 	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
936 		return 0;
937 
938 	if (state == AMD_PG_STATE_GATE)
939 		/* XXX do we need a vce_v4_0_stop()? */
940 		return 0;
941 	else
942 		return vce_v4_0_start(adev);
943 }
944 #endif
945 
946 static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
947 		struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch)
948 {
949 	amdgpu_ring_write(ring, VCE_CMD_IB_VM);
950 	amdgpu_ring_write(ring, vm_id);
951 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
952 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
953 	amdgpu_ring_write(ring, ib->length_dw);
954 }
955 
956 static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
957 			u64 seq, unsigned flags)
958 {
959 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
960 
961 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
962 	amdgpu_ring_write(ring, addr);
963 	amdgpu_ring_write(ring, upper_32_bits(addr));
964 	amdgpu_ring_write(ring, seq);
965 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
966 }
967 
968 static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
969 {
970 	amdgpu_ring_write(ring, VCE_CMD_END);
971 }
972 
973 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
974 			 unsigned int vm_id, uint64_t pd_addr)
975 {
976 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
977 	unsigned eng = ring->idx;
978 	unsigned i;
979 
980 	pd_addr = pd_addr | 0x1; /* valid bit */
981 	/* now only use physical base address of PDE and valid */
982 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
983 
984 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
985 		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
986 
987 		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
988 		amdgpu_ring_write(ring,
989 			(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
990 		amdgpu_ring_write(ring, upper_32_bits(pd_addr));
991 
992 		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
993 		amdgpu_ring_write(ring,
994 			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
995 		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
996 
997 		amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
998 		amdgpu_ring_write(ring,
999 			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
1000 		amdgpu_ring_write(ring, 0xffffffff);
1001 		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
1002 
1003 		/* flush TLB */
1004 		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
1005 		amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
1006 		amdgpu_ring_write(ring, req);
1007 
1008 		/* wait for flush */
1009 		amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
1010 		amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
1011 		amdgpu_ring_write(ring, 1 << vm_id);
1012 		amdgpu_ring_write(ring, 1 << vm_id);
1013 	}
1014 }
1015 
1016 static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
1017 					struct amdgpu_irq_src *source,
1018 					unsigned type,
1019 					enum amdgpu_interrupt_state state)
1020 {
1021 	uint32_t val = 0;
1022 
1023 	if (state == AMDGPU_IRQ_STATE_ENABLE)
1024 		val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK;
1025 
1026 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val,
1027 			~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK);
1028 	return 0;
1029 }
1030 
1031 static int vce_v4_0_process_interrupt(struct amdgpu_device *adev,
1032 				      struct amdgpu_irq_src *source,
1033 				      struct amdgpu_iv_entry *entry)
1034 {
1035 	DRM_DEBUG("IH: VCE\n");
1036 
1037 	WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS),
1038 			VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
1039 			~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
1040 
1041 	switch (entry->src_data[0]) {
1042 	case 0:
1043 	case 1:
1044 	case 2:
1045 		amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]);
1046 		break;
1047 	default:
1048 		DRM_ERROR("Unhandled interrupt: %d %d\n",
1049 			  entry->src_id, entry->src_data[0]);
1050 		break;
1051 	}
1052 
1053 	return 0;
1054 }
1055 
1056 const struct amd_ip_funcs vce_v4_0_ip_funcs = {
1057 	.name = "vce_v4_0",
1058 	.early_init = vce_v4_0_early_init,
1059 	.late_init = NULL,
1060 	.sw_init = vce_v4_0_sw_init,
1061 	.sw_fini = vce_v4_0_sw_fini,
1062 	.hw_init = vce_v4_0_hw_init,
1063 	.hw_fini = vce_v4_0_hw_fini,
1064 	.suspend = vce_v4_0_suspend,
1065 	.resume = vce_v4_0_resume,
1066 	.is_idle = NULL /* vce_v4_0_is_idle */,
1067 	.wait_for_idle = NULL /* vce_v4_0_wait_for_idle */,
1068 	.check_soft_reset = NULL /* vce_v4_0_check_soft_reset */,
1069 	.pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */,
1070 	.soft_reset = NULL /* vce_v4_0_soft_reset */,
1071 	.post_soft_reset = NULL /* vce_v4_0_post_soft_reset */,
1072 	.set_clockgating_state = vce_v4_0_set_clockgating_state,
1073 	.set_powergating_state = NULL /* vce_v4_0_set_powergating_state */,
1074 };
1075 
1076 static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
1077 	.type = AMDGPU_RING_TYPE_VCE,
1078 	.align_mask = 0x3f,
1079 	.nop = VCE_CMD_NO_OP,
1080 	.support_64bit_ptrs = false,
1081 	.get_rptr = vce_v4_0_ring_get_rptr,
1082 	.get_wptr = vce_v4_0_ring_get_wptr,
1083 	.set_wptr = vce_v4_0_ring_set_wptr,
1084 	.parse_cs = amdgpu_vce_ring_parse_cs_vm,
1085 	.emit_frame_size =
1086 		17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */
1087 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1088 		1, /* vce_v4_0_ring_insert_end */
1089 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
1090 	.emit_ib = vce_v4_0_ring_emit_ib,
1091 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
1092 	.emit_fence = vce_v4_0_ring_emit_fence,
1093 	.test_ring = amdgpu_vce_ring_test_ring,
1094 	.test_ib = amdgpu_vce_ring_test_ib,
1095 	.insert_nop = amdgpu_ring_insert_nop,
1096 	.insert_end = vce_v4_0_ring_insert_end,
1097 	.pad_ib = amdgpu_ring_generic_pad_ib,
1098 	.begin_use = amdgpu_vce_ring_begin_use,
1099 	.end_use = amdgpu_vce_ring_end_use,
1100 };
1101 
1102 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
1103 {
1104 	int i;
1105 
1106 	for (i = 0; i < adev->vce.num_rings; i++)
1107 		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
1108 	DRM_INFO("VCE enabled in VM mode\n");
1109 }
1110 
1111 static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = {
1112 	.set = vce_v4_0_set_interrupt_state,
1113 	.process = vce_v4_0_process_interrupt,
1114 };
1115 
1116 static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev)
1117 {
1118 	adev->vce.irq.num_types = 1;
1119 	adev->vce.irq.funcs = &vce_v4_0_irq_funcs;
1120 };
1121 
1122 const struct amdgpu_ip_block_version vce_v4_0_ip_block =
1123 {
1124 	.type = AMD_IP_BLOCK_TYPE_VCE,
1125 	.major = 4,
1126 	.minor = 0,
1127 	.rev = 0,
1128 	.funcs = &vce_v4_0_ip_funcs,
1129 };
1130