xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c (revision a4eb44a6435d6d8f9e642407a4a06f65eb90ca04)
1 /*
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
20  *
21  * The above copyright notice and this permission notice (including the
22  * next paragraph) shall be included in all copies or substantial portions
23  * of the Software.
24  *
25  * Authors: Christian König <christian.koenig@amd.com>
26  */
27 
28 #include <linux/firmware.h>
29 #include <linux/module.h>
30 
31 #include <drm/drm.h>
32 #include <drm/drm_drv.h>
33 
34 #include "amdgpu.h"
35 #include "amdgpu_pm.h"
36 #include "amdgpu_vce.h"
37 #include "cikd.h"
38 
39 /* 1 second timeout */
40 #define VCE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
41 
42 /* Firmware Names */
43 #ifdef CONFIG_DRM_AMDGPU_CIK
44 #define FIRMWARE_BONAIRE	"amdgpu/bonaire_vce.bin"
45 #define FIRMWARE_KABINI	"amdgpu/kabini_vce.bin"
46 #define FIRMWARE_KAVERI	"amdgpu/kaveri_vce.bin"
47 #define FIRMWARE_HAWAII	"amdgpu/hawaii_vce.bin"
48 #define FIRMWARE_MULLINS	"amdgpu/mullins_vce.bin"
49 #endif
50 #define FIRMWARE_TONGA		"amdgpu/tonga_vce.bin"
51 #define FIRMWARE_CARRIZO	"amdgpu/carrizo_vce.bin"
52 #define FIRMWARE_FIJI		"amdgpu/fiji_vce.bin"
53 #define FIRMWARE_STONEY		"amdgpu/stoney_vce.bin"
54 #define FIRMWARE_POLARIS10	"amdgpu/polaris10_vce.bin"
55 #define FIRMWARE_POLARIS11	"amdgpu/polaris11_vce.bin"
56 #define FIRMWARE_POLARIS12	"amdgpu/polaris12_vce.bin"
57 #define FIRMWARE_VEGAM		"amdgpu/vegam_vce.bin"
58 
59 #define FIRMWARE_VEGA10		"amdgpu/vega10_vce.bin"
60 #define FIRMWARE_VEGA12		"amdgpu/vega12_vce.bin"
61 #define FIRMWARE_VEGA20		"amdgpu/vega20_vce.bin"
62 
63 #ifdef CONFIG_DRM_AMDGPU_CIK
64 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
65 MODULE_FIRMWARE(FIRMWARE_KABINI);
66 MODULE_FIRMWARE(FIRMWARE_KAVERI);
67 MODULE_FIRMWARE(FIRMWARE_HAWAII);
68 MODULE_FIRMWARE(FIRMWARE_MULLINS);
69 #endif
70 MODULE_FIRMWARE(FIRMWARE_TONGA);
71 MODULE_FIRMWARE(FIRMWARE_CARRIZO);
72 MODULE_FIRMWARE(FIRMWARE_FIJI);
73 MODULE_FIRMWARE(FIRMWARE_STONEY);
74 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
75 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
76 MODULE_FIRMWARE(FIRMWARE_POLARIS12);
77 MODULE_FIRMWARE(FIRMWARE_VEGAM);
78 
79 MODULE_FIRMWARE(FIRMWARE_VEGA10);
80 MODULE_FIRMWARE(FIRMWARE_VEGA12);
81 MODULE_FIRMWARE(FIRMWARE_VEGA20);
82 
83 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
84 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
85 				     struct dma_fence **fence);
86 static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
87 				      bool direct, struct dma_fence **fence);
88 
89 /**
90  * amdgpu_vce_sw_init - allocate memory, load vce firmware
91  *
92  * @adev: amdgpu_device pointer
93  * @size: size for the new BO
94  *
95  * First step to get VCE online, allocate memory and load the firmware
96  */
97 int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
98 {
99 	const char *fw_name;
100 	const struct common_firmware_header *hdr;
101 	unsigned ucode_version, version_major, version_minor, binary_id;
102 	int i, r;
103 
104 	switch (adev->asic_type) {
105 #ifdef CONFIG_DRM_AMDGPU_CIK
106 	case CHIP_BONAIRE:
107 		fw_name = FIRMWARE_BONAIRE;
108 		break;
109 	case CHIP_KAVERI:
110 		fw_name = FIRMWARE_KAVERI;
111 		break;
112 	case CHIP_KABINI:
113 		fw_name = FIRMWARE_KABINI;
114 		break;
115 	case CHIP_HAWAII:
116 		fw_name = FIRMWARE_HAWAII;
117 		break;
118 	case CHIP_MULLINS:
119 		fw_name = FIRMWARE_MULLINS;
120 		break;
121 #endif
122 	case CHIP_TONGA:
123 		fw_name = FIRMWARE_TONGA;
124 		break;
125 	case CHIP_CARRIZO:
126 		fw_name = FIRMWARE_CARRIZO;
127 		break;
128 	case CHIP_FIJI:
129 		fw_name = FIRMWARE_FIJI;
130 		break;
131 	case CHIP_STONEY:
132 		fw_name = FIRMWARE_STONEY;
133 		break;
134 	case CHIP_POLARIS10:
135 		fw_name = FIRMWARE_POLARIS10;
136 		break;
137 	case CHIP_POLARIS11:
138 		fw_name = FIRMWARE_POLARIS11;
139 		break;
140 	case CHIP_POLARIS12:
141 		fw_name = FIRMWARE_POLARIS12;
142 		break;
143 	case CHIP_VEGAM:
144 		fw_name = FIRMWARE_VEGAM;
145 		break;
146 	case CHIP_VEGA10:
147 		fw_name = FIRMWARE_VEGA10;
148 		break;
149 	case CHIP_VEGA12:
150 		fw_name = FIRMWARE_VEGA12;
151 		break;
152 	case CHIP_VEGA20:
153 		fw_name = FIRMWARE_VEGA20;
154 		break;
155 
156 	default:
157 		return -EINVAL;
158 	}
159 
160 	r = request_firmware(&adev->vce.fw, fw_name, adev->dev);
161 	if (r) {
162 		dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n",
163 			fw_name);
164 		return r;
165 	}
166 
167 	r = amdgpu_ucode_validate(adev->vce.fw);
168 	if (r) {
169 		dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n",
170 			fw_name);
171 		release_firmware(adev->vce.fw);
172 		adev->vce.fw = NULL;
173 		return r;
174 	}
175 
176 	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
177 
178 	ucode_version = le32_to_cpu(hdr->ucode_version);
179 	version_major = (ucode_version >> 20) & 0xfff;
180 	version_minor = (ucode_version >> 8) & 0xfff;
181 	binary_id = ucode_version & 0xff;
182 	DRM_INFO("Found VCE firmware Version: %d.%d Binary ID: %d\n",
183 		version_major, version_minor, binary_id);
184 	adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) |
185 				(binary_id << 8));
186 
187 	r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
188 				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo,
189 				    &adev->vce.gpu_addr, &adev->vce.cpu_addr);
190 	if (r) {
191 		dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
192 		return r;
193 	}
194 
195 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
196 		atomic_set(&adev->vce.handles[i], 0);
197 		adev->vce.filp[i] = NULL;
198 	}
199 
200 	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
201 	mutex_init(&adev->vce.idle_mutex);
202 
203 	return 0;
204 }
205 
206 /**
207  * amdgpu_vce_sw_fini - free memory
208  *
209  * @adev: amdgpu_device pointer
210  *
211  * Last step on VCE teardown, free firmware memory
212  */
213 int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
214 {
215 	unsigned i;
216 
217 	if (adev->vce.vcpu_bo == NULL)
218 		return 0;
219 
220 	drm_sched_entity_destroy(&adev->vce.entity);
221 
222 	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
223 		(void **)&adev->vce.cpu_addr);
224 
225 	for (i = 0; i < adev->vce.num_rings; i++)
226 		amdgpu_ring_fini(&adev->vce.ring[i]);
227 
228 	release_firmware(adev->vce.fw);
229 	mutex_destroy(&adev->vce.idle_mutex);
230 
231 	return 0;
232 }
233 
234 /**
235  * amdgpu_vce_entity_init - init entity
236  *
237  * @adev: amdgpu_device pointer
238  *
239  */
240 int amdgpu_vce_entity_init(struct amdgpu_device *adev)
241 {
242 	struct amdgpu_ring *ring;
243 	struct drm_gpu_scheduler *sched;
244 	int r;
245 
246 	ring = &adev->vce.ring[0];
247 	sched = &ring->sched;
248 	r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
249 				  &sched, 1, NULL);
250 	if (r != 0) {
251 		DRM_ERROR("Failed setting up VCE run queue.\n");
252 		return r;
253 	}
254 
255 	return 0;
256 }
257 
258 /**
259  * amdgpu_vce_suspend - unpin VCE fw memory
260  *
261  * @adev: amdgpu_device pointer
262  *
263  */
264 int amdgpu_vce_suspend(struct amdgpu_device *adev)
265 {
266 	int i;
267 
268 	cancel_delayed_work_sync(&adev->vce.idle_work);
269 
270 	if (adev->vce.vcpu_bo == NULL)
271 		return 0;
272 
273 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
274 		if (atomic_read(&adev->vce.handles[i]))
275 			break;
276 
277 	if (i == AMDGPU_MAX_VCE_HANDLES)
278 		return 0;
279 
280 	/* TODO: suspending running encoding sessions isn't supported */
281 	return -EINVAL;
282 }
283 
284 /**
285  * amdgpu_vce_resume - pin VCE fw memory
286  *
287  * @adev: amdgpu_device pointer
288  *
289  */
290 int amdgpu_vce_resume(struct amdgpu_device *adev)
291 {
292 	void *cpu_addr;
293 	const struct common_firmware_header *hdr;
294 	unsigned offset;
295 	int r, idx;
296 
297 	if (adev->vce.vcpu_bo == NULL)
298 		return -EINVAL;
299 
300 	r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
301 	if (r) {
302 		dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
303 		return r;
304 	}
305 
306 	r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr);
307 	if (r) {
308 		amdgpu_bo_unreserve(adev->vce.vcpu_bo);
309 		dev_err(adev->dev, "(%d) VCE map failed\n", r);
310 		return r;
311 	}
312 
313 	hdr = (const struct common_firmware_header *)adev->vce.fw->data;
314 	offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
315 
316 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
317 		memcpy_toio(cpu_addr, adev->vce.fw->data + offset,
318 			    adev->vce.fw->size - offset);
319 		drm_dev_exit(idx);
320 	}
321 
322 	amdgpu_bo_kunmap(adev->vce.vcpu_bo);
323 
324 	amdgpu_bo_unreserve(adev->vce.vcpu_bo);
325 
326 	return 0;
327 }
328 
329 /**
330  * amdgpu_vce_idle_work_handler - power off VCE
331  *
332  * @work: pointer to work structure
333  *
334  * power of VCE when it's not used any more
335  */
336 static void amdgpu_vce_idle_work_handler(struct work_struct *work)
337 {
338 	struct amdgpu_device *adev =
339 		container_of(work, struct amdgpu_device, vce.idle_work.work);
340 	unsigned i, count = 0;
341 
342 	for (i = 0; i < adev->vce.num_rings; i++)
343 		count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
344 
345 	if (count == 0) {
346 		if (adev->pm.dpm_enabled) {
347 			amdgpu_dpm_enable_vce(adev, false);
348 		} else {
349 			amdgpu_asic_set_vce_clocks(adev, 0, 0);
350 			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
351 							       AMD_PG_STATE_GATE);
352 			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
353 							       AMD_CG_STATE_GATE);
354 		}
355 	} else {
356 		schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
357 	}
358 }
359 
360 /**
361  * amdgpu_vce_ring_begin_use - power up VCE
362  *
363  * @ring: amdgpu ring
364  *
365  * Make sure VCE is powerd up when we want to use it
366  */
367 void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
368 {
369 	struct amdgpu_device *adev = ring->adev;
370 	bool set_clocks;
371 
372 	if (amdgpu_sriov_vf(adev))
373 		return;
374 
375 	mutex_lock(&adev->vce.idle_mutex);
376 	set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
377 	if (set_clocks) {
378 		if (adev->pm.dpm_enabled) {
379 			amdgpu_dpm_enable_vce(adev, true);
380 		} else {
381 			amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
382 			amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
383 							       AMD_CG_STATE_UNGATE);
384 			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
385 							       AMD_PG_STATE_UNGATE);
386 
387 		}
388 	}
389 	mutex_unlock(&adev->vce.idle_mutex);
390 }
391 
392 /**
393  * amdgpu_vce_ring_end_use - power VCE down
394  *
395  * @ring: amdgpu ring
396  *
397  * Schedule work to power VCE down again
398  */
399 void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
400 {
401 	if (!amdgpu_sriov_vf(ring->adev))
402 		schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
403 }
404 
405 /**
406  * amdgpu_vce_free_handles - free still open VCE handles
407  *
408  * @adev: amdgpu_device pointer
409  * @filp: drm file pointer
410  *
411  * Close all VCE handles still open by this file pointer
412  */
413 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
414 {
415 	struct amdgpu_ring *ring = &adev->vce.ring[0];
416 	int i, r;
417 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
418 		uint32_t handle = atomic_read(&adev->vce.handles[i]);
419 
420 		if (!handle || adev->vce.filp[i] != filp)
421 			continue;
422 
423 		r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
424 		if (r)
425 			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
426 
427 		adev->vce.filp[i] = NULL;
428 		atomic_set(&adev->vce.handles[i], 0);
429 	}
430 }
431 
432 /**
433  * amdgpu_vce_get_create_msg - generate a VCE create msg
434  *
435  * @ring: ring we should submit the msg to
436  * @handle: VCE session handle to use
437  * @fence: optional fence to return
438  *
439  * Open up a stream for HW test
440  */
441 static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
442 				     struct dma_fence **fence)
443 {
444 	const unsigned ib_size_dw = 1024;
445 	struct amdgpu_job *job;
446 	struct amdgpu_ib *ib;
447 	struct amdgpu_ib ib_msg;
448 	struct dma_fence *f = NULL;
449 	uint64_t addr;
450 	int i, r;
451 
452 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
453 				     AMDGPU_IB_POOL_DIRECT, &job);
454 	if (r)
455 		return r;
456 
457 	memset(&ib_msg, 0, sizeof(ib_msg));
458 	/* only one gpu page is needed, alloc +1 page to make addr aligned. */
459 	r = amdgpu_ib_get(ring->adev, NULL, AMDGPU_GPU_PAGE_SIZE * 2,
460 			  AMDGPU_IB_POOL_DIRECT,
461 			  &ib_msg);
462 	if (r)
463 		goto err;
464 
465 	ib = &job->ibs[0];
466 	/* let addr point to page boundary */
467 	addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg.gpu_addr);
468 
469 	/* stitch together an VCE create msg */
470 	ib->length_dw = 0;
471 	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
472 	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
473 	ib->ptr[ib->length_dw++] = handle;
474 
475 	if ((ring->adev->vce.fw_version >> 24) >= 52)
476 		ib->ptr[ib->length_dw++] = 0x00000040; /* len */
477 	else
478 		ib->ptr[ib->length_dw++] = 0x00000030; /* len */
479 	ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
480 	ib->ptr[ib->length_dw++] = 0x00000000;
481 	ib->ptr[ib->length_dw++] = 0x00000042;
482 	ib->ptr[ib->length_dw++] = 0x0000000a;
483 	ib->ptr[ib->length_dw++] = 0x00000001;
484 	ib->ptr[ib->length_dw++] = 0x00000080;
485 	ib->ptr[ib->length_dw++] = 0x00000060;
486 	ib->ptr[ib->length_dw++] = 0x00000100;
487 	ib->ptr[ib->length_dw++] = 0x00000100;
488 	ib->ptr[ib->length_dw++] = 0x0000000c;
489 	ib->ptr[ib->length_dw++] = 0x00000000;
490 	if ((ring->adev->vce.fw_version >> 24) >= 52) {
491 		ib->ptr[ib->length_dw++] = 0x00000000;
492 		ib->ptr[ib->length_dw++] = 0x00000000;
493 		ib->ptr[ib->length_dw++] = 0x00000000;
494 		ib->ptr[ib->length_dw++] = 0x00000000;
495 	}
496 
497 	ib->ptr[ib->length_dw++] = 0x00000014; /* len */
498 	ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
499 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
500 	ib->ptr[ib->length_dw++] = addr;
501 	ib->ptr[ib->length_dw++] = 0x00000001;
502 
503 	for (i = ib->length_dw; i < ib_size_dw; ++i)
504 		ib->ptr[i] = 0x0;
505 
506 	r = amdgpu_job_submit_direct(job, ring, &f);
507 	amdgpu_ib_free(ring->adev, &ib_msg, f);
508 	if (r)
509 		goto err;
510 
511 	if (fence)
512 		*fence = dma_fence_get(f);
513 	dma_fence_put(f);
514 	return 0;
515 
516 err:
517 	amdgpu_job_free(job);
518 	return r;
519 }
520 
521 /**
522  * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg
523  *
524  * @ring: ring we should submit the msg to
525  * @handle: VCE session handle to use
526  * @direct: direct or delayed pool
527  * @fence: optional fence to return
528  *
529  * Close up a stream for HW test or if userspace failed to do so
530  */
531 static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
532 				      bool direct, struct dma_fence **fence)
533 {
534 	const unsigned ib_size_dw = 1024;
535 	struct amdgpu_job *job;
536 	struct amdgpu_ib *ib;
537 	struct dma_fence *f = NULL;
538 	int i, r;
539 
540 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
541 				     direct ? AMDGPU_IB_POOL_DIRECT :
542 				     AMDGPU_IB_POOL_DELAYED, &job);
543 	if (r)
544 		return r;
545 
546 	ib = &job->ibs[0];
547 
548 	/* stitch together an VCE destroy msg */
549 	ib->length_dw = 0;
550 	ib->ptr[ib->length_dw++] = 0x0000000c; /* len */
551 	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
552 	ib->ptr[ib->length_dw++] = handle;
553 
554 	ib->ptr[ib->length_dw++] = 0x00000020; /* len */
555 	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
556 	ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
557 	ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
558 	ib->ptr[ib->length_dw++] = 0x00000000;
559 	ib->ptr[ib->length_dw++] = 0x00000000;
560 	ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
561 	ib->ptr[ib->length_dw++] = 0x00000000;
562 
563 	ib->ptr[ib->length_dw++] = 0x00000008; /* len */
564 	ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
565 
566 	for (i = ib->length_dw; i < ib_size_dw; ++i)
567 		ib->ptr[i] = 0x0;
568 
569 	if (direct)
570 		r = amdgpu_job_submit_direct(job, ring, &f);
571 	else
572 		r = amdgpu_job_submit(job, &ring->adev->vce.entity,
573 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
574 	if (r)
575 		goto err;
576 
577 	if (fence)
578 		*fence = dma_fence_get(f);
579 	dma_fence_put(f);
580 	return 0;
581 
582 err:
583 	amdgpu_job_free(job);
584 	return r;
585 }
586 
587 /**
588  * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary
589  *
590  * @p: parser context
591  * @ib_idx: indirect buffer to use
592  * @lo: address of lower dword
593  * @hi: address of higher dword
594  * @size: minimum size
595  * @index: bs/fb index
596  *
597  * Make sure that no BO cross a 4GB boundary.
598  */
599 static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx,
600 				  int lo, int hi, unsigned size, int32_t index)
601 {
602 	int64_t offset = ((uint64_t)size) * ((int64_t)index);
603 	struct ttm_operation_ctx ctx = { false, false };
604 	struct amdgpu_bo_va_mapping *mapping;
605 	unsigned i, fpfn, lpfn;
606 	struct amdgpu_bo *bo;
607 	uint64_t addr;
608 	int r;
609 
610 	addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
611 	       ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
612 	if (index >= 0) {
613 		addr += offset;
614 		fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT;
615 		lpfn = 0x100000000ULL >> PAGE_SHIFT;
616 	} else {
617 		fpfn = 0;
618 		lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT;
619 	}
620 
621 	r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
622 	if (r) {
623 		DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
624 			  addr, lo, hi, size, index);
625 		return r;
626 	}
627 
628 	for (i = 0; i < bo->placement.num_placement; ++i) {
629 		bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn);
630 		bo->placements[i].lpfn = bo->placements[i].lpfn ?
631 			min(bo->placements[i].lpfn, lpfn) : lpfn;
632 	}
633 	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
634 }
635 
636 
637 /**
638  * amdgpu_vce_cs_reloc - command submission relocation
639  *
640  * @p: parser context
641  * @ib_idx: indirect buffer to use
642  * @lo: address of lower dword
643  * @hi: address of higher dword
644  * @size: minimum size
645  * @index: bs/fb index
646  *
647  * Patch relocation inside command stream with real buffer address
648  */
649 static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
650 			       int lo, int hi, unsigned size, uint32_t index)
651 {
652 	struct amdgpu_bo_va_mapping *mapping;
653 	struct amdgpu_bo *bo;
654 	uint64_t addr;
655 	int r;
656 
657 	if (index == 0xffffffff)
658 		index = 0;
659 
660 	addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
661 	       ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
662 	addr += ((uint64_t)size) * ((uint64_t)index);
663 
664 	r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping);
665 	if (r) {
666 		DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
667 			  addr, lo, hi, size, index);
668 		return r;
669 	}
670 
671 	if ((addr + (uint64_t)size) >
672 	    (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
673 		DRM_ERROR("BO too small for addr 0x%010Lx %d %d\n",
674 			  addr, lo, hi);
675 		return -EINVAL;
676 	}
677 
678 	addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
679 	addr += amdgpu_bo_gpu_offset(bo);
680 	addr -= ((uint64_t)size) * ((uint64_t)index);
681 
682 	amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr));
683 	amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr));
684 
685 	return 0;
686 }
687 
688 /**
689  * amdgpu_vce_validate_handle - validate stream handle
690  *
691  * @p: parser context
692  * @handle: handle to validate
693  * @allocated: allocated a new handle?
694  *
695  * Validates the handle and return the found session index or -EINVAL
696  * we we don't have another free session index.
697  */
698 static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
699 				      uint32_t handle, uint32_t *allocated)
700 {
701 	unsigned i;
702 
703 	/* validate the handle */
704 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
705 		if (atomic_read(&p->adev->vce.handles[i]) == handle) {
706 			if (p->adev->vce.filp[i] != p->filp) {
707 				DRM_ERROR("VCE handle collision detected!\n");
708 				return -EINVAL;
709 			}
710 			return i;
711 		}
712 	}
713 
714 	/* handle not found try to alloc a new one */
715 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
716 		if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
717 			p->adev->vce.filp[i] = p->filp;
718 			p->adev->vce.img_size[i] = 0;
719 			*allocated |= 1 << i;
720 			return i;
721 		}
722 	}
723 
724 	DRM_ERROR("No more free VCE handles!\n");
725 	return -EINVAL;
726 }
727 
728 /**
729  * amdgpu_vce_ring_parse_cs - parse and validate the command stream
730  *
731  * @p: parser context
732  * @ib_idx: indirect buffer to use
733  */
734 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
735 {
736 	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
737 	unsigned fb_idx = 0, bs_idx = 0;
738 	int session_idx = -1;
739 	uint32_t destroyed = 0;
740 	uint32_t created = 0;
741 	uint32_t allocated = 0;
742 	uint32_t tmp, handle = 0;
743 	uint32_t *size = &tmp;
744 	unsigned idx;
745 	int i, r = 0;
746 
747 	p->job->vm = NULL;
748 	ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
749 
750 	for (idx = 0; idx < ib->length_dw;) {
751 		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
752 		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
753 
754 		if ((len < 8) || (len & 3)) {
755 			DRM_ERROR("invalid VCE command length (%d)!\n", len);
756 			r = -EINVAL;
757 			goto out;
758 		}
759 
760 		switch (cmd) {
761 		case 0x00000002: /* task info */
762 			fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
763 			bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
764 			break;
765 
766 		case 0x03000001: /* encode */
767 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10,
768 						   idx + 9, 0, 0);
769 			if (r)
770 				goto out;
771 
772 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12,
773 						   idx + 11, 0, 0);
774 			if (r)
775 				goto out;
776 			break;
777 
778 		case 0x05000001: /* context buffer */
779 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
780 						   idx + 2, 0, 0);
781 			if (r)
782 				goto out;
783 			break;
784 
785 		case 0x05000004: /* video bitstream buffer */
786 			tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
787 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
788 						   tmp, bs_idx);
789 			if (r)
790 				goto out;
791 			break;
792 
793 		case 0x05000005: /* feedback buffer */
794 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2,
795 						   4096, fb_idx);
796 			if (r)
797 				goto out;
798 			break;
799 
800 		case 0x0500000d: /* MV buffer */
801 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
802 							idx + 2, 0, 0);
803 			if (r)
804 				goto out;
805 
806 			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
807 							idx + 7, 0, 0);
808 			if (r)
809 				goto out;
810 			break;
811 		}
812 
813 		idx += len / 4;
814 	}
815 
816 	for (idx = 0; idx < ib->length_dw;) {
817 		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
818 		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
819 
820 		switch (cmd) {
821 		case 0x00000001: /* session */
822 			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
823 			session_idx = amdgpu_vce_validate_handle(p, handle,
824 								 &allocated);
825 			if (session_idx < 0) {
826 				r = session_idx;
827 				goto out;
828 			}
829 			size = &p->adev->vce.img_size[session_idx];
830 			break;
831 
832 		case 0x00000002: /* task info */
833 			fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
834 			bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
835 			break;
836 
837 		case 0x01000001: /* create */
838 			created |= 1 << session_idx;
839 			if (destroyed & (1 << session_idx)) {
840 				destroyed &= ~(1 << session_idx);
841 				allocated |= 1 << session_idx;
842 
843 			} else if (!(allocated & (1 << session_idx))) {
844 				DRM_ERROR("Handle already in use!\n");
845 				r = -EINVAL;
846 				goto out;
847 			}
848 
849 			*size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
850 				amdgpu_get_ib_value(p, ib_idx, idx + 10) *
851 				8 * 3 / 2;
852 			break;
853 
854 		case 0x04000001: /* config extension */
855 		case 0x04000002: /* pic control */
856 		case 0x04000005: /* rate control */
857 		case 0x04000007: /* motion estimation */
858 		case 0x04000008: /* rdo */
859 		case 0x04000009: /* vui */
860 		case 0x05000002: /* auxiliary buffer */
861 		case 0x05000009: /* clock table */
862 			break;
863 
864 		case 0x0500000c: /* hw config */
865 			switch (p->adev->asic_type) {
866 #ifdef CONFIG_DRM_AMDGPU_CIK
867 			case CHIP_KAVERI:
868 			case CHIP_MULLINS:
869 #endif
870 			case CHIP_CARRIZO:
871 				break;
872 			default:
873 				r = -EINVAL;
874 				goto out;
875 			}
876 			break;
877 
878 		case 0x03000001: /* encode */
879 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
880 						*size, 0);
881 			if (r)
882 				goto out;
883 
884 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
885 						*size / 3, 0);
886 			if (r)
887 				goto out;
888 			break;
889 
890 		case 0x02000001: /* destroy */
891 			destroyed |= 1 << session_idx;
892 			break;
893 
894 		case 0x05000001: /* context buffer */
895 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
896 						*size * 2, 0);
897 			if (r)
898 				goto out;
899 			break;
900 
901 		case 0x05000004: /* video bitstream buffer */
902 			tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
903 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
904 						tmp, bs_idx);
905 			if (r)
906 				goto out;
907 			break;
908 
909 		case 0x05000005: /* feedback buffer */
910 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
911 						4096, fb_idx);
912 			if (r)
913 				goto out;
914 			break;
915 
916 		case 0x0500000d: /* MV buffer */
917 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
918 							idx + 2, *size, 0);
919 			if (r)
920 				goto out;
921 
922 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
923 							idx + 7, *size / 12, 0);
924 			if (r)
925 				goto out;
926 			break;
927 
928 		default:
929 			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
930 			r = -EINVAL;
931 			goto out;
932 		}
933 
934 		if (session_idx == -1) {
935 			DRM_ERROR("no session command at start of IB\n");
936 			r = -EINVAL;
937 			goto out;
938 		}
939 
940 		idx += len / 4;
941 	}
942 
943 	if (allocated & ~created) {
944 		DRM_ERROR("New session without create command!\n");
945 		r = -ENOENT;
946 	}
947 
948 out:
949 	if (!r) {
950 		/* No error, free all destroyed handle slots */
951 		tmp = destroyed;
952 	} else {
953 		/* Error during parsing, free all allocated handle slots */
954 		tmp = allocated;
955 	}
956 
957 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
958 		if (tmp & (1 << i))
959 			atomic_set(&p->adev->vce.handles[i], 0);
960 
961 	return r;
962 }
963 
964 /**
965  * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode
966  *
967  * @p: parser context
968  * @ib_idx: indirect buffer to use
969  */
970 int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx)
971 {
972 	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
973 	int session_idx = -1;
974 	uint32_t destroyed = 0;
975 	uint32_t created = 0;
976 	uint32_t allocated = 0;
977 	uint32_t tmp, handle = 0;
978 	int i, r = 0, idx = 0;
979 
980 	while (idx < ib->length_dw) {
981 		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
982 		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
983 
984 		if ((len < 8) || (len & 3)) {
985 			DRM_ERROR("invalid VCE command length (%d)!\n", len);
986 			r = -EINVAL;
987 			goto out;
988 		}
989 
990 		switch (cmd) {
991 		case 0x00000001: /* session */
992 			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
993 			session_idx = amdgpu_vce_validate_handle(p, handle,
994 								 &allocated);
995 			if (session_idx < 0) {
996 				r = session_idx;
997 				goto out;
998 			}
999 			break;
1000 
1001 		case 0x01000001: /* create */
1002 			created |= 1 << session_idx;
1003 			if (destroyed & (1 << session_idx)) {
1004 				destroyed &= ~(1 << session_idx);
1005 				allocated |= 1 << session_idx;
1006 
1007 			} else if (!(allocated & (1 << session_idx))) {
1008 				DRM_ERROR("Handle already in use!\n");
1009 				r = -EINVAL;
1010 				goto out;
1011 			}
1012 
1013 			break;
1014 
1015 		case 0x02000001: /* destroy */
1016 			destroyed |= 1 << session_idx;
1017 			break;
1018 
1019 		default:
1020 			break;
1021 		}
1022 
1023 		if (session_idx == -1) {
1024 			DRM_ERROR("no session command at start of IB\n");
1025 			r = -EINVAL;
1026 			goto out;
1027 		}
1028 
1029 		idx += len / 4;
1030 	}
1031 
1032 	if (allocated & ~created) {
1033 		DRM_ERROR("New session without create command!\n");
1034 		r = -ENOENT;
1035 	}
1036 
1037 out:
1038 	if (!r) {
1039 		/* No error, free all destroyed handle slots */
1040 		tmp = destroyed;
1041 		amdgpu_ib_free(p->adev, ib, NULL);
1042 	} else {
1043 		/* Error during parsing, free all allocated handle slots */
1044 		tmp = allocated;
1045 	}
1046 
1047 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
1048 		if (tmp & (1 << i))
1049 			atomic_set(&p->adev->vce.handles[i], 0);
1050 
1051 	return r;
1052 }
1053 
1054 /**
1055  * amdgpu_vce_ring_emit_ib - execute indirect buffer
1056  *
1057  * @ring: engine to use
1058  * @job: job to retrieve vmid from
1059  * @ib: the IB to execute
1060  * @flags: unused
1061  *
1062  */
1063 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
1064 				struct amdgpu_job *job,
1065 				struct amdgpu_ib *ib,
1066 				uint32_t flags)
1067 {
1068 	amdgpu_ring_write(ring, VCE_CMD_IB);
1069 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
1070 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
1071 	amdgpu_ring_write(ring, ib->length_dw);
1072 }
1073 
1074 /**
1075  * amdgpu_vce_ring_emit_fence - add a fence command to the ring
1076  *
1077  * @ring: engine to use
1078  * @addr: address
1079  * @seq: sequence number
1080  * @flags: fence related flags
1081  *
1082  */
1083 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
1084 				unsigned flags)
1085 {
1086 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
1087 
1088 	amdgpu_ring_write(ring, VCE_CMD_FENCE);
1089 	amdgpu_ring_write(ring, addr);
1090 	amdgpu_ring_write(ring, upper_32_bits(addr));
1091 	amdgpu_ring_write(ring, seq);
1092 	amdgpu_ring_write(ring, VCE_CMD_TRAP);
1093 	amdgpu_ring_write(ring, VCE_CMD_END);
1094 }
1095 
1096 /**
1097  * amdgpu_vce_ring_test_ring - test if VCE ring is working
1098  *
1099  * @ring: the engine to test on
1100  *
1101  */
1102 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
1103 {
1104 	struct amdgpu_device *adev = ring->adev;
1105 	uint32_t rptr;
1106 	unsigned i;
1107 	int r, timeout = adev->usec_timeout;
1108 
1109 	/* skip ring test for sriov*/
1110 	if (amdgpu_sriov_vf(adev))
1111 		return 0;
1112 
1113 	r = amdgpu_ring_alloc(ring, 16);
1114 	if (r)
1115 		return r;
1116 
1117 	rptr = amdgpu_ring_get_rptr(ring);
1118 
1119 	amdgpu_ring_write(ring, VCE_CMD_END);
1120 	amdgpu_ring_commit(ring);
1121 
1122 	for (i = 0; i < timeout; i++) {
1123 		if (amdgpu_ring_get_rptr(ring) != rptr)
1124 			break;
1125 		udelay(1);
1126 	}
1127 
1128 	if (i >= timeout)
1129 		r = -ETIMEDOUT;
1130 
1131 	return r;
1132 }
1133 
1134 /**
1135  * amdgpu_vce_ring_test_ib - test if VCE IBs are working
1136  *
1137  * @ring: the engine to test on
1138  * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
1139  *
1140  */
1141 int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1142 {
1143 	struct dma_fence *fence = NULL;
1144 	long r;
1145 
1146 	/* skip vce ring1/2 ib test for now, since it's not reliable */
1147 	if (ring != &ring->adev->vce.ring[0])
1148 		return 0;
1149 
1150 	r = amdgpu_vce_get_create_msg(ring, 1, NULL);
1151 	if (r)
1152 		goto error;
1153 
1154 	r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
1155 	if (r)
1156 		goto error;
1157 
1158 	r = dma_fence_wait_timeout(fence, false, timeout);
1159 	if (r == 0)
1160 		r = -ETIMEDOUT;
1161 	else if (r > 0)
1162 		r = 0;
1163 
1164 error:
1165 	dma_fence_put(fence);
1166 	return r;
1167 }
1168 
1169 enum amdgpu_ring_priority_level amdgpu_vce_get_ring_prio(int ring)
1170 {
1171 	switch(ring) {
1172 	case 0:
1173 		return AMDGPU_RING_PRIO_0;
1174 	case 1:
1175 		return AMDGPU_RING_PRIO_1;
1176 	case 2:
1177 		return AMDGPU_RING_PRIO_2;
1178 	default:
1179 		return AMDGPU_RING_PRIO_0;
1180 	}
1181 }
1182