xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c (revision 727b77df826b44853476d6e8690fec4cf5515eca)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright 2023 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/kref.h>
26 #include <linux/slab.h>
27 #include <linux/dma-fence-unwrap.h>
28 
29 #include <drm/drm_exec.h>
30 #include <drm/drm_syncobj.h>
31 
32 #include "amdgpu.h"
33 #include "amdgpu_userq_fence.h"
34 
35 static const struct dma_fence_ops amdgpu_userq_fence_ops;
36 static struct kmem_cache *amdgpu_userq_fence_slab;
37 
38 int amdgpu_userq_fence_slab_init(void)
39 {
40 	amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
41 						    sizeof(struct amdgpu_userq_fence),
42 						    0,
43 						    SLAB_HWCACHE_ALIGN,
44 						    NULL);
45 	if (!amdgpu_userq_fence_slab)
46 		return -ENOMEM;
47 
48 	return 0;
49 }
50 
51 void amdgpu_userq_fence_slab_fini(void)
52 {
53 	rcu_barrier();
54 	kmem_cache_destroy(amdgpu_userq_fence_slab);
55 }
56 
57 static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
58 {
59 	if (!f || f->ops != &amdgpu_userq_fence_ops)
60 		return NULL;
61 
62 	return container_of(f, struct amdgpu_userq_fence, base);
63 }
64 
65 static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
66 {
67 	return le64_to_cpu(*fence_drv->cpu_addr);
68 }
69 
70 int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
71 				    struct amdgpu_usermode_queue *userq)
72 {
73 	struct amdgpu_userq_fence_driver *fence_drv;
74 	unsigned long flags;
75 	int r;
76 
77 	fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
78 	if (!fence_drv)
79 		return -ENOMEM;
80 
81 	/* Acquire seq64 memory */
82 	r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr,
83 			       &fence_drv->cpu_addr);
84 	if (r)
85 		goto free_fence_drv;
86 
87 	memset(fence_drv->cpu_addr, 0, sizeof(u64));
88 
89 	kref_init(&fence_drv->refcount);
90 	INIT_LIST_HEAD(&fence_drv->fences);
91 	spin_lock_init(&fence_drv->fence_list_lock);
92 
93 	fence_drv->adev = adev;
94 	fence_drv->context = dma_fence_context_alloc(1);
95 	get_task_comm(fence_drv->timeline_name, current);
96 
97 	xa_lock_irqsave(&adev->userq_xa, flags);
98 	r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
99 			      fence_drv, GFP_KERNEL));
100 	xa_unlock_irqrestore(&adev->userq_xa, flags);
101 	if (r)
102 		goto free_seq64;
103 
104 	userq->fence_drv = fence_drv;
105 
106 	return 0;
107 
108 free_seq64:
109 	amdgpu_seq64_free(adev, fence_drv->va);
110 free_fence_drv:
111 	kfree(fence_drv);
112 
113 	return r;
114 }
115 
116 static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
117 {
118 	struct amdgpu_userq_fence_driver *fence_drv;
119 	unsigned long index;
120 
121 	if (xa_empty(xa))
122 		return;
123 
124 	xa_lock(xa);
125 	xa_for_each(xa, index, fence_drv) {
126 		__xa_erase(xa, index);
127 		amdgpu_userq_fence_driver_put(fence_drv);
128 	}
129 
130 	xa_unlock(xa);
131 }
132 
133 void
134 amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
135 {
136 	amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
137 	xa_destroy(&userq->fence_drv_xa);
138 	/* Drop the fence_drv reference held by user queue */
139 	amdgpu_userq_fence_driver_put(userq->fence_drv);
140 }
141 
142 void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
143 {
144 	struct amdgpu_userq_fence *userq_fence, *tmp;
145 	struct dma_fence *fence;
146 	u64 rptr;
147 	int i;
148 
149 	if (!fence_drv)
150 		return;
151 
152 	rptr = amdgpu_userq_fence_read(fence_drv);
153 
154 	spin_lock(&fence_drv->fence_list_lock);
155 	list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
156 		fence = &userq_fence->base;
157 
158 		if (rptr < fence->seqno)
159 			break;
160 
161 		dma_fence_signal(fence);
162 
163 		for (i = 0; i < userq_fence->fence_drv_array_count; i++)
164 			amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
165 
166 		list_del(&userq_fence->link);
167 		dma_fence_put(fence);
168 	}
169 	spin_unlock(&fence_drv->fence_list_lock);
170 }
171 
172 void amdgpu_userq_fence_driver_destroy(struct kref *ref)
173 {
174 	struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
175 					 struct amdgpu_userq_fence_driver,
176 					 refcount);
177 	struct amdgpu_userq_fence_driver *xa_fence_drv;
178 	struct amdgpu_device *adev = fence_drv->adev;
179 	struct amdgpu_userq_fence *fence, *tmp;
180 	struct xarray *xa = &adev->userq_xa;
181 	unsigned long index, flags;
182 	struct dma_fence *f;
183 
184 	spin_lock(&fence_drv->fence_list_lock);
185 	list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
186 		f = &fence->base;
187 
188 		if (!dma_fence_is_signaled(f)) {
189 			dma_fence_set_error(f, -ECANCELED);
190 			dma_fence_signal(f);
191 		}
192 
193 		list_del(&fence->link);
194 		dma_fence_put(f);
195 	}
196 	spin_unlock(&fence_drv->fence_list_lock);
197 
198 	xa_lock_irqsave(xa, flags);
199 	xa_for_each(xa, index, xa_fence_drv)
200 		if (xa_fence_drv == fence_drv)
201 			__xa_erase(xa, index);
202 	xa_unlock_irqrestore(xa, flags);
203 
204 	/* Free seq64 memory */
205 	amdgpu_seq64_free(adev, fence_drv->va);
206 	kfree(fence_drv);
207 }
208 
209 void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv)
210 {
211 	kref_get(&fence_drv->refcount);
212 }
213 
214 void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
215 {
216 	kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
217 }
218 
219 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
220 static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
221 {
222 	*userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
223 	return *userq_fence ? 0 : -ENOMEM;
224 }
225 
226 static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
227 				     struct amdgpu_userq_fence *userq_fence,
228 				     u64 seq, struct dma_fence **f)
229 {
230 	struct amdgpu_userq_fence_driver *fence_drv;
231 	struct dma_fence *fence;
232 	unsigned long flags;
233 
234 	fence_drv = userq->fence_drv;
235 	if (!fence_drv)
236 		return -EINVAL;
237 
238 	spin_lock_init(&userq_fence->lock);
239 	INIT_LIST_HEAD(&userq_fence->link);
240 	fence = &userq_fence->base;
241 	userq_fence->fence_drv = fence_drv;
242 
243 	dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
244 		       fence_drv->context, seq);
245 
246 	amdgpu_userq_fence_driver_get(fence_drv);
247 	dma_fence_get(fence);
248 
249 	if (!xa_empty(&userq->fence_drv_xa)) {
250 		struct amdgpu_userq_fence_driver *stored_fence_drv;
251 		unsigned long index, count = 0;
252 		int i = 0;
253 
254 		xa_lock(&userq->fence_drv_xa);
255 		xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
256 			count++;
257 
258 		userq_fence->fence_drv_array =
259 			kvmalloc_array(count,
260 				       sizeof(struct amdgpu_userq_fence_driver *),
261 				       GFP_ATOMIC);
262 
263 		if (userq_fence->fence_drv_array) {
264 			xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
265 				userq_fence->fence_drv_array[i] = stored_fence_drv;
266 				__xa_erase(&userq->fence_drv_xa, index);
267 				i++;
268 			}
269 		}
270 
271 		userq_fence->fence_drv_array_count = i;
272 		xa_unlock(&userq->fence_drv_xa);
273 	} else {
274 		userq_fence->fence_drv_array = NULL;
275 		userq_fence->fence_drv_array_count = 0;
276 	}
277 
278 	/* Check if hardware has already processed the job */
279 	spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
280 	if (!dma_fence_is_signaled_locked(fence))
281 		list_add_tail(&userq_fence->link, &fence_drv->fences);
282 	else
283 		dma_fence_put(fence);
284 
285 	spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
286 
287 	*f = fence;
288 
289 	return 0;
290 }
291 #endif
292 
293 static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
294 {
295 	return "amdgpu_userq_fence";
296 }
297 
298 static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
299 {
300 	struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
301 
302 	return fence->fence_drv->timeline_name;
303 }
304 
305 static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
306 {
307 	struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
308 	struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
309 	u64 rptr, wptr;
310 
311 	rptr = amdgpu_userq_fence_read(fence_drv);
312 	wptr = fence->base.seqno;
313 
314 	if (rptr >= wptr)
315 		return true;
316 
317 	return false;
318 }
319 
320 static void amdgpu_userq_fence_free(struct rcu_head *rcu)
321 {
322 	struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
323 	struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
324 	struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
325 
326 	/* Release the fence driver reference */
327 	amdgpu_userq_fence_driver_put(fence_drv);
328 
329 	kvfree(userq_fence->fence_drv_array);
330 	kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
331 }
332 
333 static void amdgpu_userq_fence_release(struct dma_fence *f)
334 {
335 	call_rcu(&f->rcu, amdgpu_userq_fence_free);
336 }
337 
338 static const struct dma_fence_ops amdgpu_userq_fence_ops = {
339 	.use_64bit_seqno = true,
340 	.get_driver_name = amdgpu_userq_fence_get_driver_name,
341 	.get_timeline_name = amdgpu_userq_fence_get_timeline_name,
342 	.signaled = amdgpu_userq_fence_signaled,
343 	.release = amdgpu_userq_fence_release,
344 };
345 
346 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
347 /**
348  * amdgpu_userq_fence_read_wptr - Read the userq wptr value
349  *
350  * @queue: user mode queue structure pointer
351  * @wptr: write pointer value
352  *
353  * Read the wptr value from userq's MQD. The userq signal IOCTL
354  * creates a dma_fence for the shared buffers that expects the
355  * RPTR value written to seq64 memory >= WPTR.
356  *
357  * Returns wptr value on success, error on failure.
358  */
359 static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
360 					u64 *wptr)
361 {
362 	struct amdgpu_bo_va_mapping *mapping;
363 	struct amdgpu_bo *bo;
364 	u64 addr, *ptr;
365 	int r;
366 
367 	r = amdgpu_bo_reserve(queue->vm->root.bo, false);
368 	if (r)
369 		return r;
370 
371 	addr = queue->userq_prop->wptr_gpu_addr;
372 	addr &= AMDGPU_GMC_HOLE_MASK;
373 
374 	mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
375 	if (!mapping) {
376 		DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
377 		return -EINVAL;
378 	}
379 
380 	bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
381 	amdgpu_bo_unreserve(queue->vm->root.bo);
382 	r = amdgpu_bo_reserve(bo, true);
383 	if (r) {
384 		DRM_ERROR("Failed to reserve userqueue wptr bo");
385 		return r;
386 	}
387 
388 	r = amdgpu_bo_kmap(bo, (void **)&ptr);
389 	if (r) {
390 		DRM_ERROR("Failed mapping the userqueue wptr bo");
391 		goto map_error;
392 	}
393 
394 	*wptr = le64_to_cpu(*ptr);
395 
396 	amdgpu_bo_kunmap(bo);
397 	amdgpu_bo_unreserve(bo);
398 	amdgpu_bo_unref(&bo);
399 
400 	return 0;
401 
402 map_error:
403 	amdgpu_bo_unreserve(bo);
404 	amdgpu_bo_unref(&bo);
405 
406 	return r;
407 }
408 
409 static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
410 {
411 	dma_fence_put(fence);
412 }
413 
414 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
415 			      struct drm_file *filp)
416 {
417 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
418 	struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
419 	struct drm_amdgpu_userq_signal *args = data;
420 	struct drm_gem_object **gobj_write = NULL;
421 	struct drm_gem_object **gobj_read = NULL;
422 	struct amdgpu_usermode_queue *queue;
423 	struct amdgpu_userq_fence *userq_fence;
424 	struct drm_syncobj **syncobj = NULL;
425 	u32 *bo_handles_write, num_write_bo_handles;
426 	u32 *syncobj_handles, num_syncobj_handles;
427 	u32 *bo_handles_read, num_read_bo_handles;
428 	int r, i, entry, rentry, wentry;
429 	struct dma_fence *fence;
430 	struct drm_exec exec;
431 	u64 wptr;
432 
433 	num_syncobj_handles = args->num_syncobj_handles;
434 	syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
435 				      sizeof(u32) * num_syncobj_handles);
436 	if (IS_ERR(syncobj_handles))
437 		return PTR_ERR(syncobj_handles);
438 
439 	/* Array of pointers to the looked up syncobjs */
440 	syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
441 	if (!syncobj) {
442 		r = -ENOMEM;
443 		goto free_syncobj_handles;
444 	}
445 
446 	for (entry = 0; entry < num_syncobj_handles; entry++) {
447 		syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]);
448 		if (!syncobj[entry]) {
449 			r = -ENOENT;
450 			goto free_syncobj;
451 		}
452 	}
453 
454 	num_read_bo_handles = args->num_bo_read_handles;
455 	bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles),
456 				      sizeof(u32) * num_read_bo_handles);
457 	if (IS_ERR(bo_handles_read)) {
458 		r = PTR_ERR(bo_handles_read);
459 		goto free_syncobj;
460 	}
461 
462 	/* Array of pointers to the GEM read objects */
463 	gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
464 	if (!gobj_read) {
465 		r = -ENOMEM;
466 		goto free_bo_handles_read;
467 	}
468 
469 	for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
470 		gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
471 		if (!gobj_read[rentry]) {
472 			r = -ENOENT;
473 			goto put_gobj_read;
474 		}
475 	}
476 
477 	num_write_bo_handles = args->num_bo_write_handles;
478 	bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles),
479 				       sizeof(u32) * num_write_bo_handles);
480 	if (IS_ERR(bo_handles_write)) {
481 		r = PTR_ERR(bo_handles_write);
482 		goto put_gobj_read;
483 	}
484 
485 	/* Array of pointers to the GEM write objects */
486 	gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
487 	if (!gobj_write) {
488 		r = -ENOMEM;
489 		goto free_bo_handles_write;
490 	}
491 
492 	for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
493 		gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
494 		if (!gobj_write[wentry]) {
495 			r = -ENOENT;
496 			goto put_gobj_write;
497 		}
498 	}
499 
500 	/* Retrieve the user queue */
501 	queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
502 	if (!queue) {
503 		r = -ENOENT;
504 		goto put_gobj_write;
505 	}
506 
507 	r = amdgpu_userq_fence_read_wptr(queue, &wptr);
508 	if (r)
509 		goto put_gobj_write;
510 
511 	r = amdgpu_userq_fence_alloc(&userq_fence);
512 	if (r)
513 		goto put_gobj_write;
514 
515 	/* We are here means UQ is active, make sure the eviction fence is valid */
516 	amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
517 
518 	/* Create a new fence */
519 	r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
520 	if (r) {
521 		mutex_unlock(&userq_mgr->userq_mutex);
522 		kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
523 		goto put_gobj_write;
524 	}
525 
526 	dma_fence_put(queue->last_fence);
527 	queue->last_fence = dma_fence_get(fence);
528 	mutex_unlock(&userq_mgr->userq_mutex);
529 
530 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
531 		      (num_read_bo_handles + num_write_bo_handles));
532 
533 	/* Lock all BOs with retry handling */
534 	drm_exec_until_all_locked(&exec) {
535 		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
536 		drm_exec_retry_on_contention(&exec);
537 		if (r) {
538 			amdgpu_userq_fence_cleanup(fence);
539 			goto exec_fini;
540 		}
541 
542 		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
543 		drm_exec_retry_on_contention(&exec);
544 		if (r) {
545 			amdgpu_userq_fence_cleanup(fence);
546 			goto exec_fini;
547 		}
548 	}
549 
550 	for (i = 0; i < num_read_bo_handles; i++) {
551 		if (!gobj_read || !gobj_read[i]->resv)
552 			continue;
553 
554 		dma_resv_add_fence(gobj_read[i]->resv, fence,
555 				   DMA_RESV_USAGE_READ);
556 	}
557 
558 	for (i = 0; i < num_write_bo_handles; i++) {
559 		if (!gobj_write || !gobj_write[i]->resv)
560 			continue;
561 
562 		dma_resv_add_fence(gobj_write[i]->resv, fence,
563 				   DMA_RESV_USAGE_WRITE);
564 	}
565 
566 	/* Add the created fence to syncobj/BO's */
567 	for (i = 0; i < num_syncobj_handles; i++)
568 		drm_syncobj_replace_fence(syncobj[i], fence);
569 
570 	/* drop the reference acquired in fence creation function */
571 	dma_fence_put(fence);
572 
573 exec_fini:
574 	drm_exec_fini(&exec);
575 put_gobj_write:
576 	while (wentry-- > 0)
577 		drm_gem_object_put(gobj_write[wentry]);
578 	kfree(gobj_write);
579 free_bo_handles_write:
580 	kfree(bo_handles_write);
581 put_gobj_read:
582 	while (rentry-- > 0)
583 		drm_gem_object_put(gobj_read[rentry]);
584 	kfree(gobj_read);
585 free_bo_handles_read:
586 	kfree(bo_handles_read);
587 free_syncobj:
588 	while (entry-- > 0)
589 		if (syncobj[entry])
590 			drm_syncobj_put(syncobj[entry]);
591 	kfree(syncobj);
592 free_syncobj_handles:
593 	kfree(syncobj_handles);
594 
595 	return r;
596 }
597 #else
598 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
599 			      struct drm_file *filp)
600 {
601 	return -ENOTSUPP;
602 }
603 #endif
604 
605 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
606 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
607 			    struct drm_file *filp)
608 {
609 	u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write;
610 	u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
611 	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
612 	struct drm_amdgpu_userq_wait *wait_info = data;
613 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
614 	struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
615 	struct amdgpu_usermode_queue *waitq;
616 	struct drm_gem_object **gobj_write;
617 	struct drm_gem_object **gobj_read;
618 	struct dma_fence **fences = NULL;
619 	u16 num_points, num_fences = 0;
620 	int r, i, rentry, wentry, cnt;
621 	struct drm_exec exec;
622 
623 	num_read_bo_handles = wait_info->num_bo_read_handles;
624 	bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
625 				      sizeof(u32) * num_read_bo_handles);
626 	if (IS_ERR(bo_handles_read))
627 		return PTR_ERR(bo_handles_read);
628 
629 	num_write_bo_handles = wait_info->num_bo_write_handles;
630 	bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles),
631 				       sizeof(u32) * num_write_bo_handles);
632 	if (IS_ERR(bo_handles_write)) {
633 		r = PTR_ERR(bo_handles_write);
634 		goto free_bo_handles_read;
635 	}
636 
637 	num_syncobj = wait_info->num_syncobj_handles;
638 	syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles),
639 				      sizeof(u32) * num_syncobj);
640 	if (IS_ERR(syncobj_handles)) {
641 		r = PTR_ERR(syncobj_handles);
642 		goto free_bo_handles_write;
643 	}
644 
645 	num_points = wait_info->num_syncobj_timeline_handles;
646 	timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
647 				       sizeof(u32) * num_points);
648 	if (IS_ERR(timeline_handles)) {
649 		r = PTR_ERR(timeline_handles);
650 		goto free_syncobj_handles;
651 	}
652 
653 	timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
654 				      sizeof(u32) * num_points);
655 	if (IS_ERR(timeline_points)) {
656 		r = PTR_ERR(timeline_points);
657 		goto free_timeline_handles;
658 	}
659 
660 	gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
661 	if (!gobj_read) {
662 		r = -ENOMEM;
663 		goto free_timeline_points;
664 	}
665 
666 	for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
667 		gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
668 		if (!gobj_read[rentry]) {
669 			r = -ENOENT;
670 			goto put_gobj_read;
671 		}
672 	}
673 
674 	gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
675 	if (!gobj_write) {
676 		r = -ENOMEM;
677 		goto put_gobj_read;
678 	}
679 
680 	for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
681 		gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
682 		if (!gobj_write[wentry]) {
683 			r = -ENOENT;
684 			goto put_gobj_write;
685 		}
686 	}
687 
688 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
689 		      (num_read_bo_handles + num_write_bo_handles));
690 
691 	/* Lock all BOs with retry handling */
692 	drm_exec_until_all_locked(&exec) {
693 		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
694 		drm_exec_retry_on_contention(&exec);
695 		if (r) {
696 			drm_exec_fini(&exec);
697 			goto put_gobj_write;
698 		}
699 
700 		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
701 		drm_exec_retry_on_contention(&exec);
702 		if (r) {
703 			drm_exec_fini(&exec);
704 			goto put_gobj_write;
705 		}
706 	}
707 
708 	if (!wait_info->num_fences) {
709 		if (num_points) {
710 			struct dma_fence_unwrap iter;
711 			struct dma_fence *fence;
712 			struct dma_fence *f;
713 
714 			for (i = 0; i < num_points; i++) {
715 				r = drm_syncobj_find_fence(filp, timeline_handles[i],
716 							   timeline_points[i],
717 							   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
718 							   &fence);
719 				if (r)
720 					goto exec_fini;
721 
722 				dma_fence_unwrap_for_each(f, &iter, fence)
723 					num_fences++;
724 
725 				dma_fence_put(fence);
726 			}
727 		}
728 
729 		/* Count syncobj's fence */
730 		for (i = 0; i < num_syncobj; i++) {
731 			struct dma_fence *fence;
732 
733 			r = drm_syncobj_find_fence(filp, syncobj_handles[i],
734 						   0,
735 						   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
736 						   &fence);
737 			if (r)
738 				goto exec_fini;
739 
740 			num_fences++;
741 			dma_fence_put(fence);
742 		}
743 
744 		/* Count GEM objects fence */
745 		for (i = 0; i < num_read_bo_handles; i++) {
746 			struct dma_resv_iter resv_cursor;
747 			struct dma_fence *fence;
748 
749 			dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
750 						DMA_RESV_USAGE_READ, fence)
751 				num_fences++;
752 		}
753 
754 		for (i = 0; i < num_write_bo_handles; i++) {
755 			struct dma_resv_iter resv_cursor;
756 			struct dma_fence *fence;
757 
758 			dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
759 						DMA_RESV_USAGE_WRITE, fence)
760 				num_fences++;
761 		}
762 
763 		/*
764 		 * Passing num_fences = 0 means that userspace doesn't want to
765 		 * retrieve userq_fence_info. If num_fences = 0 we skip filling
766 		 * userq_fence_info and return the actual number of fences on
767 		 * args->num_fences.
768 		 */
769 		wait_info->num_fences = num_fences;
770 	} else {
771 		/* Array of fence info */
772 		fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
773 		if (!fence_info) {
774 			r = -ENOMEM;
775 			goto exec_fini;
776 		}
777 
778 		/* Array of fences */
779 		fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
780 		if (!fences) {
781 			r = -ENOMEM;
782 			goto free_fence_info;
783 		}
784 
785 		/* Retrieve GEM read objects fence */
786 		for (i = 0; i < num_read_bo_handles; i++) {
787 			struct dma_resv_iter resv_cursor;
788 			struct dma_fence *fence;
789 
790 			dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
791 						DMA_RESV_USAGE_READ, fence) {
792 				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
793 					r = -EINVAL;
794 					goto free_fences;
795 				}
796 
797 				fences[num_fences++] = fence;
798 				dma_fence_get(fence);
799 			}
800 		}
801 
802 		/* Retrieve GEM write objects fence */
803 		for (i = 0; i < num_write_bo_handles; i++) {
804 			struct dma_resv_iter resv_cursor;
805 			struct dma_fence *fence;
806 
807 			dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
808 						DMA_RESV_USAGE_WRITE, fence) {
809 				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
810 					r = -EINVAL;
811 					goto free_fences;
812 				}
813 
814 				fences[num_fences++] = fence;
815 				dma_fence_get(fence);
816 			}
817 		}
818 
819 		if (num_points) {
820 			struct dma_fence_unwrap iter;
821 			struct dma_fence *fence;
822 			struct dma_fence *f;
823 
824 			for (i = 0; i < num_points; i++) {
825 				r = drm_syncobj_find_fence(filp, timeline_handles[i],
826 							   timeline_points[i],
827 							   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
828 							   &fence);
829 				if (r)
830 					goto free_fences;
831 
832 				dma_fence_unwrap_for_each(f, &iter, fence) {
833 					if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
834 						r = -EINVAL;
835 						goto free_fences;
836 					}
837 
838 					dma_fence_get(f);
839 					fences[num_fences++] = f;
840 				}
841 
842 				dma_fence_put(fence);
843 			}
844 		}
845 
846 		/* Retrieve syncobj's fence */
847 		for (i = 0; i < num_syncobj; i++) {
848 			struct dma_fence *fence;
849 
850 			r = drm_syncobj_find_fence(filp, syncobj_handles[i],
851 						   0,
852 						   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
853 						   &fence);
854 			if (r)
855 				goto free_fences;
856 
857 			if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
858 				r = -EINVAL;
859 				goto free_fences;
860 			}
861 
862 			fences[num_fences++] = fence;
863 		}
864 
865 		waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id);
866 		if (!waitq)
867 			goto free_fences;
868 
869 		for (i = 0, cnt = 0; i < num_fences; i++) {
870 			struct amdgpu_userq_fence_driver *fence_drv;
871 			struct amdgpu_userq_fence *userq_fence;
872 			u32 index;
873 
874 			userq_fence = to_amdgpu_userq_fence(fences[i]);
875 			if (!userq_fence) {
876 				/*
877 				 * Just waiting on other driver fences should
878 				 * be good for now
879 				 */
880 				r = dma_fence_wait(fences[i], true);
881 				if (r) {
882 					dma_fence_put(fences[i]);
883 					goto free_fences;
884 				}
885 
886 				dma_fence_put(fences[i]);
887 				continue;
888 			}
889 
890 			fence_drv = userq_fence->fence_drv;
891 			/*
892 			 * We need to make sure the user queue release their reference
893 			 * to the fence drivers at some point before queue destruction.
894 			 * Otherwise, we would gather those references until we don't
895 			 * have any more space left and crash.
896 			 */
897 			r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
898 				     xa_limit_32b, GFP_KERNEL);
899 			if (r)
900 				goto free_fences;
901 
902 			amdgpu_userq_fence_driver_get(fence_drv);
903 
904 			/* Store drm syncobj's gpu va address and value */
905 			fence_info[cnt].va = fence_drv->va;
906 			fence_info[cnt].value = fences[i]->seqno;
907 
908 			dma_fence_put(fences[i]);
909 			/* Increment the actual userq fence count */
910 			cnt++;
911 		}
912 
913 		wait_info->num_fences = cnt;
914 		/* Copy userq fence info to user space */
915 		if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
916 				 fence_info, wait_info->num_fences * sizeof(*fence_info))) {
917 			r = -EFAULT;
918 			goto free_fences;
919 		}
920 
921 		kfree(fences);
922 		kfree(fence_info);
923 	}
924 
925 	drm_exec_fini(&exec);
926 	for (i = 0; i < num_read_bo_handles; i++)
927 		drm_gem_object_put(gobj_read[i]);
928 	kfree(gobj_read);
929 
930 	for (i = 0; i < num_write_bo_handles; i++)
931 		drm_gem_object_put(gobj_write[i]);
932 	kfree(gobj_write);
933 
934 	kfree(timeline_points);
935 	kfree(timeline_handles);
936 	kfree(syncobj_handles);
937 	kfree(bo_handles_write);
938 	kfree(bo_handles_read);
939 
940 	return 0;
941 
942 free_fences:
943 	while (num_fences-- > 0)
944 		dma_fence_put(fences[num_fences]);
945 	kfree(fences);
946 free_fence_info:
947 	kfree(fence_info);
948 exec_fini:
949 	drm_exec_fini(&exec);
950 put_gobj_write:
951 	while (wentry-- > 0)
952 		drm_gem_object_put(gobj_write[wentry]);
953 	kfree(gobj_write);
954 put_gobj_read:
955 	while (rentry-- > 0)
956 		drm_gem_object_put(gobj_read[rentry]);
957 	kfree(gobj_read);
958 free_timeline_points:
959 	kfree(timeline_points);
960 free_timeline_handles:
961 	kfree(timeline_handles);
962 free_syncobj_handles:
963 	kfree(syncobj_handles);
964 free_bo_handles_write:
965 	kfree(bo_handles_write);
966 free_bo_handles_read:
967 	kfree(bo_handles_read);
968 
969 	return r;
970 }
971 #else
972 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
973 			    struct drm_file *filp)
974 {
975 	return -ENOTSUPP;
976 }
977 #endif
978