xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c (revision a61c16258a4720065972cf04fcfee1caa6ea5fc0)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright 2023 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/kref.h>
26 #include <linux/slab.h>
27 #include <linux/dma-fence-unwrap.h>
28 
29 #include <drm/drm_exec.h>
30 #include <drm/drm_syncobj.h>
31 
32 #include "amdgpu.h"
33 #include "amdgpu_userq_fence.h"
34 
35 static const struct dma_fence_ops amdgpu_userq_fence_ops;
36 static struct kmem_cache *amdgpu_userq_fence_slab;
37 
38 int amdgpu_userq_fence_slab_init(void)
39 {
40 	amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
41 						    sizeof(struct amdgpu_userq_fence),
42 						    0,
43 						    SLAB_HWCACHE_ALIGN,
44 						    NULL);
45 	if (!amdgpu_userq_fence_slab)
46 		return -ENOMEM;
47 
48 	return 0;
49 }
50 
51 void amdgpu_userq_fence_slab_fini(void)
52 {
53 	rcu_barrier();
54 	kmem_cache_destroy(amdgpu_userq_fence_slab);
55 }
56 
57 static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
58 {
59 	if (!f || f->ops != &amdgpu_userq_fence_ops)
60 		return NULL;
61 
62 	return container_of(f, struct amdgpu_userq_fence, base);
63 }
64 
65 static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
66 {
67 	return le64_to_cpu(*fence_drv->cpu_addr);
68 }
69 
70 int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
71 				    struct amdgpu_usermode_queue *userq)
72 {
73 	struct amdgpu_userq_fence_driver *fence_drv;
74 	unsigned long flags;
75 	int r;
76 
77 	fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
78 	if (!fence_drv) {
79 		DRM_ERROR("Failed to allocate memory for fence driver\n");
80 		r = -ENOMEM;
81 		goto free_fence_drv;
82 	}
83 
84 	/* Acquire seq64 memory */
85 	r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr,
86 			       &fence_drv->cpu_addr);
87 	if (r) {
88 		kfree(fence_drv);
89 		r = -ENOMEM;
90 		goto free_seq64;
91 	}
92 
93 	memset(fence_drv->cpu_addr, 0, sizeof(u64));
94 
95 	kref_init(&fence_drv->refcount);
96 	INIT_LIST_HEAD(&fence_drv->fences);
97 	spin_lock_init(&fence_drv->fence_list_lock);
98 
99 	fence_drv->adev = adev;
100 	fence_drv->fence_drv_xa_ptr = &userq->fence_drv_xa;
101 	fence_drv->context = dma_fence_context_alloc(1);
102 	get_task_comm(fence_drv->timeline_name, current);
103 
104 	xa_lock_irqsave(&adev->userq_xa, flags);
105 	r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
106 			      fence_drv, GFP_KERNEL));
107 	xa_unlock_irqrestore(&adev->userq_xa, flags);
108 	if (r)
109 		goto free_seq64;
110 
111 	userq->fence_drv = fence_drv;
112 
113 	return 0;
114 
115 free_seq64:
116 	amdgpu_seq64_free(adev, fence_drv->va);
117 free_fence_drv:
118 	kfree(fence_drv);
119 
120 	return r;
121 }
122 
123 void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
124 {
125 	struct amdgpu_userq_fence *userq_fence, *tmp;
126 	struct dma_fence *fence;
127 	u64 rptr;
128 	int i;
129 
130 	if (!fence_drv)
131 		return;
132 
133 	rptr = amdgpu_userq_fence_read(fence_drv);
134 
135 	spin_lock(&fence_drv->fence_list_lock);
136 	list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
137 		fence = &userq_fence->base;
138 
139 		if (rptr < fence->seqno)
140 			break;
141 
142 		dma_fence_signal(fence);
143 
144 		for (i = 0; i < userq_fence->fence_drv_array_count; i++)
145 			amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
146 
147 		list_del(&userq_fence->link);
148 		dma_fence_put(fence);
149 	}
150 	spin_unlock(&fence_drv->fence_list_lock);
151 }
152 
153 void amdgpu_userq_fence_driver_destroy(struct kref *ref)
154 {
155 	struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
156 					 struct amdgpu_userq_fence_driver,
157 					 refcount);
158 	struct amdgpu_userq_fence_driver *xa_fence_drv;
159 	struct amdgpu_device *adev = fence_drv->adev;
160 	struct amdgpu_userq_fence *fence, *tmp;
161 	struct xarray *xa = &adev->userq_xa;
162 	unsigned long index, flags;
163 	struct dma_fence *f;
164 
165 	spin_lock(&fence_drv->fence_list_lock);
166 	list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
167 		f = &fence->base;
168 
169 		if (!dma_fence_is_signaled(f)) {
170 			dma_fence_set_error(f, -ECANCELED);
171 			dma_fence_signal(f);
172 		}
173 
174 		list_del(&fence->link);
175 		dma_fence_put(f);
176 	}
177 	spin_unlock(&fence_drv->fence_list_lock);
178 
179 	xa_lock_irqsave(xa, flags);
180 	xa_for_each(xa, index, xa_fence_drv)
181 		if (xa_fence_drv == fence_drv)
182 			__xa_erase(xa, index);
183 	xa_unlock_irqrestore(xa, flags);
184 
185 	/* Free seq64 memory */
186 	amdgpu_seq64_free(adev, fence_drv->va);
187 	kfree(fence_drv);
188 }
189 
190 void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv)
191 {
192 	kref_get(&fence_drv->refcount);
193 }
194 
195 void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
196 {
197 	kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
198 }
199 
200 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
201 static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
202 {
203 	*userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
204 	return *userq_fence ? 0 : -ENOMEM;
205 }
206 
207 static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
208 				     struct amdgpu_userq_fence *userq_fence,
209 				     u64 seq, struct dma_fence **f)
210 {
211 	struct amdgpu_userq_fence_driver *fence_drv;
212 	struct dma_fence *fence;
213 	unsigned long flags;
214 
215 	fence_drv = userq->fence_drv;
216 	if (!fence_drv)
217 		return -EINVAL;
218 
219 	spin_lock_init(&userq_fence->lock);
220 	INIT_LIST_HEAD(&userq_fence->link);
221 	fence = &userq_fence->base;
222 	userq_fence->fence_drv = fence_drv;
223 
224 	dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
225 		       fence_drv->context, seq);
226 
227 	amdgpu_userq_fence_driver_get(fence_drv);
228 	dma_fence_get(fence);
229 
230 	if (!xa_empty(&userq->fence_drv_xa)) {
231 		struct amdgpu_userq_fence_driver *stored_fence_drv;
232 		unsigned long index, count = 0;
233 		int i = 0;
234 
235 		xa_lock(&userq->fence_drv_xa);
236 		xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
237 			count++;
238 
239 		userq_fence->fence_drv_array =
240 			kvmalloc_array(count,
241 				       sizeof(struct amdgpu_userq_fence_driver *),
242 				       GFP_ATOMIC);
243 
244 		if (userq_fence->fence_drv_array) {
245 			xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
246 				userq_fence->fence_drv_array[i] = stored_fence_drv;
247 				__xa_erase(&userq->fence_drv_xa, index);
248 				i++;
249 			}
250 		}
251 
252 		userq_fence->fence_drv_array_count = i;
253 		xa_unlock(&userq->fence_drv_xa);
254 	} else {
255 		userq_fence->fence_drv_array = NULL;
256 		userq_fence->fence_drv_array_count = 0;
257 	}
258 
259 	/* Check if hardware has already processed the job */
260 	spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
261 	if (!dma_fence_is_signaled_locked(fence))
262 		list_add_tail(&userq_fence->link, &fence_drv->fences);
263 	else
264 		dma_fence_put(fence);
265 
266 	spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
267 
268 	*f = fence;
269 
270 	return 0;
271 }
272 #endif
273 
274 static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
275 {
276 	return "amdgpu_userqueue_fence";
277 }
278 
279 static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
280 {
281 	struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
282 
283 	return fence->fence_drv->timeline_name;
284 }
285 
286 static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
287 {
288 	struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
289 	struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
290 	u64 rptr, wptr;
291 
292 	rptr = amdgpu_userq_fence_read(fence_drv);
293 	wptr = fence->base.seqno;
294 
295 	if (rptr >= wptr)
296 		return true;
297 
298 	return false;
299 }
300 
301 static void amdgpu_userq_fence_free(struct rcu_head *rcu)
302 {
303 	struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
304 	struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
305 	struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
306 
307 	/* Release the fence driver reference */
308 	amdgpu_userq_fence_driver_put(fence_drv);
309 
310 	kvfree(userq_fence->fence_drv_array);
311 	kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
312 }
313 
314 static void amdgpu_userq_fence_release(struct dma_fence *f)
315 {
316 	call_rcu(&f->rcu, amdgpu_userq_fence_free);
317 }
318 
319 static const struct dma_fence_ops amdgpu_userq_fence_ops = {
320 	.use_64bit_seqno = true,
321 	.get_driver_name = amdgpu_userq_fence_get_driver_name,
322 	.get_timeline_name = amdgpu_userq_fence_get_timeline_name,
323 	.signaled = amdgpu_userq_fence_signaled,
324 	.release = amdgpu_userq_fence_release,
325 };
326 
327 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
328 /**
329  * amdgpu_userq_fence_read_wptr - Read the userq wptr value
330  *
331  * @queue: user mode queue structure pointer
332  * @wptr: write pointer value
333  *
334  * Read the wptr value from userq's MQD. The userq signal IOCTL
335  * creates a dma_fence for the shared buffers that expects the
336  * RPTR value written to seq64 memory >= WPTR.
337  *
338  * Returns wptr value on success, error on failure.
339  */
340 static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
341 					u64 *wptr)
342 {
343 	struct amdgpu_bo_va_mapping *mapping;
344 	struct amdgpu_bo *bo;
345 	u64 addr, *ptr;
346 	int r;
347 
348 	r = amdgpu_bo_reserve(queue->vm->root.bo, false);
349 	if (r)
350 		return r;
351 
352 	addr = queue->userq_prop->wptr_gpu_addr;
353 	addr &= AMDGPU_GMC_HOLE_MASK;
354 
355 	mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
356 	if (!mapping) {
357 		DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
358 		return -EINVAL;
359 	}
360 
361 	bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
362 	amdgpu_bo_unreserve(queue->vm->root.bo);
363 	r = amdgpu_bo_reserve(bo, true);
364 	if (r) {
365 		DRM_ERROR("Failed to reserve userqueue wptr bo");
366 		return r;
367 	}
368 
369 	r = amdgpu_bo_kmap(bo, (void **)&ptr);
370 	if (r) {
371 		DRM_ERROR("Failed mapping the userqueue wptr bo");
372 		goto map_error;
373 	}
374 
375 	*wptr = le64_to_cpu(*ptr);
376 
377 	amdgpu_bo_kunmap(bo);
378 	amdgpu_bo_unreserve(bo);
379 	amdgpu_bo_unref(&bo);
380 
381 	return 0;
382 
383 map_error:
384 	amdgpu_bo_unreserve(bo);
385 	amdgpu_bo_unref(&bo);
386 
387 	return r;
388 }
389 
390 static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
391 {
392 	dma_fence_put(fence);
393 }
394 
395 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
396 			      struct drm_file *filp)
397 {
398 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
399 	struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
400 	struct drm_amdgpu_userq_signal *args = data;
401 	struct drm_gem_object **gobj_write = NULL;
402 	struct drm_gem_object **gobj_read = NULL;
403 	struct amdgpu_usermode_queue *queue;
404 	struct amdgpu_userq_fence *userq_fence;
405 	struct drm_syncobj **syncobj = NULL;
406 	u32 *bo_handles_write, num_write_bo_handles;
407 	u32 *syncobj_handles, num_syncobj_handles;
408 	u32 *bo_handles_read, num_read_bo_handles;
409 	int r, i, entry, rentry, wentry;
410 	struct dma_fence *fence;
411 	struct drm_exec exec;
412 	u64 wptr;
413 
414 	num_syncobj_handles = args->num_syncobj_handles;
415 	syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
416 				      sizeof(u32) * num_syncobj_handles);
417 	if (IS_ERR(syncobj_handles))
418 		return PTR_ERR(syncobj_handles);
419 
420 	/* Array of pointers to the looked up syncobjs */
421 	syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
422 	if (!syncobj) {
423 		r = -ENOMEM;
424 		goto free_syncobj_handles;
425 	}
426 
427 	for (entry = 0; entry < num_syncobj_handles; entry++) {
428 		syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]);
429 		if (!syncobj[entry]) {
430 			r = -ENOENT;
431 			goto free_syncobj;
432 		}
433 	}
434 
435 	num_read_bo_handles = args->num_bo_read_handles;
436 	bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles),
437 				      sizeof(u32) * num_read_bo_handles);
438 	if (IS_ERR(bo_handles_read)) {
439 		r = PTR_ERR(bo_handles_read);
440 		goto free_syncobj;
441 	}
442 
443 	/* Array of pointers to the GEM read objects */
444 	gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
445 	if (!gobj_read) {
446 		r = -ENOMEM;
447 		goto free_bo_handles_read;
448 	}
449 
450 	for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
451 		gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
452 		if (!gobj_read[rentry]) {
453 			r = -ENOENT;
454 			goto put_gobj_read;
455 		}
456 	}
457 
458 	num_write_bo_handles = args->num_bo_write_handles;
459 	bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles),
460 				       sizeof(u32) * num_write_bo_handles);
461 	if (IS_ERR(bo_handles_write)) {
462 		r = PTR_ERR(bo_handles_write);
463 		goto put_gobj_read;
464 	}
465 
466 	/* Array of pointers to the GEM write objects */
467 	gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
468 	if (!gobj_write) {
469 		r = -ENOMEM;
470 		goto free_bo_handles_write;
471 	}
472 
473 	for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
474 		gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
475 		if (!gobj_write[wentry]) {
476 			r = -ENOENT;
477 			goto put_gobj_write;
478 		}
479 	}
480 
481 	/* Retrieve the user queue */
482 	queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
483 	if (!queue) {
484 		r = -ENOENT;
485 		goto put_gobj_write;
486 	}
487 
488 	r = amdgpu_userq_fence_read_wptr(queue, &wptr);
489 	if (r)
490 		goto put_gobj_write;
491 
492 	r = amdgpu_userq_fence_alloc(&userq_fence);
493 	if (r)
494 		goto put_gobj_write;
495 
496 	/* We are here means UQ is active, make sure the eviction fence is valid */
497 	amdgpu_userqueue_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
498 
499 	/* Create a new fence */
500 	r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
501 	if (r) {
502 		mutex_unlock(&userq_mgr->userq_mutex);
503 		kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
504 		goto put_gobj_write;
505 	}
506 
507 	dma_fence_put(queue->last_fence);
508 	queue->last_fence = dma_fence_get(fence);
509 	mutex_unlock(&userq_mgr->userq_mutex);
510 
511 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
512 		      (num_read_bo_handles + num_write_bo_handles));
513 
514 	/* Lock all BOs with retry handling */
515 	drm_exec_until_all_locked(&exec) {
516 		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
517 		drm_exec_retry_on_contention(&exec);
518 		if (r) {
519 			amdgpu_userq_fence_cleanup(fence);
520 			goto exec_fini;
521 		}
522 
523 		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
524 		drm_exec_retry_on_contention(&exec);
525 		if (r) {
526 			amdgpu_userq_fence_cleanup(fence);
527 			goto exec_fini;
528 		}
529 	}
530 
531 	for (i = 0; i < num_read_bo_handles; i++) {
532 		if (!gobj_read || !gobj_read[i]->resv)
533 			continue;
534 
535 		dma_resv_add_fence(gobj_read[i]->resv, fence,
536 				   DMA_RESV_USAGE_READ);
537 	}
538 
539 	for (i = 0; i < num_write_bo_handles; i++) {
540 		if (!gobj_write || !gobj_write[i]->resv)
541 			continue;
542 
543 		dma_resv_add_fence(gobj_write[i]->resv, fence,
544 				   DMA_RESV_USAGE_WRITE);
545 	}
546 
547 	/* Add the created fence to syncobj/BO's */
548 	for (i = 0; i < num_syncobj_handles; i++)
549 		drm_syncobj_replace_fence(syncobj[i], fence);
550 
551 	/* drop the reference acquired in fence creation function */
552 	dma_fence_put(fence);
553 
554 exec_fini:
555 	drm_exec_fini(&exec);
556 put_gobj_write:
557 	while (wentry-- > 0)
558 		drm_gem_object_put(gobj_write[wentry]);
559 	kfree(gobj_write);
560 free_bo_handles_write:
561 	kfree(bo_handles_write);
562 put_gobj_read:
563 	while (rentry-- > 0)
564 		drm_gem_object_put(gobj_read[rentry]);
565 	kfree(gobj_read);
566 free_bo_handles_read:
567 	kfree(bo_handles_read);
568 free_syncobj:
569 	while (entry-- > 0)
570 		if (syncobj[entry])
571 			drm_syncobj_put(syncobj[entry]);
572 	kfree(syncobj);
573 free_syncobj_handles:
574 	kfree(syncobj_handles);
575 
576 	return r;
577 }
578 #else
579 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
580 			      struct drm_file *filp)
581 {
582 	return -ENOTSUPP;
583 }
584 #endif
585 
586 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
587 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
588 			    struct drm_file *filp)
589 {
590 	u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write;
591 	u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
592 	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
593 	struct drm_amdgpu_userq_wait *wait_info = data;
594 	struct drm_gem_object **gobj_write;
595 	struct drm_gem_object **gobj_read;
596 	struct dma_fence **fences = NULL;
597 	u16 num_points, num_fences = 0;
598 	int r, i, rentry, wentry, cnt;
599 	struct drm_exec exec;
600 
601 	num_read_bo_handles = wait_info->num_bo_read_handles;
602 	bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
603 				      sizeof(u32) * num_read_bo_handles);
604 	if (IS_ERR(bo_handles_read))
605 		return PTR_ERR(bo_handles_read);
606 
607 	num_write_bo_handles = wait_info->num_bo_write_handles;
608 	bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles),
609 				       sizeof(u32) * num_write_bo_handles);
610 	if (IS_ERR(bo_handles_write)) {
611 		r = PTR_ERR(bo_handles_write);
612 		goto free_bo_handles_read;
613 	}
614 
615 	num_syncobj = wait_info->num_syncobj_handles;
616 	syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles),
617 				      sizeof(u32) * num_syncobj);
618 	if (IS_ERR(syncobj_handles)) {
619 		r = PTR_ERR(syncobj_handles);
620 		goto free_bo_handles_write;
621 	}
622 
623 	num_points = wait_info->num_syncobj_timeline_handles;
624 	timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
625 				       sizeof(u32) * num_points);
626 	if (IS_ERR(timeline_handles)) {
627 		r = PTR_ERR(timeline_handles);
628 		goto free_syncobj_handles;
629 	}
630 
631 	timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
632 				      sizeof(u32) * num_points);
633 	if (IS_ERR(timeline_points)) {
634 		r = PTR_ERR(timeline_points);
635 		goto free_timeline_handles;
636 	}
637 
638 	gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
639 	if (!gobj_read) {
640 		r = -ENOMEM;
641 		goto free_timeline_points;
642 	}
643 
644 	for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
645 		gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
646 		if (!gobj_read[rentry]) {
647 			r = -ENOENT;
648 			goto put_gobj_read;
649 		}
650 	}
651 
652 	gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
653 	if (!gobj_write) {
654 		r = -ENOMEM;
655 		goto put_gobj_read;
656 	}
657 
658 	for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
659 		gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
660 		if (!gobj_write[wentry]) {
661 			r = -ENOENT;
662 			goto put_gobj_write;
663 		}
664 	}
665 
666 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
667 		      (num_read_bo_handles + num_write_bo_handles));
668 
669 	/* Lock all BOs with retry handling */
670 	drm_exec_until_all_locked(&exec) {
671 		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
672 		drm_exec_retry_on_contention(&exec);
673 		if (r) {
674 			drm_exec_fini(&exec);
675 			goto put_gobj_write;
676 		}
677 
678 		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
679 		drm_exec_retry_on_contention(&exec);
680 		if (r) {
681 			drm_exec_fini(&exec);
682 			goto put_gobj_write;
683 		}
684 	}
685 
686 	if (!wait_info->num_fences) {
687 		if (num_points) {
688 			struct dma_fence_unwrap iter;
689 			struct dma_fence *fence;
690 			struct dma_fence *f;
691 
692 			for (i = 0; i < num_points; i++) {
693 				r = drm_syncobj_find_fence(filp, timeline_handles[i],
694 							   timeline_points[i],
695 							   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
696 							   &fence);
697 				if (r)
698 					goto exec_fini;
699 
700 				dma_fence_unwrap_for_each(f, &iter, fence)
701 					num_fences++;
702 
703 				dma_fence_put(fence);
704 			}
705 		}
706 
707 		/* Count syncobj's fence */
708 		for (i = 0; i < num_syncobj; i++) {
709 			struct dma_fence *fence;
710 
711 			r = drm_syncobj_find_fence(filp, syncobj_handles[i],
712 						   0,
713 						   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
714 						   &fence);
715 			if (r)
716 				goto exec_fini;
717 
718 			num_fences++;
719 			dma_fence_put(fence);
720 		}
721 
722 		/* Count GEM objects fence */
723 		for (i = 0; i < num_read_bo_handles; i++) {
724 			struct dma_resv_iter resv_cursor;
725 			struct dma_fence *fence;
726 
727 			dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
728 						DMA_RESV_USAGE_READ, fence)
729 				num_fences++;
730 		}
731 
732 		for (i = 0; i < num_write_bo_handles; i++) {
733 			struct dma_resv_iter resv_cursor;
734 			struct dma_fence *fence;
735 
736 			dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
737 						DMA_RESV_USAGE_WRITE, fence)
738 				num_fences++;
739 		}
740 
741 		/*
742 		 * Passing num_fences = 0 means that userspace doesn't want to
743 		 * retrieve userq_fence_info. If num_fences = 0 we skip filling
744 		 * userq_fence_info and return the actual number of fences on
745 		 * args->num_fences.
746 		 */
747 		wait_info->num_fences = num_fences;
748 	} else {
749 		/* Array of fence info */
750 		fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
751 		if (!fence_info) {
752 			r = -ENOMEM;
753 			goto exec_fini;
754 		}
755 
756 		/* Array of fences */
757 		fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
758 		if (!fences) {
759 			r = -ENOMEM;
760 			goto free_fence_info;
761 		}
762 
763 		/* Retrieve GEM read objects fence */
764 		for (i = 0; i < num_read_bo_handles; i++) {
765 			struct dma_resv_iter resv_cursor;
766 			struct dma_fence *fence;
767 
768 			dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
769 						DMA_RESV_USAGE_READ, fence) {
770 				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
771 					r = -EINVAL;
772 					goto free_fences;
773 				}
774 
775 				fences[num_fences++] = fence;
776 				dma_fence_get(fence);
777 			}
778 		}
779 
780 		/* Retrieve GEM write objects fence */
781 		for (i = 0; i < num_write_bo_handles; i++) {
782 			struct dma_resv_iter resv_cursor;
783 			struct dma_fence *fence;
784 
785 			dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
786 						DMA_RESV_USAGE_WRITE, fence) {
787 				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
788 					r = -EINVAL;
789 					goto free_fences;
790 				}
791 
792 				fences[num_fences++] = fence;
793 				dma_fence_get(fence);
794 			}
795 		}
796 
797 		if (num_points) {
798 			struct dma_fence_unwrap iter;
799 			struct dma_fence *fence;
800 			struct dma_fence *f;
801 
802 			for (i = 0; i < num_points; i++) {
803 				r = drm_syncobj_find_fence(filp, timeline_handles[i],
804 							   timeline_points[i],
805 							   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
806 							   &fence);
807 				if (r)
808 					goto free_fences;
809 
810 				dma_fence_unwrap_for_each(f, &iter, fence) {
811 					if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
812 						r = -EINVAL;
813 						goto free_fences;
814 					}
815 
816 					dma_fence_get(f);
817 					fences[num_fences++] = f;
818 				}
819 
820 				dma_fence_put(fence);
821 			}
822 		}
823 
824 		/* Retrieve syncobj's fence */
825 		for (i = 0; i < num_syncobj; i++) {
826 			struct dma_fence *fence;
827 
828 			r = drm_syncobj_find_fence(filp, syncobj_handles[i],
829 						   0,
830 						   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
831 						   &fence);
832 			if (r)
833 				goto free_fences;
834 
835 			if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
836 				r = -EINVAL;
837 				goto free_fences;
838 			}
839 
840 			fences[num_fences++] = fence;
841 		}
842 
843 		for (i = 0, cnt = 0; i < num_fences; i++) {
844 			struct amdgpu_userq_fence_driver *fence_drv;
845 			struct amdgpu_userq_fence *userq_fence;
846 			u32 index;
847 
848 			userq_fence = to_amdgpu_userq_fence(fences[i]);
849 			if (!userq_fence) {
850 				/*
851 				 * Just waiting on other driver fences should
852 				 * be good for now
853 				 */
854 				r = dma_fence_wait(fences[i], true);
855 				if (r) {
856 					dma_fence_put(fences[i]);
857 					goto free_fences;
858 				}
859 
860 				dma_fence_put(fences[i]);
861 				continue;
862 			}
863 
864 			fence_drv = userq_fence->fence_drv;
865 			/*
866 			 * We need to make sure the user queue release their reference
867 			 * to the fence drivers at some point before queue destruction.
868 			 * Otherwise, we would gather those references until we don't
869 			 * have any more space left and crash.
870 			 */
871 			if (fence_drv->fence_drv_xa_ptr) {
872 				r = xa_alloc(fence_drv->fence_drv_xa_ptr, &index, fence_drv,
873 					     xa_limit_32b, GFP_KERNEL);
874 				if (r)
875 					goto free_fences;
876 
877 				amdgpu_userq_fence_driver_get(fence_drv);
878 			}
879 
880 			/* Store drm syncobj's gpu va address and value */
881 			fence_info[cnt].va = fence_drv->va;
882 			fence_info[cnt].value = fences[i]->seqno;
883 
884 			dma_fence_put(fences[i]);
885 			/* Increment the actual userq fence count */
886 			cnt++;
887 		}
888 
889 		wait_info->num_fences = cnt;
890 		/* Copy userq fence info to user space */
891 		if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
892 				 fence_info, wait_info->num_fences * sizeof(*fence_info))) {
893 			r = -EFAULT;
894 			goto free_fences;
895 		}
896 
897 		kfree(fences);
898 		kfree(fence_info);
899 	}
900 
901 	drm_exec_fini(&exec);
902 	for (i = 0; i < num_read_bo_handles; i++)
903 		drm_gem_object_put(gobj_read[i]);
904 	kfree(gobj_read);
905 
906 	for (i = 0; i < num_write_bo_handles; i++)
907 		drm_gem_object_put(gobj_write[i]);
908 	kfree(gobj_write);
909 
910 	kfree(timeline_points);
911 	kfree(timeline_handles);
912 	kfree(syncobj_handles);
913 	kfree(bo_handles_write);
914 	kfree(bo_handles_read);
915 
916 	return 0;
917 
918 free_fences:
919 	while (num_fences-- > 0)
920 		dma_fence_put(fences[num_fences]);
921 	kfree(fences);
922 free_fence_info:
923 	kfree(fence_info);
924 exec_fini:
925 	drm_exec_fini(&exec);
926 put_gobj_write:
927 	while (wentry-- > 0)
928 		drm_gem_object_put(gobj_write[wentry]);
929 	kfree(gobj_write);
930 put_gobj_read:
931 	while (rentry-- > 0)
932 		drm_gem_object_put(gobj_read[rentry]);
933 	kfree(gobj_read);
934 free_timeline_points:
935 	kfree(timeline_points);
936 free_timeline_handles:
937 	kfree(timeline_handles);
938 free_syncobj_handles:
939 	kfree(syncobj_handles);
940 free_bo_handles_write:
941 	kfree(bo_handles_write);
942 free_bo_handles_read:
943 	kfree(bo_handles_read);
944 
945 	return r;
946 }
947 #else
948 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
949 			    struct drm_file *filp)
950 {
951 	return -ENOTSUPP;
952 }
953 #endif
954