xref: /linux/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c (revision e332935a540eb76dd656663ca908eb0544d96757)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright 2023 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/kref.h>
26 #include <linux/slab.h>
27 #include <linux/dma-fence-unwrap.h>
28 
29 #include <drm/drm_exec.h>
30 #include <drm/drm_syncobj.h>
31 
32 #include "amdgpu.h"
33 #include "amdgpu_userq_fence.h"
34 
35 static const struct dma_fence_ops amdgpu_userq_fence_ops;
36 static struct kmem_cache *amdgpu_userq_fence_slab;
37 
amdgpu_userq_fence_slab_init(void)38 int amdgpu_userq_fence_slab_init(void)
39 {
40 	amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
41 						    sizeof(struct amdgpu_userq_fence),
42 						    0,
43 						    SLAB_HWCACHE_ALIGN,
44 						    NULL);
45 	if (!amdgpu_userq_fence_slab)
46 		return -ENOMEM;
47 
48 	return 0;
49 }
50 
amdgpu_userq_fence_slab_fini(void)51 void amdgpu_userq_fence_slab_fini(void)
52 {
53 	rcu_barrier();
54 	kmem_cache_destroy(amdgpu_userq_fence_slab);
55 }
56 
to_amdgpu_userq_fence(struct dma_fence * f)57 static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
58 {
59 	if (!f || f->ops != &amdgpu_userq_fence_ops)
60 		return NULL;
61 
62 	return container_of(f, struct amdgpu_userq_fence, base);
63 }
64 
amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver * fence_drv)65 static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
66 {
67 	return le64_to_cpu(*fence_drv->cpu_addr);
68 }
69 
amdgpu_userq_fence_driver_alloc(struct amdgpu_device * adev,struct amdgpu_usermode_queue * userq)70 int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
71 				    struct amdgpu_usermode_queue *userq)
72 {
73 	struct amdgpu_userq_fence_driver *fence_drv;
74 	unsigned long flags;
75 	int r;
76 
77 	fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
78 	if (!fence_drv)
79 		return -ENOMEM;
80 
81 	/* Acquire seq64 memory */
82 	r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr,
83 			       &fence_drv->cpu_addr);
84 	if (r)
85 		goto free_fence_drv;
86 
87 	memset(fence_drv->cpu_addr, 0, sizeof(u64));
88 
89 	kref_init(&fence_drv->refcount);
90 	INIT_LIST_HEAD(&fence_drv->fences);
91 	spin_lock_init(&fence_drv->fence_list_lock);
92 
93 	fence_drv->adev = adev;
94 	fence_drv->context = dma_fence_context_alloc(1);
95 	get_task_comm(fence_drv->timeline_name, current);
96 
97 	xa_lock_irqsave(&adev->userq_xa, flags);
98 	r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
99 			      fence_drv, GFP_KERNEL));
100 	xa_unlock_irqrestore(&adev->userq_xa, flags);
101 	if (r)
102 		goto free_seq64;
103 
104 	userq->fence_drv = fence_drv;
105 
106 	return 0;
107 
108 free_seq64:
109 	amdgpu_seq64_free(adev, fence_drv->va);
110 free_fence_drv:
111 	kfree(fence_drv);
112 
113 	return r;
114 }
115 
amdgpu_userq_walk_and_drop_fence_drv(struct xarray * xa)116 static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
117 {
118 	struct amdgpu_userq_fence_driver *fence_drv;
119 	unsigned long index;
120 
121 	if (xa_empty(xa))
122 		return;
123 
124 	xa_lock(xa);
125 	xa_for_each(xa, index, fence_drv) {
126 		__xa_erase(xa, index);
127 		amdgpu_userq_fence_driver_put(fence_drv);
128 	}
129 
130 	xa_unlock(xa);
131 }
132 
133 void
amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue * userq)134 amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
135 {
136 	amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
137 	xa_destroy(&userq->fence_drv_xa);
138 	/* Drop the fence_drv reference held by user queue */
139 	amdgpu_userq_fence_driver_put(userq->fence_drv);
140 }
141 
amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver * fence_drv)142 void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
143 {
144 	struct amdgpu_userq_fence *userq_fence, *tmp;
145 	struct dma_fence *fence;
146 	u64 rptr;
147 	int i;
148 
149 	if (!fence_drv)
150 		return;
151 
152 	rptr = amdgpu_userq_fence_read(fence_drv);
153 
154 	spin_lock(&fence_drv->fence_list_lock);
155 	list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
156 		fence = &userq_fence->base;
157 
158 		if (rptr < fence->seqno)
159 			break;
160 
161 		dma_fence_signal(fence);
162 
163 		for (i = 0; i < userq_fence->fence_drv_array_count; i++)
164 			amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
165 
166 		list_del(&userq_fence->link);
167 		dma_fence_put(fence);
168 	}
169 	spin_unlock(&fence_drv->fence_list_lock);
170 }
171 
amdgpu_userq_fence_driver_destroy(struct kref * ref)172 void amdgpu_userq_fence_driver_destroy(struct kref *ref)
173 {
174 	struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
175 					 struct amdgpu_userq_fence_driver,
176 					 refcount);
177 	struct amdgpu_userq_fence_driver *xa_fence_drv;
178 	struct amdgpu_device *adev = fence_drv->adev;
179 	struct amdgpu_userq_fence *fence, *tmp;
180 	struct xarray *xa = &adev->userq_xa;
181 	unsigned long index, flags;
182 	struct dma_fence *f;
183 
184 	spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
185 	list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
186 		f = &fence->base;
187 
188 		if (!dma_fence_is_signaled(f)) {
189 			dma_fence_set_error(f, -ECANCELED);
190 			dma_fence_signal(f);
191 		}
192 
193 		list_del(&fence->link);
194 		dma_fence_put(f);
195 	}
196 	spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
197 
198 	xa_lock_irqsave(xa, flags);
199 	xa_for_each(xa, index, xa_fence_drv)
200 		if (xa_fence_drv == fence_drv)
201 			__xa_erase(xa, index);
202 	xa_unlock_irqrestore(xa, flags);
203 
204 	/* Free seq64 memory */
205 	amdgpu_seq64_free(adev, fence_drv->va);
206 	kfree(fence_drv);
207 }
208 
amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver * fence_drv)209 void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv)
210 {
211 	kref_get(&fence_drv->refcount);
212 }
213 
amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver * fence_drv)214 void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
215 {
216 	kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
217 }
218 
amdgpu_userq_fence_alloc(struct amdgpu_userq_fence ** userq_fence)219 static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
220 {
221 	*userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
222 	return *userq_fence ? 0 : -ENOMEM;
223 }
224 
amdgpu_userq_fence_create(struct amdgpu_usermode_queue * userq,struct amdgpu_userq_fence * userq_fence,u64 seq,struct dma_fence ** f)225 static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
226 				     struct amdgpu_userq_fence *userq_fence,
227 				     u64 seq, struct dma_fence **f)
228 {
229 	struct amdgpu_userq_fence_driver *fence_drv;
230 	struct dma_fence *fence;
231 	unsigned long flags;
232 
233 	fence_drv = userq->fence_drv;
234 	if (!fence_drv)
235 		return -EINVAL;
236 
237 	spin_lock_init(&userq_fence->lock);
238 	INIT_LIST_HEAD(&userq_fence->link);
239 	fence = &userq_fence->base;
240 	userq_fence->fence_drv = fence_drv;
241 
242 	dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
243 		       fence_drv->context, seq);
244 
245 	amdgpu_userq_fence_driver_get(fence_drv);
246 	dma_fence_get(fence);
247 
248 	if (!xa_empty(&userq->fence_drv_xa)) {
249 		struct amdgpu_userq_fence_driver *stored_fence_drv;
250 		unsigned long index, count = 0;
251 		int i = 0;
252 
253 		xa_lock(&userq->fence_drv_xa);
254 		xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
255 			count++;
256 
257 		userq_fence->fence_drv_array =
258 			kvmalloc_array(count,
259 				       sizeof(struct amdgpu_userq_fence_driver *),
260 				       GFP_ATOMIC);
261 
262 		if (userq_fence->fence_drv_array) {
263 			xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
264 				userq_fence->fence_drv_array[i] = stored_fence_drv;
265 				__xa_erase(&userq->fence_drv_xa, index);
266 				i++;
267 			}
268 		}
269 
270 		userq_fence->fence_drv_array_count = i;
271 		xa_unlock(&userq->fence_drv_xa);
272 	} else {
273 		userq_fence->fence_drv_array = NULL;
274 		userq_fence->fence_drv_array_count = 0;
275 	}
276 
277 	/* Check if hardware has already processed the job */
278 	spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
279 	if (!dma_fence_is_signaled_locked(fence))
280 		list_add_tail(&userq_fence->link, &fence_drv->fences);
281 	else
282 		dma_fence_put(fence);
283 
284 	spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
285 
286 	*f = fence;
287 
288 	return 0;
289 }
290 
amdgpu_userq_fence_get_driver_name(struct dma_fence * f)291 static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
292 {
293 	return "amdgpu_userq_fence";
294 }
295 
amdgpu_userq_fence_get_timeline_name(struct dma_fence * f)296 static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
297 {
298 	struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
299 
300 	return fence->fence_drv->timeline_name;
301 }
302 
amdgpu_userq_fence_signaled(struct dma_fence * f)303 static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
304 {
305 	struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
306 	struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
307 	u64 rptr, wptr;
308 
309 	rptr = amdgpu_userq_fence_read(fence_drv);
310 	wptr = fence->base.seqno;
311 
312 	if (rptr >= wptr)
313 		return true;
314 
315 	return false;
316 }
317 
amdgpu_userq_fence_free(struct rcu_head * rcu)318 static void amdgpu_userq_fence_free(struct rcu_head *rcu)
319 {
320 	struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
321 	struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
322 	struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
323 
324 	/* Release the fence driver reference */
325 	amdgpu_userq_fence_driver_put(fence_drv);
326 
327 	kvfree(userq_fence->fence_drv_array);
328 	kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
329 }
330 
amdgpu_userq_fence_release(struct dma_fence * f)331 static void amdgpu_userq_fence_release(struct dma_fence *f)
332 {
333 	call_rcu(&f->rcu, amdgpu_userq_fence_free);
334 }
335 
336 static const struct dma_fence_ops amdgpu_userq_fence_ops = {
337 	.use_64bit_seqno = true,
338 	.get_driver_name = amdgpu_userq_fence_get_driver_name,
339 	.get_timeline_name = amdgpu_userq_fence_get_timeline_name,
340 	.signaled = amdgpu_userq_fence_signaled,
341 	.release = amdgpu_userq_fence_release,
342 };
343 
344 /**
345  * amdgpu_userq_fence_read_wptr - Read the userq wptr value
346  *
347  * @queue: user mode queue structure pointer
348  * @wptr: write pointer value
349  *
350  * Read the wptr value from userq's MQD. The userq signal IOCTL
351  * creates a dma_fence for the shared buffers that expects the
352  * RPTR value written to seq64 memory >= WPTR.
353  *
354  * Returns wptr value on success, error on failure.
355  */
amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue * queue,u64 * wptr)356 static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
357 					u64 *wptr)
358 {
359 	struct amdgpu_bo_va_mapping *mapping;
360 	struct amdgpu_bo *bo;
361 	u64 addr, *ptr;
362 	int r;
363 
364 	r = amdgpu_bo_reserve(queue->vm->root.bo, false);
365 	if (r)
366 		return r;
367 
368 	addr = queue->userq_prop->wptr_gpu_addr;
369 	addr &= AMDGPU_GMC_HOLE_MASK;
370 
371 	mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
372 	if (!mapping) {
373 		amdgpu_bo_unreserve(queue->vm->root.bo);
374 		DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
375 		return -EINVAL;
376 	}
377 
378 	bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
379 	amdgpu_bo_unreserve(queue->vm->root.bo);
380 	r = amdgpu_bo_reserve(bo, true);
381 	if (r) {
382 		DRM_ERROR("Failed to reserve userqueue wptr bo");
383 		return r;
384 	}
385 
386 	r = amdgpu_bo_kmap(bo, (void **)&ptr);
387 	if (r) {
388 		DRM_ERROR("Failed mapping the userqueue wptr bo");
389 		goto map_error;
390 	}
391 
392 	*wptr = le64_to_cpu(*ptr);
393 
394 	amdgpu_bo_kunmap(bo);
395 	amdgpu_bo_unreserve(bo);
396 	amdgpu_bo_unref(&bo);
397 
398 	return 0;
399 
400 map_error:
401 	amdgpu_bo_unreserve(bo);
402 	amdgpu_bo_unref(&bo);
403 
404 	return r;
405 }
406 
amdgpu_userq_fence_cleanup(struct dma_fence * fence)407 static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
408 {
409 	dma_fence_put(fence);
410 }
411 
amdgpu_userq_signal_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)412 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
413 			      struct drm_file *filp)
414 {
415 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
416 	struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
417 	struct drm_amdgpu_userq_signal *args = data;
418 	struct drm_gem_object **gobj_write = NULL;
419 	struct drm_gem_object **gobj_read = NULL;
420 	struct amdgpu_usermode_queue *queue;
421 	struct amdgpu_userq_fence *userq_fence;
422 	struct drm_syncobj **syncobj = NULL;
423 	u32 *bo_handles_write, num_write_bo_handles;
424 	u32 *syncobj_handles, num_syncobj_handles;
425 	u32 *bo_handles_read, num_read_bo_handles;
426 	int r, i, entry, rentry, wentry;
427 	struct dma_fence *fence;
428 	struct drm_exec exec;
429 	u64 wptr;
430 
431 	num_syncobj_handles = args->num_syncobj_handles;
432 	syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
433 				      size_mul(sizeof(u32), num_syncobj_handles));
434 	if (IS_ERR(syncobj_handles))
435 		return PTR_ERR(syncobj_handles);
436 
437 	/* Array of pointers to the looked up syncobjs */
438 	syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
439 	if (!syncobj) {
440 		r = -ENOMEM;
441 		goto free_syncobj_handles;
442 	}
443 
444 	for (entry = 0; entry < num_syncobj_handles; entry++) {
445 		syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]);
446 		if (!syncobj[entry]) {
447 			r = -ENOENT;
448 			goto free_syncobj;
449 		}
450 	}
451 
452 	num_read_bo_handles = args->num_bo_read_handles;
453 	bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles),
454 				      sizeof(u32) * num_read_bo_handles);
455 	if (IS_ERR(bo_handles_read)) {
456 		r = PTR_ERR(bo_handles_read);
457 		goto free_syncobj;
458 	}
459 
460 	/* Array of pointers to the GEM read objects */
461 	gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
462 	if (!gobj_read) {
463 		r = -ENOMEM;
464 		goto free_bo_handles_read;
465 	}
466 
467 	for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
468 		gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
469 		if (!gobj_read[rentry]) {
470 			r = -ENOENT;
471 			goto put_gobj_read;
472 		}
473 	}
474 
475 	num_write_bo_handles = args->num_bo_write_handles;
476 	bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles),
477 				       sizeof(u32) * num_write_bo_handles);
478 	if (IS_ERR(bo_handles_write)) {
479 		r = PTR_ERR(bo_handles_write);
480 		goto put_gobj_read;
481 	}
482 
483 	/* Array of pointers to the GEM write objects */
484 	gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
485 	if (!gobj_write) {
486 		r = -ENOMEM;
487 		goto free_bo_handles_write;
488 	}
489 
490 	for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
491 		gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
492 		if (!gobj_write[wentry]) {
493 			r = -ENOENT;
494 			goto put_gobj_write;
495 		}
496 	}
497 
498 	/* Retrieve the user queue */
499 	queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
500 	if (!queue) {
501 		r = -ENOENT;
502 		goto put_gobj_write;
503 	}
504 
505 	r = amdgpu_userq_fence_read_wptr(queue, &wptr);
506 	if (r)
507 		goto put_gobj_write;
508 
509 	r = amdgpu_userq_fence_alloc(&userq_fence);
510 	if (r)
511 		goto put_gobj_write;
512 
513 	/* We are here means UQ is active, make sure the eviction fence is valid */
514 	amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
515 
516 	/* Create a new fence */
517 	r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
518 	if (r) {
519 		mutex_unlock(&userq_mgr->userq_mutex);
520 		kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
521 		goto put_gobj_write;
522 	}
523 
524 	dma_fence_put(queue->last_fence);
525 	queue->last_fence = dma_fence_get(fence);
526 	mutex_unlock(&userq_mgr->userq_mutex);
527 
528 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
529 		      (num_read_bo_handles + num_write_bo_handles));
530 
531 	/* Lock all BOs with retry handling */
532 	drm_exec_until_all_locked(&exec) {
533 		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
534 		drm_exec_retry_on_contention(&exec);
535 		if (r) {
536 			amdgpu_userq_fence_cleanup(fence);
537 			goto exec_fini;
538 		}
539 
540 		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
541 		drm_exec_retry_on_contention(&exec);
542 		if (r) {
543 			amdgpu_userq_fence_cleanup(fence);
544 			goto exec_fini;
545 		}
546 	}
547 
548 	for (i = 0; i < num_read_bo_handles; i++) {
549 		if (!gobj_read || !gobj_read[i]->resv)
550 			continue;
551 
552 		dma_resv_add_fence(gobj_read[i]->resv, fence,
553 				   DMA_RESV_USAGE_READ);
554 	}
555 
556 	for (i = 0; i < num_write_bo_handles; i++) {
557 		if (!gobj_write || !gobj_write[i]->resv)
558 			continue;
559 
560 		dma_resv_add_fence(gobj_write[i]->resv, fence,
561 				   DMA_RESV_USAGE_WRITE);
562 	}
563 
564 	/* Add the created fence to syncobj/BO's */
565 	for (i = 0; i < num_syncobj_handles; i++)
566 		drm_syncobj_replace_fence(syncobj[i], fence);
567 
568 	/* drop the reference acquired in fence creation function */
569 	dma_fence_put(fence);
570 
571 exec_fini:
572 	drm_exec_fini(&exec);
573 put_gobj_write:
574 	while (wentry-- > 0)
575 		drm_gem_object_put(gobj_write[wentry]);
576 	kfree(gobj_write);
577 free_bo_handles_write:
578 	kfree(bo_handles_write);
579 put_gobj_read:
580 	while (rentry-- > 0)
581 		drm_gem_object_put(gobj_read[rentry]);
582 	kfree(gobj_read);
583 free_bo_handles_read:
584 	kfree(bo_handles_read);
585 free_syncobj:
586 	while (entry-- > 0)
587 		if (syncobj[entry])
588 			drm_syncobj_put(syncobj[entry]);
589 	kfree(syncobj);
590 free_syncobj_handles:
591 	kfree(syncobj_handles);
592 
593 	return r;
594 }
595 
amdgpu_userq_wait_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)596 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
597 			    struct drm_file *filp)
598 {
599 	u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write;
600 	u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
601 	struct drm_amdgpu_userq_fence_info *fence_info = NULL;
602 	struct drm_amdgpu_userq_wait *wait_info = data;
603 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
604 	struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
605 	struct amdgpu_usermode_queue *waitq;
606 	struct drm_gem_object **gobj_write;
607 	struct drm_gem_object **gobj_read;
608 	struct dma_fence **fences = NULL;
609 	u16 num_points, num_fences = 0;
610 	int r, i, rentry, wentry, cnt;
611 	struct drm_exec exec;
612 
613 	num_read_bo_handles = wait_info->num_bo_read_handles;
614 	bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
615 				      size_mul(sizeof(u32), num_read_bo_handles));
616 	if (IS_ERR(bo_handles_read))
617 		return PTR_ERR(bo_handles_read);
618 
619 	num_write_bo_handles = wait_info->num_bo_write_handles;
620 	bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles),
621 				       size_mul(sizeof(u32), num_write_bo_handles));
622 	if (IS_ERR(bo_handles_write)) {
623 		r = PTR_ERR(bo_handles_write);
624 		goto free_bo_handles_read;
625 	}
626 
627 	num_syncobj = wait_info->num_syncobj_handles;
628 	syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles),
629 				      size_mul(sizeof(u32), num_syncobj));
630 	if (IS_ERR(syncobj_handles)) {
631 		r = PTR_ERR(syncobj_handles);
632 		goto free_bo_handles_write;
633 	}
634 
635 	num_points = wait_info->num_syncobj_timeline_handles;
636 	timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
637 				       sizeof(u32) * num_points);
638 	if (IS_ERR(timeline_handles)) {
639 		r = PTR_ERR(timeline_handles);
640 		goto free_syncobj_handles;
641 	}
642 
643 	timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
644 				      sizeof(u32) * num_points);
645 	if (IS_ERR(timeline_points)) {
646 		r = PTR_ERR(timeline_points);
647 		goto free_timeline_handles;
648 	}
649 
650 	gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
651 	if (!gobj_read) {
652 		r = -ENOMEM;
653 		goto free_timeline_points;
654 	}
655 
656 	for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
657 		gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
658 		if (!gobj_read[rentry]) {
659 			r = -ENOENT;
660 			goto put_gobj_read;
661 		}
662 	}
663 
664 	gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
665 	if (!gobj_write) {
666 		r = -ENOMEM;
667 		goto put_gobj_read;
668 	}
669 
670 	for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
671 		gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
672 		if (!gobj_write[wentry]) {
673 			r = -ENOENT;
674 			goto put_gobj_write;
675 		}
676 	}
677 
678 	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
679 		      (num_read_bo_handles + num_write_bo_handles));
680 
681 	/* Lock all BOs with retry handling */
682 	drm_exec_until_all_locked(&exec) {
683 		r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
684 		drm_exec_retry_on_contention(&exec);
685 		if (r) {
686 			drm_exec_fini(&exec);
687 			goto put_gobj_write;
688 		}
689 
690 		r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
691 		drm_exec_retry_on_contention(&exec);
692 		if (r) {
693 			drm_exec_fini(&exec);
694 			goto put_gobj_write;
695 		}
696 	}
697 
698 	if (!wait_info->num_fences) {
699 		if (num_points) {
700 			struct dma_fence_unwrap iter;
701 			struct dma_fence *fence;
702 			struct dma_fence *f;
703 
704 			for (i = 0; i < num_points; i++) {
705 				r = drm_syncobj_find_fence(filp, timeline_handles[i],
706 							   timeline_points[i],
707 							   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
708 							   &fence);
709 				if (r)
710 					goto exec_fini;
711 
712 				dma_fence_unwrap_for_each(f, &iter, fence)
713 					num_fences++;
714 
715 				dma_fence_put(fence);
716 			}
717 		}
718 
719 		/* Count syncobj's fence */
720 		for (i = 0; i < num_syncobj; i++) {
721 			struct dma_fence *fence;
722 
723 			r = drm_syncobj_find_fence(filp, syncobj_handles[i],
724 						   0,
725 						   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
726 						   &fence);
727 			if (r)
728 				goto exec_fini;
729 
730 			num_fences++;
731 			dma_fence_put(fence);
732 		}
733 
734 		/* Count GEM objects fence */
735 		for (i = 0; i < num_read_bo_handles; i++) {
736 			struct dma_resv_iter resv_cursor;
737 			struct dma_fence *fence;
738 
739 			dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
740 						DMA_RESV_USAGE_READ, fence)
741 				num_fences++;
742 		}
743 
744 		for (i = 0; i < num_write_bo_handles; i++) {
745 			struct dma_resv_iter resv_cursor;
746 			struct dma_fence *fence;
747 
748 			dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
749 						DMA_RESV_USAGE_WRITE, fence)
750 				num_fences++;
751 		}
752 
753 		/*
754 		 * Passing num_fences = 0 means that userspace doesn't want to
755 		 * retrieve userq_fence_info. If num_fences = 0 we skip filling
756 		 * userq_fence_info and return the actual number of fences on
757 		 * args->num_fences.
758 		 */
759 		wait_info->num_fences = num_fences;
760 	} else {
761 		/* Array of fence info */
762 		fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
763 		if (!fence_info) {
764 			r = -ENOMEM;
765 			goto exec_fini;
766 		}
767 
768 		/* Array of fences */
769 		fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
770 		if (!fences) {
771 			r = -ENOMEM;
772 			goto free_fence_info;
773 		}
774 
775 		/* Retrieve GEM read objects fence */
776 		for (i = 0; i < num_read_bo_handles; i++) {
777 			struct dma_resv_iter resv_cursor;
778 			struct dma_fence *fence;
779 
780 			dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
781 						DMA_RESV_USAGE_READ, fence) {
782 				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
783 					r = -EINVAL;
784 					goto free_fences;
785 				}
786 
787 				fences[num_fences++] = fence;
788 				dma_fence_get(fence);
789 			}
790 		}
791 
792 		/* Retrieve GEM write objects fence */
793 		for (i = 0; i < num_write_bo_handles; i++) {
794 			struct dma_resv_iter resv_cursor;
795 			struct dma_fence *fence;
796 
797 			dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
798 						DMA_RESV_USAGE_WRITE, fence) {
799 				if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
800 					r = -EINVAL;
801 					goto free_fences;
802 				}
803 
804 				fences[num_fences++] = fence;
805 				dma_fence_get(fence);
806 			}
807 		}
808 
809 		if (num_points) {
810 			struct dma_fence_unwrap iter;
811 			struct dma_fence *fence;
812 			struct dma_fence *f;
813 
814 			for (i = 0; i < num_points; i++) {
815 				r = drm_syncobj_find_fence(filp, timeline_handles[i],
816 							   timeline_points[i],
817 							   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
818 							   &fence);
819 				if (r)
820 					goto free_fences;
821 
822 				dma_fence_unwrap_for_each(f, &iter, fence) {
823 					if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
824 						r = -EINVAL;
825 						goto free_fences;
826 					}
827 
828 					dma_fence_get(f);
829 					fences[num_fences++] = f;
830 				}
831 
832 				dma_fence_put(fence);
833 			}
834 		}
835 
836 		/* Retrieve syncobj's fence */
837 		for (i = 0; i < num_syncobj; i++) {
838 			struct dma_fence *fence;
839 
840 			r = drm_syncobj_find_fence(filp, syncobj_handles[i],
841 						   0,
842 						   DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
843 						   &fence);
844 			if (r)
845 				goto free_fences;
846 
847 			if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
848 				r = -EINVAL;
849 				goto free_fences;
850 			}
851 
852 			fences[num_fences++] = fence;
853 		}
854 
855 		/*
856 		 * Keep only the latest fences to reduce the number of values
857 		 * given back to userspace.
858 		 */
859 		num_fences = dma_fence_dedup_array(fences, num_fences);
860 
861 		waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id);
862 		if (!waitq) {
863 			r = -EINVAL;
864 			goto free_fences;
865 		}
866 
867 		for (i = 0, cnt = 0; i < num_fences; i++) {
868 			struct amdgpu_userq_fence_driver *fence_drv;
869 			struct amdgpu_userq_fence *userq_fence;
870 			u32 index;
871 
872 			userq_fence = to_amdgpu_userq_fence(fences[i]);
873 			if (!userq_fence) {
874 				/*
875 				 * Just waiting on other driver fences should
876 				 * be good for now
877 				 */
878 				r = dma_fence_wait(fences[i], true);
879 				if (r) {
880 					dma_fence_put(fences[i]);
881 					goto free_fences;
882 				}
883 
884 				dma_fence_put(fences[i]);
885 				continue;
886 			}
887 
888 			fence_drv = userq_fence->fence_drv;
889 			/*
890 			 * We need to make sure the user queue release their reference
891 			 * to the fence drivers at some point before queue destruction.
892 			 * Otherwise, we would gather those references until we don't
893 			 * have any more space left and crash.
894 			 */
895 			r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
896 				     xa_limit_32b, GFP_KERNEL);
897 			if (r)
898 				goto free_fences;
899 
900 			amdgpu_userq_fence_driver_get(fence_drv);
901 
902 			/* Store drm syncobj's gpu va address and value */
903 			fence_info[cnt].va = fence_drv->va;
904 			fence_info[cnt].value = fences[i]->seqno;
905 
906 			dma_fence_put(fences[i]);
907 			/* Increment the actual userq fence count */
908 			cnt++;
909 		}
910 
911 		wait_info->num_fences = cnt;
912 		/* Copy userq fence info to user space */
913 		if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
914 				 fence_info, wait_info->num_fences * sizeof(*fence_info))) {
915 			r = -EFAULT;
916 			goto free_fences;
917 		}
918 
919 		kfree(fences);
920 		kfree(fence_info);
921 	}
922 
923 	drm_exec_fini(&exec);
924 	for (i = 0; i < num_read_bo_handles; i++)
925 		drm_gem_object_put(gobj_read[i]);
926 	kfree(gobj_read);
927 
928 	for (i = 0; i < num_write_bo_handles; i++)
929 		drm_gem_object_put(gobj_write[i]);
930 	kfree(gobj_write);
931 
932 	kfree(timeline_points);
933 	kfree(timeline_handles);
934 	kfree(syncobj_handles);
935 	kfree(bo_handles_write);
936 	kfree(bo_handles_read);
937 
938 	return 0;
939 
940 free_fences:
941 	while (num_fences-- > 0)
942 		dma_fence_put(fences[num_fences]);
943 	kfree(fences);
944 free_fence_info:
945 	kfree(fence_info);
946 exec_fini:
947 	drm_exec_fini(&exec);
948 put_gobj_write:
949 	while (wentry-- > 0)
950 		drm_gem_object_put(gobj_write[wentry]);
951 	kfree(gobj_write);
952 put_gobj_read:
953 	while (rentry-- > 0)
954 		drm_gem_object_put(gobj_read[rentry]);
955 	kfree(gobj_read);
956 free_timeline_points:
957 	kfree(timeline_points);
958 free_timeline_handles:
959 	kfree(timeline_handles);
960 free_syncobj_handles:
961 	kfree(syncobj_handles);
962 free_bo_handles_write:
963 	kfree(bo_handles_write);
964 free_bo_handles_read:
965 	kfree(bo_handles_read);
966 
967 	return r;
968 }
969