1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2023 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <linux/kref.h>
26 #include <linux/slab.h>
27 #include <linux/dma-fence-unwrap.h>
28
29 #include <drm/drm_exec.h>
30 #include <drm/drm_syncobj.h>
31
32 #include "amdgpu.h"
33 #include "amdgpu_userq_fence.h"
34
35 static const struct dma_fence_ops amdgpu_userq_fence_ops;
36 static struct kmem_cache *amdgpu_userq_fence_slab;
37
amdgpu_userq_fence_slab_init(void)38 int amdgpu_userq_fence_slab_init(void)
39 {
40 amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
41 sizeof(struct amdgpu_userq_fence),
42 0,
43 SLAB_HWCACHE_ALIGN,
44 NULL);
45 if (!amdgpu_userq_fence_slab)
46 return -ENOMEM;
47
48 return 0;
49 }
50
amdgpu_userq_fence_slab_fini(void)51 void amdgpu_userq_fence_slab_fini(void)
52 {
53 rcu_barrier();
54 kmem_cache_destroy(amdgpu_userq_fence_slab);
55 }
56
to_amdgpu_userq_fence(struct dma_fence * f)57 static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
58 {
59 if (!f || f->ops != &amdgpu_userq_fence_ops)
60 return NULL;
61
62 return container_of(f, struct amdgpu_userq_fence, base);
63 }
64
amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver * fence_drv)65 static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
66 {
67 return le64_to_cpu(*fence_drv->cpu_addr);
68 }
69
amdgpu_userq_fence_driver_alloc(struct amdgpu_device * adev,struct amdgpu_usermode_queue * userq)70 int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
71 struct amdgpu_usermode_queue *userq)
72 {
73 struct amdgpu_userq_fence_driver *fence_drv;
74 unsigned long flags;
75 int r;
76
77 fence_drv = kzalloc(sizeof(*fence_drv), GFP_KERNEL);
78 if (!fence_drv)
79 return -ENOMEM;
80
81 /* Acquire seq64 memory */
82 r = amdgpu_seq64_alloc(adev, &fence_drv->va, &fence_drv->gpu_addr,
83 &fence_drv->cpu_addr);
84 if (r)
85 goto free_fence_drv;
86
87 memset(fence_drv->cpu_addr, 0, sizeof(u64));
88
89 kref_init(&fence_drv->refcount);
90 INIT_LIST_HEAD(&fence_drv->fences);
91 spin_lock_init(&fence_drv->fence_list_lock);
92
93 fence_drv->adev = adev;
94 fence_drv->context = dma_fence_context_alloc(1);
95 get_task_comm(fence_drv->timeline_name, current);
96
97 xa_lock_irqsave(&adev->userq_xa, flags);
98 r = xa_err(__xa_store(&adev->userq_xa, userq->doorbell_index,
99 fence_drv, GFP_KERNEL));
100 xa_unlock_irqrestore(&adev->userq_xa, flags);
101 if (r)
102 goto free_seq64;
103
104 userq->fence_drv = fence_drv;
105
106 return 0;
107
108 free_seq64:
109 amdgpu_seq64_free(adev, fence_drv->va);
110 free_fence_drv:
111 kfree(fence_drv);
112
113 return r;
114 }
115
amdgpu_userq_walk_and_drop_fence_drv(struct xarray * xa)116 static void amdgpu_userq_walk_and_drop_fence_drv(struct xarray *xa)
117 {
118 struct amdgpu_userq_fence_driver *fence_drv;
119 unsigned long index;
120
121 if (xa_empty(xa))
122 return;
123
124 xa_lock(xa);
125 xa_for_each(xa, index, fence_drv) {
126 __xa_erase(xa, index);
127 amdgpu_userq_fence_driver_put(fence_drv);
128 }
129
130 xa_unlock(xa);
131 }
132
133 void
amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue * userq)134 amdgpu_userq_fence_driver_free(struct amdgpu_usermode_queue *userq)
135 {
136 amdgpu_userq_walk_and_drop_fence_drv(&userq->fence_drv_xa);
137 xa_destroy(&userq->fence_drv_xa);
138 /* Drop the fence_drv reference held by user queue */
139 amdgpu_userq_fence_driver_put(userq->fence_drv);
140 }
141
amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver * fence_drv)142 void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
143 {
144 struct amdgpu_userq_fence *userq_fence, *tmp;
145 struct dma_fence *fence;
146 u64 rptr;
147 int i;
148
149 if (!fence_drv)
150 return;
151
152 rptr = amdgpu_userq_fence_read(fence_drv);
153
154 spin_lock(&fence_drv->fence_list_lock);
155 list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
156 fence = &userq_fence->base;
157
158 if (rptr < fence->seqno)
159 break;
160
161 dma_fence_signal(fence);
162
163 for (i = 0; i < userq_fence->fence_drv_array_count; i++)
164 amdgpu_userq_fence_driver_put(userq_fence->fence_drv_array[i]);
165
166 list_del(&userq_fence->link);
167 dma_fence_put(fence);
168 }
169 spin_unlock(&fence_drv->fence_list_lock);
170 }
171
amdgpu_userq_fence_driver_destroy(struct kref * ref)172 void amdgpu_userq_fence_driver_destroy(struct kref *ref)
173 {
174 struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
175 struct amdgpu_userq_fence_driver,
176 refcount);
177 struct amdgpu_userq_fence_driver *xa_fence_drv;
178 struct amdgpu_device *adev = fence_drv->adev;
179 struct amdgpu_userq_fence *fence, *tmp;
180 struct xarray *xa = &adev->userq_xa;
181 unsigned long index, flags;
182 struct dma_fence *f;
183
184 spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
185 list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
186 f = &fence->base;
187
188 if (!dma_fence_is_signaled(f)) {
189 dma_fence_set_error(f, -ECANCELED);
190 dma_fence_signal(f);
191 }
192
193 list_del(&fence->link);
194 dma_fence_put(f);
195 }
196 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
197
198 xa_lock_irqsave(xa, flags);
199 xa_for_each(xa, index, xa_fence_drv)
200 if (xa_fence_drv == fence_drv)
201 __xa_erase(xa, index);
202 xa_unlock_irqrestore(xa, flags);
203
204 /* Free seq64 memory */
205 amdgpu_seq64_free(adev, fence_drv->va);
206 kfree(fence_drv);
207 }
208
amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver * fence_drv)209 void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv)
210 {
211 kref_get(&fence_drv->refcount);
212 }
213
amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver * fence_drv)214 void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
215 {
216 kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
217 }
218
amdgpu_userq_fence_alloc(struct amdgpu_userq_fence ** userq_fence)219 static int amdgpu_userq_fence_alloc(struct amdgpu_userq_fence **userq_fence)
220 {
221 *userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
222 return *userq_fence ? 0 : -ENOMEM;
223 }
224
amdgpu_userq_fence_create(struct amdgpu_usermode_queue * userq,struct amdgpu_userq_fence * userq_fence,u64 seq,struct dma_fence ** f)225 static int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
226 struct amdgpu_userq_fence *userq_fence,
227 u64 seq, struct dma_fence **f)
228 {
229 struct amdgpu_userq_fence_driver *fence_drv;
230 struct dma_fence *fence;
231 unsigned long flags;
232
233 fence_drv = userq->fence_drv;
234 if (!fence_drv)
235 return -EINVAL;
236
237 spin_lock_init(&userq_fence->lock);
238 INIT_LIST_HEAD(&userq_fence->link);
239 fence = &userq_fence->base;
240 userq_fence->fence_drv = fence_drv;
241
242 dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
243 fence_drv->context, seq);
244
245 amdgpu_userq_fence_driver_get(fence_drv);
246 dma_fence_get(fence);
247
248 if (!xa_empty(&userq->fence_drv_xa)) {
249 struct amdgpu_userq_fence_driver *stored_fence_drv;
250 unsigned long index, count = 0;
251 int i = 0;
252
253 xa_lock(&userq->fence_drv_xa);
254 xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv)
255 count++;
256
257 userq_fence->fence_drv_array =
258 kvmalloc_array(count,
259 sizeof(struct amdgpu_userq_fence_driver *),
260 GFP_ATOMIC);
261
262 if (userq_fence->fence_drv_array) {
263 xa_for_each(&userq->fence_drv_xa, index, stored_fence_drv) {
264 userq_fence->fence_drv_array[i] = stored_fence_drv;
265 __xa_erase(&userq->fence_drv_xa, index);
266 i++;
267 }
268 }
269
270 userq_fence->fence_drv_array_count = i;
271 xa_unlock(&userq->fence_drv_xa);
272 } else {
273 userq_fence->fence_drv_array = NULL;
274 userq_fence->fence_drv_array_count = 0;
275 }
276
277 /* Check if hardware has already processed the job */
278 spin_lock_irqsave(&fence_drv->fence_list_lock, flags);
279 if (!dma_fence_is_signaled_locked(fence))
280 list_add_tail(&userq_fence->link, &fence_drv->fences);
281 else
282 dma_fence_put(fence);
283
284 spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags);
285
286 *f = fence;
287
288 return 0;
289 }
290
amdgpu_userq_fence_get_driver_name(struct dma_fence * f)291 static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
292 {
293 return "amdgpu_userq_fence";
294 }
295
amdgpu_userq_fence_get_timeline_name(struct dma_fence * f)296 static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
297 {
298 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
299
300 return fence->fence_drv->timeline_name;
301 }
302
amdgpu_userq_fence_signaled(struct dma_fence * f)303 static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
304 {
305 struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
306 struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
307 u64 rptr, wptr;
308
309 rptr = amdgpu_userq_fence_read(fence_drv);
310 wptr = fence->base.seqno;
311
312 if (rptr >= wptr)
313 return true;
314
315 return false;
316 }
317
amdgpu_userq_fence_free(struct rcu_head * rcu)318 static void amdgpu_userq_fence_free(struct rcu_head *rcu)
319 {
320 struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
321 struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
322 struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
323
324 /* Release the fence driver reference */
325 amdgpu_userq_fence_driver_put(fence_drv);
326
327 kvfree(userq_fence->fence_drv_array);
328 kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
329 }
330
amdgpu_userq_fence_release(struct dma_fence * f)331 static void amdgpu_userq_fence_release(struct dma_fence *f)
332 {
333 call_rcu(&f->rcu, amdgpu_userq_fence_free);
334 }
335
336 static const struct dma_fence_ops amdgpu_userq_fence_ops = {
337 .use_64bit_seqno = true,
338 .get_driver_name = amdgpu_userq_fence_get_driver_name,
339 .get_timeline_name = amdgpu_userq_fence_get_timeline_name,
340 .signaled = amdgpu_userq_fence_signaled,
341 .release = amdgpu_userq_fence_release,
342 };
343
344 /**
345 * amdgpu_userq_fence_read_wptr - Read the userq wptr value
346 *
347 * @queue: user mode queue structure pointer
348 * @wptr: write pointer value
349 *
350 * Read the wptr value from userq's MQD. The userq signal IOCTL
351 * creates a dma_fence for the shared buffers that expects the
352 * RPTR value written to seq64 memory >= WPTR.
353 *
354 * Returns wptr value on success, error on failure.
355 */
amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue * queue,u64 * wptr)356 static int amdgpu_userq_fence_read_wptr(struct amdgpu_usermode_queue *queue,
357 u64 *wptr)
358 {
359 struct amdgpu_bo_va_mapping *mapping;
360 struct amdgpu_bo *bo;
361 u64 addr, *ptr;
362 int r;
363
364 r = amdgpu_bo_reserve(queue->vm->root.bo, false);
365 if (r)
366 return r;
367
368 addr = queue->userq_prop->wptr_gpu_addr;
369 addr &= AMDGPU_GMC_HOLE_MASK;
370
371 mapping = amdgpu_vm_bo_lookup_mapping(queue->vm, addr >> PAGE_SHIFT);
372 if (!mapping) {
373 amdgpu_bo_unreserve(queue->vm->root.bo);
374 DRM_ERROR("Failed to lookup amdgpu_bo_va_mapping\n");
375 return -EINVAL;
376 }
377
378 bo = amdgpu_bo_ref(mapping->bo_va->base.bo);
379 amdgpu_bo_unreserve(queue->vm->root.bo);
380 r = amdgpu_bo_reserve(bo, true);
381 if (r) {
382 DRM_ERROR("Failed to reserve userqueue wptr bo");
383 return r;
384 }
385
386 r = amdgpu_bo_kmap(bo, (void **)&ptr);
387 if (r) {
388 DRM_ERROR("Failed mapping the userqueue wptr bo");
389 goto map_error;
390 }
391
392 *wptr = le64_to_cpu(*ptr);
393
394 amdgpu_bo_kunmap(bo);
395 amdgpu_bo_unreserve(bo);
396 amdgpu_bo_unref(&bo);
397
398 return 0;
399
400 map_error:
401 amdgpu_bo_unreserve(bo);
402 amdgpu_bo_unref(&bo);
403
404 return r;
405 }
406
amdgpu_userq_fence_cleanup(struct dma_fence * fence)407 static void amdgpu_userq_fence_cleanup(struct dma_fence *fence)
408 {
409 dma_fence_put(fence);
410 }
411
amdgpu_userq_signal_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)412 int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
413 struct drm_file *filp)
414 {
415 struct amdgpu_fpriv *fpriv = filp->driver_priv;
416 struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
417 struct drm_amdgpu_userq_signal *args = data;
418 struct drm_gem_object **gobj_write = NULL;
419 struct drm_gem_object **gobj_read = NULL;
420 struct amdgpu_usermode_queue *queue;
421 struct amdgpu_userq_fence *userq_fence;
422 struct drm_syncobj **syncobj = NULL;
423 u32 *bo_handles_write, num_write_bo_handles;
424 u32 *syncobj_handles, num_syncobj_handles;
425 u32 *bo_handles_read, num_read_bo_handles;
426 int r, i, entry, rentry, wentry;
427 struct dma_fence *fence;
428 struct drm_exec exec;
429 u64 wptr;
430
431 num_syncobj_handles = args->num_syncobj_handles;
432 syncobj_handles = memdup_user(u64_to_user_ptr(args->syncobj_handles),
433 size_mul(sizeof(u32), num_syncobj_handles));
434 if (IS_ERR(syncobj_handles))
435 return PTR_ERR(syncobj_handles);
436
437 /* Array of pointers to the looked up syncobjs */
438 syncobj = kmalloc_array(num_syncobj_handles, sizeof(*syncobj), GFP_KERNEL);
439 if (!syncobj) {
440 r = -ENOMEM;
441 goto free_syncobj_handles;
442 }
443
444 for (entry = 0; entry < num_syncobj_handles; entry++) {
445 syncobj[entry] = drm_syncobj_find(filp, syncobj_handles[entry]);
446 if (!syncobj[entry]) {
447 r = -ENOENT;
448 goto free_syncobj;
449 }
450 }
451
452 num_read_bo_handles = args->num_bo_read_handles;
453 bo_handles_read = memdup_user(u64_to_user_ptr(args->bo_read_handles),
454 sizeof(u32) * num_read_bo_handles);
455 if (IS_ERR(bo_handles_read)) {
456 r = PTR_ERR(bo_handles_read);
457 goto free_syncobj;
458 }
459
460 /* Array of pointers to the GEM read objects */
461 gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
462 if (!gobj_read) {
463 r = -ENOMEM;
464 goto free_bo_handles_read;
465 }
466
467 for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
468 gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
469 if (!gobj_read[rentry]) {
470 r = -ENOENT;
471 goto put_gobj_read;
472 }
473 }
474
475 num_write_bo_handles = args->num_bo_write_handles;
476 bo_handles_write = memdup_user(u64_to_user_ptr(args->bo_write_handles),
477 sizeof(u32) * num_write_bo_handles);
478 if (IS_ERR(bo_handles_write)) {
479 r = PTR_ERR(bo_handles_write);
480 goto put_gobj_read;
481 }
482
483 /* Array of pointers to the GEM write objects */
484 gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
485 if (!gobj_write) {
486 r = -ENOMEM;
487 goto free_bo_handles_write;
488 }
489
490 for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
491 gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
492 if (!gobj_write[wentry]) {
493 r = -ENOENT;
494 goto put_gobj_write;
495 }
496 }
497
498 /* Retrieve the user queue */
499 queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
500 if (!queue) {
501 r = -ENOENT;
502 goto put_gobj_write;
503 }
504
505 r = amdgpu_userq_fence_read_wptr(queue, &wptr);
506 if (r)
507 goto put_gobj_write;
508
509 r = amdgpu_userq_fence_alloc(&userq_fence);
510 if (r)
511 goto put_gobj_write;
512
513 /* We are here means UQ is active, make sure the eviction fence is valid */
514 amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
515
516 /* Create a new fence */
517 r = amdgpu_userq_fence_create(queue, userq_fence, wptr, &fence);
518 if (r) {
519 mutex_unlock(&userq_mgr->userq_mutex);
520 kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
521 goto put_gobj_write;
522 }
523
524 dma_fence_put(queue->last_fence);
525 queue->last_fence = dma_fence_get(fence);
526 mutex_unlock(&userq_mgr->userq_mutex);
527
528 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
529 (num_read_bo_handles + num_write_bo_handles));
530
531 /* Lock all BOs with retry handling */
532 drm_exec_until_all_locked(&exec) {
533 r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
534 drm_exec_retry_on_contention(&exec);
535 if (r) {
536 amdgpu_userq_fence_cleanup(fence);
537 goto exec_fini;
538 }
539
540 r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
541 drm_exec_retry_on_contention(&exec);
542 if (r) {
543 amdgpu_userq_fence_cleanup(fence);
544 goto exec_fini;
545 }
546 }
547
548 for (i = 0; i < num_read_bo_handles; i++) {
549 if (!gobj_read || !gobj_read[i]->resv)
550 continue;
551
552 dma_resv_add_fence(gobj_read[i]->resv, fence,
553 DMA_RESV_USAGE_READ);
554 }
555
556 for (i = 0; i < num_write_bo_handles; i++) {
557 if (!gobj_write || !gobj_write[i]->resv)
558 continue;
559
560 dma_resv_add_fence(gobj_write[i]->resv, fence,
561 DMA_RESV_USAGE_WRITE);
562 }
563
564 /* Add the created fence to syncobj/BO's */
565 for (i = 0; i < num_syncobj_handles; i++)
566 drm_syncobj_replace_fence(syncobj[i], fence);
567
568 /* drop the reference acquired in fence creation function */
569 dma_fence_put(fence);
570
571 exec_fini:
572 drm_exec_fini(&exec);
573 put_gobj_write:
574 while (wentry-- > 0)
575 drm_gem_object_put(gobj_write[wentry]);
576 kfree(gobj_write);
577 free_bo_handles_write:
578 kfree(bo_handles_write);
579 put_gobj_read:
580 while (rentry-- > 0)
581 drm_gem_object_put(gobj_read[rentry]);
582 kfree(gobj_read);
583 free_bo_handles_read:
584 kfree(bo_handles_read);
585 free_syncobj:
586 while (entry-- > 0)
587 if (syncobj[entry])
588 drm_syncobj_put(syncobj[entry]);
589 kfree(syncobj);
590 free_syncobj_handles:
591 kfree(syncobj_handles);
592
593 return r;
594 }
595
amdgpu_userq_wait_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)596 int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
597 struct drm_file *filp)
598 {
599 u32 *syncobj_handles, *timeline_points, *timeline_handles, *bo_handles_read, *bo_handles_write;
600 u32 num_syncobj, num_read_bo_handles, num_write_bo_handles;
601 struct drm_amdgpu_userq_fence_info *fence_info = NULL;
602 struct drm_amdgpu_userq_wait *wait_info = data;
603 struct amdgpu_fpriv *fpriv = filp->driver_priv;
604 struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
605 struct amdgpu_usermode_queue *waitq;
606 struct drm_gem_object **gobj_write;
607 struct drm_gem_object **gobj_read;
608 struct dma_fence **fences = NULL;
609 u16 num_points, num_fences = 0;
610 int r, i, rentry, wentry, cnt;
611 struct drm_exec exec;
612
613 num_read_bo_handles = wait_info->num_bo_read_handles;
614 bo_handles_read = memdup_user(u64_to_user_ptr(wait_info->bo_read_handles),
615 size_mul(sizeof(u32), num_read_bo_handles));
616 if (IS_ERR(bo_handles_read))
617 return PTR_ERR(bo_handles_read);
618
619 num_write_bo_handles = wait_info->num_bo_write_handles;
620 bo_handles_write = memdup_user(u64_to_user_ptr(wait_info->bo_write_handles),
621 size_mul(sizeof(u32), num_write_bo_handles));
622 if (IS_ERR(bo_handles_write)) {
623 r = PTR_ERR(bo_handles_write);
624 goto free_bo_handles_read;
625 }
626
627 num_syncobj = wait_info->num_syncobj_handles;
628 syncobj_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_handles),
629 size_mul(sizeof(u32), num_syncobj));
630 if (IS_ERR(syncobj_handles)) {
631 r = PTR_ERR(syncobj_handles);
632 goto free_bo_handles_write;
633 }
634
635 num_points = wait_info->num_syncobj_timeline_handles;
636 timeline_handles = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_handles),
637 sizeof(u32) * num_points);
638 if (IS_ERR(timeline_handles)) {
639 r = PTR_ERR(timeline_handles);
640 goto free_syncobj_handles;
641 }
642
643 timeline_points = memdup_user(u64_to_user_ptr(wait_info->syncobj_timeline_points),
644 sizeof(u32) * num_points);
645 if (IS_ERR(timeline_points)) {
646 r = PTR_ERR(timeline_points);
647 goto free_timeline_handles;
648 }
649
650 gobj_read = kmalloc_array(num_read_bo_handles, sizeof(*gobj_read), GFP_KERNEL);
651 if (!gobj_read) {
652 r = -ENOMEM;
653 goto free_timeline_points;
654 }
655
656 for (rentry = 0; rentry < num_read_bo_handles; rentry++) {
657 gobj_read[rentry] = drm_gem_object_lookup(filp, bo_handles_read[rentry]);
658 if (!gobj_read[rentry]) {
659 r = -ENOENT;
660 goto put_gobj_read;
661 }
662 }
663
664 gobj_write = kmalloc_array(num_write_bo_handles, sizeof(*gobj_write), GFP_KERNEL);
665 if (!gobj_write) {
666 r = -ENOMEM;
667 goto put_gobj_read;
668 }
669
670 for (wentry = 0; wentry < num_write_bo_handles; wentry++) {
671 gobj_write[wentry] = drm_gem_object_lookup(filp, bo_handles_write[wentry]);
672 if (!gobj_write[wentry]) {
673 r = -ENOENT;
674 goto put_gobj_write;
675 }
676 }
677
678 drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT,
679 (num_read_bo_handles + num_write_bo_handles));
680
681 /* Lock all BOs with retry handling */
682 drm_exec_until_all_locked(&exec) {
683 r = drm_exec_prepare_array(&exec, gobj_read, num_read_bo_handles, 1);
684 drm_exec_retry_on_contention(&exec);
685 if (r) {
686 drm_exec_fini(&exec);
687 goto put_gobj_write;
688 }
689
690 r = drm_exec_prepare_array(&exec, gobj_write, num_write_bo_handles, 1);
691 drm_exec_retry_on_contention(&exec);
692 if (r) {
693 drm_exec_fini(&exec);
694 goto put_gobj_write;
695 }
696 }
697
698 if (!wait_info->num_fences) {
699 if (num_points) {
700 struct dma_fence_unwrap iter;
701 struct dma_fence *fence;
702 struct dma_fence *f;
703
704 for (i = 0; i < num_points; i++) {
705 r = drm_syncobj_find_fence(filp, timeline_handles[i],
706 timeline_points[i],
707 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
708 &fence);
709 if (r)
710 goto exec_fini;
711
712 dma_fence_unwrap_for_each(f, &iter, fence)
713 num_fences++;
714
715 dma_fence_put(fence);
716 }
717 }
718
719 /* Count syncobj's fence */
720 for (i = 0; i < num_syncobj; i++) {
721 struct dma_fence *fence;
722
723 r = drm_syncobj_find_fence(filp, syncobj_handles[i],
724 0,
725 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
726 &fence);
727 if (r)
728 goto exec_fini;
729
730 num_fences++;
731 dma_fence_put(fence);
732 }
733
734 /* Count GEM objects fence */
735 for (i = 0; i < num_read_bo_handles; i++) {
736 struct dma_resv_iter resv_cursor;
737 struct dma_fence *fence;
738
739 dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
740 DMA_RESV_USAGE_READ, fence)
741 num_fences++;
742 }
743
744 for (i = 0; i < num_write_bo_handles; i++) {
745 struct dma_resv_iter resv_cursor;
746 struct dma_fence *fence;
747
748 dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
749 DMA_RESV_USAGE_WRITE, fence)
750 num_fences++;
751 }
752
753 /*
754 * Passing num_fences = 0 means that userspace doesn't want to
755 * retrieve userq_fence_info. If num_fences = 0 we skip filling
756 * userq_fence_info and return the actual number of fences on
757 * args->num_fences.
758 */
759 wait_info->num_fences = num_fences;
760 } else {
761 /* Array of fence info */
762 fence_info = kmalloc_array(wait_info->num_fences, sizeof(*fence_info), GFP_KERNEL);
763 if (!fence_info) {
764 r = -ENOMEM;
765 goto exec_fini;
766 }
767
768 /* Array of fences */
769 fences = kmalloc_array(wait_info->num_fences, sizeof(*fences), GFP_KERNEL);
770 if (!fences) {
771 r = -ENOMEM;
772 goto free_fence_info;
773 }
774
775 /* Retrieve GEM read objects fence */
776 for (i = 0; i < num_read_bo_handles; i++) {
777 struct dma_resv_iter resv_cursor;
778 struct dma_fence *fence;
779
780 dma_resv_for_each_fence(&resv_cursor, gobj_read[i]->resv,
781 DMA_RESV_USAGE_READ, fence) {
782 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
783 r = -EINVAL;
784 goto free_fences;
785 }
786
787 fences[num_fences++] = fence;
788 dma_fence_get(fence);
789 }
790 }
791
792 /* Retrieve GEM write objects fence */
793 for (i = 0; i < num_write_bo_handles; i++) {
794 struct dma_resv_iter resv_cursor;
795 struct dma_fence *fence;
796
797 dma_resv_for_each_fence(&resv_cursor, gobj_write[i]->resv,
798 DMA_RESV_USAGE_WRITE, fence) {
799 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
800 r = -EINVAL;
801 goto free_fences;
802 }
803
804 fences[num_fences++] = fence;
805 dma_fence_get(fence);
806 }
807 }
808
809 if (num_points) {
810 struct dma_fence_unwrap iter;
811 struct dma_fence *fence;
812 struct dma_fence *f;
813
814 for (i = 0; i < num_points; i++) {
815 r = drm_syncobj_find_fence(filp, timeline_handles[i],
816 timeline_points[i],
817 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
818 &fence);
819 if (r)
820 goto free_fences;
821
822 dma_fence_unwrap_for_each(f, &iter, fence) {
823 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
824 r = -EINVAL;
825 goto free_fences;
826 }
827
828 dma_fence_get(f);
829 fences[num_fences++] = f;
830 }
831
832 dma_fence_put(fence);
833 }
834 }
835
836 /* Retrieve syncobj's fence */
837 for (i = 0; i < num_syncobj; i++) {
838 struct dma_fence *fence;
839
840 r = drm_syncobj_find_fence(filp, syncobj_handles[i],
841 0,
842 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT,
843 &fence);
844 if (r)
845 goto free_fences;
846
847 if (WARN_ON_ONCE(num_fences >= wait_info->num_fences)) {
848 r = -EINVAL;
849 goto free_fences;
850 }
851
852 fences[num_fences++] = fence;
853 }
854
855 /*
856 * Keep only the latest fences to reduce the number of values
857 * given back to userspace.
858 */
859 num_fences = dma_fence_dedup_array(fences, num_fences);
860
861 waitq = idr_find(&userq_mgr->userq_idr, wait_info->waitq_id);
862 if (!waitq) {
863 r = -EINVAL;
864 goto free_fences;
865 }
866
867 for (i = 0, cnt = 0; i < num_fences; i++) {
868 struct amdgpu_userq_fence_driver *fence_drv;
869 struct amdgpu_userq_fence *userq_fence;
870 u32 index;
871
872 userq_fence = to_amdgpu_userq_fence(fences[i]);
873 if (!userq_fence) {
874 /*
875 * Just waiting on other driver fences should
876 * be good for now
877 */
878 r = dma_fence_wait(fences[i], true);
879 if (r) {
880 dma_fence_put(fences[i]);
881 goto free_fences;
882 }
883
884 dma_fence_put(fences[i]);
885 continue;
886 }
887
888 fence_drv = userq_fence->fence_drv;
889 /*
890 * We need to make sure the user queue release their reference
891 * to the fence drivers at some point before queue destruction.
892 * Otherwise, we would gather those references until we don't
893 * have any more space left and crash.
894 */
895 r = xa_alloc(&waitq->fence_drv_xa, &index, fence_drv,
896 xa_limit_32b, GFP_KERNEL);
897 if (r)
898 goto free_fences;
899
900 amdgpu_userq_fence_driver_get(fence_drv);
901
902 /* Store drm syncobj's gpu va address and value */
903 fence_info[cnt].va = fence_drv->va;
904 fence_info[cnt].value = fences[i]->seqno;
905
906 dma_fence_put(fences[i]);
907 /* Increment the actual userq fence count */
908 cnt++;
909 }
910
911 wait_info->num_fences = cnt;
912 /* Copy userq fence info to user space */
913 if (copy_to_user(u64_to_user_ptr(wait_info->out_fences),
914 fence_info, wait_info->num_fences * sizeof(*fence_info))) {
915 r = -EFAULT;
916 goto free_fences;
917 }
918
919 kfree(fences);
920 kfree(fence_info);
921 }
922
923 drm_exec_fini(&exec);
924 for (i = 0; i < num_read_bo_handles; i++)
925 drm_gem_object_put(gobj_read[i]);
926 kfree(gobj_read);
927
928 for (i = 0; i < num_write_bo_handles; i++)
929 drm_gem_object_put(gobj_write[i]);
930 kfree(gobj_write);
931
932 kfree(timeline_points);
933 kfree(timeline_handles);
934 kfree(syncobj_handles);
935 kfree(bo_handles_write);
936 kfree(bo_handles_read);
937
938 return 0;
939
940 free_fences:
941 while (num_fences-- > 0)
942 dma_fence_put(fences[num_fences]);
943 kfree(fences);
944 free_fence_info:
945 kfree(fence_info);
946 exec_fini:
947 drm_exec_fini(&exec);
948 put_gobj_write:
949 while (wentry-- > 0)
950 drm_gem_object_put(gobj_write[wentry]);
951 kfree(gobj_write);
952 put_gobj_read:
953 while (rentry-- > 0)
954 drm_gem_object_put(gobj_read[rentry]);
955 kfree(gobj_read);
956 free_timeline_points:
957 kfree(timeline_points);
958 free_timeline_handles:
959 kfree(timeline_handles);
960 free_syncobj_handles:
961 kfree(syncobj_handles);
962 free_bo_handles_write:
963 kfree(bo_handles_write);
964 free_bo_handles_read:
965 kfree(bo_handles_read);
966
967 return r;
968 }
969