1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2023 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <drm/drm_auth.h>
26 #include <drm/drm_exec.h>
27 #include <linux/pm_runtime.h>
28
29 #include "amdgpu.h"
30 #include "amdgpu_vm.h"
31 #include "amdgpu_userq.h"
32 #include "amdgpu_userq_fence.h"
33
amdgpu_userq_get_supported_ip_mask(struct amdgpu_device * adev)34 u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
35 {
36 int i;
37 u32 userq_ip_mask = 0;
38
39 for (i = 0; i < AMDGPU_HW_IP_NUM; i++) {
40 if (adev->userq_funcs[i])
41 userq_ip_mask |= (1 << i);
42 }
43
44 return userq_ip_mask;
45 }
46
amdgpu_userq_input_va_validate(struct amdgpu_vm * vm,u64 addr,u64 expected_size)47 int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr,
48 u64 expected_size)
49 {
50 struct amdgpu_bo_va_mapping *va_map;
51 u64 user_addr;
52 u64 size;
53 int r = 0;
54
55 user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
56 size = expected_size >> AMDGPU_GPU_PAGE_SHIFT;
57
58 r = amdgpu_bo_reserve(vm->root.bo, false);
59 if (r)
60 return r;
61
62 va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr);
63 if (!va_map) {
64 r = -EINVAL;
65 goto out_err;
66 }
67 /* Only validate the userq whether resident in the VM mapping range */
68 if (user_addr >= va_map->start &&
69 va_map->last - user_addr + 1 >= size) {
70 amdgpu_bo_unreserve(vm->root.bo);
71 return 0;
72 }
73
74 r = -EINVAL;
75 out_err:
76 amdgpu_bo_unreserve(vm->root.bo);
77 return r;
78 }
79
80 static int
amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr * uq_mgr,struct amdgpu_usermode_queue * queue)81 amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr,
82 struct amdgpu_usermode_queue *queue)
83 {
84 struct amdgpu_device *adev = uq_mgr->adev;
85 const struct amdgpu_userq_funcs *userq_funcs =
86 adev->userq_funcs[queue->queue_type];
87 int r = 0;
88
89 if (queue->state == AMDGPU_USERQ_STATE_MAPPED) {
90 r = userq_funcs->preempt(uq_mgr, queue);
91 if (r) {
92 queue->state = AMDGPU_USERQ_STATE_HUNG;
93 } else {
94 queue->state = AMDGPU_USERQ_STATE_PREEMPTED;
95 }
96 }
97
98 return r;
99 }
100
101 static int
amdgpu_userq_restore_helper(struct amdgpu_userq_mgr * uq_mgr,struct amdgpu_usermode_queue * queue)102 amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr,
103 struct amdgpu_usermode_queue *queue)
104 {
105 struct amdgpu_device *adev = uq_mgr->adev;
106 const struct amdgpu_userq_funcs *userq_funcs =
107 adev->userq_funcs[queue->queue_type];
108 int r = 0;
109
110 if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) {
111 r = userq_funcs->restore(uq_mgr, queue);
112 if (r) {
113 queue->state = AMDGPU_USERQ_STATE_HUNG;
114 } else {
115 queue->state = AMDGPU_USERQ_STATE_MAPPED;
116 }
117 }
118
119 return r;
120 }
121
122 static int
amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr * uq_mgr,struct amdgpu_usermode_queue * queue)123 amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr,
124 struct amdgpu_usermode_queue *queue)
125 {
126 struct amdgpu_device *adev = uq_mgr->adev;
127 const struct amdgpu_userq_funcs *userq_funcs =
128 adev->userq_funcs[queue->queue_type];
129 int r = 0;
130
131 if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) ||
132 (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) {
133 r = userq_funcs->unmap(uq_mgr, queue);
134 if (r)
135 queue->state = AMDGPU_USERQ_STATE_HUNG;
136 else
137 queue->state = AMDGPU_USERQ_STATE_UNMAPPED;
138 }
139 return r;
140 }
141
142 static int
amdgpu_userq_map_helper(struct amdgpu_userq_mgr * uq_mgr,struct amdgpu_usermode_queue * queue)143 amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr,
144 struct amdgpu_usermode_queue *queue)
145 {
146 struct amdgpu_device *adev = uq_mgr->adev;
147 const struct amdgpu_userq_funcs *userq_funcs =
148 adev->userq_funcs[queue->queue_type];
149 int r = 0;
150
151 if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) {
152 r = userq_funcs->map(uq_mgr, queue);
153 if (r) {
154 queue->state = AMDGPU_USERQ_STATE_HUNG;
155 } else {
156 queue->state = AMDGPU_USERQ_STATE_MAPPED;
157 }
158 }
159 return r;
160 }
161
162 static void
amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr * uq_mgr,struct amdgpu_usermode_queue * queue)163 amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr,
164 struct amdgpu_usermode_queue *queue)
165 {
166 struct dma_fence *f = queue->last_fence;
167 int ret;
168
169 if (f && !dma_fence_is_signaled(f)) {
170 ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
171 if (ret <= 0)
172 drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
173 f->context, f->seqno);
174 }
175 }
176
177 static void
amdgpu_userq_cleanup(struct amdgpu_userq_mgr * uq_mgr,struct amdgpu_usermode_queue * queue,int queue_id)178 amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr,
179 struct amdgpu_usermode_queue *queue,
180 int queue_id)
181 {
182 struct amdgpu_device *adev = uq_mgr->adev;
183 const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type];
184
185 uq_funcs->mqd_destroy(uq_mgr, queue);
186 amdgpu_userq_fence_driver_free(queue);
187 idr_remove(&uq_mgr->userq_idr, queue_id);
188 kfree(queue);
189 }
190
191 static struct amdgpu_usermode_queue *
amdgpu_userq_find(struct amdgpu_userq_mgr * uq_mgr,int qid)192 amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
193 {
194 return idr_find(&uq_mgr->userq_idr, qid);
195 }
196
197 void
amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr * uq_mgr,struct amdgpu_eviction_fence_mgr * evf_mgr)198 amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
199 struct amdgpu_eviction_fence_mgr *evf_mgr)
200 {
201 struct amdgpu_eviction_fence *ev_fence;
202
203 retry:
204 /* Flush any pending resume work to create ev_fence */
205 flush_delayed_work(&uq_mgr->resume_work);
206
207 mutex_lock(&uq_mgr->userq_mutex);
208 spin_lock(&evf_mgr->ev_fence_lock);
209 ev_fence = evf_mgr->ev_fence;
210 spin_unlock(&evf_mgr->ev_fence_lock);
211 if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) {
212 mutex_unlock(&uq_mgr->userq_mutex);
213 /*
214 * Looks like there was no pending resume work,
215 * add one now to create a valid eviction fence
216 */
217 schedule_delayed_work(&uq_mgr->resume_work, 0);
218 goto retry;
219 }
220 }
221
amdgpu_userq_create_object(struct amdgpu_userq_mgr * uq_mgr,struct amdgpu_userq_obj * userq_obj,int size)222 int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
223 struct amdgpu_userq_obj *userq_obj,
224 int size)
225 {
226 struct amdgpu_device *adev = uq_mgr->adev;
227 struct amdgpu_bo_param bp;
228 int r;
229
230 memset(&bp, 0, sizeof(bp));
231 bp.byte_align = PAGE_SIZE;
232 bp.domain = AMDGPU_GEM_DOMAIN_GTT;
233 bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
234 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
235 bp.type = ttm_bo_type_kernel;
236 bp.size = size;
237 bp.resv = NULL;
238 bp.bo_ptr_size = sizeof(struct amdgpu_bo);
239
240 r = amdgpu_bo_create(adev, &bp, &userq_obj->obj);
241 if (r) {
242 drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r);
243 return r;
244 }
245
246 r = amdgpu_bo_reserve(userq_obj->obj, true);
247 if (r) {
248 drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r);
249 goto free_obj;
250 }
251
252 r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo);
253 if (r) {
254 drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r);
255 goto unresv;
256 }
257
258 r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr);
259 if (r) {
260 drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r);
261 goto unresv;
262 }
263
264 userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj);
265 amdgpu_bo_unreserve(userq_obj->obj);
266 memset(userq_obj->cpu_ptr, 0, size);
267 return 0;
268
269 unresv:
270 amdgpu_bo_unreserve(userq_obj->obj);
271
272 free_obj:
273 amdgpu_bo_unref(&userq_obj->obj);
274 return r;
275 }
276
amdgpu_userq_destroy_object(struct amdgpu_userq_mgr * uq_mgr,struct amdgpu_userq_obj * userq_obj)277 void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
278 struct amdgpu_userq_obj *userq_obj)
279 {
280 amdgpu_bo_kunmap(userq_obj->obj);
281 amdgpu_bo_unref(&userq_obj->obj);
282 }
283
284 uint64_t
amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr * uq_mgr,struct amdgpu_db_info * db_info,struct drm_file * filp)285 amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
286 struct amdgpu_db_info *db_info,
287 struct drm_file *filp)
288 {
289 uint64_t index;
290 struct drm_gem_object *gobj;
291 struct amdgpu_userq_obj *db_obj = db_info->db_obj;
292 int r, db_size;
293
294 gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle);
295 if (gobj == NULL) {
296 drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n");
297 return -EINVAL;
298 }
299
300 db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
301 drm_gem_object_put(gobj);
302
303 r = amdgpu_bo_reserve(db_obj->obj, true);
304 if (r) {
305 drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
306 goto unref_bo;
307 }
308
309 /* Pin the BO before generating the index, unpin in queue destroy */
310 r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL);
311 if (r) {
312 drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n");
313 goto unresv_bo;
314 }
315
316 switch (db_info->queue_type) {
317 case AMDGPU_HW_IP_GFX:
318 case AMDGPU_HW_IP_COMPUTE:
319 case AMDGPU_HW_IP_DMA:
320 db_size = sizeof(u64);
321 break;
322
323 case AMDGPU_HW_IP_VCN_ENC:
324 db_size = sizeof(u32);
325 db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1;
326 break;
327
328 case AMDGPU_HW_IP_VPE:
329 db_size = sizeof(u32);
330 db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1;
331 break;
332
333 default:
334 drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n",
335 db_info->queue_type);
336 r = -EINVAL;
337 goto unpin_bo;
338 }
339
340 index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj,
341 db_info->doorbell_offset, db_size);
342 drm_dbg_driver(adev_to_drm(uq_mgr->adev),
343 "[Usermode queues] doorbell index=%lld\n", index);
344 amdgpu_bo_unreserve(db_obj->obj);
345 return index;
346
347 unpin_bo:
348 amdgpu_bo_unpin(db_obj->obj);
349 unresv_bo:
350 amdgpu_bo_unreserve(db_obj->obj);
351 unref_bo:
352 amdgpu_bo_unref(&db_obj->obj);
353 return r;
354 }
355
356 static int
amdgpu_userq_destroy(struct drm_file * filp,int queue_id)357 amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
358 {
359 struct amdgpu_fpriv *fpriv = filp->driver_priv;
360 struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
361 struct amdgpu_device *adev = uq_mgr->adev;
362 struct amdgpu_usermode_queue *queue;
363 int r = 0;
364
365 cancel_delayed_work_sync(&uq_mgr->resume_work);
366 mutex_lock(&uq_mgr->userq_mutex);
367
368 queue = amdgpu_userq_find(uq_mgr, queue_id);
369 if (!queue) {
370 drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n");
371 mutex_unlock(&uq_mgr->userq_mutex);
372 return -EINVAL;
373 }
374 amdgpu_userq_wait_for_last_fence(uq_mgr, queue);
375 r = amdgpu_bo_reserve(queue->db_obj.obj, true);
376 if (!r) {
377 amdgpu_bo_unpin(queue->db_obj.obj);
378 amdgpu_bo_unreserve(queue->db_obj.obj);
379 }
380 amdgpu_bo_unref(&queue->db_obj.obj);
381
382 #if defined(CONFIG_DEBUG_FS)
383 debugfs_remove_recursive(queue->debugfs_queue);
384 #endif
385 r = amdgpu_userq_unmap_helper(uq_mgr, queue);
386 /*TODO: It requires a reset for userq hw unmap error*/
387 if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) {
388 drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
389 queue->state = AMDGPU_USERQ_STATE_HUNG;
390 }
391 amdgpu_userq_cleanup(uq_mgr, queue, queue_id);
392 mutex_unlock(&uq_mgr->userq_mutex);
393
394 pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
395 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
396
397 return r;
398 }
399
amdgpu_userq_priority_permit(struct drm_file * filp,int priority)400 static int amdgpu_userq_priority_permit(struct drm_file *filp,
401 int priority)
402 {
403 if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH)
404 return 0;
405
406 if (capable(CAP_SYS_NICE))
407 return 0;
408
409 if (drm_is_current_master(filp))
410 return 0;
411
412 return -EACCES;
413 }
414
415 #if defined(CONFIG_DEBUG_FS)
amdgpu_mqd_info_read(struct seq_file * m,void * unused)416 static int amdgpu_mqd_info_read(struct seq_file *m, void *unused)
417 {
418 struct amdgpu_usermode_queue *queue = m->private;
419 struct amdgpu_bo *bo;
420 int r;
421
422 if (!queue || !queue->mqd.obj)
423 return -EINVAL;
424
425 bo = amdgpu_bo_ref(queue->mqd.obj);
426 r = amdgpu_bo_reserve(bo, true);
427 if (r) {
428 amdgpu_bo_unref(&bo);
429 return -EINVAL;
430 }
431
432 seq_printf(m, "queue_type: %d\n", queue->queue_type);
433 seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj));
434
435 amdgpu_bo_unreserve(bo);
436 amdgpu_bo_unref(&bo);
437
438 return 0;
439 }
440
amdgpu_mqd_info_open(struct inode * inode,struct file * file)441 static int amdgpu_mqd_info_open(struct inode *inode, struct file *file)
442 {
443 return single_open(file, amdgpu_mqd_info_read, inode->i_private);
444 }
445
446 static const struct file_operations amdgpu_mqd_info_fops = {
447 .owner = THIS_MODULE,
448 .open = amdgpu_mqd_info_open,
449 .read = seq_read,
450 .llseek = seq_lseek,
451 .release = single_release,
452 };
453 #endif
454
455 static int
amdgpu_userq_create(struct drm_file * filp,union drm_amdgpu_userq * args)456 amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
457 {
458 struct amdgpu_fpriv *fpriv = filp->driver_priv;
459 struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
460 struct amdgpu_device *adev = uq_mgr->adev;
461 const struct amdgpu_userq_funcs *uq_funcs;
462 struct amdgpu_usermode_queue *queue;
463 struct amdgpu_db_info db_info;
464 char *queue_name;
465 bool skip_map_queue;
466 uint64_t index;
467 int qid, r = 0;
468 int priority =
469 (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >>
470 AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT;
471
472 r = amdgpu_userq_priority_permit(filp, priority);
473 if (r)
474 return r;
475
476 r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
477 if (r < 0) {
478 drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n");
479 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
480 return r;
481 }
482
483 /*
484 * There could be a situation that we are creating a new queue while
485 * the other queues under this UQ_mgr are suspended. So if there is any
486 * resume work pending, wait for it to get done.
487 *
488 * This will also make sure we have a valid eviction fence ready to be used.
489 */
490 mutex_lock(&adev->userq_mutex);
491 amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr);
492
493 uq_funcs = adev->userq_funcs[args->in.ip_type];
494 if (!uq_funcs) {
495 drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n",
496 args->in.ip_type);
497 r = -EINVAL;
498 goto unlock;
499 }
500
501 queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL);
502 if (!queue) {
503 drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n");
504 r = -ENOMEM;
505 goto unlock;
506 }
507
508 /* Validate the userq virtual address.*/
509 if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) ||
510 amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) ||
511 amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) {
512 r = -EINVAL;
513 kfree(queue);
514 goto unlock;
515 }
516 queue->doorbell_handle = args->in.doorbell_handle;
517 queue->queue_type = args->in.ip_type;
518 queue->vm = &fpriv->vm;
519 queue->priority = priority;
520
521 db_info.queue_type = queue->queue_type;
522 db_info.doorbell_handle = queue->doorbell_handle;
523 db_info.db_obj = &queue->db_obj;
524 db_info.doorbell_offset = args->in.doorbell_offset;
525
526 /* Convert relative doorbell offset into absolute doorbell index */
527 index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp);
528 if (index == (uint64_t)-EINVAL) {
529 drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");
530 kfree(queue);
531 r = -EINVAL;
532 goto unlock;
533 }
534
535 queue->doorbell_index = index;
536 xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC);
537 r = amdgpu_userq_fence_driver_alloc(adev, queue);
538 if (r) {
539 drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n");
540 goto unlock;
541 }
542
543 r = uq_funcs->mqd_create(uq_mgr, &args->in, queue);
544 if (r) {
545 drm_file_err(uq_mgr->file, "Failed to create Queue\n");
546 amdgpu_userq_fence_driver_free(queue);
547 kfree(queue);
548 goto unlock;
549 }
550
551
552 qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL);
553 if (qid < 0) {
554 drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n");
555 amdgpu_userq_fence_driver_free(queue);
556 uq_funcs->mqd_destroy(uq_mgr, queue);
557 kfree(queue);
558 r = -ENOMEM;
559 goto unlock;
560 }
561
562 /* don't map the queue if scheduling is halted */
563 if (adev->userq_halt_for_enforce_isolation &&
564 ((queue->queue_type == AMDGPU_HW_IP_GFX) ||
565 (queue->queue_type == AMDGPU_HW_IP_COMPUTE)))
566 skip_map_queue = true;
567 else
568 skip_map_queue = false;
569 if (!skip_map_queue) {
570 r = amdgpu_userq_map_helper(uq_mgr, queue);
571 if (r) {
572 drm_file_err(uq_mgr->file, "Failed to map Queue\n");
573 idr_remove(&uq_mgr->userq_idr, qid);
574 amdgpu_userq_fence_driver_free(queue);
575 uq_funcs->mqd_destroy(uq_mgr, queue);
576 kfree(queue);
577 goto unlock;
578 }
579 }
580
581 queue_name = kasprintf(GFP_KERNEL, "queue-%d", qid);
582 if (!queue_name) {
583 r = -ENOMEM;
584 goto unlock;
585 }
586
587 #if defined(CONFIG_DEBUG_FS)
588 /* Queue dentry per client to hold MQD information */
589 queue->debugfs_queue = debugfs_create_dir(queue_name, filp->debugfs_client);
590 debugfs_create_file("mqd_info", 0444, queue->debugfs_queue, queue, &amdgpu_mqd_info_fops);
591 #endif
592 kfree(queue_name);
593
594 args->out.queue_id = qid;
595
596 unlock:
597 mutex_unlock(&uq_mgr->userq_mutex);
598 mutex_unlock(&adev->userq_mutex);
599
600 return r;
601 }
602
amdgpu_userq_input_args_validate(struct drm_device * dev,union drm_amdgpu_userq * args,struct drm_file * filp)603 static int amdgpu_userq_input_args_validate(struct drm_device *dev,
604 union drm_amdgpu_userq *args,
605 struct drm_file *filp)
606 {
607 struct amdgpu_device *adev = drm_to_adev(dev);
608
609 switch (args->in.op) {
610 case AMDGPU_USERQ_OP_CREATE:
611 if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK |
612 AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE))
613 return -EINVAL;
614 /* Usermode queues are only supported for GFX IP as of now */
615 if (args->in.ip_type != AMDGPU_HW_IP_GFX &&
616 args->in.ip_type != AMDGPU_HW_IP_DMA &&
617 args->in.ip_type != AMDGPU_HW_IP_COMPUTE) {
618 drm_file_err(filp, "Usermode queue doesn't support IP type %u\n",
619 args->in.ip_type);
620 return -EINVAL;
621 }
622
623 if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) &&
624 (args->in.ip_type != AMDGPU_HW_IP_GFX) &&
625 (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) &&
626 !amdgpu_is_tmz(adev)) {
627 drm_file_err(filp, "Secure only supported on GFX/Compute queues\n");
628 return -EINVAL;
629 }
630
631 if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET ||
632 args->in.queue_va == 0 ||
633 args->in.queue_size == 0) {
634 drm_file_err(filp, "invalidate userq queue va or size\n");
635 return -EINVAL;
636 }
637 if (!args->in.wptr_va || !args->in.rptr_va) {
638 drm_file_err(filp, "invalidate userq queue rptr or wptr\n");
639 return -EINVAL;
640 }
641 break;
642 case AMDGPU_USERQ_OP_FREE:
643 if (args->in.ip_type ||
644 args->in.doorbell_handle ||
645 args->in.doorbell_offset ||
646 args->in.flags ||
647 args->in.queue_va ||
648 args->in.queue_size ||
649 args->in.rptr_va ||
650 args->in.wptr_va ||
651 args->in.mqd ||
652 args->in.mqd_size)
653 return -EINVAL;
654 break;
655 default:
656 return -EINVAL;
657 }
658
659 return 0;
660 }
661
amdgpu_userq_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)662 int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
663 struct drm_file *filp)
664 {
665 union drm_amdgpu_userq *args = data;
666 int r;
667
668 if (amdgpu_userq_input_args_validate(dev, args, filp) < 0)
669 return -EINVAL;
670
671 switch (args->in.op) {
672 case AMDGPU_USERQ_OP_CREATE:
673 r = amdgpu_userq_create(filp, args);
674 if (r)
675 drm_file_err(filp, "Failed to create usermode queue\n");
676 break;
677
678 case AMDGPU_USERQ_OP_FREE:
679 r = amdgpu_userq_destroy(filp, args->in.queue_id);
680 if (r)
681 drm_file_err(filp, "Failed to destroy usermode queue\n");
682 break;
683
684 default:
685 drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
686 return -EINVAL;
687 }
688
689 return r;
690 }
691
692 static int
amdgpu_userq_restore_all(struct amdgpu_userq_mgr * uq_mgr)693 amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
694 {
695 struct amdgpu_usermode_queue *queue;
696 int queue_id;
697 int ret = 0, r;
698
699 /* Resume all the queues for this process */
700 idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
701 r = amdgpu_userq_restore_helper(uq_mgr, queue);
702 if (r)
703 ret = r;
704 }
705
706 if (ret)
707 drm_file_err(uq_mgr->file, "Failed to map all the queues\n");
708 return ret;
709 }
710
amdgpu_userq_validate_vm(void * param,struct amdgpu_bo * bo)711 static int amdgpu_userq_validate_vm(void *param, struct amdgpu_bo *bo)
712 {
713 struct ttm_operation_ctx ctx = { false, false };
714
715 amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
716 return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
717 }
718
719 /* Handle all BOs on the invalidated list, validate them and update the PTs */
720 static int
amdgpu_userq_bo_validate(struct amdgpu_device * adev,struct drm_exec * exec,struct amdgpu_vm * vm)721 amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
722 struct amdgpu_vm *vm)
723 {
724 struct ttm_operation_ctx ctx = { false, false };
725 struct amdgpu_bo_va *bo_va;
726 struct amdgpu_bo *bo;
727 int ret;
728
729 spin_lock(&vm->status_lock);
730 while (!list_empty(&vm->invalidated)) {
731 bo_va = list_first_entry(&vm->invalidated,
732 struct amdgpu_bo_va,
733 base.vm_status);
734 spin_unlock(&vm->status_lock);
735
736 bo = bo_va->base.bo;
737 ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2);
738 if (unlikely(ret))
739 return ret;
740
741 amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
742 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
743 if (ret)
744 return ret;
745
746 /* This moves the bo_va to the done list */
747 ret = amdgpu_vm_bo_update(adev, bo_va, false);
748 if (ret)
749 return ret;
750
751 spin_lock(&vm->status_lock);
752 }
753 spin_unlock(&vm->status_lock);
754
755 return 0;
756 }
757
758 /* Make sure the whole VM is ready to be used */
759 static int
amdgpu_userq_vm_validate(struct amdgpu_userq_mgr * uq_mgr)760 amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
761 {
762 struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
763 struct amdgpu_device *adev = uq_mgr->adev;
764 struct amdgpu_vm *vm = &fpriv->vm;
765 struct amdgpu_bo_va *bo_va;
766 struct drm_exec exec;
767 int ret;
768
769 drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
770 drm_exec_until_all_locked(&exec) {
771 ret = amdgpu_vm_lock_pd(vm, &exec, 1);
772 drm_exec_retry_on_contention(&exec);
773 if (unlikely(ret))
774 goto unlock_all;
775
776 ret = amdgpu_vm_lock_done_list(vm, &exec, 1);
777 drm_exec_retry_on_contention(&exec);
778 if (unlikely(ret))
779 goto unlock_all;
780
781 /* This validates PDs, PTs and per VM BOs */
782 ret = amdgpu_vm_validate(adev, vm, NULL,
783 amdgpu_userq_validate_vm,
784 NULL);
785 if (unlikely(ret))
786 goto unlock_all;
787
788 /* This locks and validates the remaining evicted BOs */
789 ret = amdgpu_userq_bo_validate(adev, &exec, vm);
790 drm_exec_retry_on_contention(&exec);
791 if (unlikely(ret))
792 goto unlock_all;
793 }
794
795 ret = amdgpu_vm_handle_moved(adev, vm, NULL);
796 if (ret)
797 goto unlock_all;
798
799 ret = amdgpu_vm_update_pdes(adev, vm, false);
800 if (ret)
801 goto unlock_all;
802
803 /*
804 * We need to wait for all VM updates to finish before restarting the
805 * queues. Using the done list like that is now ok since everything is
806 * locked in place.
807 */
808 list_for_each_entry(bo_va, &vm->done, base.vm_status)
809 dma_fence_wait(bo_va->last_pt_update, false);
810 dma_fence_wait(vm->last_update, false);
811
812 ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec);
813 if (ret)
814 drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n");
815
816 unlock_all:
817 drm_exec_fini(&exec);
818 return ret;
819 }
820
amdgpu_userq_restore_worker(struct work_struct * work)821 static void amdgpu_userq_restore_worker(struct work_struct *work)
822 {
823 struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work);
824 struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
825 int ret;
826
827 flush_delayed_work(&fpriv->evf_mgr.suspend_work);
828
829 mutex_lock(&uq_mgr->userq_mutex);
830
831 ret = amdgpu_userq_vm_validate(uq_mgr);
832 if (ret) {
833 drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n");
834 goto unlock;
835 }
836
837 ret = amdgpu_userq_restore_all(uq_mgr);
838 if (ret) {
839 drm_file_err(uq_mgr->file, "Failed to restore all queues\n");
840 goto unlock;
841 }
842
843 unlock:
844 mutex_unlock(&uq_mgr->userq_mutex);
845 }
846
847 static int
amdgpu_userq_evict_all(struct amdgpu_userq_mgr * uq_mgr)848 amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
849 {
850 struct amdgpu_usermode_queue *queue;
851 int queue_id;
852 int ret = 0, r;
853
854 /* Try to unmap all the queues in this process ctx */
855 idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
856 r = amdgpu_userq_preempt_helper(uq_mgr, queue);
857 if (r)
858 ret = r;
859 }
860
861 if (ret)
862 drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n");
863 return ret;
864 }
865
866 static int
amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr * uq_mgr)867 amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
868 {
869 struct amdgpu_usermode_queue *queue;
870 int queue_id, ret;
871
872 idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) {
873 struct dma_fence *f = queue->last_fence;
874
875 if (!f || dma_fence_is_signaled(f))
876 continue;
877 ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
878 if (ret <= 0) {
879 drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
880 f->context, f->seqno);
881 return -ETIMEDOUT;
882 }
883 }
884
885 return 0;
886 }
887
888 void
amdgpu_userq_evict(struct amdgpu_userq_mgr * uq_mgr,struct amdgpu_eviction_fence * ev_fence)889 amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
890 struct amdgpu_eviction_fence *ev_fence)
891 {
892 int ret;
893 struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
894 struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr;
895
896 /* Wait for any pending userqueue fence work to finish */
897 ret = amdgpu_userq_wait_for_signal(uq_mgr);
898 if (ret) {
899 drm_file_err(uq_mgr->file, "Not evicting userqueue, timeout waiting for work\n");
900 return;
901 }
902
903 ret = amdgpu_userq_evict_all(uq_mgr);
904 if (ret) {
905 drm_file_err(uq_mgr->file, "Failed to evict userqueue\n");
906 return;
907 }
908
909 /* Signal current eviction fence */
910 amdgpu_eviction_fence_signal(evf_mgr, ev_fence);
911
912 if (evf_mgr->fd_closing) {
913 cancel_delayed_work_sync(&uq_mgr->resume_work);
914 return;
915 }
916
917 /* Schedule a resume work */
918 schedule_delayed_work(&uq_mgr->resume_work, 0);
919 }
920
amdgpu_userq_mgr_init(struct amdgpu_userq_mgr * userq_mgr,struct drm_file * file_priv,struct amdgpu_device * adev)921 int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
922 struct amdgpu_device *adev)
923 {
924 mutex_init(&userq_mgr->userq_mutex);
925 idr_init_base(&userq_mgr->userq_idr, 1);
926 userq_mgr->adev = adev;
927 userq_mgr->file = file_priv;
928
929 mutex_lock(&adev->userq_mutex);
930 list_add(&userq_mgr->list, &adev->userq_mgr_list);
931 mutex_unlock(&adev->userq_mutex);
932
933 INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker);
934 return 0;
935 }
936
amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr * userq_mgr)937 void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
938 {
939 struct amdgpu_device *adev = userq_mgr->adev;
940 struct amdgpu_usermode_queue *queue;
941 struct amdgpu_userq_mgr *uqm, *tmp;
942 uint32_t queue_id;
943
944 cancel_delayed_work_sync(&userq_mgr->resume_work);
945
946 mutex_lock(&adev->userq_mutex);
947 mutex_lock(&userq_mgr->userq_mutex);
948 idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) {
949 amdgpu_userq_wait_for_last_fence(userq_mgr, queue);
950 amdgpu_userq_unmap_helper(userq_mgr, queue);
951 amdgpu_userq_cleanup(userq_mgr, queue, queue_id);
952 }
953
954 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
955 if (uqm == userq_mgr) {
956 list_del(&uqm->list);
957 break;
958 }
959 }
960 idr_destroy(&userq_mgr->userq_idr);
961 mutex_unlock(&userq_mgr->userq_mutex);
962 mutex_unlock(&adev->userq_mutex);
963 mutex_destroy(&userq_mgr->userq_mutex);
964 }
965
amdgpu_userq_suspend(struct amdgpu_device * adev)966 int amdgpu_userq_suspend(struct amdgpu_device *adev)
967 {
968 u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
969 struct amdgpu_usermode_queue *queue;
970 struct amdgpu_userq_mgr *uqm, *tmp;
971 int queue_id;
972 int ret = 0, r;
973
974 if (!ip_mask)
975 return 0;
976
977 mutex_lock(&adev->userq_mutex);
978 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
979 cancel_delayed_work_sync(&uqm->resume_work);
980 mutex_lock(&uqm->userq_mutex);
981 idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
982 if (adev->in_s0ix)
983 r = amdgpu_userq_preempt_helper(uqm, queue);
984 else
985 r = amdgpu_userq_unmap_helper(uqm, queue);
986 if (r)
987 ret = r;
988 }
989 mutex_unlock(&uqm->userq_mutex);
990 }
991 mutex_unlock(&adev->userq_mutex);
992 return ret;
993 }
994
amdgpu_userq_resume(struct amdgpu_device * adev)995 int amdgpu_userq_resume(struct amdgpu_device *adev)
996 {
997 u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
998 struct amdgpu_usermode_queue *queue;
999 struct amdgpu_userq_mgr *uqm, *tmp;
1000 int queue_id;
1001 int ret = 0, r;
1002
1003 if (!ip_mask)
1004 return 0;
1005
1006 mutex_lock(&adev->userq_mutex);
1007 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
1008 mutex_lock(&uqm->userq_mutex);
1009 idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
1010 if (adev->in_s0ix)
1011 r = amdgpu_userq_restore_helper(uqm, queue);
1012 else
1013 r = amdgpu_userq_map_helper(uqm, queue);
1014 if (r)
1015 ret = r;
1016 }
1017 mutex_unlock(&uqm->userq_mutex);
1018 }
1019 mutex_unlock(&adev->userq_mutex);
1020 return ret;
1021 }
1022
amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device * adev,u32 idx)1023 int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
1024 u32 idx)
1025 {
1026 u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
1027 struct amdgpu_usermode_queue *queue;
1028 struct amdgpu_userq_mgr *uqm, *tmp;
1029 int queue_id;
1030 int ret = 0, r;
1031
1032 /* only need to stop gfx/compute */
1033 if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
1034 return 0;
1035
1036 mutex_lock(&adev->userq_mutex);
1037 if (adev->userq_halt_for_enforce_isolation)
1038 dev_warn(adev->dev, "userq scheduling already stopped!\n");
1039 adev->userq_halt_for_enforce_isolation = true;
1040 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
1041 cancel_delayed_work_sync(&uqm->resume_work);
1042 mutex_lock(&uqm->userq_mutex);
1043 idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
1044 if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
1045 (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
1046 (queue->xcp_id == idx)) {
1047 r = amdgpu_userq_preempt_helper(uqm, queue);
1048 if (r)
1049 ret = r;
1050 }
1051 }
1052 mutex_unlock(&uqm->userq_mutex);
1053 }
1054 mutex_unlock(&adev->userq_mutex);
1055 return ret;
1056 }
1057
amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device * adev,u32 idx)1058 int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
1059 u32 idx)
1060 {
1061 u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev);
1062 struct amdgpu_usermode_queue *queue;
1063 struct amdgpu_userq_mgr *uqm, *tmp;
1064 int queue_id;
1065 int ret = 0, r;
1066
1067 /* only need to stop gfx/compute */
1068 if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE))))
1069 return 0;
1070
1071 mutex_lock(&adev->userq_mutex);
1072 if (!adev->userq_halt_for_enforce_isolation)
1073 dev_warn(adev->dev, "userq scheduling already started!\n");
1074 adev->userq_halt_for_enforce_isolation = false;
1075 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) {
1076 mutex_lock(&uqm->userq_mutex);
1077 idr_for_each_entry(&uqm->userq_idr, queue, queue_id) {
1078 if (((queue->queue_type == AMDGPU_HW_IP_GFX) ||
1079 (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) &&
1080 (queue->xcp_id == idx)) {
1081 r = amdgpu_userq_restore_helper(uqm, queue);
1082 if (r)
1083 ret = r;
1084 }
1085 }
1086 mutex_unlock(&uqm->userq_mutex);
1087 }
1088 mutex_unlock(&adev->userq_mutex);
1089 return ret;
1090 }
1091