1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2023 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <drm/drm_auth.h> 26 #include <drm/drm_exec.h> 27 #include <linux/pm_runtime.h> 28 29 #include "amdgpu.h" 30 #include "amdgpu_vm.h" 31 #include "amdgpu_userq.h" 32 #include "amdgpu_userq_fence.h" 33 34 u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev) 35 { 36 int i; 37 u32 userq_ip_mask = 0; 38 39 for (i = 0; i < AMDGPU_HW_IP_NUM; i++) { 40 if (adev->userq_funcs[i]) 41 userq_ip_mask |= (1 << i); 42 } 43 44 return userq_ip_mask; 45 } 46 47 int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, 48 u64 expected_size) 49 { 50 struct amdgpu_bo_va_mapping *va_map; 51 u64 user_addr; 52 u64 size; 53 int r = 0; 54 55 user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT; 56 size = expected_size >> AMDGPU_GPU_PAGE_SHIFT; 57 58 r = amdgpu_bo_reserve(vm->root.bo, false); 59 if (r) 60 return r; 61 62 va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr); 63 if (!va_map) { 64 r = -EINVAL; 65 goto out_err; 66 } 67 /* Only validate the userq whether resident in the VM mapping range */ 68 if (user_addr >= va_map->start && 69 va_map->last - user_addr + 1 >= size) { 70 amdgpu_bo_unreserve(vm->root.bo); 71 return 0; 72 } 73 74 out_err: 75 amdgpu_bo_unreserve(vm->root.bo); 76 return r; 77 } 78 79 static int 80 amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr, 81 struct amdgpu_usermode_queue *queue) 82 { 83 struct amdgpu_device *adev = uq_mgr->adev; 84 const struct amdgpu_userq_funcs *userq_funcs = 85 adev->userq_funcs[queue->queue_type]; 86 int r = 0; 87 88 if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { 89 r = userq_funcs->preempt(uq_mgr, queue); 90 if (r) { 91 queue->state = AMDGPU_USERQ_STATE_HUNG; 92 } else { 93 queue->state = AMDGPU_USERQ_STATE_PREEMPTED; 94 } 95 } 96 97 return r; 98 } 99 100 static int 101 amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr, 102 struct amdgpu_usermode_queue *queue) 103 { 104 struct amdgpu_device *adev = uq_mgr->adev; 105 const struct amdgpu_userq_funcs *userq_funcs = 106 adev->userq_funcs[queue->queue_type]; 107 int r = 0; 108 109 if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) { 110 r = userq_funcs->restore(uq_mgr, queue); 111 if (r) { 112 queue->state = AMDGPU_USERQ_STATE_HUNG; 113 } else { 114 queue->state = AMDGPU_USERQ_STATE_MAPPED; 115 } 116 } 117 118 return r; 119 } 120 121 static int 122 amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, 123 struct amdgpu_usermode_queue *queue) 124 { 125 struct amdgpu_device *adev = uq_mgr->adev; 126 const struct amdgpu_userq_funcs *userq_funcs = 127 adev->userq_funcs[queue->queue_type]; 128 int r = 0; 129 130 if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) || 131 (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { 132 r = userq_funcs->unmap(uq_mgr, queue); 133 if (r) 134 queue->state = AMDGPU_USERQ_STATE_HUNG; 135 else 136 queue->state = AMDGPU_USERQ_STATE_UNMAPPED; 137 } 138 return r; 139 } 140 141 static int 142 amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr, 143 struct amdgpu_usermode_queue *queue) 144 { 145 struct amdgpu_device *adev = uq_mgr->adev; 146 const struct amdgpu_userq_funcs *userq_funcs = 147 adev->userq_funcs[queue->queue_type]; 148 int r = 0; 149 150 if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) { 151 r = userq_funcs->map(uq_mgr, queue); 152 if (r) { 153 queue->state = AMDGPU_USERQ_STATE_HUNG; 154 } else { 155 queue->state = AMDGPU_USERQ_STATE_MAPPED; 156 } 157 } 158 return r; 159 } 160 161 static void 162 amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr, 163 struct amdgpu_usermode_queue *queue) 164 { 165 struct dma_fence *f = queue->last_fence; 166 int ret; 167 168 if (f && !dma_fence_is_signaled(f)) { 169 ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); 170 if (ret <= 0) 171 drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", 172 f->context, f->seqno); 173 } 174 } 175 176 static void 177 amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, 178 struct amdgpu_usermode_queue *queue, 179 int queue_id) 180 { 181 struct amdgpu_device *adev = uq_mgr->adev; 182 const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; 183 184 uq_funcs->mqd_destroy(uq_mgr, queue); 185 amdgpu_userq_fence_driver_free(queue); 186 idr_remove(&uq_mgr->userq_idr, queue_id); 187 kfree(queue); 188 } 189 190 static struct amdgpu_usermode_queue * 191 amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) 192 { 193 return idr_find(&uq_mgr->userq_idr, qid); 194 } 195 196 void 197 amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr, 198 struct amdgpu_eviction_fence_mgr *evf_mgr) 199 { 200 struct amdgpu_eviction_fence *ev_fence; 201 202 retry: 203 /* Flush any pending resume work to create ev_fence */ 204 flush_delayed_work(&uq_mgr->resume_work); 205 206 mutex_lock(&uq_mgr->userq_mutex); 207 spin_lock(&evf_mgr->ev_fence_lock); 208 ev_fence = evf_mgr->ev_fence; 209 spin_unlock(&evf_mgr->ev_fence_lock); 210 if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) { 211 mutex_unlock(&uq_mgr->userq_mutex); 212 /* 213 * Looks like there was no pending resume work, 214 * add one now to create a valid eviction fence 215 */ 216 schedule_delayed_work(&uq_mgr->resume_work, 0); 217 goto retry; 218 } 219 } 220 221 int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, 222 struct amdgpu_userq_obj *userq_obj, 223 int size) 224 { 225 struct amdgpu_device *adev = uq_mgr->adev; 226 struct amdgpu_bo_param bp; 227 int r; 228 229 memset(&bp, 0, sizeof(bp)); 230 bp.byte_align = PAGE_SIZE; 231 bp.domain = AMDGPU_GEM_DOMAIN_GTT; 232 bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 233 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 234 bp.type = ttm_bo_type_kernel; 235 bp.size = size; 236 bp.resv = NULL; 237 bp.bo_ptr_size = sizeof(struct amdgpu_bo); 238 239 r = amdgpu_bo_create(adev, &bp, &userq_obj->obj); 240 if (r) { 241 drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r); 242 return r; 243 } 244 245 r = amdgpu_bo_reserve(userq_obj->obj, true); 246 if (r) { 247 drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r); 248 goto free_obj; 249 } 250 251 r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo); 252 if (r) { 253 drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r); 254 goto unresv; 255 } 256 257 r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr); 258 if (r) { 259 drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r); 260 goto unresv; 261 } 262 263 userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj); 264 amdgpu_bo_unreserve(userq_obj->obj); 265 memset(userq_obj->cpu_ptr, 0, size); 266 return 0; 267 268 unresv: 269 amdgpu_bo_unreserve(userq_obj->obj); 270 271 free_obj: 272 amdgpu_bo_unref(&userq_obj->obj); 273 return r; 274 } 275 276 void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, 277 struct amdgpu_userq_obj *userq_obj) 278 { 279 amdgpu_bo_kunmap(userq_obj->obj); 280 amdgpu_bo_unref(&userq_obj->obj); 281 } 282 283 uint64_t 284 amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, 285 struct amdgpu_db_info *db_info, 286 struct drm_file *filp) 287 { 288 uint64_t index; 289 struct drm_gem_object *gobj; 290 struct amdgpu_userq_obj *db_obj = db_info->db_obj; 291 int r, db_size; 292 293 gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle); 294 if (gobj == NULL) { 295 drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n"); 296 return -EINVAL; 297 } 298 299 db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 300 drm_gem_object_put(gobj); 301 302 r = amdgpu_bo_reserve(db_obj->obj, true); 303 if (r) { 304 drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n"); 305 goto unref_bo; 306 } 307 308 /* Pin the BO before generating the index, unpin in queue destroy */ 309 r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL); 310 if (r) { 311 drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n"); 312 goto unresv_bo; 313 } 314 315 switch (db_info->queue_type) { 316 case AMDGPU_HW_IP_GFX: 317 case AMDGPU_HW_IP_COMPUTE: 318 case AMDGPU_HW_IP_DMA: 319 db_size = sizeof(u64); 320 break; 321 322 case AMDGPU_HW_IP_VCN_ENC: 323 db_size = sizeof(u32); 324 db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1; 325 break; 326 327 case AMDGPU_HW_IP_VPE: 328 db_size = sizeof(u32); 329 db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1; 330 break; 331 332 default: 333 drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n", 334 db_info->queue_type); 335 r = -EINVAL; 336 goto unpin_bo; 337 } 338 339 index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, 340 db_info->doorbell_offset, db_size); 341 drm_dbg_driver(adev_to_drm(uq_mgr->adev), 342 "[Usermode queues] doorbell index=%lld\n", index); 343 amdgpu_bo_unreserve(db_obj->obj); 344 return index; 345 346 unpin_bo: 347 amdgpu_bo_unpin(db_obj->obj); 348 unresv_bo: 349 amdgpu_bo_unreserve(db_obj->obj); 350 unref_bo: 351 amdgpu_bo_unref(&db_obj->obj); 352 return r; 353 } 354 355 static int 356 amdgpu_userq_destroy(struct drm_file *filp, int queue_id) 357 { 358 struct amdgpu_fpriv *fpriv = filp->driver_priv; 359 struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; 360 struct amdgpu_device *adev = uq_mgr->adev; 361 struct amdgpu_usermode_queue *queue; 362 int r = 0; 363 364 cancel_delayed_work_sync(&uq_mgr->resume_work); 365 mutex_lock(&uq_mgr->userq_mutex); 366 367 queue = amdgpu_userq_find(uq_mgr, queue_id); 368 if (!queue) { 369 drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n"); 370 mutex_unlock(&uq_mgr->userq_mutex); 371 return -EINVAL; 372 } 373 amdgpu_userq_wait_for_last_fence(uq_mgr, queue); 374 r = amdgpu_bo_reserve(queue->db_obj.obj, true); 375 if (!r) { 376 amdgpu_bo_unpin(queue->db_obj.obj); 377 amdgpu_bo_unreserve(queue->db_obj.obj); 378 } 379 amdgpu_bo_unref(&queue->db_obj.obj); 380 381 #if defined(CONFIG_DEBUG_FS) 382 debugfs_remove_recursive(queue->debugfs_queue); 383 #endif 384 r = amdgpu_userq_unmap_helper(uq_mgr, queue); 385 /*TODO: It requires a reset for userq hw unmap error*/ 386 if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) { 387 drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n"); 388 queue->state = AMDGPU_USERQ_STATE_HUNG; 389 } 390 amdgpu_userq_cleanup(uq_mgr, queue, queue_id); 391 mutex_unlock(&uq_mgr->userq_mutex); 392 393 pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); 394 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); 395 396 return r; 397 } 398 399 static int amdgpu_userq_priority_permit(struct drm_file *filp, 400 int priority) 401 { 402 if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH) 403 return 0; 404 405 if (capable(CAP_SYS_NICE)) 406 return 0; 407 408 if (drm_is_current_master(filp)) 409 return 0; 410 411 return -EACCES; 412 } 413 414 #if defined(CONFIG_DEBUG_FS) 415 static int amdgpu_mqd_info_read(struct seq_file *m, void *unused) 416 { 417 struct amdgpu_usermode_queue *queue = m->private; 418 struct amdgpu_bo *bo; 419 int r; 420 421 if (!queue || !queue->mqd.obj) 422 return -EINVAL; 423 424 bo = amdgpu_bo_ref(queue->mqd.obj); 425 r = amdgpu_bo_reserve(bo, true); 426 if (r) { 427 amdgpu_bo_unref(&bo); 428 return -EINVAL; 429 } 430 431 seq_printf(m, "queue_type: %d\n", queue->queue_type); 432 seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj)); 433 434 amdgpu_bo_unreserve(bo); 435 amdgpu_bo_unref(&bo); 436 437 return 0; 438 } 439 440 static int amdgpu_mqd_info_open(struct inode *inode, struct file *file) 441 { 442 return single_open(file, amdgpu_mqd_info_read, inode->i_private); 443 } 444 445 static const struct file_operations amdgpu_mqd_info_fops = { 446 .owner = THIS_MODULE, 447 .open = amdgpu_mqd_info_open, 448 .read = seq_read, 449 .llseek = seq_lseek, 450 .release = single_release, 451 }; 452 #endif 453 454 static int 455 amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) 456 { 457 struct amdgpu_fpriv *fpriv = filp->driver_priv; 458 struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; 459 struct amdgpu_device *adev = uq_mgr->adev; 460 const struct amdgpu_userq_funcs *uq_funcs; 461 struct amdgpu_usermode_queue *queue; 462 struct amdgpu_db_info db_info; 463 char *queue_name; 464 bool skip_map_queue; 465 uint64_t index; 466 int qid, r = 0; 467 int priority = 468 (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> 469 AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; 470 471 r = amdgpu_userq_priority_permit(filp, priority); 472 if (r) 473 return r; 474 475 r = pm_runtime_get_sync(adev_to_drm(adev)->dev); 476 if (r < 0) { 477 drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n"); 478 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); 479 return r; 480 } 481 482 /* 483 * There could be a situation that we are creating a new queue while 484 * the other queues under this UQ_mgr are suspended. So if there is any 485 * resume work pending, wait for it to get done. 486 * 487 * This will also make sure we have a valid eviction fence ready to be used. 488 */ 489 mutex_lock(&adev->userq_mutex); 490 amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); 491 492 uq_funcs = adev->userq_funcs[args->in.ip_type]; 493 if (!uq_funcs) { 494 drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n", 495 args->in.ip_type); 496 r = -EINVAL; 497 goto unlock; 498 } 499 500 queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL); 501 if (!queue) { 502 drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n"); 503 r = -ENOMEM; 504 goto unlock; 505 } 506 507 /* Validate the userq virtual address.*/ 508 if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) || 509 amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || 510 amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { 511 kfree(queue); 512 goto unlock; 513 } 514 queue->doorbell_handle = args->in.doorbell_handle; 515 queue->queue_type = args->in.ip_type; 516 queue->vm = &fpriv->vm; 517 queue->priority = priority; 518 519 db_info.queue_type = queue->queue_type; 520 db_info.doorbell_handle = queue->doorbell_handle; 521 db_info.db_obj = &queue->db_obj; 522 db_info.doorbell_offset = args->in.doorbell_offset; 523 524 /* Convert relative doorbell offset into absolute doorbell index */ 525 index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp); 526 if (index == (uint64_t)-EINVAL) { 527 drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); 528 kfree(queue); 529 r = -EINVAL; 530 goto unlock; 531 } 532 533 queue->doorbell_index = index; 534 xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); 535 r = amdgpu_userq_fence_driver_alloc(adev, queue); 536 if (r) { 537 drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n"); 538 goto unlock; 539 } 540 541 r = uq_funcs->mqd_create(uq_mgr, &args->in, queue); 542 if (r) { 543 drm_file_err(uq_mgr->file, "Failed to create Queue\n"); 544 amdgpu_userq_fence_driver_free(queue); 545 kfree(queue); 546 goto unlock; 547 } 548 549 550 qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL); 551 if (qid < 0) { 552 drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n"); 553 amdgpu_userq_fence_driver_free(queue); 554 uq_funcs->mqd_destroy(uq_mgr, queue); 555 kfree(queue); 556 r = -ENOMEM; 557 goto unlock; 558 } 559 560 /* don't map the queue if scheduling is halted */ 561 if (adev->userq_halt_for_enforce_isolation && 562 ((queue->queue_type == AMDGPU_HW_IP_GFX) || 563 (queue->queue_type == AMDGPU_HW_IP_COMPUTE))) 564 skip_map_queue = true; 565 else 566 skip_map_queue = false; 567 if (!skip_map_queue) { 568 r = amdgpu_userq_map_helper(uq_mgr, queue); 569 if (r) { 570 drm_file_err(uq_mgr->file, "Failed to map Queue\n"); 571 idr_remove(&uq_mgr->userq_idr, qid); 572 amdgpu_userq_fence_driver_free(queue); 573 uq_funcs->mqd_destroy(uq_mgr, queue); 574 kfree(queue); 575 goto unlock; 576 } 577 } 578 579 queue_name = kasprintf(GFP_KERNEL, "queue-%d", qid); 580 if (!queue_name) { 581 r = -ENOMEM; 582 goto unlock; 583 } 584 585 #if defined(CONFIG_DEBUG_FS) 586 /* Queue dentry per client to hold MQD information */ 587 queue->debugfs_queue = debugfs_create_dir(queue_name, filp->debugfs_client); 588 debugfs_create_file("mqd_info", 0444, queue->debugfs_queue, queue, &amdgpu_mqd_info_fops); 589 #endif 590 kfree(queue_name); 591 592 args->out.queue_id = qid; 593 594 unlock: 595 mutex_unlock(&uq_mgr->userq_mutex); 596 mutex_unlock(&adev->userq_mutex); 597 598 return r; 599 } 600 601 static int amdgpu_userq_input_args_validate(struct drm_device *dev, 602 union drm_amdgpu_userq *args, 603 struct drm_file *filp) 604 { 605 struct amdgpu_device *adev = drm_to_adev(dev); 606 607 switch (args->in.op) { 608 case AMDGPU_USERQ_OP_CREATE: 609 if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK | 610 AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE)) 611 return -EINVAL; 612 /* Usermode queues are only supported for GFX IP as of now */ 613 if (args->in.ip_type != AMDGPU_HW_IP_GFX && 614 args->in.ip_type != AMDGPU_HW_IP_DMA && 615 args->in.ip_type != AMDGPU_HW_IP_COMPUTE) { 616 drm_file_err(filp, "Usermode queue doesn't support IP type %u\n", 617 args->in.ip_type); 618 return -EINVAL; 619 } 620 621 if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) && 622 (args->in.ip_type != AMDGPU_HW_IP_GFX) && 623 (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) && 624 !amdgpu_is_tmz(adev)) { 625 drm_file_err(filp, "Secure only supported on GFX/Compute queues\n"); 626 return -EINVAL; 627 } 628 629 if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET || 630 args->in.queue_va == 0 || 631 args->in.queue_size == 0) { 632 drm_file_err(filp, "invalidate userq queue va or size\n"); 633 return -EINVAL; 634 } 635 if (!args->in.wptr_va || !args->in.rptr_va) { 636 drm_file_err(filp, "invalidate userq queue rptr or wptr\n"); 637 return -EINVAL; 638 } 639 break; 640 case AMDGPU_USERQ_OP_FREE: 641 if (args->in.ip_type || 642 args->in.doorbell_handle || 643 args->in.doorbell_offset || 644 args->in.flags || 645 args->in.queue_va || 646 args->in.queue_size || 647 args->in.rptr_va || 648 args->in.wptr_va || 649 args->in.mqd || 650 args->in.mqd_size) 651 return -EINVAL; 652 break; 653 default: 654 return -EINVAL; 655 } 656 657 return 0; 658 } 659 660 int amdgpu_userq_ioctl(struct drm_device *dev, void *data, 661 struct drm_file *filp) 662 { 663 union drm_amdgpu_userq *args = data; 664 int r; 665 666 if (amdgpu_userq_input_args_validate(dev, args, filp) < 0) 667 return -EINVAL; 668 669 switch (args->in.op) { 670 case AMDGPU_USERQ_OP_CREATE: 671 r = amdgpu_userq_create(filp, args); 672 if (r) 673 drm_file_err(filp, "Failed to create usermode queue\n"); 674 break; 675 676 case AMDGPU_USERQ_OP_FREE: 677 r = amdgpu_userq_destroy(filp, args->in.queue_id); 678 if (r) 679 drm_file_err(filp, "Failed to destroy usermode queue\n"); 680 break; 681 682 default: 683 drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op); 684 return -EINVAL; 685 } 686 687 return r; 688 } 689 690 static int 691 amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) 692 { 693 struct amdgpu_usermode_queue *queue; 694 int queue_id; 695 int ret = 0, r; 696 697 /* Resume all the queues for this process */ 698 idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { 699 r = amdgpu_userq_restore_helper(uq_mgr, queue); 700 if (r) 701 ret = r; 702 } 703 704 if (ret) 705 drm_file_err(uq_mgr->file, "Failed to map all the queues\n"); 706 return ret; 707 } 708 709 static int amdgpu_userq_validate_vm(void *param, struct amdgpu_bo *bo) 710 { 711 struct ttm_operation_ctx ctx = { false, false }; 712 713 amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); 714 return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 715 } 716 717 /* Handle all BOs on the invalidated list, validate them and update the PTs */ 718 static int 719 amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec, 720 struct amdgpu_vm *vm) 721 { 722 struct ttm_operation_ctx ctx = { false, false }; 723 struct amdgpu_bo_va *bo_va; 724 struct amdgpu_bo *bo; 725 int ret; 726 727 spin_lock(&vm->invalidated_lock); 728 while (!list_empty(&vm->invalidated)) { 729 bo_va = list_first_entry(&vm->invalidated, 730 struct amdgpu_bo_va, 731 base.vm_status); 732 spin_unlock(&vm->invalidated_lock); 733 734 bo = bo_va->base.bo; 735 ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2); 736 if (unlikely(ret)) 737 return ret; 738 739 amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); 740 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 741 if (ret) 742 return ret; 743 744 /* This moves the bo_va to the done list */ 745 ret = amdgpu_vm_bo_update(adev, bo_va, false); 746 if (ret) 747 return ret; 748 749 spin_lock(&vm->invalidated_lock); 750 } 751 spin_unlock(&vm->invalidated_lock); 752 753 return 0; 754 } 755 756 /* Make sure the whole VM is ready to be used */ 757 static int 758 amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr) 759 { 760 struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); 761 struct amdgpu_device *adev = uq_mgr->adev; 762 struct amdgpu_vm *vm = &fpriv->vm; 763 struct amdgpu_bo_va *bo_va; 764 struct drm_exec exec; 765 int ret; 766 767 drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); 768 drm_exec_until_all_locked(&exec) { 769 ret = amdgpu_vm_lock_pd(vm, &exec, 1); 770 drm_exec_retry_on_contention(&exec); 771 if (unlikely(ret)) 772 goto unlock_all; 773 774 ret = amdgpu_vm_lock_done_list(vm, &exec, 1); 775 drm_exec_retry_on_contention(&exec); 776 if (unlikely(ret)) 777 goto unlock_all; 778 779 /* This validates PDs, PTs and per VM BOs */ 780 ret = amdgpu_vm_validate(adev, vm, NULL, 781 amdgpu_userq_validate_vm, 782 NULL); 783 if (unlikely(ret)) 784 goto unlock_all; 785 786 /* This locks and validates the remaining evicted BOs */ 787 ret = amdgpu_userq_bo_validate(adev, &exec, vm); 788 drm_exec_retry_on_contention(&exec); 789 if (unlikely(ret)) 790 goto unlock_all; 791 } 792 793 ret = amdgpu_vm_handle_moved(adev, vm, NULL); 794 if (ret) 795 goto unlock_all; 796 797 ret = amdgpu_vm_update_pdes(adev, vm, false); 798 if (ret) 799 goto unlock_all; 800 801 /* 802 * We need to wait for all VM updates to finish before restarting the 803 * queues. Using the done list like that is now ok since everything is 804 * locked in place. 805 */ 806 list_for_each_entry(bo_va, &vm->done, base.vm_status) 807 dma_fence_wait(bo_va->last_pt_update, false); 808 dma_fence_wait(vm->last_update, false); 809 810 ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); 811 if (ret) 812 drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n"); 813 814 unlock_all: 815 drm_exec_fini(&exec); 816 return ret; 817 } 818 819 static void amdgpu_userq_restore_worker(struct work_struct *work) 820 { 821 struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work); 822 struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); 823 int ret; 824 825 flush_delayed_work(&fpriv->evf_mgr.suspend_work); 826 827 mutex_lock(&uq_mgr->userq_mutex); 828 829 ret = amdgpu_userq_vm_validate(uq_mgr); 830 if (ret) { 831 drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n"); 832 goto unlock; 833 } 834 835 ret = amdgpu_userq_restore_all(uq_mgr); 836 if (ret) { 837 drm_file_err(uq_mgr->file, "Failed to restore all queues\n"); 838 goto unlock; 839 } 840 841 unlock: 842 mutex_unlock(&uq_mgr->userq_mutex); 843 } 844 845 static int 846 amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) 847 { 848 struct amdgpu_usermode_queue *queue; 849 int queue_id; 850 int ret = 0, r; 851 852 /* Try to unmap all the queues in this process ctx */ 853 idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { 854 r = amdgpu_userq_preempt_helper(uq_mgr, queue); 855 if (r) 856 ret = r; 857 } 858 859 if (ret) 860 drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n"); 861 return ret; 862 } 863 864 static int 865 amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) 866 { 867 struct amdgpu_usermode_queue *queue; 868 int queue_id, ret; 869 870 idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { 871 struct dma_fence *f = queue->last_fence; 872 873 if (!f || dma_fence_is_signaled(f)) 874 continue; 875 ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); 876 if (ret <= 0) { 877 drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", 878 f->context, f->seqno); 879 return -ETIMEDOUT; 880 } 881 } 882 883 return 0; 884 } 885 886 void 887 amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, 888 struct amdgpu_eviction_fence *ev_fence) 889 { 890 int ret; 891 struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); 892 struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr; 893 894 /* Wait for any pending userqueue fence work to finish */ 895 ret = amdgpu_userq_wait_for_signal(uq_mgr); 896 if (ret) { 897 drm_file_err(uq_mgr->file, "Not evicting userqueue, timeout waiting for work\n"); 898 return; 899 } 900 901 ret = amdgpu_userq_evict_all(uq_mgr); 902 if (ret) { 903 drm_file_err(uq_mgr->file, "Failed to evict userqueue\n"); 904 return; 905 } 906 907 /* Signal current eviction fence */ 908 amdgpu_eviction_fence_signal(evf_mgr, ev_fence); 909 910 if (evf_mgr->fd_closing) { 911 cancel_delayed_work_sync(&uq_mgr->resume_work); 912 return; 913 } 914 915 /* Schedule a resume work */ 916 schedule_delayed_work(&uq_mgr->resume_work, 0); 917 } 918 919 int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, 920 struct amdgpu_device *adev) 921 { 922 mutex_init(&userq_mgr->userq_mutex); 923 idr_init_base(&userq_mgr->userq_idr, 1); 924 userq_mgr->adev = adev; 925 userq_mgr->file = file_priv; 926 927 mutex_lock(&adev->userq_mutex); 928 list_add(&userq_mgr->list, &adev->userq_mgr_list); 929 mutex_unlock(&adev->userq_mutex); 930 931 INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); 932 return 0; 933 } 934 935 void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) 936 { 937 struct amdgpu_device *adev = userq_mgr->adev; 938 struct amdgpu_usermode_queue *queue; 939 struct amdgpu_userq_mgr *uqm, *tmp; 940 uint32_t queue_id; 941 942 cancel_delayed_work_sync(&userq_mgr->resume_work); 943 944 mutex_lock(&adev->userq_mutex); 945 mutex_lock(&userq_mgr->userq_mutex); 946 idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) { 947 amdgpu_userq_wait_for_last_fence(userq_mgr, queue); 948 amdgpu_userq_unmap_helper(userq_mgr, queue); 949 amdgpu_userq_cleanup(userq_mgr, queue, queue_id); 950 } 951 952 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { 953 if (uqm == userq_mgr) { 954 list_del(&uqm->list); 955 break; 956 } 957 } 958 idr_destroy(&userq_mgr->userq_idr); 959 mutex_unlock(&userq_mgr->userq_mutex); 960 mutex_unlock(&adev->userq_mutex); 961 mutex_destroy(&userq_mgr->userq_mutex); 962 } 963 964 int amdgpu_userq_suspend(struct amdgpu_device *adev) 965 { 966 u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); 967 struct amdgpu_usermode_queue *queue; 968 struct amdgpu_userq_mgr *uqm, *tmp; 969 int queue_id; 970 int ret = 0, r; 971 972 if (!ip_mask) 973 return 0; 974 975 mutex_lock(&adev->userq_mutex); 976 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { 977 cancel_delayed_work_sync(&uqm->resume_work); 978 mutex_lock(&uqm->userq_mutex); 979 idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { 980 if (adev->in_s0ix) 981 r = amdgpu_userq_preempt_helper(uqm, queue); 982 else 983 r = amdgpu_userq_unmap_helper(uqm, queue); 984 if (r) 985 ret = r; 986 } 987 mutex_unlock(&uqm->userq_mutex); 988 } 989 mutex_unlock(&adev->userq_mutex); 990 return ret; 991 } 992 993 int amdgpu_userq_resume(struct amdgpu_device *adev) 994 { 995 u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); 996 struct amdgpu_usermode_queue *queue; 997 struct amdgpu_userq_mgr *uqm, *tmp; 998 int queue_id; 999 int ret = 0, r; 1000 1001 if (!ip_mask) 1002 return 0; 1003 1004 mutex_lock(&adev->userq_mutex); 1005 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { 1006 mutex_lock(&uqm->userq_mutex); 1007 idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { 1008 if (adev->in_s0ix) 1009 r = amdgpu_userq_restore_helper(uqm, queue); 1010 else 1011 r = amdgpu_userq_map_helper(uqm, queue); 1012 if (r) 1013 ret = r; 1014 } 1015 mutex_unlock(&uqm->userq_mutex); 1016 } 1017 mutex_unlock(&adev->userq_mutex); 1018 return ret; 1019 } 1020 1021 int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, 1022 u32 idx) 1023 { 1024 u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); 1025 struct amdgpu_usermode_queue *queue; 1026 struct amdgpu_userq_mgr *uqm, *tmp; 1027 int queue_id; 1028 int ret = 0, r; 1029 1030 /* only need to stop gfx/compute */ 1031 if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE)))) 1032 return 0; 1033 1034 mutex_lock(&adev->userq_mutex); 1035 if (adev->userq_halt_for_enforce_isolation) 1036 dev_warn(adev->dev, "userq scheduling already stopped!\n"); 1037 adev->userq_halt_for_enforce_isolation = true; 1038 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { 1039 cancel_delayed_work_sync(&uqm->resume_work); 1040 mutex_lock(&uqm->userq_mutex); 1041 idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { 1042 if (((queue->queue_type == AMDGPU_HW_IP_GFX) || 1043 (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && 1044 (queue->xcp_id == idx)) { 1045 r = amdgpu_userq_preempt_helper(uqm, queue); 1046 if (r) 1047 ret = r; 1048 } 1049 } 1050 mutex_unlock(&uqm->userq_mutex); 1051 } 1052 mutex_unlock(&adev->userq_mutex); 1053 return ret; 1054 } 1055 1056 int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, 1057 u32 idx) 1058 { 1059 u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); 1060 struct amdgpu_usermode_queue *queue; 1061 struct amdgpu_userq_mgr *uqm, *tmp; 1062 int queue_id; 1063 int ret = 0, r; 1064 1065 /* only need to stop gfx/compute */ 1066 if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE)))) 1067 return 0; 1068 1069 mutex_lock(&adev->userq_mutex); 1070 if (!adev->userq_halt_for_enforce_isolation) 1071 dev_warn(adev->dev, "userq scheduling already started!\n"); 1072 adev->userq_halt_for_enforce_isolation = false; 1073 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { 1074 mutex_lock(&uqm->userq_mutex); 1075 idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { 1076 if (((queue->queue_type == AMDGPU_HW_IP_GFX) || 1077 (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && 1078 (queue->xcp_id == idx)) { 1079 r = amdgpu_userq_restore_helper(uqm, queue); 1080 if (r) 1081 ret = r; 1082 } 1083 } 1084 mutex_unlock(&uqm->userq_mutex); 1085 } 1086 mutex_unlock(&adev->userq_mutex); 1087 return ret; 1088 } 1089