1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2023 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <drm/drm_auth.h> 26 #include <drm/drm_exec.h> 27 #include <linux/pm_runtime.h> 28 29 #include "amdgpu.h" 30 #include "amdgpu_vm.h" 31 #include "amdgpu_userq.h" 32 #include "amdgpu_userq_fence.h" 33 34 u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev) 35 { 36 int i; 37 u32 userq_ip_mask = 0; 38 39 for (i = 0; i < AMDGPU_HW_IP_NUM; i++) { 40 if (adev->userq_funcs[i]) 41 userq_ip_mask |= (1 << i); 42 } 43 44 return userq_ip_mask; 45 } 46 47 int amdgpu_userq_input_va_validate(struct amdgpu_vm *vm, u64 addr, 48 u64 expected_size) 49 { 50 struct amdgpu_bo_va_mapping *va_map; 51 u64 user_addr; 52 u64 size; 53 int r = 0; 54 55 user_addr = (addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT; 56 size = expected_size >> AMDGPU_GPU_PAGE_SHIFT; 57 58 r = amdgpu_bo_reserve(vm->root.bo, false); 59 if (r) 60 return r; 61 62 va_map = amdgpu_vm_bo_lookup_mapping(vm, user_addr); 63 if (!va_map) { 64 r = -EINVAL; 65 goto out_err; 66 } 67 /* Only validate the userq whether resident in the VM mapping range */ 68 if (user_addr >= va_map->start && 69 va_map->last - user_addr + 1 >= size) { 70 amdgpu_bo_unreserve(vm->root.bo); 71 return 0; 72 } 73 74 r = -EINVAL; 75 out_err: 76 amdgpu_bo_unreserve(vm->root.bo); 77 return r; 78 } 79 80 static int 81 amdgpu_userq_preempt_helper(struct amdgpu_userq_mgr *uq_mgr, 82 struct amdgpu_usermode_queue *queue) 83 { 84 struct amdgpu_device *adev = uq_mgr->adev; 85 const struct amdgpu_userq_funcs *userq_funcs = 86 adev->userq_funcs[queue->queue_type]; 87 int r = 0; 88 89 if (queue->state == AMDGPU_USERQ_STATE_MAPPED) { 90 r = userq_funcs->preempt(uq_mgr, queue); 91 if (r) { 92 queue->state = AMDGPU_USERQ_STATE_HUNG; 93 } else { 94 queue->state = AMDGPU_USERQ_STATE_PREEMPTED; 95 } 96 } 97 98 return r; 99 } 100 101 static int 102 amdgpu_userq_restore_helper(struct amdgpu_userq_mgr *uq_mgr, 103 struct amdgpu_usermode_queue *queue) 104 { 105 struct amdgpu_device *adev = uq_mgr->adev; 106 const struct amdgpu_userq_funcs *userq_funcs = 107 adev->userq_funcs[queue->queue_type]; 108 int r = 0; 109 110 if (queue->state == AMDGPU_USERQ_STATE_PREEMPTED) { 111 r = userq_funcs->restore(uq_mgr, queue); 112 if (r) { 113 queue->state = AMDGPU_USERQ_STATE_HUNG; 114 } else { 115 queue->state = AMDGPU_USERQ_STATE_MAPPED; 116 } 117 } 118 119 return r; 120 } 121 122 static int 123 amdgpu_userq_unmap_helper(struct amdgpu_userq_mgr *uq_mgr, 124 struct amdgpu_usermode_queue *queue) 125 { 126 struct amdgpu_device *adev = uq_mgr->adev; 127 const struct amdgpu_userq_funcs *userq_funcs = 128 adev->userq_funcs[queue->queue_type]; 129 int r = 0; 130 131 if ((queue->state == AMDGPU_USERQ_STATE_MAPPED) || 132 (queue->state == AMDGPU_USERQ_STATE_PREEMPTED)) { 133 r = userq_funcs->unmap(uq_mgr, queue); 134 if (r) 135 queue->state = AMDGPU_USERQ_STATE_HUNG; 136 else 137 queue->state = AMDGPU_USERQ_STATE_UNMAPPED; 138 } 139 return r; 140 } 141 142 static int 143 amdgpu_userq_map_helper(struct amdgpu_userq_mgr *uq_mgr, 144 struct amdgpu_usermode_queue *queue) 145 { 146 struct amdgpu_device *adev = uq_mgr->adev; 147 const struct amdgpu_userq_funcs *userq_funcs = 148 adev->userq_funcs[queue->queue_type]; 149 int r = 0; 150 151 if (queue->state == AMDGPU_USERQ_STATE_UNMAPPED) { 152 r = userq_funcs->map(uq_mgr, queue); 153 if (r) { 154 queue->state = AMDGPU_USERQ_STATE_HUNG; 155 } else { 156 queue->state = AMDGPU_USERQ_STATE_MAPPED; 157 } 158 } 159 return r; 160 } 161 162 static void 163 amdgpu_userq_wait_for_last_fence(struct amdgpu_userq_mgr *uq_mgr, 164 struct amdgpu_usermode_queue *queue) 165 { 166 struct dma_fence *f = queue->last_fence; 167 int ret; 168 169 if (f && !dma_fence_is_signaled(f)) { 170 ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); 171 if (ret <= 0) 172 drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", 173 f->context, f->seqno); 174 } 175 } 176 177 static void 178 amdgpu_userq_cleanup(struct amdgpu_userq_mgr *uq_mgr, 179 struct amdgpu_usermode_queue *queue, 180 int queue_id) 181 { 182 struct amdgpu_device *adev = uq_mgr->adev; 183 const struct amdgpu_userq_funcs *uq_funcs = adev->userq_funcs[queue->queue_type]; 184 185 uq_funcs->mqd_destroy(uq_mgr, queue); 186 amdgpu_userq_fence_driver_free(queue); 187 idr_remove(&uq_mgr->userq_idr, queue_id); 188 kfree(queue); 189 } 190 191 static struct amdgpu_usermode_queue * 192 amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid) 193 { 194 return idr_find(&uq_mgr->userq_idr, qid); 195 } 196 197 void 198 amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr, 199 struct amdgpu_eviction_fence_mgr *evf_mgr) 200 { 201 struct amdgpu_eviction_fence *ev_fence; 202 203 retry: 204 /* Flush any pending resume work to create ev_fence */ 205 flush_delayed_work(&uq_mgr->resume_work); 206 207 mutex_lock(&uq_mgr->userq_mutex); 208 spin_lock(&evf_mgr->ev_fence_lock); 209 ev_fence = evf_mgr->ev_fence; 210 spin_unlock(&evf_mgr->ev_fence_lock); 211 if (!ev_fence || dma_fence_is_signaled(&ev_fence->base)) { 212 mutex_unlock(&uq_mgr->userq_mutex); 213 /* 214 * Looks like there was no pending resume work, 215 * add one now to create a valid eviction fence 216 */ 217 schedule_delayed_work(&uq_mgr->resume_work, 0); 218 goto retry; 219 } 220 } 221 222 int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr, 223 struct amdgpu_userq_obj *userq_obj, 224 int size) 225 { 226 struct amdgpu_device *adev = uq_mgr->adev; 227 struct amdgpu_bo_param bp; 228 int r; 229 230 memset(&bp, 0, sizeof(bp)); 231 bp.byte_align = PAGE_SIZE; 232 bp.domain = AMDGPU_GEM_DOMAIN_GTT; 233 bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 234 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 235 bp.type = ttm_bo_type_kernel; 236 bp.size = size; 237 bp.resv = NULL; 238 bp.bo_ptr_size = sizeof(struct amdgpu_bo); 239 240 r = amdgpu_bo_create(adev, &bp, &userq_obj->obj); 241 if (r) { 242 drm_file_err(uq_mgr->file, "Failed to allocate BO for userqueue (%d)", r); 243 return r; 244 } 245 246 r = amdgpu_bo_reserve(userq_obj->obj, true); 247 if (r) { 248 drm_file_err(uq_mgr->file, "Failed to reserve BO to map (%d)", r); 249 goto free_obj; 250 } 251 252 r = amdgpu_ttm_alloc_gart(&(userq_obj->obj)->tbo); 253 if (r) { 254 drm_file_err(uq_mgr->file, "Failed to alloc GART for userqueue object (%d)", r); 255 goto unresv; 256 } 257 258 r = amdgpu_bo_kmap(userq_obj->obj, &userq_obj->cpu_ptr); 259 if (r) { 260 drm_file_err(uq_mgr->file, "Failed to map BO for userqueue (%d)", r); 261 goto unresv; 262 } 263 264 userq_obj->gpu_addr = amdgpu_bo_gpu_offset(userq_obj->obj); 265 amdgpu_bo_unreserve(userq_obj->obj); 266 memset(userq_obj->cpu_ptr, 0, size); 267 return 0; 268 269 unresv: 270 amdgpu_bo_unreserve(userq_obj->obj); 271 272 free_obj: 273 amdgpu_bo_unref(&userq_obj->obj); 274 return r; 275 } 276 277 void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr, 278 struct amdgpu_userq_obj *userq_obj) 279 { 280 amdgpu_bo_kunmap(userq_obj->obj); 281 amdgpu_bo_unref(&userq_obj->obj); 282 } 283 284 uint64_t 285 amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr, 286 struct amdgpu_db_info *db_info, 287 struct drm_file *filp) 288 { 289 uint64_t index; 290 struct drm_gem_object *gobj; 291 struct amdgpu_userq_obj *db_obj = db_info->db_obj; 292 int r, db_size; 293 294 gobj = drm_gem_object_lookup(filp, db_info->doorbell_handle); 295 if (gobj == NULL) { 296 drm_file_err(uq_mgr->file, "Can't find GEM object for doorbell\n"); 297 return -EINVAL; 298 } 299 300 db_obj->obj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 301 drm_gem_object_put(gobj); 302 303 r = amdgpu_bo_reserve(db_obj->obj, true); 304 if (r) { 305 drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n"); 306 goto unref_bo; 307 } 308 309 /* Pin the BO before generating the index, unpin in queue destroy */ 310 r = amdgpu_bo_pin(db_obj->obj, AMDGPU_GEM_DOMAIN_DOORBELL); 311 if (r) { 312 drm_file_err(uq_mgr->file, "[Usermode queues] Failed to pin doorbell object\n"); 313 goto unresv_bo; 314 } 315 316 switch (db_info->queue_type) { 317 case AMDGPU_HW_IP_GFX: 318 case AMDGPU_HW_IP_COMPUTE: 319 case AMDGPU_HW_IP_DMA: 320 db_size = sizeof(u64); 321 break; 322 323 case AMDGPU_HW_IP_VCN_ENC: 324 db_size = sizeof(u32); 325 db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1; 326 break; 327 328 case AMDGPU_HW_IP_VPE: 329 db_size = sizeof(u32); 330 db_info->doorbell_offset += AMDGPU_NAVI10_DOORBELL64_VPE << 1; 331 break; 332 333 default: 334 drm_file_err(uq_mgr->file, "[Usermode queues] IP %d not support\n", 335 db_info->queue_type); 336 r = -EINVAL; 337 goto unpin_bo; 338 } 339 340 index = amdgpu_doorbell_index_on_bar(uq_mgr->adev, db_obj->obj, 341 db_info->doorbell_offset, db_size); 342 drm_dbg_driver(adev_to_drm(uq_mgr->adev), 343 "[Usermode queues] doorbell index=%lld\n", index); 344 amdgpu_bo_unreserve(db_obj->obj); 345 return index; 346 347 unpin_bo: 348 amdgpu_bo_unpin(db_obj->obj); 349 unresv_bo: 350 amdgpu_bo_unreserve(db_obj->obj); 351 unref_bo: 352 amdgpu_bo_unref(&db_obj->obj); 353 return r; 354 } 355 356 static int 357 amdgpu_userq_destroy(struct drm_file *filp, int queue_id) 358 { 359 struct amdgpu_fpriv *fpriv = filp->driver_priv; 360 struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; 361 struct amdgpu_device *adev = uq_mgr->adev; 362 struct amdgpu_usermode_queue *queue; 363 int r = 0; 364 365 cancel_delayed_work_sync(&uq_mgr->resume_work); 366 mutex_lock(&uq_mgr->userq_mutex); 367 368 queue = amdgpu_userq_find(uq_mgr, queue_id); 369 if (!queue) { 370 drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n"); 371 mutex_unlock(&uq_mgr->userq_mutex); 372 return -EINVAL; 373 } 374 amdgpu_userq_wait_for_last_fence(uq_mgr, queue); 375 r = amdgpu_bo_reserve(queue->db_obj.obj, true); 376 if (!r) { 377 amdgpu_bo_unpin(queue->db_obj.obj); 378 amdgpu_bo_unreserve(queue->db_obj.obj); 379 } 380 amdgpu_bo_unref(&queue->db_obj.obj); 381 382 #if defined(CONFIG_DEBUG_FS) 383 debugfs_remove_recursive(queue->debugfs_queue); 384 #endif 385 r = amdgpu_userq_unmap_helper(uq_mgr, queue); 386 /*TODO: It requires a reset for userq hw unmap error*/ 387 if (unlikely(r != AMDGPU_USERQ_STATE_UNMAPPED)) { 388 drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n"); 389 queue->state = AMDGPU_USERQ_STATE_HUNG; 390 } 391 amdgpu_userq_cleanup(uq_mgr, queue, queue_id); 392 mutex_unlock(&uq_mgr->userq_mutex); 393 394 pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); 395 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); 396 397 return r; 398 } 399 400 static int amdgpu_userq_priority_permit(struct drm_file *filp, 401 int priority) 402 { 403 if (priority < AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH) 404 return 0; 405 406 if (capable(CAP_SYS_NICE)) 407 return 0; 408 409 if (drm_is_current_master(filp)) 410 return 0; 411 412 return -EACCES; 413 } 414 415 #if defined(CONFIG_DEBUG_FS) 416 static int amdgpu_mqd_info_read(struct seq_file *m, void *unused) 417 { 418 struct amdgpu_usermode_queue *queue = m->private; 419 struct amdgpu_bo *bo; 420 int r; 421 422 if (!queue || !queue->mqd.obj) 423 return -EINVAL; 424 425 bo = amdgpu_bo_ref(queue->mqd.obj); 426 r = amdgpu_bo_reserve(bo, true); 427 if (r) { 428 amdgpu_bo_unref(&bo); 429 return -EINVAL; 430 } 431 432 seq_printf(m, "queue_type: %d\n", queue->queue_type); 433 seq_printf(m, "mqd_gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(queue->mqd.obj)); 434 435 amdgpu_bo_unreserve(bo); 436 amdgpu_bo_unref(&bo); 437 438 return 0; 439 } 440 441 static int amdgpu_mqd_info_open(struct inode *inode, struct file *file) 442 { 443 return single_open(file, amdgpu_mqd_info_read, inode->i_private); 444 } 445 446 static const struct file_operations amdgpu_mqd_info_fops = { 447 .owner = THIS_MODULE, 448 .open = amdgpu_mqd_info_open, 449 .read = seq_read, 450 .llseek = seq_lseek, 451 .release = single_release, 452 }; 453 #endif 454 455 static int 456 amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) 457 { 458 struct amdgpu_fpriv *fpriv = filp->driver_priv; 459 struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr; 460 struct amdgpu_device *adev = uq_mgr->adev; 461 const struct amdgpu_userq_funcs *uq_funcs; 462 struct amdgpu_usermode_queue *queue; 463 struct amdgpu_db_info db_info; 464 char *queue_name; 465 bool skip_map_queue; 466 uint64_t index; 467 int qid, r = 0; 468 int priority = 469 (args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK) >> 470 AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT; 471 472 r = amdgpu_userq_priority_permit(filp, priority); 473 if (r) 474 return r; 475 476 r = pm_runtime_get_sync(adev_to_drm(adev)->dev); 477 if (r < 0) { 478 drm_file_err(uq_mgr->file, "pm_runtime_get_sync() failed for userqueue create\n"); 479 pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); 480 return r; 481 } 482 483 /* 484 * There could be a situation that we are creating a new queue while 485 * the other queues under this UQ_mgr are suspended. So if there is any 486 * resume work pending, wait for it to get done. 487 * 488 * This will also make sure we have a valid eviction fence ready to be used. 489 */ 490 mutex_lock(&adev->userq_mutex); 491 amdgpu_userq_ensure_ev_fence(&fpriv->userq_mgr, &fpriv->evf_mgr); 492 493 uq_funcs = adev->userq_funcs[args->in.ip_type]; 494 if (!uq_funcs) { 495 drm_file_err(uq_mgr->file, "Usermode queue is not supported for this IP (%u)\n", 496 args->in.ip_type); 497 r = -EINVAL; 498 goto unlock; 499 } 500 501 queue = kzalloc(sizeof(struct amdgpu_usermode_queue), GFP_KERNEL); 502 if (!queue) { 503 drm_file_err(uq_mgr->file, "Failed to allocate memory for queue\n"); 504 r = -ENOMEM; 505 goto unlock; 506 } 507 508 /* Validate the userq virtual address.*/ 509 if (amdgpu_userq_input_va_validate(&fpriv->vm, args->in.queue_va, args->in.queue_size) || 510 amdgpu_userq_input_va_validate(&fpriv->vm, args->in.rptr_va, AMDGPU_GPU_PAGE_SIZE) || 511 amdgpu_userq_input_va_validate(&fpriv->vm, args->in.wptr_va, AMDGPU_GPU_PAGE_SIZE)) { 512 r = -EINVAL; 513 kfree(queue); 514 goto unlock; 515 } 516 queue->doorbell_handle = args->in.doorbell_handle; 517 queue->queue_type = args->in.ip_type; 518 queue->vm = &fpriv->vm; 519 queue->priority = priority; 520 521 db_info.queue_type = queue->queue_type; 522 db_info.doorbell_handle = queue->doorbell_handle; 523 db_info.db_obj = &queue->db_obj; 524 db_info.doorbell_offset = args->in.doorbell_offset; 525 526 /* Convert relative doorbell offset into absolute doorbell index */ 527 index = amdgpu_userq_get_doorbell_index(uq_mgr, &db_info, filp); 528 if (index == (uint64_t)-EINVAL) { 529 drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n"); 530 kfree(queue); 531 r = -EINVAL; 532 goto unlock; 533 } 534 535 queue->doorbell_index = index; 536 xa_init_flags(&queue->fence_drv_xa, XA_FLAGS_ALLOC); 537 r = amdgpu_userq_fence_driver_alloc(adev, queue); 538 if (r) { 539 drm_file_err(uq_mgr->file, "Failed to alloc fence driver\n"); 540 goto unlock; 541 } 542 543 r = uq_funcs->mqd_create(uq_mgr, &args->in, queue); 544 if (r) { 545 drm_file_err(uq_mgr->file, "Failed to create Queue\n"); 546 amdgpu_userq_fence_driver_free(queue); 547 kfree(queue); 548 goto unlock; 549 } 550 551 552 qid = idr_alloc(&uq_mgr->userq_idr, queue, 1, AMDGPU_MAX_USERQ_COUNT, GFP_KERNEL); 553 if (qid < 0) { 554 drm_file_err(uq_mgr->file, "Failed to allocate a queue id\n"); 555 amdgpu_userq_fence_driver_free(queue); 556 uq_funcs->mqd_destroy(uq_mgr, queue); 557 kfree(queue); 558 r = -ENOMEM; 559 goto unlock; 560 } 561 562 /* don't map the queue if scheduling is halted */ 563 if (adev->userq_halt_for_enforce_isolation && 564 ((queue->queue_type == AMDGPU_HW_IP_GFX) || 565 (queue->queue_type == AMDGPU_HW_IP_COMPUTE))) 566 skip_map_queue = true; 567 else 568 skip_map_queue = false; 569 if (!skip_map_queue) { 570 r = amdgpu_userq_map_helper(uq_mgr, queue); 571 if (r) { 572 drm_file_err(uq_mgr->file, "Failed to map Queue\n"); 573 idr_remove(&uq_mgr->userq_idr, qid); 574 amdgpu_userq_fence_driver_free(queue); 575 uq_funcs->mqd_destroy(uq_mgr, queue); 576 kfree(queue); 577 goto unlock; 578 } 579 } 580 581 queue_name = kasprintf(GFP_KERNEL, "queue-%d", qid); 582 if (!queue_name) { 583 r = -ENOMEM; 584 goto unlock; 585 } 586 587 #if defined(CONFIG_DEBUG_FS) 588 /* Queue dentry per client to hold MQD information */ 589 queue->debugfs_queue = debugfs_create_dir(queue_name, filp->debugfs_client); 590 debugfs_create_file("mqd_info", 0444, queue->debugfs_queue, queue, &amdgpu_mqd_info_fops); 591 #endif 592 kfree(queue_name); 593 594 args->out.queue_id = qid; 595 596 unlock: 597 mutex_unlock(&uq_mgr->userq_mutex); 598 mutex_unlock(&adev->userq_mutex); 599 600 return r; 601 } 602 603 static int amdgpu_userq_input_args_validate(struct drm_device *dev, 604 union drm_amdgpu_userq *args, 605 struct drm_file *filp) 606 { 607 struct amdgpu_device *adev = drm_to_adev(dev); 608 609 switch (args->in.op) { 610 case AMDGPU_USERQ_OP_CREATE: 611 if (args->in.flags & ~(AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK | 612 AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE)) 613 return -EINVAL; 614 /* Usermode queues are only supported for GFX IP as of now */ 615 if (args->in.ip_type != AMDGPU_HW_IP_GFX && 616 args->in.ip_type != AMDGPU_HW_IP_DMA && 617 args->in.ip_type != AMDGPU_HW_IP_COMPUTE) { 618 drm_file_err(filp, "Usermode queue doesn't support IP type %u\n", 619 args->in.ip_type); 620 return -EINVAL; 621 } 622 623 if ((args->in.flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE) && 624 (args->in.ip_type != AMDGPU_HW_IP_GFX) && 625 (args->in.ip_type != AMDGPU_HW_IP_COMPUTE) && 626 !amdgpu_is_tmz(adev)) { 627 drm_file_err(filp, "Secure only supported on GFX/Compute queues\n"); 628 return -EINVAL; 629 } 630 631 if (args->in.queue_va == AMDGPU_BO_INVALID_OFFSET || 632 args->in.queue_va == 0 || 633 args->in.queue_size == 0) { 634 drm_file_err(filp, "invalidate userq queue va or size\n"); 635 return -EINVAL; 636 } 637 if (!args->in.wptr_va || !args->in.rptr_va) { 638 drm_file_err(filp, "invalidate userq queue rptr or wptr\n"); 639 return -EINVAL; 640 } 641 break; 642 case AMDGPU_USERQ_OP_FREE: 643 if (args->in.ip_type || 644 args->in.doorbell_handle || 645 args->in.doorbell_offset || 646 args->in.flags || 647 args->in.queue_va || 648 args->in.queue_size || 649 args->in.rptr_va || 650 args->in.wptr_va || 651 args->in.mqd || 652 args->in.mqd_size) 653 return -EINVAL; 654 break; 655 default: 656 return -EINVAL; 657 } 658 659 return 0; 660 } 661 662 int amdgpu_userq_ioctl(struct drm_device *dev, void *data, 663 struct drm_file *filp) 664 { 665 union drm_amdgpu_userq *args = data; 666 int r; 667 668 if (amdgpu_userq_input_args_validate(dev, args, filp) < 0) 669 return -EINVAL; 670 671 switch (args->in.op) { 672 case AMDGPU_USERQ_OP_CREATE: 673 r = amdgpu_userq_create(filp, args); 674 if (r) 675 drm_file_err(filp, "Failed to create usermode queue\n"); 676 break; 677 678 case AMDGPU_USERQ_OP_FREE: 679 r = amdgpu_userq_destroy(filp, args->in.queue_id); 680 if (r) 681 drm_file_err(filp, "Failed to destroy usermode queue\n"); 682 break; 683 684 default: 685 drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op); 686 return -EINVAL; 687 } 688 689 return r; 690 } 691 692 static int 693 amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr) 694 { 695 struct amdgpu_usermode_queue *queue; 696 int queue_id; 697 int ret = 0, r; 698 699 /* Resume all the queues for this process */ 700 idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { 701 r = amdgpu_userq_restore_helper(uq_mgr, queue); 702 if (r) 703 ret = r; 704 } 705 706 if (ret) 707 drm_file_err(uq_mgr->file, "Failed to map all the queues\n"); 708 return ret; 709 } 710 711 static int amdgpu_userq_validate_vm(void *param, struct amdgpu_bo *bo) 712 { 713 struct ttm_operation_ctx ctx = { false, false }; 714 715 amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); 716 return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 717 } 718 719 /* Handle all BOs on the invalidated list, validate them and update the PTs */ 720 static int 721 amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec, 722 struct amdgpu_vm *vm) 723 { 724 struct ttm_operation_ctx ctx = { false, false }; 725 struct amdgpu_bo_va *bo_va; 726 struct amdgpu_bo *bo; 727 int ret; 728 729 spin_lock(&vm->invalidated_lock); 730 while (!list_empty(&vm->invalidated)) { 731 bo_va = list_first_entry(&vm->invalidated, 732 struct amdgpu_bo_va, 733 base.vm_status); 734 spin_unlock(&vm->invalidated_lock); 735 736 bo = bo_va->base.bo; 737 ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2); 738 if (unlikely(ret)) 739 return ret; 740 741 amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); 742 ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 743 if (ret) 744 return ret; 745 746 /* This moves the bo_va to the done list */ 747 ret = amdgpu_vm_bo_update(adev, bo_va, false); 748 if (ret) 749 return ret; 750 751 spin_lock(&vm->invalidated_lock); 752 } 753 spin_unlock(&vm->invalidated_lock); 754 755 return 0; 756 } 757 758 /* Make sure the whole VM is ready to be used */ 759 static int 760 amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr) 761 { 762 struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); 763 struct amdgpu_device *adev = uq_mgr->adev; 764 struct amdgpu_vm *vm = &fpriv->vm; 765 struct amdgpu_bo_va *bo_va; 766 struct drm_exec exec; 767 int ret; 768 769 drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); 770 drm_exec_until_all_locked(&exec) { 771 ret = amdgpu_vm_lock_pd(vm, &exec, 1); 772 drm_exec_retry_on_contention(&exec); 773 if (unlikely(ret)) 774 goto unlock_all; 775 776 ret = amdgpu_vm_lock_done_list(vm, &exec, 1); 777 drm_exec_retry_on_contention(&exec); 778 if (unlikely(ret)) 779 goto unlock_all; 780 781 /* This validates PDs, PTs and per VM BOs */ 782 ret = amdgpu_vm_validate(adev, vm, NULL, 783 amdgpu_userq_validate_vm, 784 NULL); 785 if (unlikely(ret)) 786 goto unlock_all; 787 788 /* This locks and validates the remaining evicted BOs */ 789 ret = amdgpu_userq_bo_validate(adev, &exec, vm); 790 drm_exec_retry_on_contention(&exec); 791 if (unlikely(ret)) 792 goto unlock_all; 793 } 794 795 ret = amdgpu_vm_handle_moved(adev, vm, NULL); 796 if (ret) 797 goto unlock_all; 798 799 ret = amdgpu_vm_update_pdes(adev, vm, false); 800 if (ret) 801 goto unlock_all; 802 803 /* 804 * We need to wait for all VM updates to finish before restarting the 805 * queues. Using the done list like that is now ok since everything is 806 * locked in place. 807 */ 808 list_for_each_entry(bo_va, &vm->done, base.vm_status) 809 dma_fence_wait(bo_va->last_pt_update, false); 810 dma_fence_wait(vm->last_update, false); 811 812 ret = amdgpu_eviction_fence_replace_fence(&fpriv->evf_mgr, &exec); 813 if (ret) 814 drm_file_err(uq_mgr->file, "Failed to replace eviction fence\n"); 815 816 unlock_all: 817 drm_exec_fini(&exec); 818 return ret; 819 } 820 821 static void amdgpu_userq_restore_worker(struct work_struct *work) 822 { 823 struct amdgpu_userq_mgr *uq_mgr = work_to_uq_mgr(work, resume_work.work); 824 struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); 825 int ret; 826 827 flush_delayed_work(&fpriv->evf_mgr.suspend_work); 828 829 mutex_lock(&uq_mgr->userq_mutex); 830 831 ret = amdgpu_userq_vm_validate(uq_mgr); 832 if (ret) { 833 drm_file_err(uq_mgr->file, "Failed to validate BOs to restore\n"); 834 goto unlock; 835 } 836 837 ret = amdgpu_userq_restore_all(uq_mgr); 838 if (ret) { 839 drm_file_err(uq_mgr->file, "Failed to restore all queues\n"); 840 goto unlock; 841 } 842 843 unlock: 844 mutex_unlock(&uq_mgr->userq_mutex); 845 } 846 847 static int 848 amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr) 849 { 850 struct amdgpu_usermode_queue *queue; 851 int queue_id; 852 int ret = 0, r; 853 854 /* Try to unmap all the queues in this process ctx */ 855 idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { 856 r = amdgpu_userq_preempt_helper(uq_mgr, queue); 857 if (r) 858 ret = r; 859 } 860 861 if (ret) 862 drm_file_err(uq_mgr->file, "Couldn't unmap all the queues\n"); 863 return ret; 864 } 865 866 static int 867 amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr) 868 { 869 struct amdgpu_usermode_queue *queue; 870 int queue_id, ret; 871 872 idr_for_each_entry(&uq_mgr->userq_idr, queue, queue_id) { 873 struct dma_fence *f = queue->last_fence; 874 875 if (!f || dma_fence_is_signaled(f)) 876 continue; 877 ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100)); 878 if (ret <= 0) { 879 drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n", 880 f->context, f->seqno); 881 return -ETIMEDOUT; 882 } 883 } 884 885 return 0; 886 } 887 888 void 889 amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr, 890 struct amdgpu_eviction_fence *ev_fence) 891 { 892 int ret; 893 struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); 894 struct amdgpu_eviction_fence_mgr *evf_mgr = &fpriv->evf_mgr; 895 896 /* Wait for any pending userqueue fence work to finish */ 897 ret = amdgpu_userq_wait_for_signal(uq_mgr); 898 if (ret) { 899 drm_file_err(uq_mgr->file, "Not evicting userqueue, timeout waiting for work\n"); 900 return; 901 } 902 903 ret = amdgpu_userq_evict_all(uq_mgr); 904 if (ret) { 905 drm_file_err(uq_mgr->file, "Failed to evict userqueue\n"); 906 return; 907 } 908 909 /* Signal current eviction fence */ 910 amdgpu_eviction_fence_signal(evf_mgr, ev_fence); 911 912 if (evf_mgr->fd_closing) { 913 cancel_delayed_work_sync(&uq_mgr->resume_work); 914 return; 915 } 916 917 /* Schedule a resume work */ 918 schedule_delayed_work(&uq_mgr->resume_work, 0); 919 } 920 921 int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv, 922 struct amdgpu_device *adev) 923 { 924 mutex_init(&userq_mgr->userq_mutex); 925 idr_init_base(&userq_mgr->userq_idr, 1); 926 userq_mgr->adev = adev; 927 userq_mgr->file = file_priv; 928 929 mutex_lock(&adev->userq_mutex); 930 list_add(&userq_mgr->list, &adev->userq_mgr_list); 931 mutex_unlock(&adev->userq_mutex); 932 933 INIT_DELAYED_WORK(&userq_mgr->resume_work, amdgpu_userq_restore_worker); 934 return 0; 935 } 936 937 void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr) 938 { 939 struct amdgpu_device *adev = userq_mgr->adev; 940 struct amdgpu_usermode_queue *queue; 941 struct amdgpu_userq_mgr *uqm, *tmp; 942 uint32_t queue_id; 943 944 cancel_delayed_work_sync(&userq_mgr->resume_work); 945 946 mutex_lock(&adev->userq_mutex); 947 mutex_lock(&userq_mgr->userq_mutex); 948 idr_for_each_entry(&userq_mgr->userq_idr, queue, queue_id) { 949 amdgpu_userq_wait_for_last_fence(userq_mgr, queue); 950 amdgpu_userq_unmap_helper(userq_mgr, queue); 951 amdgpu_userq_cleanup(userq_mgr, queue, queue_id); 952 } 953 954 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { 955 if (uqm == userq_mgr) { 956 list_del(&uqm->list); 957 break; 958 } 959 } 960 idr_destroy(&userq_mgr->userq_idr); 961 mutex_unlock(&userq_mgr->userq_mutex); 962 mutex_unlock(&adev->userq_mutex); 963 mutex_destroy(&userq_mgr->userq_mutex); 964 } 965 966 int amdgpu_userq_suspend(struct amdgpu_device *adev) 967 { 968 u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); 969 struct amdgpu_usermode_queue *queue; 970 struct amdgpu_userq_mgr *uqm, *tmp; 971 int queue_id; 972 int ret = 0, r; 973 974 if (!ip_mask) 975 return 0; 976 977 mutex_lock(&adev->userq_mutex); 978 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { 979 cancel_delayed_work_sync(&uqm->resume_work); 980 mutex_lock(&uqm->userq_mutex); 981 idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { 982 if (adev->in_s0ix) 983 r = amdgpu_userq_preempt_helper(uqm, queue); 984 else 985 r = amdgpu_userq_unmap_helper(uqm, queue); 986 if (r) 987 ret = r; 988 } 989 mutex_unlock(&uqm->userq_mutex); 990 } 991 mutex_unlock(&adev->userq_mutex); 992 return ret; 993 } 994 995 int amdgpu_userq_resume(struct amdgpu_device *adev) 996 { 997 u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); 998 struct amdgpu_usermode_queue *queue; 999 struct amdgpu_userq_mgr *uqm, *tmp; 1000 int queue_id; 1001 int ret = 0, r; 1002 1003 if (!ip_mask) 1004 return 0; 1005 1006 mutex_lock(&adev->userq_mutex); 1007 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { 1008 mutex_lock(&uqm->userq_mutex); 1009 idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { 1010 if (adev->in_s0ix) 1011 r = amdgpu_userq_restore_helper(uqm, queue); 1012 else 1013 r = amdgpu_userq_map_helper(uqm, queue); 1014 if (r) 1015 ret = r; 1016 } 1017 mutex_unlock(&uqm->userq_mutex); 1018 } 1019 mutex_unlock(&adev->userq_mutex); 1020 return ret; 1021 } 1022 1023 int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev, 1024 u32 idx) 1025 { 1026 u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); 1027 struct amdgpu_usermode_queue *queue; 1028 struct amdgpu_userq_mgr *uqm, *tmp; 1029 int queue_id; 1030 int ret = 0, r; 1031 1032 /* only need to stop gfx/compute */ 1033 if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE)))) 1034 return 0; 1035 1036 mutex_lock(&adev->userq_mutex); 1037 if (adev->userq_halt_for_enforce_isolation) 1038 dev_warn(adev->dev, "userq scheduling already stopped!\n"); 1039 adev->userq_halt_for_enforce_isolation = true; 1040 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { 1041 cancel_delayed_work_sync(&uqm->resume_work); 1042 mutex_lock(&uqm->userq_mutex); 1043 idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { 1044 if (((queue->queue_type == AMDGPU_HW_IP_GFX) || 1045 (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && 1046 (queue->xcp_id == idx)) { 1047 r = amdgpu_userq_preempt_helper(uqm, queue); 1048 if (r) 1049 ret = r; 1050 } 1051 } 1052 mutex_unlock(&uqm->userq_mutex); 1053 } 1054 mutex_unlock(&adev->userq_mutex); 1055 return ret; 1056 } 1057 1058 int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev, 1059 u32 idx) 1060 { 1061 u32 ip_mask = amdgpu_userq_get_supported_ip_mask(adev); 1062 struct amdgpu_usermode_queue *queue; 1063 struct amdgpu_userq_mgr *uqm, *tmp; 1064 int queue_id; 1065 int ret = 0, r; 1066 1067 /* only need to stop gfx/compute */ 1068 if (!(ip_mask & ((1 << AMDGPU_HW_IP_GFX) | (1 << AMDGPU_HW_IP_COMPUTE)))) 1069 return 0; 1070 1071 mutex_lock(&adev->userq_mutex); 1072 if (!adev->userq_halt_for_enforce_isolation) 1073 dev_warn(adev->dev, "userq scheduling already started!\n"); 1074 adev->userq_halt_for_enforce_isolation = false; 1075 list_for_each_entry_safe(uqm, tmp, &adev->userq_mgr_list, list) { 1076 mutex_lock(&uqm->userq_mutex); 1077 idr_for_each_entry(&uqm->userq_idr, queue, queue_id) { 1078 if (((queue->queue_type == AMDGPU_HW_IP_GFX) || 1079 (queue->queue_type == AMDGPU_HW_IP_COMPUTE)) && 1080 (queue->xcp_id == idx)) { 1081 r = amdgpu_userq_restore_helper(uqm, queue); 1082 if (r) 1083 ret = r; 1084 } 1085 } 1086 mutex_unlock(&uqm->userq_mutex); 1087 } 1088 mutex_unlock(&adev->userq_mutex); 1089 return ret; 1090 } 1091