1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_exec_queue.h" 7 8 #include <linux/nospec.h> 9 10 #include <drm/drm_device.h> 11 #include <drm/drm_drv.h> 12 #include <drm/drm_file.h> 13 #include <drm/drm_syncobj.h> 14 #include <uapi/drm/xe_drm.h> 15 16 #include "xe_bo.h" 17 #include "xe_dep_scheduler.h" 18 #include "xe_device.h" 19 #include "xe_gt.h" 20 #include "xe_gt_sriov_vf.h" 21 #include "xe_hw_engine_class_sysfs.h" 22 #include "xe_hw_engine_group.h" 23 #include "xe_hw_fence.h" 24 #include "xe_irq.h" 25 #include "xe_lrc.h" 26 #include "xe_macros.h" 27 #include "xe_migrate.h" 28 #include "xe_pm.h" 29 #include "xe_ring_ops_types.h" 30 #include "xe_trace.h" 31 #include "xe_vm.h" 32 #include "xe_pxp.h" 33 34 /** 35 * DOC: Execution Queue 36 * 37 * An Execution queue is an interface for the HW context of execution. 38 * The user creates an execution queue, submits the GPU jobs through those 39 * queues and in the end destroys them. 40 * 41 * Execution queues can also be created by XeKMD itself for driver internal 42 * operations like object migration etc. 43 * 44 * An execution queue is associated with a specified HW engine or a group of 45 * engines (belonging to the same tile and engine class) and any GPU job 46 * submitted on the queue will be run on one of these engines. 47 * 48 * An execution queue is tied to an address space (VM). It holds a reference 49 * of the associated VM and the underlying Logical Ring Context/s (LRC/s) 50 * until the queue is destroyed. 51 * 52 * The execution queue sits on top of the submission backend. It opaquely 53 * handles the GuC and Execlist backends whichever the platform uses, and 54 * the ring operations the different engine classes support. 55 */ 56 57 enum xe_exec_queue_sched_prop { 58 XE_EXEC_QUEUE_JOB_TIMEOUT = 0, 59 XE_EXEC_QUEUE_TIMESLICE = 1, 60 XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, 61 XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, 62 }; 63 64 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 65 u64 extensions); 66 67 static void xe_exec_queue_group_cleanup(struct xe_exec_queue *q) 68 { 69 struct xe_exec_queue_group *group = q->multi_queue.group; 70 struct xe_lrc *lrc; 71 unsigned long idx; 72 73 if (xe_exec_queue_is_multi_queue_secondary(q)) { 74 /* 75 * Put pairs with get from xe_exec_queue_lookup() call 76 * in xe_exec_queue_group_validate(). 77 */ 78 xe_exec_queue_put(xe_exec_queue_multi_queue_primary(q)); 79 return; 80 } 81 82 if (!group) 83 return; 84 85 /* Primary queue cleanup */ 86 xa_for_each(&group->xa, idx, lrc) 87 xe_lrc_put(lrc); 88 89 xa_destroy(&group->xa); 90 mutex_destroy(&group->list_lock); 91 xe_bo_unpin_map_no_vm(group->cgp_bo); 92 kfree(group); 93 } 94 95 static void __xe_exec_queue_free(struct xe_exec_queue *q) 96 { 97 int i; 98 99 for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) 100 if (q->tlb_inval[i].dep_scheduler) 101 xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler); 102 103 if (xe_exec_queue_uses_pxp(q)) 104 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); 105 106 if (xe_exec_queue_is_multi_queue(q)) 107 xe_exec_queue_group_cleanup(q); 108 109 if (q->vm) 110 xe_vm_put(q->vm); 111 112 if (q->xef) 113 xe_file_put(q->xef); 114 115 kvfree(q->replay_state); 116 kfree(q); 117 } 118 119 static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q) 120 { 121 struct xe_tile *tile = gt_to_tile(q->gt); 122 int i; 123 124 for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) { 125 struct xe_dep_scheduler *dep_scheduler; 126 struct xe_gt *gt; 127 struct workqueue_struct *wq; 128 129 if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT) 130 gt = tile->primary_gt; 131 else 132 gt = tile->media_gt; 133 134 if (!gt) 135 continue; 136 137 wq = gt->tlb_inval.job_wq; 138 139 #define MAX_TLB_INVAL_JOBS 16 /* Picking a reasonable value */ 140 dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name, 141 MAX_TLB_INVAL_JOBS); 142 if (IS_ERR(dep_scheduler)) 143 return PTR_ERR(dep_scheduler); 144 145 q->tlb_inval[i].dep_scheduler = dep_scheduler; 146 } 147 #undef MAX_TLB_INVAL_JOBS 148 149 return 0; 150 } 151 152 static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, 153 struct xe_vm *vm, 154 u32 logical_mask, 155 u16 width, struct xe_hw_engine *hwe, 156 u32 flags, u64 extensions) 157 { 158 struct xe_exec_queue *q; 159 struct xe_gt *gt = hwe->gt; 160 int err; 161 162 /* only kernel queues can be permanent */ 163 XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); 164 165 q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL); 166 if (!q) 167 return ERR_PTR(-ENOMEM); 168 169 kref_init(&q->refcount); 170 q->flags = flags; 171 q->hwe = hwe; 172 q->gt = gt; 173 q->class = hwe->class; 174 q->width = width; 175 q->msix_vec = XE_IRQ_DEFAULT_MSIX; 176 q->logical_mask = logical_mask; 177 q->fence_irq = >->fence_irq[hwe->class]; 178 q->ring_ops = gt->ring_ops[hwe->class]; 179 q->ops = gt->exec_queue_ops; 180 INIT_LIST_HEAD(&q->lr.link); 181 INIT_LIST_HEAD(&q->multi_gt_link); 182 INIT_LIST_HEAD(&q->hw_engine_group_link); 183 INIT_LIST_HEAD(&q->pxp.link); 184 q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL; 185 186 q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; 187 q->sched_props.preempt_timeout_us = 188 hwe->eclass->sched_props.preempt_timeout_us; 189 q->sched_props.job_timeout_ms = 190 hwe->eclass->sched_props.job_timeout_ms; 191 if (q->flags & EXEC_QUEUE_FLAG_KERNEL && 192 q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) 193 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; 194 else 195 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; 196 197 if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) { 198 err = alloc_dep_schedulers(xe, q); 199 if (err) { 200 __xe_exec_queue_free(q); 201 return ERR_PTR(err); 202 } 203 } 204 205 if (vm) 206 q->vm = xe_vm_get(vm); 207 208 if (extensions) { 209 /* 210 * may set q->usm, must come before xe_lrc_create(), 211 * may overwrite q->sched_props, must come before q->ops->init() 212 */ 213 err = exec_queue_user_extensions(xe, q, extensions); 214 if (err) { 215 __xe_exec_queue_free(q); 216 return ERR_PTR(err); 217 } 218 } 219 220 return q; 221 } 222 223 static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) 224 { 225 int i, err; 226 u32 flags = 0; 227 228 /* 229 * PXP workloads executing on RCS or CCS must run in isolation (i.e. no 230 * other workload can use the EUs at the same time). On MTL this is done 231 * by setting the RUNALONE bit in the LRC, while starting on Xe2 there 232 * is a dedicated bit for it. 233 */ 234 if (xe_exec_queue_uses_pxp(q) && 235 (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) { 236 if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20) 237 flags |= XE_LRC_CREATE_PXP; 238 else 239 flags |= XE_LRC_CREATE_RUNALONE; 240 } 241 242 if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL)) 243 flags |= XE_LRC_CREATE_USER_CTX; 244 245 err = q->ops->init(q); 246 if (err) 247 return err; 248 249 /* 250 * This must occur after q->ops->init to avoid race conditions during VF 251 * post-migration recovery, as the fixups for the LRC GGTT addresses 252 * depend on the queue being present in the backend tracking structure. 253 * 254 * In addition to above, we must wait on inflight GGTT changes to avoid 255 * writing out stale values here. Such wait provides a solid solution 256 * (without a race) only if the function can detect migration instantly 257 * from the moment vCPU resumes execution. 258 */ 259 for (i = 0; i < q->width; ++i) { 260 struct xe_lrc *lrc; 261 262 xe_gt_sriov_vf_wait_valid_ggtt(q->gt); 263 lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state, 264 xe_lrc_ring_size(), q->msix_vec, flags); 265 if (IS_ERR(lrc)) { 266 err = PTR_ERR(lrc); 267 goto err_lrc; 268 } 269 270 /* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */ 271 WRITE_ONCE(q->lrc[i], lrc); 272 } 273 274 return 0; 275 276 err_lrc: 277 for (i = i - 1; i >= 0; --i) 278 xe_lrc_put(q->lrc[i]); 279 return err; 280 } 281 282 static void __xe_exec_queue_fini(struct xe_exec_queue *q) 283 { 284 int i; 285 286 q->ops->fini(q); 287 288 for (i = 0; i < q->width; ++i) 289 xe_lrc_put(q->lrc[i]); 290 } 291 292 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, 293 u32 logical_mask, u16 width, 294 struct xe_hw_engine *hwe, u32 flags, 295 u64 extensions) 296 { 297 struct xe_exec_queue *q; 298 int err; 299 300 /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */ 301 xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0))); 302 303 q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, 304 extensions); 305 if (IS_ERR(q)) 306 return q; 307 308 err = __xe_exec_queue_init(q, flags); 309 if (err) 310 goto err_post_alloc; 311 312 /* 313 * We can only add the queue to the PXP list after the init is complete, 314 * because the PXP termination can call exec_queue_kill and that will 315 * go bad if the queue is only half-initialized. This means that we 316 * can't do it when we handle the PXP extension in __xe_exec_queue_alloc 317 * and we need to do it here instead. 318 */ 319 if (xe_exec_queue_uses_pxp(q)) { 320 err = xe_pxp_exec_queue_add(xe->pxp, q); 321 if (err) 322 goto err_post_init; 323 } 324 325 return q; 326 327 err_post_init: 328 __xe_exec_queue_fini(q); 329 err_post_alloc: 330 __xe_exec_queue_free(q); 331 return ERR_PTR(err); 332 } 333 ALLOW_ERROR_INJECTION(xe_exec_queue_create, ERRNO); 334 335 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, 336 struct xe_vm *vm, 337 enum xe_engine_class class, 338 u32 flags, u64 extensions) 339 { 340 struct xe_hw_engine *hwe, *hwe0 = NULL; 341 enum xe_hw_engine_id id; 342 u32 logical_mask = 0; 343 344 for_each_hw_engine(hwe, gt, id) { 345 if (xe_hw_engine_is_reserved(hwe)) 346 continue; 347 348 if (hwe->class == class) { 349 logical_mask |= BIT(hwe->logical_instance); 350 if (!hwe0) 351 hwe0 = hwe; 352 } 353 } 354 355 if (!logical_mask) 356 return ERR_PTR(-ENODEV); 357 358 return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); 359 } 360 361 /** 362 * xe_exec_queue_create_bind() - Create bind exec queue. 363 * @xe: Xe device. 364 * @tile: tile which bind exec queue belongs to. 365 * @flags: exec queue creation flags 366 * @extensions: exec queue creation extensions 367 * 368 * Normalize bind exec queue creation. Bind exec queue is tied to migration VM 369 * for access to physical memory required for page table programming. On a 370 * faulting devices the reserved copy engine instance must be used to avoid 371 * deadlocking (user binds cannot get stuck behind faults as kernel binds which 372 * resolve faults depend on user binds). On non-faulting devices any copy engine 373 * can be used. 374 * 375 * Returns exec queue on success, ERR_PTR on failure 376 */ 377 struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, 378 struct xe_tile *tile, 379 u32 flags, u64 extensions) 380 { 381 struct xe_gt *gt = tile->primary_gt; 382 struct xe_exec_queue *q; 383 struct xe_vm *migrate_vm; 384 385 migrate_vm = xe_migrate_get_vm(tile->migrate); 386 if (xe->info.has_usm) { 387 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, 388 XE_ENGINE_CLASS_COPY, 389 gt->usm.reserved_bcs_instance, 390 false); 391 392 if (!hwe) { 393 xe_vm_put(migrate_vm); 394 return ERR_PTR(-EINVAL); 395 } 396 397 q = xe_exec_queue_create(xe, migrate_vm, 398 BIT(hwe->logical_instance), 1, hwe, 399 flags, extensions); 400 } else { 401 q = xe_exec_queue_create_class(xe, gt, migrate_vm, 402 XE_ENGINE_CLASS_COPY, flags, 403 extensions); 404 } 405 xe_vm_put(migrate_vm); 406 407 if (!IS_ERR(q)) { 408 int err = drm_syncobj_create(&q->ufence_syncobj, 409 DRM_SYNCOBJ_CREATE_SIGNALED, 410 NULL); 411 if (err) { 412 xe_exec_queue_put(q); 413 return ERR_PTR(err); 414 } 415 } 416 417 return q; 418 } 419 ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); 420 421 void xe_exec_queue_destroy(struct kref *ref) 422 { 423 struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); 424 struct xe_exec_queue *eq, *next; 425 int i; 426 427 xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0); 428 429 if (q->ufence_syncobj) 430 drm_syncobj_put(q->ufence_syncobj); 431 432 if (q->ufence_syncobj) 433 drm_syncobj_put(q->ufence_syncobj); 434 435 if (xe_exec_queue_uses_pxp(q)) 436 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); 437 438 xe_exec_queue_last_fence_put_unlocked(q); 439 for_each_tlb_inval(i) 440 xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i); 441 442 if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { 443 list_for_each_entry_safe(eq, next, &q->multi_gt_list, 444 multi_gt_link) 445 xe_exec_queue_put(eq); 446 } 447 448 q->ops->destroy(q); 449 } 450 451 void xe_exec_queue_fini(struct xe_exec_queue *q) 452 { 453 /* 454 * Before releasing our ref to lrc and xef, accumulate our run ticks 455 * and wakeup any waiters. 456 */ 457 xe_exec_queue_update_run_ticks(q); 458 if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) 459 wake_up_var(&q->xef->exec_queue.pending_removal); 460 461 __xe_exec_queue_fini(q); 462 __xe_exec_queue_free(q); 463 } 464 465 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) 466 { 467 switch (q->class) { 468 case XE_ENGINE_CLASS_RENDER: 469 snprintf(q->name, sizeof(q->name), "rcs%d", instance); 470 break; 471 case XE_ENGINE_CLASS_VIDEO_DECODE: 472 snprintf(q->name, sizeof(q->name), "vcs%d", instance); 473 break; 474 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 475 snprintf(q->name, sizeof(q->name), "vecs%d", instance); 476 break; 477 case XE_ENGINE_CLASS_COPY: 478 snprintf(q->name, sizeof(q->name), "bcs%d", instance); 479 break; 480 case XE_ENGINE_CLASS_COMPUTE: 481 snprintf(q->name, sizeof(q->name), "ccs%d", instance); 482 break; 483 case XE_ENGINE_CLASS_OTHER: 484 snprintf(q->name, sizeof(q->name), "gsccs%d", instance); 485 break; 486 default: 487 XE_WARN_ON(q->class); 488 } 489 } 490 491 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) 492 { 493 struct xe_exec_queue *q; 494 495 mutex_lock(&xef->exec_queue.lock); 496 q = xa_load(&xef->exec_queue.xa, id); 497 if (q) 498 xe_exec_queue_get(q); 499 mutex_unlock(&xef->exec_queue.lock); 500 501 return q; 502 } 503 504 enum xe_exec_queue_priority 505 xe_exec_queue_device_get_max_priority(struct xe_device *xe) 506 { 507 return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : 508 XE_EXEC_QUEUE_PRIORITY_NORMAL; 509 } 510 511 static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, 512 u64 value) 513 { 514 if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) 515 return -EINVAL; 516 517 if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) 518 return -EPERM; 519 520 q->sched_props.priority = value; 521 return 0; 522 } 523 524 static bool xe_exec_queue_enforce_schedule_limit(void) 525 { 526 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 527 return true; 528 #else 529 return !capable(CAP_SYS_NICE); 530 #endif 531 } 532 533 static void 534 xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, 535 enum xe_exec_queue_sched_prop prop, 536 u32 *min, u32 *max) 537 { 538 switch (prop) { 539 case XE_EXEC_QUEUE_JOB_TIMEOUT: 540 *min = eclass->sched_props.job_timeout_min; 541 *max = eclass->sched_props.job_timeout_max; 542 break; 543 case XE_EXEC_QUEUE_TIMESLICE: 544 *min = eclass->sched_props.timeslice_min; 545 *max = eclass->sched_props.timeslice_max; 546 break; 547 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 548 *min = eclass->sched_props.preempt_timeout_min; 549 *max = eclass->sched_props.preempt_timeout_max; 550 break; 551 default: 552 break; 553 } 554 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 555 if (capable(CAP_SYS_NICE)) { 556 switch (prop) { 557 case XE_EXEC_QUEUE_JOB_TIMEOUT: 558 *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 559 *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 560 break; 561 case XE_EXEC_QUEUE_TIMESLICE: 562 *min = XE_HW_ENGINE_TIMESLICE_MIN; 563 *max = XE_HW_ENGINE_TIMESLICE_MAX; 564 break; 565 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 566 *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 567 *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 568 break; 569 default: 570 break; 571 } 572 } 573 #endif 574 } 575 576 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, 577 u64 value) 578 { 579 u32 min = 0, max = 0; 580 581 xe_exec_queue_get_prop_minmax(q->hwe->eclass, 582 XE_EXEC_QUEUE_TIMESLICE, &min, &max); 583 584 if (xe_exec_queue_enforce_schedule_limit() && 585 !xe_hw_engine_timeout_in_range(value, min, max)) 586 return -EINVAL; 587 588 q->sched_props.timeslice_us = value; 589 return 0; 590 } 591 592 static int 593 exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value) 594 { 595 if (value == DRM_XE_PXP_TYPE_NONE) 596 return 0; 597 598 /* we only support HWDRM sessions right now */ 599 if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM)) 600 return -EINVAL; 601 602 if (!xe_pxp_is_enabled(xe->pxp)) 603 return -ENODEV; 604 605 return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM); 606 } 607 608 static int exec_queue_set_hang_replay_state(struct xe_device *xe, 609 struct xe_exec_queue *q, 610 u64 value) 611 { 612 size_t size = xe_gt_lrc_hang_replay_size(q->gt, q->class); 613 u64 __user *address = u64_to_user_ptr(value); 614 void *ptr; 615 616 ptr = vmemdup_user(address, size); 617 if (XE_IOCTL_DBG(xe, IS_ERR(ptr))) 618 return PTR_ERR(ptr); 619 620 q->replay_state = ptr; 621 622 return 0; 623 } 624 625 static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue *q) 626 { 627 struct xe_tile *tile = gt_to_tile(q->gt); 628 struct xe_exec_queue_group *group; 629 struct xe_bo *bo; 630 631 group = kzalloc(sizeof(*group), GFP_KERNEL); 632 if (!group) 633 return -ENOMEM; 634 635 bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, 636 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 637 XE_BO_FLAG_PINNED_LATE_RESTORE | 638 XE_BO_FLAG_FORCE_USER_VRAM | 639 XE_BO_FLAG_GGTT_INVALIDATE | 640 XE_BO_FLAG_GGTT, false); 641 if (IS_ERR(bo)) { 642 drm_err(&xe->drm, "CGP bo allocation for queue group failed: %ld\n", 643 PTR_ERR(bo)); 644 kfree(group); 645 return PTR_ERR(bo); 646 } 647 648 xe_map_memset(xe, &bo->vmap, 0, 0, SZ_4K); 649 650 group->primary = q; 651 group->cgp_bo = bo; 652 INIT_LIST_HEAD(&group->list); 653 xa_init_flags(&group->xa, XA_FLAGS_ALLOC1); 654 mutex_init(&group->list_lock); 655 q->multi_queue.group = group; 656 657 /* group->list_lock is used in submission backend */ 658 if (IS_ENABLED(CONFIG_LOCKDEP)) { 659 fs_reclaim_acquire(GFP_KERNEL); 660 might_lock(&group->list_lock); 661 fs_reclaim_release(GFP_KERNEL); 662 } 663 664 return 0; 665 } 666 667 static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q) 668 { 669 return q->gt->info.multi_queue_engine_class_mask & BIT(q->class); 670 } 671 672 static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q, 673 u32 primary_id) 674 { 675 struct xe_exec_queue_group *group; 676 struct xe_exec_queue *primary; 677 int ret; 678 679 /* 680 * Get from below xe_exec_queue_lookup() pairs with put 681 * in xe_exec_queue_group_cleanup(). 682 */ 683 primary = xe_exec_queue_lookup(q->vm->xef, primary_id); 684 if (XE_IOCTL_DBG(xe, !primary)) 685 return -ENOENT; 686 687 if (XE_IOCTL_DBG(xe, !xe_exec_queue_is_multi_queue_primary(primary)) || 688 XE_IOCTL_DBG(xe, q->vm != primary->vm) || 689 XE_IOCTL_DBG(xe, q->logical_mask != primary->logical_mask)) { 690 ret = -EINVAL; 691 goto put_primary; 692 } 693 694 group = primary->multi_queue.group; 695 q->multi_queue.valid = true; 696 q->multi_queue.group = group; 697 698 return 0; 699 put_primary: 700 xe_exec_queue_put(primary); 701 return ret; 702 } 703 704 #define XE_MAX_GROUP_SIZE 64 705 static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q) 706 { 707 struct xe_exec_queue_group *group = q->multi_queue.group; 708 u32 pos; 709 int err; 710 711 xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); 712 713 /* Primary queue holds a reference to LRCs of all secondary queues */ 714 err = xa_alloc(&group->xa, &pos, xe_lrc_get(q->lrc[0]), 715 XA_LIMIT(1, XE_MAX_GROUP_SIZE - 1), GFP_KERNEL); 716 if (XE_IOCTL_DBG(xe, err)) { 717 xe_lrc_put(q->lrc[0]); 718 719 /* It is invalid if queue group limit is exceeded */ 720 if (err == -EBUSY) 721 err = -EINVAL; 722 723 return err; 724 } 725 726 q->multi_queue.pos = pos; 727 728 return 0; 729 } 730 731 static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queue *q) 732 { 733 struct xe_exec_queue_group *group = q->multi_queue.group; 734 struct xe_lrc *lrc; 735 736 xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); 737 738 lrc = xa_erase(&group->xa, q->multi_queue.pos); 739 xe_assert(xe, lrc); 740 xe_lrc_put(lrc); 741 } 742 743 static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, 744 u64 value) 745 { 746 if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q))) 747 return -ENODEV; 748 749 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe))) 750 return -EOPNOTSUPP; 751 752 if (XE_IOCTL_DBG(xe, !q->vm->xef)) 753 return -EINVAL; 754 755 if (XE_IOCTL_DBG(xe, xe_exec_queue_is_parallel(q))) 756 return -EINVAL; 757 758 if (XE_IOCTL_DBG(xe, xe_exec_queue_is_multi_queue(q))) 759 return -EINVAL; 760 761 if (value & DRM_XE_MULTI_GROUP_CREATE) { 762 if (XE_IOCTL_DBG(xe, value & ~DRM_XE_MULTI_GROUP_CREATE)) 763 return -EINVAL; 764 765 q->multi_queue.valid = true; 766 q->multi_queue.is_primary = true; 767 q->multi_queue.pos = 0; 768 return 0; 769 } 770 771 /* While adding secondary queues, the upper 32 bits must be 0 */ 772 if (XE_IOCTL_DBG(xe, value & (~0ull << 32))) 773 return -EINVAL; 774 775 return xe_exec_queue_group_validate(xe, q, value); 776 } 777 778 static int exec_queue_set_multi_queue_priority(struct xe_device *xe, struct xe_exec_queue *q, 779 u64 value) 780 { 781 if (XE_IOCTL_DBG(xe, value > XE_MULTI_QUEUE_PRIORITY_HIGH)) 782 return -EINVAL; 783 784 /* For queue creation time (!q->xef) setting, just store the priority value */ 785 if (!q->xef) { 786 q->multi_queue.priority = value; 787 return 0; 788 } 789 790 if (!xe_exec_queue_is_multi_queue(q)) 791 return -EINVAL; 792 793 return q->ops->set_multi_queue_priority(q, value); 794 } 795 796 typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, 797 struct xe_exec_queue *q, 798 u64 value); 799 800 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { 801 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, 802 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, 803 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, 804 [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state, 805 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group, 806 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY] = 807 exec_queue_set_multi_queue_priority, 808 }; 809 810 int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, 811 struct drm_file *file) 812 { 813 struct xe_device *xe = to_xe_device(dev); 814 struct xe_file *xef = to_xe_file(file); 815 struct drm_xe_exec_queue_set_property *args = data; 816 struct xe_exec_queue *q; 817 int ret; 818 u32 idx; 819 820 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 821 return -EINVAL; 822 823 if (XE_IOCTL_DBG(xe, args->property != 824 DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) 825 return -EINVAL; 826 827 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 828 if (XE_IOCTL_DBG(xe, !q)) 829 return -ENOENT; 830 831 idx = array_index_nospec(args->property, 832 ARRAY_SIZE(exec_queue_set_property_funcs)); 833 ret = exec_queue_set_property_funcs[idx](xe, q, args->value); 834 if (XE_IOCTL_DBG(xe, ret)) 835 goto err_post_lookup; 836 837 xe_exec_queue_put(q); 838 return 0; 839 840 err_post_lookup: 841 xe_exec_queue_put(q); 842 return ret; 843 } 844 845 static int exec_queue_user_ext_check(struct xe_exec_queue *q, u64 properties) 846 { 847 u64 secondary_queue_valid_props = BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP) | 848 BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY); 849 850 /* 851 * Only MULTI_QUEUE_PRIORITY property is valid for secondary queues of a 852 * multi-queue group. 853 */ 854 if (xe_exec_queue_is_multi_queue_secondary(q) && 855 properties & ~secondary_queue_valid_props) 856 return -EINVAL; 857 858 return 0; 859 } 860 861 static int exec_queue_user_ext_check_final(struct xe_exec_queue *q, u64 properties) 862 { 863 /* MULTI_QUEUE_PRIORITY only applies to multi-queue group queues */ 864 if ((properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) && 865 !(properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP))) 866 return -EINVAL; 867 868 return 0; 869 } 870 871 static int exec_queue_user_ext_set_property(struct xe_device *xe, 872 struct xe_exec_queue *q, 873 u64 extension, u64 *properties) 874 { 875 u64 __user *address = u64_to_user_ptr(extension); 876 struct drm_xe_ext_set_property ext; 877 int err; 878 u32 idx; 879 880 err = copy_from_user(&ext, address, sizeof(ext)); 881 if (XE_IOCTL_DBG(xe, err)) 882 return -EFAULT; 883 884 if (XE_IOCTL_DBG(xe, ext.property >= 885 ARRAY_SIZE(exec_queue_set_property_funcs)) || 886 XE_IOCTL_DBG(xe, ext.pad) || 887 XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && 888 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && 889 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE && 890 ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE && 891 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP && 892 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) 893 return -EINVAL; 894 895 idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); 896 if (!exec_queue_set_property_funcs[idx]) 897 return -EINVAL; 898 899 *properties |= BIT_ULL(idx); 900 err = exec_queue_user_ext_check(q, *properties); 901 if (XE_IOCTL_DBG(xe, err)) 902 return err; 903 904 return exec_queue_set_property_funcs[idx](xe, q, ext.value); 905 } 906 907 typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, 908 struct xe_exec_queue *q, 909 u64 extension, u64 *properties); 910 911 static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { 912 [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, 913 }; 914 915 #define MAX_USER_EXTENSIONS 16 916 static int __exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 917 u64 extensions, int ext_number, u64 *properties) 918 { 919 u64 __user *address = u64_to_user_ptr(extensions); 920 struct drm_xe_user_extension ext; 921 int err; 922 u32 idx; 923 924 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) 925 return -E2BIG; 926 927 err = copy_from_user(&ext, address, sizeof(ext)); 928 if (XE_IOCTL_DBG(xe, err)) 929 return -EFAULT; 930 931 if (XE_IOCTL_DBG(xe, ext.pad) || 932 XE_IOCTL_DBG(xe, ext.name >= 933 ARRAY_SIZE(exec_queue_user_extension_funcs))) 934 return -EINVAL; 935 936 idx = array_index_nospec(ext.name, 937 ARRAY_SIZE(exec_queue_user_extension_funcs)); 938 err = exec_queue_user_extension_funcs[idx](xe, q, extensions, properties); 939 if (XE_IOCTL_DBG(xe, err)) 940 return err; 941 942 if (ext.next_extension) 943 return __exec_queue_user_extensions(xe, q, ext.next_extension, 944 ++ext_number, properties); 945 946 return 0; 947 } 948 949 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 950 u64 extensions) 951 { 952 u64 properties = 0; 953 int err; 954 955 err = __exec_queue_user_extensions(xe, q, extensions, 0, &properties); 956 if (XE_IOCTL_DBG(xe, err)) 957 return err; 958 959 err = exec_queue_user_ext_check_final(q, properties); 960 if (XE_IOCTL_DBG(xe, err)) 961 return err; 962 963 if (xe_exec_queue_is_multi_queue_primary(q)) { 964 err = xe_exec_queue_group_init(xe, q); 965 if (XE_IOCTL_DBG(xe, err)) 966 return err; 967 } 968 969 return 0; 970 } 971 972 static u32 calc_validate_logical_mask(struct xe_device *xe, 973 struct drm_xe_engine_class_instance *eci, 974 u16 width, u16 num_placements) 975 { 976 int len = width * num_placements; 977 int i, j, n; 978 u16 class; 979 u16 gt_id; 980 u32 return_mask = 0, prev_mask; 981 982 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && 983 len > 1)) 984 return 0; 985 986 for (i = 0; i < width; ++i) { 987 u32 current_mask = 0; 988 989 for (j = 0; j < num_placements; ++j) { 990 struct xe_hw_engine *hwe; 991 992 n = j * width + i; 993 994 hwe = xe_hw_engine_lookup(xe, eci[n]); 995 if (XE_IOCTL_DBG(xe, !hwe)) 996 return 0; 997 998 if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) 999 return 0; 1000 1001 if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || 1002 XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) 1003 return 0; 1004 1005 class = eci[n].engine_class; 1006 gt_id = eci[n].gt_id; 1007 1008 if (width == 1 || !i) 1009 return_mask |= BIT(eci[n].engine_instance); 1010 current_mask |= BIT(eci[n].engine_instance); 1011 } 1012 1013 /* Parallel submissions must be logically contiguous */ 1014 if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) 1015 return 0; 1016 1017 prev_mask = current_mask; 1018 } 1019 1020 return return_mask; 1021 } 1022 1023 int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, 1024 struct drm_file *file) 1025 { 1026 struct xe_device *xe = to_xe_device(dev); 1027 struct xe_file *xef = to_xe_file(file); 1028 struct drm_xe_exec_queue_create *args = data; 1029 struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; 1030 struct drm_xe_engine_class_instance __user *user_eci = 1031 u64_to_user_ptr(args->instances); 1032 struct xe_hw_engine *hwe; 1033 struct xe_vm *vm; 1034 struct xe_tile *tile; 1035 struct xe_exec_queue *q = NULL; 1036 u32 logical_mask; 1037 u32 flags = 0; 1038 u32 id; 1039 u32 len; 1040 int err; 1041 1042 if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) || 1043 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1044 return -EINVAL; 1045 1046 len = args->width * args->num_placements; 1047 if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) 1048 return -EINVAL; 1049 1050 err = copy_from_user(eci, user_eci, 1051 sizeof(struct drm_xe_engine_class_instance) * len); 1052 if (XE_IOCTL_DBG(xe, err)) 1053 return -EFAULT; 1054 1055 if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) 1056 return -EINVAL; 1057 1058 if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) 1059 flags |= EXEC_QUEUE_FLAG_LOW_LATENCY; 1060 1061 if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { 1062 if (XE_IOCTL_DBG(xe, args->width != 1) || 1063 XE_IOCTL_DBG(xe, args->num_placements != 1) || 1064 XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) 1065 return -EINVAL; 1066 1067 for_each_tile(tile, xe, id) { 1068 struct xe_exec_queue *new; 1069 1070 flags |= EXEC_QUEUE_FLAG_VM; 1071 if (id) 1072 flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; 1073 1074 new = xe_exec_queue_create_bind(xe, tile, flags, 1075 args->extensions); 1076 if (IS_ERR(new)) { 1077 err = PTR_ERR(new); 1078 if (q) 1079 goto put_exec_queue; 1080 return err; 1081 } 1082 if (id == 0) 1083 q = new; 1084 else 1085 list_add_tail(&new->multi_gt_list, 1086 &q->multi_gt_link); 1087 } 1088 } else { 1089 logical_mask = calc_validate_logical_mask(xe, eci, 1090 args->width, 1091 args->num_placements); 1092 if (XE_IOCTL_DBG(xe, !logical_mask)) 1093 return -EINVAL; 1094 1095 hwe = xe_hw_engine_lookup(xe, eci[0]); 1096 if (XE_IOCTL_DBG(xe, !hwe)) 1097 return -EINVAL; 1098 1099 vm = xe_vm_lookup(xef, args->vm_id); 1100 if (XE_IOCTL_DBG(xe, !vm)) 1101 return -ENOENT; 1102 1103 err = down_read_interruptible(&vm->lock); 1104 if (err) { 1105 xe_vm_put(vm); 1106 return err; 1107 } 1108 1109 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 1110 up_read(&vm->lock); 1111 xe_vm_put(vm); 1112 return -ENOENT; 1113 } 1114 1115 q = xe_exec_queue_create(xe, vm, logical_mask, 1116 args->width, hwe, flags, 1117 args->extensions); 1118 up_read(&vm->lock); 1119 xe_vm_put(vm); 1120 if (IS_ERR(q)) 1121 return PTR_ERR(q); 1122 1123 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1124 err = xe_exec_queue_group_add(xe, q); 1125 if (XE_IOCTL_DBG(xe, err)) 1126 goto put_exec_queue; 1127 } 1128 1129 if (xe_vm_in_preempt_fence_mode(vm)) { 1130 q->lr.context = dma_fence_context_alloc(1); 1131 1132 err = xe_vm_add_compute_exec_queue(vm, q); 1133 if (XE_IOCTL_DBG(xe, err)) 1134 goto delete_queue_group; 1135 } 1136 1137 if (q->vm && q->hwe->hw_engine_group) { 1138 err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); 1139 if (err) 1140 goto put_exec_queue; 1141 } 1142 } 1143 1144 q->xef = xe_file_get(xef); 1145 1146 /* user id alloc must always be last in ioctl to prevent UAF */ 1147 err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); 1148 if (err) 1149 goto kill_exec_queue; 1150 1151 args->exec_queue_id = id; 1152 1153 return 0; 1154 1155 kill_exec_queue: 1156 xe_exec_queue_kill(q); 1157 delete_queue_group: 1158 if (xe_exec_queue_is_multi_queue_secondary(q)) 1159 xe_exec_queue_group_delete(xe, q); 1160 put_exec_queue: 1161 xe_exec_queue_put(q); 1162 return err; 1163 } 1164 1165 int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, 1166 struct drm_file *file) 1167 { 1168 struct xe_device *xe = to_xe_device(dev); 1169 struct xe_file *xef = to_xe_file(file); 1170 struct drm_xe_exec_queue_get_property *args = data; 1171 struct xe_exec_queue *q; 1172 int ret; 1173 1174 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1175 return -EINVAL; 1176 1177 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 1178 if (XE_IOCTL_DBG(xe, !q)) 1179 return -ENOENT; 1180 1181 switch (args->property) { 1182 case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: 1183 args->value = q->ops->reset_status(q); 1184 ret = 0; 1185 break; 1186 default: 1187 ret = -EINVAL; 1188 } 1189 1190 xe_exec_queue_put(q); 1191 1192 return ret; 1193 } 1194 1195 /** 1196 * xe_exec_queue_lrc() - Get the LRC from exec queue. 1197 * @q: The exec_queue. 1198 * 1199 * Retrieves the primary LRC for the exec queue. Note that this function 1200 * returns only the first LRC instance, even when multiple parallel LRCs 1201 * are configured. 1202 * 1203 * Return: Pointer to LRC on success, error on failure 1204 */ 1205 struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) 1206 { 1207 return q->lrc[0]; 1208 } 1209 1210 /** 1211 * xe_exec_queue_is_lr() - Whether an exec_queue is long-running 1212 * @q: The exec_queue 1213 * 1214 * Return: True if the exec_queue is long-running, false otherwise. 1215 */ 1216 bool xe_exec_queue_is_lr(struct xe_exec_queue *q) 1217 { 1218 return q->vm && xe_vm_in_lr_mode(q->vm) && 1219 !(q->flags & EXEC_QUEUE_FLAG_VM); 1220 } 1221 1222 /** 1223 * xe_exec_queue_is_idle() - Whether an exec_queue is idle. 1224 * @q: The exec_queue 1225 * 1226 * FIXME: Need to determine what to use as the short-lived 1227 * timeline lock for the exec_queues, so that the return value 1228 * of this function becomes more than just an advisory 1229 * snapshot in time. The timeline lock must protect the 1230 * seqno from racing submissions on the same exec_queue. 1231 * Typically vm->resv, but user-created timeline locks use the migrate vm 1232 * and never grabs the migrate vm->resv so we have a race there. 1233 * 1234 * Return: True if the exec_queue is idle, false otherwise. 1235 */ 1236 bool xe_exec_queue_is_idle(struct xe_exec_queue *q) 1237 { 1238 if (xe_exec_queue_is_parallel(q)) { 1239 int i; 1240 1241 for (i = 0; i < q->width; ++i) { 1242 if (xe_lrc_seqno(q->lrc[i]) != 1243 q->lrc[i]->fence_ctx.next_seqno - 1) 1244 return false; 1245 } 1246 1247 return true; 1248 } 1249 1250 return xe_lrc_seqno(q->lrc[0]) == 1251 q->lrc[0]->fence_ctx.next_seqno - 1; 1252 } 1253 1254 /** 1255 * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue 1256 * from hw 1257 * @q: The exec queue 1258 * 1259 * Update the timestamp saved by HW for this exec queue and save run ticks 1260 * calculated by using the delta from last update. 1261 */ 1262 void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) 1263 { 1264 struct xe_device *xe = gt_to_xe(q->gt); 1265 struct xe_lrc *lrc; 1266 u64 old_ts, new_ts; 1267 int idx; 1268 1269 /* 1270 * Jobs that are executed by kernel doesn't have a corresponding xe_file 1271 * and thus are not accounted. 1272 */ 1273 if (!q->xef) 1274 return; 1275 1276 /* Synchronize with unbind while holding the xe file open */ 1277 if (!drm_dev_enter(&xe->drm, &idx)) 1278 return; 1279 /* 1280 * Only sample the first LRC. For parallel submission, all of them are 1281 * scheduled together and we compensate that below by multiplying by 1282 * width - this may introduce errors if that premise is not true and 1283 * they don't exit 100% aligned. On the other hand, looping through 1284 * the LRCs and reading them in different time could also introduce 1285 * errors. 1286 */ 1287 lrc = q->lrc[0]; 1288 new_ts = xe_lrc_update_timestamp(lrc, &old_ts); 1289 q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; 1290 1291 drm_dev_exit(idx); 1292 } 1293 1294 /** 1295 * xe_exec_queue_kill - permanently stop all execution from an exec queue 1296 * @q: The exec queue 1297 * 1298 * This function permanently stops all activity on an exec queue. If the queue 1299 * is actively executing on the HW, it will be kicked off the engine; any 1300 * pending jobs are discarded and all future submissions are rejected. 1301 * This function is safe to call multiple times. 1302 */ 1303 void xe_exec_queue_kill(struct xe_exec_queue *q) 1304 { 1305 struct xe_exec_queue *eq = q, *next; 1306 1307 list_for_each_entry_safe(eq, next, &eq->multi_gt_list, 1308 multi_gt_link) { 1309 q->ops->kill(eq); 1310 xe_vm_remove_compute_exec_queue(q->vm, eq); 1311 } 1312 1313 q->ops->kill(q); 1314 xe_vm_remove_compute_exec_queue(q->vm, q); 1315 } 1316 1317 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, 1318 struct drm_file *file) 1319 { 1320 struct xe_device *xe = to_xe_device(dev); 1321 struct xe_file *xef = to_xe_file(file); 1322 struct drm_xe_exec_queue_destroy *args = data; 1323 struct xe_exec_queue *q; 1324 1325 if (XE_IOCTL_DBG(xe, args->pad) || 1326 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1327 return -EINVAL; 1328 1329 mutex_lock(&xef->exec_queue.lock); 1330 q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); 1331 if (q) 1332 atomic_inc(&xef->exec_queue.pending_removal); 1333 mutex_unlock(&xef->exec_queue.lock); 1334 1335 if (XE_IOCTL_DBG(xe, !q)) 1336 return -ENOENT; 1337 1338 if (q->vm && q->hwe->hw_engine_group) 1339 xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); 1340 1341 xe_exec_queue_kill(q); 1342 1343 trace_xe_exec_queue_close(q); 1344 xe_exec_queue_put(q); 1345 1346 return 0; 1347 } 1348 1349 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, 1350 struct xe_vm *vm) 1351 { 1352 if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) { 1353 xe_migrate_job_lock_assert(q); 1354 } else if (q->flags & EXEC_QUEUE_FLAG_VM) { 1355 lockdep_assert_held(&vm->lock); 1356 } else { 1357 xe_vm_assert_held(vm); 1358 lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); 1359 } 1360 } 1361 1362 /** 1363 * xe_exec_queue_last_fence_put() - Drop ref to last fence 1364 * @q: The exec queue 1365 * @vm: The VM the engine does a bind or exec for 1366 */ 1367 void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) 1368 { 1369 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1370 1371 xe_exec_queue_last_fence_put_unlocked(q); 1372 } 1373 1374 /** 1375 * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked 1376 * @q: The exec queue 1377 * 1378 * Only safe to be called from xe_exec_queue_destroy(). 1379 */ 1380 void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) 1381 { 1382 if (q->last_fence) { 1383 dma_fence_put(q->last_fence); 1384 q->last_fence = NULL; 1385 } 1386 } 1387 1388 /** 1389 * xe_exec_queue_last_fence_get() - Get last fence 1390 * @q: The exec queue 1391 * @vm: The VM the engine does a bind or exec for 1392 * 1393 * Get last fence, takes a ref 1394 * 1395 * Returns: last fence if not signaled, dma fence stub if signaled 1396 */ 1397 struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, 1398 struct xe_vm *vm) 1399 { 1400 struct dma_fence *fence; 1401 1402 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1403 1404 if (q->last_fence && 1405 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 1406 xe_exec_queue_last_fence_put(q, vm); 1407 1408 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 1409 dma_fence_get(fence); 1410 return fence; 1411 } 1412 1413 /** 1414 * xe_exec_queue_last_fence_get_for_resume() - Get last fence 1415 * @q: The exec queue 1416 * @vm: The VM the engine does a bind or exec for 1417 * 1418 * Get last fence, takes a ref. Only safe to be called in the context of 1419 * resuming the hw engine group's long-running exec queue, when the group 1420 * semaphore is held. 1421 * 1422 * Returns: last fence if not signaled, dma fence stub if signaled 1423 */ 1424 struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, 1425 struct xe_vm *vm) 1426 { 1427 struct dma_fence *fence; 1428 1429 lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); 1430 1431 if (q->last_fence && 1432 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 1433 xe_exec_queue_last_fence_put_unlocked(q); 1434 1435 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 1436 dma_fence_get(fence); 1437 return fence; 1438 } 1439 1440 /** 1441 * xe_exec_queue_last_fence_set() - Set last fence 1442 * @q: The exec queue 1443 * @vm: The VM the engine does a bind or exec for 1444 * @fence: The fence 1445 * 1446 * Set the last fence for the engine. Increases reference count for fence, when 1447 * closing engine xe_exec_queue_last_fence_put should be called. 1448 */ 1449 void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, 1450 struct dma_fence *fence) 1451 { 1452 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1453 xe_assert(vm->xe, !dma_fence_is_container(fence)); 1454 1455 xe_exec_queue_last_fence_put(q, vm); 1456 q->last_fence = dma_fence_get(fence); 1457 } 1458 1459 /** 1460 * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence 1461 * @q: The exec queue 1462 * @vm: The VM the engine does a bind for 1463 * @type: Either primary or media GT 1464 */ 1465 void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, 1466 struct xe_vm *vm, 1467 unsigned int type) 1468 { 1469 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1470 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1471 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1472 1473 xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type); 1474 } 1475 1476 /** 1477 * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB 1478 * invalidation fence unlocked 1479 * @q: The exec queue 1480 * @type: Either primary or media GT 1481 * 1482 * Only safe to be called from xe_exec_queue_destroy(). 1483 */ 1484 void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, 1485 unsigned int type) 1486 { 1487 xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1488 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1489 1490 dma_fence_put(q->tlb_inval[type].last_fence); 1491 q->tlb_inval[type].last_fence = NULL; 1492 } 1493 1494 /** 1495 * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation 1496 * @q: The exec queue 1497 * @vm: The VM the engine does a bind for 1498 * @type: Either primary or media GT 1499 * 1500 * Get last fence, takes a ref 1501 * 1502 * Returns: last fence if not signaled, dma fence stub if signaled 1503 */ 1504 struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q, 1505 struct xe_vm *vm, 1506 unsigned int type) 1507 { 1508 struct dma_fence *fence; 1509 1510 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1511 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1512 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1513 xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | 1514 EXEC_QUEUE_FLAG_MIGRATE)); 1515 1516 if (q->tlb_inval[type].last_fence && 1517 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 1518 &q->tlb_inval[type].last_fence->flags)) 1519 xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); 1520 1521 fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub(); 1522 dma_fence_get(fence); 1523 return fence; 1524 } 1525 1526 /** 1527 * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation 1528 * @q: The exec queue 1529 * @vm: The VM the engine does a bind for 1530 * @fence: The fence 1531 * @type: Either primary or media GT 1532 * 1533 * Set the last fence for the tlb invalidation type on the queue. Increases 1534 * reference count for fence, when closing queue 1535 * xe_exec_queue_tlb_inval_last_fence_put should be called. 1536 */ 1537 void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q, 1538 struct xe_vm *vm, 1539 struct dma_fence *fence, 1540 unsigned int type) 1541 { 1542 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1543 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1544 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1545 xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | 1546 EXEC_QUEUE_FLAG_MIGRATE)); 1547 xe_assert(vm->xe, !dma_fence_is_container(fence)); 1548 1549 xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); 1550 q->tlb_inval[type].last_fence = dma_fence_get(fence); 1551 } 1552 1553 /** 1554 * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references 1555 * within all LRCs of a queue. 1556 * @q: the &xe_exec_queue struct instance containing target LRCs 1557 * @scratch: scratch buffer to be used as temporary storage 1558 * 1559 * Returns: zero on success, negative error code on failure 1560 */ 1561 int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) 1562 { 1563 int i; 1564 int err = 0; 1565 1566 for (i = 0; i < q->width; ++i) { 1567 struct xe_lrc *lrc; 1568 1569 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 1570 lrc = READ_ONCE(q->lrc[i]); 1571 if (!lrc) 1572 continue; 1573 1574 xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch); 1575 xe_lrc_update_hwctx_regs_with_address(lrc); 1576 err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch); 1577 if (err) 1578 break; 1579 } 1580 1581 return err; 1582 } 1583