1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_exec_queue.h" 7 8 #include <linux/nospec.h> 9 10 #include <drm/drm_device.h> 11 #include <drm/drm_drv.h> 12 #include <drm/drm_file.h> 13 #include <drm/drm_syncobj.h> 14 #include <uapi/drm/xe_drm.h> 15 16 #include "xe_bo.h" 17 #include "xe_dep_scheduler.h" 18 #include "xe_device.h" 19 #include "xe_gt.h" 20 #include "xe_gt_sriov_vf.h" 21 #include "xe_hw_engine_class_sysfs.h" 22 #include "xe_hw_engine_group.h" 23 #include "xe_hw_fence.h" 24 #include "xe_irq.h" 25 #include "xe_lrc.h" 26 #include "xe_macros.h" 27 #include "xe_migrate.h" 28 #include "xe_pm.h" 29 #include "xe_ring_ops_types.h" 30 #include "xe_trace.h" 31 #include "xe_vm.h" 32 #include "xe_pxp.h" 33 34 /** 35 * DOC: Execution Queue 36 * 37 * An Execution queue is an interface for the HW context of execution. 38 * The user creates an execution queue, submits the GPU jobs through those 39 * queues and in the end destroys them. 40 * 41 * Execution queues can also be created by XeKMD itself for driver internal 42 * operations like object migration etc. 43 * 44 * An execution queue is associated with a specified HW engine or a group of 45 * engines (belonging to the same tile and engine class) and any GPU job 46 * submitted on the queue will be run on one of these engines. 47 * 48 * An execution queue is tied to an address space (VM). It holds a reference 49 * of the associated VM and the underlying Logical Ring Context/s (LRC/s) 50 * until the queue is destroyed. 51 * 52 * The execution queue sits on top of the submission backend. It opaquely 53 * handles the GuC and Execlist backends whichever the platform uses, and 54 * the ring operations the different engine classes support. 55 */ 56 57 /** 58 * DOC: Multi Queue Group 59 * 60 * Multi Queue Group is another mode of execution supported by the compute 61 * and blitter copy command streamers (CCS and BCS, respectively). It is 62 * an enhancement of the existing hardware architecture and leverages the 63 * same submission model. It enables support for efficient, parallel 64 * execution of multiple queues within a single shared context. The multi 65 * queue group functionality is only supported with GuC submission backend. 66 * All the queues of a group must use the same address space (VM). 67 * 68 * The DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE execution queue property 69 * supports creating a multi queue group and adding queues to a queue group. 70 * 71 * The XE_EXEC_QUEUE_CREATE ioctl call with above property with value field 72 * set to DRM_XE_MULTI_GROUP_CREATE, will create a new multi queue group with 73 * the queue being created as the primary queue (aka q0) of the group. To add 74 * secondary queues to the group, they need to be created with the above 75 * property with id of the primary queue as the value. The properties of 76 * the primary queue (like priority, time slice) applies to the whole group. 77 * So, these properties can't be set for secondary queues of a group. 78 * 79 * The hardware does not support removing a queue from a multi-queue group. 80 * However, queues can be dynamically added to the group. A group can have 81 * up to 64 queues. To support this, XeKMD holds references to LRCs of the 82 * queues even after the queues are destroyed by the user until the whole 83 * group is destroyed. The secondary queues hold a reference to the primary 84 * queue thus preventing the group from being destroyed when user destroys 85 * the primary queue. Once the primary queue is destroyed, secondary queues 86 * can't be added to the queue group, but they can continue to submit the 87 * jobs if the DRM_XE_MULTI_GROUP_KEEP_ACTIVE flag is set during the multi 88 * queue group creation. 89 * 90 * The queues of a multi queue group can set their priority within the group 91 * through the DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY property. 92 * This multi queue priority can also be set dynamically through the 93 * XE_EXEC_QUEUE_SET_PROPERTY ioctl. This is the only other property 94 * supported by the secondary queues of a multi queue group, other than 95 * DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE. 96 * 97 * When GuC reports an error on any of the queues of a multi queue group, 98 * the queue cleanup mechanism is invoked for all the queues of the group 99 * as hardware cannot make progress on the multi queue context. 100 * 101 * Refer :ref:`multi-queue-group-guc-interface` for multi queue group GuC 102 * interface. 103 */ 104 105 enum xe_exec_queue_sched_prop { 106 XE_EXEC_QUEUE_JOB_TIMEOUT = 0, 107 XE_EXEC_QUEUE_TIMESLICE = 1, 108 XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, 109 XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, 110 }; 111 112 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 113 u64 extensions); 114 115 static void xe_exec_queue_group_cleanup(struct xe_exec_queue *q) 116 { 117 struct xe_exec_queue_group *group = q->multi_queue.group; 118 struct xe_lrc *lrc; 119 unsigned long idx; 120 121 if (xe_exec_queue_is_multi_queue_secondary(q)) { 122 /* 123 * Put pairs with get from xe_exec_queue_lookup() call 124 * in xe_exec_queue_group_validate(). 125 */ 126 xe_exec_queue_put(xe_exec_queue_multi_queue_primary(q)); 127 return; 128 } 129 130 if (!group) 131 return; 132 133 /* Primary queue cleanup */ 134 xa_for_each(&group->xa, idx, lrc) 135 xe_lrc_put(lrc); 136 137 xa_destroy(&group->xa); 138 mutex_destroy(&group->list_lock); 139 xe_bo_unpin_map_no_vm(group->cgp_bo); 140 kfree(group); 141 } 142 143 static void __xe_exec_queue_free(struct xe_exec_queue *q) 144 { 145 int i; 146 147 for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) 148 if (q->tlb_inval[i].dep_scheduler) 149 xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler); 150 151 if (xe_exec_queue_uses_pxp(q)) 152 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); 153 154 if (xe_exec_queue_is_multi_queue(q)) 155 xe_exec_queue_group_cleanup(q); 156 157 if (q->vm) 158 xe_vm_put(q->vm); 159 160 if (q->xef) 161 xe_file_put(q->xef); 162 163 kvfree(q->replay_state); 164 kfree(q); 165 } 166 167 static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q) 168 { 169 struct xe_tile *tile = gt_to_tile(q->gt); 170 int i; 171 172 for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) { 173 struct xe_dep_scheduler *dep_scheduler; 174 struct xe_gt *gt; 175 struct workqueue_struct *wq; 176 177 if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT) 178 gt = tile->primary_gt; 179 else 180 gt = tile->media_gt; 181 182 if (!gt) 183 continue; 184 185 wq = gt->tlb_inval.job_wq; 186 187 #define MAX_TLB_INVAL_JOBS 16 /* Picking a reasonable value */ 188 dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name, 189 MAX_TLB_INVAL_JOBS); 190 if (IS_ERR(dep_scheduler)) 191 return PTR_ERR(dep_scheduler); 192 193 q->tlb_inval[i].dep_scheduler = dep_scheduler; 194 } 195 #undef MAX_TLB_INVAL_JOBS 196 197 return 0; 198 } 199 200 static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, 201 struct xe_vm *vm, 202 u32 logical_mask, 203 u16 width, struct xe_hw_engine *hwe, 204 u32 flags, u64 extensions) 205 { 206 struct xe_exec_queue *q; 207 struct xe_gt *gt = hwe->gt; 208 int err; 209 210 /* only kernel queues can be permanent */ 211 XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); 212 213 q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL); 214 if (!q) 215 return ERR_PTR(-ENOMEM); 216 217 kref_init(&q->refcount); 218 q->flags = flags; 219 q->hwe = hwe; 220 q->gt = gt; 221 q->class = hwe->class; 222 q->width = width; 223 q->msix_vec = XE_IRQ_DEFAULT_MSIX; 224 q->logical_mask = logical_mask; 225 q->fence_irq = >->fence_irq[hwe->class]; 226 q->ring_ops = gt->ring_ops[hwe->class]; 227 q->ops = gt->exec_queue_ops; 228 INIT_LIST_HEAD(&q->lr.link); 229 INIT_LIST_HEAD(&q->multi_gt_link); 230 INIT_LIST_HEAD(&q->hw_engine_group_link); 231 INIT_LIST_HEAD(&q->pxp.link); 232 q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL; 233 234 q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; 235 q->sched_props.preempt_timeout_us = 236 hwe->eclass->sched_props.preempt_timeout_us; 237 q->sched_props.job_timeout_ms = 238 hwe->eclass->sched_props.job_timeout_ms; 239 if (q->flags & EXEC_QUEUE_FLAG_KERNEL && 240 q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) 241 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; 242 else 243 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; 244 245 if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) { 246 err = alloc_dep_schedulers(xe, q); 247 if (err) { 248 __xe_exec_queue_free(q); 249 return ERR_PTR(err); 250 } 251 } 252 253 if (vm) 254 q->vm = xe_vm_get(vm); 255 256 if (extensions) { 257 /* 258 * may set q->usm, must come before xe_lrc_create(), 259 * may overwrite q->sched_props, must come before q->ops->init() 260 */ 261 err = exec_queue_user_extensions(xe, q, extensions); 262 if (err) { 263 __xe_exec_queue_free(q); 264 return ERR_PTR(err); 265 } 266 } 267 268 return q; 269 } 270 271 static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) 272 { 273 int i, err; 274 u32 flags = 0; 275 276 /* 277 * PXP workloads executing on RCS or CCS must run in isolation (i.e. no 278 * other workload can use the EUs at the same time). On MTL this is done 279 * by setting the RUNALONE bit in the LRC, while starting on Xe2 there 280 * is a dedicated bit for it. 281 */ 282 if (xe_exec_queue_uses_pxp(q) && 283 (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) { 284 if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20) 285 flags |= XE_LRC_CREATE_PXP; 286 else 287 flags |= XE_LRC_CREATE_RUNALONE; 288 } 289 290 if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL)) 291 flags |= XE_LRC_CREATE_USER_CTX; 292 293 err = q->ops->init(q); 294 if (err) 295 return err; 296 297 /* 298 * This must occur after q->ops->init to avoid race conditions during VF 299 * post-migration recovery, as the fixups for the LRC GGTT addresses 300 * depend on the queue being present in the backend tracking structure. 301 * 302 * In addition to above, we must wait on inflight GGTT changes to avoid 303 * writing out stale values here. Such wait provides a solid solution 304 * (without a race) only if the function can detect migration instantly 305 * from the moment vCPU resumes execution. 306 */ 307 for (i = 0; i < q->width; ++i) { 308 struct xe_lrc *lrc; 309 310 xe_gt_sriov_vf_wait_valid_ggtt(q->gt); 311 lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state, 312 xe_lrc_ring_size(), q->msix_vec, flags); 313 if (IS_ERR(lrc)) { 314 err = PTR_ERR(lrc); 315 goto err_lrc; 316 } 317 318 /* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */ 319 WRITE_ONCE(q->lrc[i], lrc); 320 } 321 322 return 0; 323 324 err_lrc: 325 for (i = i - 1; i >= 0; --i) 326 xe_lrc_put(q->lrc[i]); 327 return err; 328 } 329 330 static void __xe_exec_queue_fini(struct xe_exec_queue *q) 331 { 332 int i; 333 334 q->ops->fini(q); 335 336 for (i = 0; i < q->width; ++i) 337 xe_lrc_put(q->lrc[i]); 338 } 339 340 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, 341 u32 logical_mask, u16 width, 342 struct xe_hw_engine *hwe, u32 flags, 343 u64 extensions) 344 { 345 struct xe_exec_queue *q; 346 int err; 347 348 /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */ 349 xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0))); 350 351 q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, 352 extensions); 353 if (IS_ERR(q)) 354 return q; 355 356 err = __xe_exec_queue_init(q, flags); 357 if (err) 358 goto err_post_alloc; 359 360 /* 361 * We can only add the queue to the PXP list after the init is complete, 362 * because the PXP termination can call exec_queue_kill and that will 363 * go bad if the queue is only half-initialized. This means that we 364 * can't do it when we handle the PXP extension in __xe_exec_queue_alloc 365 * and we need to do it here instead. 366 */ 367 if (xe_exec_queue_uses_pxp(q)) { 368 err = xe_pxp_exec_queue_add(xe->pxp, q); 369 if (err) 370 goto err_post_init; 371 } 372 373 return q; 374 375 err_post_init: 376 __xe_exec_queue_fini(q); 377 err_post_alloc: 378 __xe_exec_queue_free(q); 379 return ERR_PTR(err); 380 } 381 ALLOW_ERROR_INJECTION(xe_exec_queue_create, ERRNO); 382 383 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, 384 struct xe_vm *vm, 385 enum xe_engine_class class, 386 u32 flags, u64 extensions) 387 { 388 struct xe_hw_engine *hwe, *hwe0 = NULL; 389 enum xe_hw_engine_id id; 390 u32 logical_mask = 0; 391 392 for_each_hw_engine(hwe, gt, id) { 393 if (xe_hw_engine_is_reserved(hwe)) 394 continue; 395 396 if (hwe->class == class) { 397 logical_mask |= BIT(hwe->logical_instance); 398 if (!hwe0) 399 hwe0 = hwe; 400 } 401 } 402 403 if (!logical_mask) 404 return ERR_PTR(-ENODEV); 405 406 return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); 407 } 408 409 /** 410 * xe_exec_queue_create_bind() - Create bind exec queue. 411 * @xe: Xe device. 412 * @tile: tile which bind exec queue belongs to. 413 * @flags: exec queue creation flags 414 * @extensions: exec queue creation extensions 415 * 416 * Normalize bind exec queue creation. Bind exec queue is tied to migration VM 417 * for access to physical memory required for page table programming. On a 418 * faulting devices the reserved copy engine instance must be used to avoid 419 * deadlocking (user binds cannot get stuck behind faults as kernel binds which 420 * resolve faults depend on user binds). On non-faulting devices any copy engine 421 * can be used. 422 * 423 * Returns exec queue on success, ERR_PTR on failure 424 */ 425 struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, 426 struct xe_tile *tile, 427 u32 flags, u64 extensions) 428 { 429 struct xe_gt *gt = tile->primary_gt; 430 struct xe_exec_queue *q; 431 struct xe_vm *migrate_vm; 432 433 migrate_vm = xe_migrate_get_vm(tile->migrate); 434 if (xe->info.has_usm) { 435 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, 436 XE_ENGINE_CLASS_COPY, 437 gt->usm.reserved_bcs_instance, 438 false); 439 440 if (!hwe) { 441 xe_vm_put(migrate_vm); 442 return ERR_PTR(-EINVAL); 443 } 444 445 q = xe_exec_queue_create(xe, migrate_vm, 446 BIT(hwe->logical_instance), 1, hwe, 447 flags, extensions); 448 } else { 449 q = xe_exec_queue_create_class(xe, gt, migrate_vm, 450 XE_ENGINE_CLASS_COPY, flags, 451 extensions); 452 } 453 xe_vm_put(migrate_vm); 454 455 if (!IS_ERR(q)) { 456 int err = drm_syncobj_create(&q->ufence_syncobj, 457 DRM_SYNCOBJ_CREATE_SIGNALED, 458 NULL); 459 if (err) { 460 xe_exec_queue_put(q); 461 return ERR_PTR(err); 462 } 463 } 464 465 return q; 466 } 467 ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); 468 469 static void xe_exec_queue_group_kill(struct kref *ref) 470 { 471 struct xe_exec_queue_group *group = container_of(ref, struct xe_exec_queue_group, 472 kill_refcount); 473 xe_exec_queue_kill(group->primary); 474 } 475 476 static inline void xe_exec_queue_group_kill_get(struct xe_exec_queue_group *group) 477 { 478 kref_get(&group->kill_refcount); 479 } 480 481 void xe_exec_queue_group_kill_put(struct xe_exec_queue_group *group) 482 { 483 if (!group) 484 return; 485 486 kref_put(&group->kill_refcount, xe_exec_queue_group_kill); 487 } 488 489 void xe_exec_queue_destroy(struct kref *ref) 490 { 491 struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); 492 struct xe_exec_queue *eq, *next; 493 int i; 494 495 xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0); 496 497 if (q->ufence_syncobj) 498 drm_syncobj_put(q->ufence_syncobj); 499 500 if (q->ufence_syncobj) 501 drm_syncobj_put(q->ufence_syncobj); 502 503 if (xe_exec_queue_uses_pxp(q)) 504 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); 505 506 xe_exec_queue_last_fence_put_unlocked(q); 507 for_each_tlb_inval(i) 508 xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i); 509 510 if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { 511 list_for_each_entry_safe(eq, next, &q->multi_gt_list, 512 multi_gt_link) 513 xe_exec_queue_put(eq); 514 } 515 516 q->ops->destroy(q); 517 } 518 519 void xe_exec_queue_fini(struct xe_exec_queue *q) 520 { 521 /* 522 * Before releasing our ref to lrc and xef, accumulate our run ticks 523 * and wakeup any waiters. 524 */ 525 xe_exec_queue_update_run_ticks(q); 526 if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) 527 wake_up_var(&q->xef->exec_queue.pending_removal); 528 529 __xe_exec_queue_fini(q); 530 __xe_exec_queue_free(q); 531 } 532 533 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) 534 { 535 switch (q->class) { 536 case XE_ENGINE_CLASS_RENDER: 537 snprintf(q->name, sizeof(q->name), "rcs%d", instance); 538 break; 539 case XE_ENGINE_CLASS_VIDEO_DECODE: 540 snprintf(q->name, sizeof(q->name), "vcs%d", instance); 541 break; 542 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 543 snprintf(q->name, sizeof(q->name), "vecs%d", instance); 544 break; 545 case XE_ENGINE_CLASS_COPY: 546 snprintf(q->name, sizeof(q->name), "bcs%d", instance); 547 break; 548 case XE_ENGINE_CLASS_COMPUTE: 549 snprintf(q->name, sizeof(q->name), "ccs%d", instance); 550 break; 551 case XE_ENGINE_CLASS_OTHER: 552 snprintf(q->name, sizeof(q->name), "gsccs%d", instance); 553 break; 554 default: 555 XE_WARN_ON(q->class); 556 } 557 } 558 559 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) 560 { 561 struct xe_exec_queue *q; 562 563 mutex_lock(&xef->exec_queue.lock); 564 q = xa_load(&xef->exec_queue.xa, id); 565 if (q) 566 xe_exec_queue_get(q); 567 mutex_unlock(&xef->exec_queue.lock); 568 569 return q; 570 } 571 572 enum xe_exec_queue_priority 573 xe_exec_queue_device_get_max_priority(struct xe_device *xe) 574 { 575 return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : 576 XE_EXEC_QUEUE_PRIORITY_NORMAL; 577 } 578 579 static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, 580 u64 value) 581 { 582 if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) 583 return -EINVAL; 584 585 if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) 586 return -EPERM; 587 588 q->sched_props.priority = value; 589 return 0; 590 } 591 592 static bool xe_exec_queue_enforce_schedule_limit(void) 593 { 594 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 595 return true; 596 #else 597 return !capable(CAP_SYS_NICE); 598 #endif 599 } 600 601 static void 602 xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, 603 enum xe_exec_queue_sched_prop prop, 604 u32 *min, u32 *max) 605 { 606 switch (prop) { 607 case XE_EXEC_QUEUE_JOB_TIMEOUT: 608 *min = eclass->sched_props.job_timeout_min; 609 *max = eclass->sched_props.job_timeout_max; 610 break; 611 case XE_EXEC_QUEUE_TIMESLICE: 612 *min = eclass->sched_props.timeslice_min; 613 *max = eclass->sched_props.timeslice_max; 614 break; 615 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 616 *min = eclass->sched_props.preempt_timeout_min; 617 *max = eclass->sched_props.preempt_timeout_max; 618 break; 619 default: 620 break; 621 } 622 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 623 if (capable(CAP_SYS_NICE)) { 624 switch (prop) { 625 case XE_EXEC_QUEUE_JOB_TIMEOUT: 626 *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 627 *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 628 break; 629 case XE_EXEC_QUEUE_TIMESLICE: 630 *min = XE_HW_ENGINE_TIMESLICE_MIN; 631 *max = XE_HW_ENGINE_TIMESLICE_MAX; 632 break; 633 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 634 *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 635 *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 636 break; 637 default: 638 break; 639 } 640 } 641 #endif 642 } 643 644 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, 645 u64 value) 646 { 647 u32 min = 0, max = 0; 648 649 xe_exec_queue_get_prop_minmax(q->hwe->eclass, 650 XE_EXEC_QUEUE_TIMESLICE, &min, &max); 651 652 if (xe_exec_queue_enforce_schedule_limit() && 653 !xe_hw_engine_timeout_in_range(value, min, max)) 654 return -EINVAL; 655 656 q->sched_props.timeslice_us = value; 657 return 0; 658 } 659 660 static int 661 exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value) 662 { 663 if (value == DRM_XE_PXP_TYPE_NONE) 664 return 0; 665 666 /* we only support HWDRM sessions right now */ 667 if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM)) 668 return -EINVAL; 669 670 if (!xe_pxp_is_enabled(xe->pxp)) 671 return -ENODEV; 672 673 return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM); 674 } 675 676 static int exec_queue_set_hang_replay_state(struct xe_device *xe, 677 struct xe_exec_queue *q, 678 u64 value) 679 { 680 size_t size = xe_gt_lrc_hang_replay_size(q->gt, q->class); 681 u64 __user *address = u64_to_user_ptr(value); 682 void *ptr; 683 684 ptr = vmemdup_user(address, size); 685 if (XE_IOCTL_DBG(xe, IS_ERR(ptr))) 686 return PTR_ERR(ptr); 687 688 q->replay_state = ptr; 689 690 return 0; 691 } 692 693 static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue *q) 694 { 695 struct xe_tile *tile = gt_to_tile(q->gt); 696 struct xe_exec_queue_group *group; 697 struct xe_bo *bo; 698 699 group = kzalloc(sizeof(*group), GFP_KERNEL); 700 if (!group) 701 return -ENOMEM; 702 703 bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, 704 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 705 XE_BO_FLAG_PINNED_LATE_RESTORE | 706 XE_BO_FLAG_FORCE_USER_VRAM | 707 XE_BO_FLAG_GGTT_INVALIDATE | 708 XE_BO_FLAG_GGTT, false); 709 if (IS_ERR(bo)) { 710 drm_err(&xe->drm, "CGP bo allocation for queue group failed: %ld\n", 711 PTR_ERR(bo)); 712 kfree(group); 713 return PTR_ERR(bo); 714 } 715 716 xe_map_memset(xe, &bo->vmap, 0, 0, SZ_4K); 717 718 group->primary = q; 719 group->cgp_bo = bo; 720 INIT_LIST_HEAD(&group->list); 721 kref_init(&group->kill_refcount); 722 xa_init_flags(&group->xa, XA_FLAGS_ALLOC1); 723 mutex_init(&group->list_lock); 724 q->multi_queue.group = group; 725 726 /* group->list_lock is used in submission backend */ 727 if (IS_ENABLED(CONFIG_LOCKDEP)) { 728 fs_reclaim_acquire(GFP_KERNEL); 729 might_lock(&group->list_lock); 730 fs_reclaim_release(GFP_KERNEL); 731 } 732 733 return 0; 734 } 735 736 static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q) 737 { 738 return q->gt->info.multi_queue_engine_class_mask & BIT(q->class); 739 } 740 741 static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q, 742 u32 primary_id) 743 { 744 struct xe_exec_queue_group *group; 745 struct xe_exec_queue *primary; 746 int ret; 747 748 /* 749 * Get from below xe_exec_queue_lookup() pairs with put 750 * in xe_exec_queue_group_cleanup(). 751 */ 752 primary = xe_exec_queue_lookup(q->vm->xef, primary_id); 753 if (XE_IOCTL_DBG(xe, !primary)) 754 return -ENOENT; 755 756 if (XE_IOCTL_DBG(xe, !xe_exec_queue_is_multi_queue_primary(primary)) || 757 XE_IOCTL_DBG(xe, q->vm != primary->vm) || 758 XE_IOCTL_DBG(xe, q->logical_mask != primary->logical_mask)) { 759 ret = -EINVAL; 760 goto put_primary; 761 } 762 763 group = primary->multi_queue.group; 764 q->multi_queue.valid = true; 765 q->multi_queue.group = group; 766 767 return 0; 768 put_primary: 769 xe_exec_queue_put(primary); 770 return ret; 771 } 772 773 #define XE_MAX_GROUP_SIZE 64 774 static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q) 775 { 776 struct xe_exec_queue_group *group = q->multi_queue.group; 777 u32 pos; 778 int err; 779 780 xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); 781 782 /* Primary queue holds a reference to LRCs of all secondary queues */ 783 err = xa_alloc(&group->xa, &pos, xe_lrc_get(q->lrc[0]), 784 XA_LIMIT(1, XE_MAX_GROUP_SIZE - 1), GFP_KERNEL); 785 if (XE_IOCTL_DBG(xe, err)) { 786 xe_lrc_put(q->lrc[0]); 787 788 /* It is invalid if queue group limit is exceeded */ 789 if (err == -EBUSY) 790 err = -EINVAL; 791 792 return err; 793 } 794 795 q->multi_queue.pos = pos; 796 797 if (group->primary->multi_queue.keep_active) { 798 xe_exec_queue_group_kill_get(group); 799 q->multi_queue.keep_active = true; 800 } 801 802 return 0; 803 } 804 805 static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queue *q) 806 { 807 struct xe_exec_queue_group *group = q->multi_queue.group; 808 struct xe_lrc *lrc; 809 810 xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); 811 812 lrc = xa_erase(&group->xa, q->multi_queue.pos); 813 xe_assert(xe, lrc); 814 xe_lrc_put(lrc); 815 816 if (q->multi_queue.keep_active) { 817 xe_exec_queue_group_kill_put(group); 818 q->multi_queue.keep_active = false; 819 } 820 } 821 822 static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, 823 u64 value) 824 { 825 if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q))) 826 return -ENODEV; 827 828 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe))) 829 return -EOPNOTSUPP; 830 831 if (XE_IOCTL_DBG(xe, !q->vm->xef)) 832 return -EINVAL; 833 834 if (XE_IOCTL_DBG(xe, xe_exec_queue_is_parallel(q))) 835 return -EINVAL; 836 837 if (XE_IOCTL_DBG(xe, xe_exec_queue_is_multi_queue(q))) 838 return -EINVAL; 839 840 if (value & DRM_XE_MULTI_GROUP_CREATE) { 841 if (XE_IOCTL_DBG(xe, value & ~(DRM_XE_MULTI_GROUP_CREATE | 842 DRM_XE_MULTI_GROUP_KEEP_ACTIVE))) 843 return -EINVAL; 844 845 /* 846 * KEEP_ACTIVE is not supported in preempt fence mode as in that mode, 847 * VM_DESTROY ioctl expects all exec queues of that VM are already killed. 848 */ 849 if (XE_IOCTL_DBG(xe, (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) && 850 xe_vm_in_preempt_fence_mode(q->vm))) 851 return -EINVAL; 852 853 q->multi_queue.valid = true; 854 q->multi_queue.is_primary = true; 855 q->multi_queue.pos = 0; 856 if (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) 857 q->multi_queue.keep_active = true; 858 859 return 0; 860 } 861 862 /* While adding secondary queues, the upper 32 bits must be 0 */ 863 if (XE_IOCTL_DBG(xe, value & (~0ull << 32))) 864 return -EINVAL; 865 866 return xe_exec_queue_group_validate(xe, q, value); 867 } 868 869 static int exec_queue_set_multi_queue_priority(struct xe_device *xe, struct xe_exec_queue *q, 870 u64 value) 871 { 872 if (XE_IOCTL_DBG(xe, value > XE_MULTI_QUEUE_PRIORITY_HIGH)) 873 return -EINVAL; 874 875 /* For queue creation time (!q->xef) setting, just store the priority value */ 876 if (!q->xef) { 877 q->multi_queue.priority = value; 878 return 0; 879 } 880 881 if (!xe_exec_queue_is_multi_queue(q)) 882 return -EINVAL; 883 884 return q->ops->set_multi_queue_priority(q, value); 885 } 886 887 typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, 888 struct xe_exec_queue *q, 889 u64 value); 890 891 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { 892 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, 893 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, 894 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, 895 [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state, 896 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group, 897 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY] = 898 exec_queue_set_multi_queue_priority, 899 }; 900 901 int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, 902 struct drm_file *file) 903 { 904 struct xe_device *xe = to_xe_device(dev); 905 struct xe_file *xef = to_xe_file(file); 906 struct drm_xe_exec_queue_set_property *args = data; 907 struct xe_exec_queue *q; 908 int ret; 909 u32 idx; 910 911 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 912 return -EINVAL; 913 914 if (XE_IOCTL_DBG(xe, args->property != 915 DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) 916 return -EINVAL; 917 918 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 919 if (XE_IOCTL_DBG(xe, !q)) 920 return -ENOENT; 921 922 idx = array_index_nospec(args->property, 923 ARRAY_SIZE(exec_queue_set_property_funcs)); 924 ret = exec_queue_set_property_funcs[idx](xe, q, args->value); 925 if (XE_IOCTL_DBG(xe, ret)) 926 goto err_post_lookup; 927 928 xe_exec_queue_put(q); 929 return 0; 930 931 err_post_lookup: 932 xe_exec_queue_put(q); 933 return ret; 934 } 935 936 static int exec_queue_user_ext_check(struct xe_exec_queue *q, u64 properties) 937 { 938 u64 secondary_queue_valid_props = BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP) | 939 BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY); 940 941 /* 942 * Only MULTI_QUEUE_PRIORITY property is valid for secondary queues of a 943 * multi-queue group. 944 */ 945 if (xe_exec_queue_is_multi_queue_secondary(q) && 946 properties & ~secondary_queue_valid_props) 947 return -EINVAL; 948 949 return 0; 950 } 951 952 static int exec_queue_user_ext_check_final(struct xe_exec_queue *q, u64 properties) 953 { 954 /* MULTI_QUEUE_PRIORITY only applies to multi-queue group queues */ 955 if ((properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) && 956 !(properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP))) 957 return -EINVAL; 958 959 return 0; 960 } 961 962 static int exec_queue_user_ext_set_property(struct xe_device *xe, 963 struct xe_exec_queue *q, 964 u64 extension, u64 *properties) 965 { 966 u64 __user *address = u64_to_user_ptr(extension); 967 struct drm_xe_ext_set_property ext; 968 int err; 969 u32 idx; 970 971 err = copy_from_user(&ext, address, sizeof(ext)); 972 if (XE_IOCTL_DBG(xe, err)) 973 return -EFAULT; 974 975 if (XE_IOCTL_DBG(xe, ext.property >= 976 ARRAY_SIZE(exec_queue_set_property_funcs)) || 977 XE_IOCTL_DBG(xe, ext.pad) || 978 XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && 979 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && 980 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE && 981 ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE && 982 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP && 983 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) 984 return -EINVAL; 985 986 idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); 987 if (!exec_queue_set_property_funcs[idx]) 988 return -EINVAL; 989 990 *properties |= BIT_ULL(idx); 991 err = exec_queue_user_ext_check(q, *properties); 992 if (XE_IOCTL_DBG(xe, err)) 993 return err; 994 995 return exec_queue_set_property_funcs[idx](xe, q, ext.value); 996 } 997 998 typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, 999 struct xe_exec_queue *q, 1000 u64 extension, u64 *properties); 1001 1002 static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { 1003 [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, 1004 }; 1005 1006 #define MAX_USER_EXTENSIONS 16 1007 static int __exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 1008 u64 extensions, int ext_number, u64 *properties) 1009 { 1010 u64 __user *address = u64_to_user_ptr(extensions); 1011 struct drm_xe_user_extension ext; 1012 int err; 1013 u32 idx; 1014 1015 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) 1016 return -E2BIG; 1017 1018 err = copy_from_user(&ext, address, sizeof(ext)); 1019 if (XE_IOCTL_DBG(xe, err)) 1020 return -EFAULT; 1021 1022 if (XE_IOCTL_DBG(xe, ext.pad) || 1023 XE_IOCTL_DBG(xe, ext.name >= 1024 ARRAY_SIZE(exec_queue_user_extension_funcs))) 1025 return -EINVAL; 1026 1027 idx = array_index_nospec(ext.name, 1028 ARRAY_SIZE(exec_queue_user_extension_funcs)); 1029 err = exec_queue_user_extension_funcs[idx](xe, q, extensions, properties); 1030 if (XE_IOCTL_DBG(xe, err)) 1031 return err; 1032 1033 if (ext.next_extension) 1034 return __exec_queue_user_extensions(xe, q, ext.next_extension, 1035 ++ext_number, properties); 1036 1037 return 0; 1038 } 1039 1040 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 1041 u64 extensions) 1042 { 1043 u64 properties = 0; 1044 int err; 1045 1046 err = __exec_queue_user_extensions(xe, q, extensions, 0, &properties); 1047 if (XE_IOCTL_DBG(xe, err)) 1048 return err; 1049 1050 err = exec_queue_user_ext_check_final(q, properties); 1051 if (XE_IOCTL_DBG(xe, err)) 1052 return err; 1053 1054 if (xe_exec_queue_is_multi_queue_primary(q)) { 1055 err = xe_exec_queue_group_init(xe, q); 1056 if (XE_IOCTL_DBG(xe, err)) 1057 return err; 1058 } 1059 1060 return 0; 1061 } 1062 1063 static u32 calc_validate_logical_mask(struct xe_device *xe, 1064 struct drm_xe_engine_class_instance *eci, 1065 u16 width, u16 num_placements) 1066 { 1067 int len = width * num_placements; 1068 int i, j, n; 1069 u16 class; 1070 u16 gt_id; 1071 u32 return_mask = 0, prev_mask; 1072 1073 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && 1074 len > 1)) 1075 return 0; 1076 1077 for (i = 0; i < width; ++i) { 1078 u32 current_mask = 0; 1079 1080 for (j = 0; j < num_placements; ++j) { 1081 struct xe_hw_engine *hwe; 1082 1083 n = j * width + i; 1084 1085 hwe = xe_hw_engine_lookup(xe, eci[n]); 1086 if (XE_IOCTL_DBG(xe, !hwe)) 1087 return 0; 1088 1089 if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) 1090 return 0; 1091 1092 if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || 1093 XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) 1094 return 0; 1095 1096 class = eci[n].engine_class; 1097 gt_id = eci[n].gt_id; 1098 1099 if (width == 1 || !i) 1100 return_mask |= BIT(eci[n].engine_instance); 1101 current_mask |= BIT(eci[n].engine_instance); 1102 } 1103 1104 /* Parallel submissions must be logically contiguous */ 1105 if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) 1106 return 0; 1107 1108 prev_mask = current_mask; 1109 } 1110 1111 return return_mask; 1112 } 1113 1114 int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, 1115 struct drm_file *file) 1116 { 1117 struct xe_device *xe = to_xe_device(dev); 1118 struct xe_file *xef = to_xe_file(file); 1119 struct drm_xe_exec_queue_create *args = data; 1120 struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; 1121 struct drm_xe_engine_class_instance __user *user_eci = 1122 u64_to_user_ptr(args->instances); 1123 struct xe_hw_engine *hwe; 1124 struct xe_vm *vm; 1125 struct xe_tile *tile; 1126 struct xe_exec_queue *q = NULL; 1127 u32 logical_mask; 1128 u32 flags = 0; 1129 u32 id; 1130 u32 len; 1131 int err; 1132 1133 if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) || 1134 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1135 return -EINVAL; 1136 1137 len = args->width * args->num_placements; 1138 if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) 1139 return -EINVAL; 1140 1141 err = copy_from_user(eci, user_eci, 1142 sizeof(struct drm_xe_engine_class_instance) * len); 1143 if (XE_IOCTL_DBG(xe, err)) 1144 return -EFAULT; 1145 1146 if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) 1147 return -EINVAL; 1148 1149 if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) 1150 flags |= EXEC_QUEUE_FLAG_LOW_LATENCY; 1151 1152 if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { 1153 if (XE_IOCTL_DBG(xe, args->width != 1) || 1154 XE_IOCTL_DBG(xe, args->num_placements != 1) || 1155 XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) 1156 return -EINVAL; 1157 1158 for_each_tile(tile, xe, id) { 1159 struct xe_exec_queue *new; 1160 1161 flags |= EXEC_QUEUE_FLAG_VM; 1162 if (id) 1163 flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; 1164 1165 new = xe_exec_queue_create_bind(xe, tile, flags, 1166 args->extensions); 1167 if (IS_ERR(new)) { 1168 err = PTR_ERR(new); 1169 if (q) 1170 goto put_exec_queue; 1171 return err; 1172 } 1173 if (id == 0) 1174 q = new; 1175 else 1176 list_add_tail(&new->multi_gt_list, 1177 &q->multi_gt_link); 1178 } 1179 } else { 1180 logical_mask = calc_validate_logical_mask(xe, eci, 1181 args->width, 1182 args->num_placements); 1183 if (XE_IOCTL_DBG(xe, !logical_mask)) 1184 return -EINVAL; 1185 1186 hwe = xe_hw_engine_lookup(xe, eci[0]); 1187 if (XE_IOCTL_DBG(xe, !hwe)) 1188 return -EINVAL; 1189 1190 vm = xe_vm_lookup(xef, args->vm_id); 1191 if (XE_IOCTL_DBG(xe, !vm)) 1192 return -ENOENT; 1193 1194 err = down_read_interruptible(&vm->lock); 1195 if (err) { 1196 xe_vm_put(vm); 1197 return err; 1198 } 1199 1200 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 1201 up_read(&vm->lock); 1202 xe_vm_put(vm); 1203 return -ENOENT; 1204 } 1205 1206 q = xe_exec_queue_create(xe, vm, logical_mask, 1207 args->width, hwe, flags, 1208 args->extensions); 1209 up_read(&vm->lock); 1210 xe_vm_put(vm); 1211 if (IS_ERR(q)) 1212 return PTR_ERR(q); 1213 1214 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1215 err = xe_exec_queue_group_add(xe, q); 1216 if (XE_IOCTL_DBG(xe, err)) 1217 goto put_exec_queue; 1218 } 1219 1220 if (xe_vm_in_preempt_fence_mode(vm)) { 1221 q->lr.context = dma_fence_context_alloc(1); 1222 1223 err = xe_vm_add_compute_exec_queue(vm, q); 1224 if (XE_IOCTL_DBG(xe, err)) 1225 goto delete_queue_group; 1226 } 1227 1228 if (q->vm && q->hwe->hw_engine_group) { 1229 err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); 1230 if (err) 1231 goto put_exec_queue; 1232 } 1233 } 1234 1235 q->xef = xe_file_get(xef); 1236 1237 /* user id alloc must always be last in ioctl to prevent UAF */ 1238 err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); 1239 if (err) 1240 goto kill_exec_queue; 1241 1242 args->exec_queue_id = id; 1243 1244 return 0; 1245 1246 kill_exec_queue: 1247 xe_exec_queue_kill(q); 1248 delete_queue_group: 1249 if (xe_exec_queue_is_multi_queue_secondary(q)) 1250 xe_exec_queue_group_delete(xe, q); 1251 put_exec_queue: 1252 xe_exec_queue_put(q); 1253 return err; 1254 } 1255 1256 int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, 1257 struct drm_file *file) 1258 { 1259 struct xe_device *xe = to_xe_device(dev); 1260 struct xe_file *xef = to_xe_file(file); 1261 struct drm_xe_exec_queue_get_property *args = data; 1262 struct xe_exec_queue *q; 1263 int ret; 1264 1265 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1266 return -EINVAL; 1267 1268 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 1269 if (XE_IOCTL_DBG(xe, !q)) 1270 return -ENOENT; 1271 1272 switch (args->property) { 1273 case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: 1274 args->value = q->ops->reset_status(q); 1275 ret = 0; 1276 break; 1277 default: 1278 ret = -EINVAL; 1279 } 1280 1281 xe_exec_queue_put(q); 1282 1283 return ret; 1284 } 1285 1286 /** 1287 * xe_exec_queue_lrc() - Get the LRC from exec queue. 1288 * @q: The exec_queue. 1289 * 1290 * Retrieves the primary LRC for the exec queue. Note that this function 1291 * returns only the first LRC instance, even when multiple parallel LRCs 1292 * are configured. 1293 * 1294 * Return: Pointer to LRC on success, error on failure 1295 */ 1296 struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) 1297 { 1298 return q->lrc[0]; 1299 } 1300 1301 /** 1302 * xe_exec_queue_is_lr() - Whether an exec_queue is long-running 1303 * @q: The exec_queue 1304 * 1305 * Return: True if the exec_queue is long-running, false otherwise. 1306 */ 1307 bool xe_exec_queue_is_lr(struct xe_exec_queue *q) 1308 { 1309 return q->vm && xe_vm_in_lr_mode(q->vm) && 1310 !(q->flags & EXEC_QUEUE_FLAG_VM); 1311 } 1312 1313 /** 1314 * xe_exec_queue_is_idle() - Whether an exec_queue is idle. 1315 * @q: The exec_queue 1316 * 1317 * FIXME: Need to determine what to use as the short-lived 1318 * timeline lock for the exec_queues, so that the return value 1319 * of this function becomes more than just an advisory 1320 * snapshot in time. The timeline lock must protect the 1321 * seqno from racing submissions on the same exec_queue. 1322 * Typically vm->resv, but user-created timeline locks use the migrate vm 1323 * and never grabs the migrate vm->resv so we have a race there. 1324 * 1325 * Return: True if the exec_queue is idle, false otherwise. 1326 */ 1327 bool xe_exec_queue_is_idle(struct xe_exec_queue *q) 1328 { 1329 if (xe_exec_queue_is_parallel(q)) { 1330 int i; 1331 1332 for (i = 0; i < q->width; ++i) { 1333 if (xe_lrc_seqno(q->lrc[i]) != 1334 q->lrc[i]->fence_ctx.next_seqno - 1) 1335 return false; 1336 } 1337 1338 return true; 1339 } 1340 1341 return xe_lrc_seqno(q->lrc[0]) == 1342 q->lrc[0]->fence_ctx.next_seqno - 1; 1343 } 1344 1345 /** 1346 * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue 1347 * from hw 1348 * @q: The exec queue 1349 * 1350 * Update the timestamp saved by HW for this exec queue and save run ticks 1351 * calculated by using the delta from last update. 1352 */ 1353 void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) 1354 { 1355 struct xe_device *xe = gt_to_xe(q->gt); 1356 struct xe_lrc *lrc; 1357 u64 old_ts, new_ts; 1358 int idx; 1359 1360 /* 1361 * Jobs that are executed by kernel doesn't have a corresponding xe_file 1362 * and thus are not accounted. 1363 */ 1364 if (!q->xef) 1365 return; 1366 1367 /* Synchronize with unbind while holding the xe file open */ 1368 if (!drm_dev_enter(&xe->drm, &idx)) 1369 return; 1370 /* 1371 * Only sample the first LRC. For parallel submission, all of them are 1372 * scheduled together and we compensate that below by multiplying by 1373 * width - this may introduce errors if that premise is not true and 1374 * they don't exit 100% aligned. On the other hand, looping through 1375 * the LRCs and reading them in different time could also introduce 1376 * errors. 1377 */ 1378 lrc = q->lrc[0]; 1379 new_ts = xe_lrc_update_timestamp(lrc, &old_ts); 1380 q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; 1381 1382 drm_dev_exit(idx); 1383 } 1384 1385 /** 1386 * xe_exec_queue_kill - permanently stop all execution from an exec queue 1387 * @q: The exec queue 1388 * 1389 * This function permanently stops all activity on an exec queue. If the queue 1390 * is actively executing on the HW, it will be kicked off the engine; any 1391 * pending jobs are discarded and all future submissions are rejected. 1392 * This function is safe to call multiple times. 1393 */ 1394 void xe_exec_queue_kill(struct xe_exec_queue *q) 1395 { 1396 struct xe_exec_queue *eq = q, *next; 1397 1398 list_for_each_entry_safe(eq, next, &eq->multi_gt_list, 1399 multi_gt_link) { 1400 q->ops->kill(eq); 1401 xe_vm_remove_compute_exec_queue(q->vm, eq); 1402 } 1403 1404 q->ops->kill(q); 1405 xe_vm_remove_compute_exec_queue(q->vm, q); 1406 1407 if (!xe_exec_queue_is_multi_queue_primary(q) && q->multi_queue.keep_active) { 1408 xe_exec_queue_group_kill_put(q->multi_queue.group); 1409 q->multi_queue.keep_active = false; 1410 } 1411 } 1412 1413 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, 1414 struct drm_file *file) 1415 { 1416 struct xe_device *xe = to_xe_device(dev); 1417 struct xe_file *xef = to_xe_file(file); 1418 struct drm_xe_exec_queue_destroy *args = data; 1419 struct xe_exec_queue *q; 1420 1421 if (XE_IOCTL_DBG(xe, args->pad) || 1422 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1423 return -EINVAL; 1424 1425 mutex_lock(&xef->exec_queue.lock); 1426 q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); 1427 if (q) 1428 atomic_inc(&xef->exec_queue.pending_removal); 1429 mutex_unlock(&xef->exec_queue.lock); 1430 1431 if (XE_IOCTL_DBG(xe, !q)) 1432 return -ENOENT; 1433 1434 if (q->vm && q->hwe->hw_engine_group) 1435 xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); 1436 1437 if (xe_exec_queue_is_multi_queue_primary(q)) 1438 xe_exec_queue_group_kill_put(q->multi_queue.group); 1439 else 1440 xe_exec_queue_kill(q); 1441 1442 trace_xe_exec_queue_close(q); 1443 xe_exec_queue_put(q); 1444 1445 return 0; 1446 } 1447 1448 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, 1449 struct xe_vm *vm) 1450 { 1451 if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) { 1452 xe_migrate_job_lock_assert(q); 1453 } else if (q->flags & EXEC_QUEUE_FLAG_VM) { 1454 lockdep_assert_held(&vm->lock); 1455 } else { 1456 xe_vm_assert_held(vm); 1457 lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); 1458 } 1459 } 1460 1461 /** 1462 * xe_exec_queue_last_fence_put() - Drop ref to last fence 1463 * @q: The exec queue 1464 * @vm: The VM the engine does a bind or exec for 1465 */ 1466 void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) 1467 { 1468 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1469 1470 xe_exec_queue_last_fence_put_unlocked(q); 1471 } 1472 1473 /** 1474 * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked 1475 * @q: The exec queue 1476 * 1477 * Only safe to be called from xe_exec_queue_destroy(). 1478 */ 1479 void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) 1480 { 1481 if (q->last_fence) { 1482 dma_fence_put(q->last_fence); 1483 q->last_fence = NULL; 1484 } 1485 } 1486 1487 /** 1488 * xe_exec_queue_last_fence_get() - Get last fence 1489 * @q: The exec queue 1490 * @vm: The VM the engine does a bind or exec for 1491 * 1492 * Get last fence, takes a ref 1493 * 1494 * Returns: last fence if not signaled, dma fence stub if signaled 1495 */ 1496 struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, 1497 struct xe_vm *vm) 1498 { 1499 struct dma_fence *fence; 1500 1501 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1502 1503 if (q->last_fence && 1504 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 1505 xe_exec_queue_last_fence_put(q, vm); 1506 1507 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 1508 dma_fence_get(fence); 1509 return fence; 1510 } 1511 1512 /** 1513 * xe_exec_queue_last_fence_get_for_resume() - Get last fence 1514 * @q: The exec queue 1515 * @vm: The VM the engine does a bind or exec for 1516 * 1517 * Get last fence, takes a ref. Only safe to be called in the context of 1518 * resuming the hw engine group's long-running exec queue, when the group 1519 * semaphore is held. 1520 * 1521 * Returns: last fence if not signaled, dma fence stub if signaled 1522 */ 1523 struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, 1524 struct xe_vm *vm) 1525 { 1526 struct dma_fence *fence; 1527 1528 lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); 1529 1530 if (q->last_fence && 1531 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 1532 xe_exec_queue_last_fence_put_unlocked(q); 1533 1534 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 1535 dma_fence_get(fence); 1536 return fence; 1537 } 1538 1539 /** 1540 * xe_exec_queue_last_fence_set() - Set last fence 1541 * @q: The exec queue 1542 * @vm: The VM the engine does a bind or exec for 1543 * @fence: The fence 1544 * 1545 * Set the last fence for the engine. Increases reference count for fence, when 1546 * closing engine xe_exec_queue_last_fence_put should be called. 1547 */ 1548 void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, 1549 struct dma_fence *fence) 1550 { 1551 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1552 xe_assert(vm->xe, !dma_fence_is_container(fence)); 1553 1554 xe_exec_queue_last_fence_put(q, vm); 1555 q->last_fence = dma_fence_get(fence); 1556 } 1557 1558 /** 1559 * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence 1560 * @q: The exec queue 1561 * @vm: The VM the engine does a bind for 1562 * @type: Either primary or media GT 1563 */ 1564 void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, 1565 struct xe_vm *vm, 1566 unsigned int type) 1567 { 1568 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1569 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1570 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1571 1572 xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type); 1573 } 1574 1575 /** 1576 * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB 1577 * invalidation fence unlocked 1578 * @q: The exec queue 1579 * @type: Either primary or media GT 1580 * 1581 * Only safe to be called from xe_exec_queue_destroy(). 1582 */ 1583 void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, 1584 unsigned int type) 1585 { 1586 xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1587 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1588 1589 dma_fence_put(q->tlb_inval[type].last_fence); 1590 q->tlb_inval[type].last_fence = NULL; 1591 } 1592 1593 /** 1594 * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation 1595 * @q: The exec queue 1596 * @vm: The VM the engine does a bind for 1597 * @type: Either primary or media GT 1598 * 1599 * Get last fence, takes a ref 1600 * 1601 * Returns: last fence if not signaled, dma fence stub if signaled 1602 */ 1603 struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q, 1604 struct xe_vm *vm, 1605 unsigned int type) 1606 { 1607 struct dma_fence *fence; 1608 1609 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1610 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1611 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1612 xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | 1613 EXEC_QUEUE_FLAG_MIGRATE)); 1614 1615 if (q->tlb_inval[type].last_fence && 1616 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 1617 &q->tlb_inval[type].last_fence->flags)) 1618 xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); 1619 1620 fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub(); 1621 dma_fence_get(fence); 1622 return fence; 1623 } 1624 1625 /** 1626 * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation 1627 * @q: The exec queue 1628 * @vm: The VM the engine does a bind for 1629 * @fence: The fence 1630 * @type: Either primary or media GT 1631 * 1632 * Set the last fence for the tlb invalidation type on the queue. Increases 1633 * reference count for fence, when closing queue 1634 * xe_exec_queue_tlb_inval_last_fence_put should be called. 1635 */ 1636 void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q, 1637 struct xe_vm *vm, 1638 struct dma_fence *fence, 1639 unsigned int type) 1640 { 1641 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1642 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1643 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1644 xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | 1645 EXEC_QUEUE_FLAG_MIGRATE)); 1646 xe_assert(vm->xe, !dma_fence_is_container(fence)); 1647 1648 xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); 1649 q->tlb_inval[type].last_fence = dma_fence_get(fence); 1650 } 1651 1652 /** 1653 * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references 1654 * within all LRCs of a queue. 1655 * @q: the &xe_exec_queue struct instance containing target LRCs 1656 * @scratch: scratch buffer to be used as temporary storage 1657 * 1658 * Returns: zero on success, negative error code on failure 1659 */ 1660 int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) 1661 { 1662 int i; 1663 int err = 0; 1664 1665 for (i = 0; i < q->width; ++i) { 1666 struct xe_lrc *lrc; 1667 1668 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 1669 lrc = READ_ONCE(q->lrc[i]); 1670 if (!lrc) 1671 continue; 1672 1673 xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch); 1674 xe_lrc_update_hwctx_regs_with_address(lrc); 1675 err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch); 1676 if (err) 1677 break; 1678 } 1679 1680 return err; 1681 } 1682