1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_exec_queue.h" 7 8 #include <linux/nospec.h> 9 10 #include <drm/drm_device.h> 11 #include <drm/drm_drv.h> 12 #include <drm/drm_file.h> 13 #include <drm/drm_syncobj.h> 14 #include <uapi/drm/xe_drm.h> 15 16 #include "xe_bo.h" 17 #include "xe_dep_scheduler.h" 18 #include "xe_device.h" 19 #include "xe_gt.h" 20 #include "xe_gt_sriov_vf.h" 21 #include "xe_hw_engine_class_sysfs.h" 22 #include "xe_hw_engine_group.h" 23 #include "xe_hw_fence.h" 24 #include "xe_irq.h" 25 #include "xe_lrc.h" 26 #include "xe_macros.h" 27 #include "xe_migrate.h" 28 #include "xe_pm.h" 29 #include "xe_ring_ops_types.h" 30 #include "xe_trace.h" 31 #include "xe_vm.h" 32 #include "xe_pxp.h" 33 34 /** 35 * DOC: Execution Queue 36 * 37 * An Execution queue is an interface for the HW context of execution. 38 * The user creates an execution queue, submits the GPU jobs through those 39 * queues and in the end destroys them. 40 * 41 * Execution queues can also be created by XeKMD itself for driver internal 42 * operations like object migration etc. 43 * 44 * An execution queue is associated with a specified HW engine or a group of 45 * engines (belonging to the same tile and engine class) and any GPU job 46 * submitted on the queue will be run on one of these engines. 47 * 48 * An execution queue is tied to an address space (VM). It holds a reference 49 * of the associated VM and the underlying Logical Ring Context/s (LRC/s) 50 * until the queue is destroyed. 51 * 52 * The execution queue sits on top of the submission backend. It opaquely 53 * handles the GuC and Execlist backends whichever the platform uses, and 54 * the ring operations the different engine classes support. 55 */ 56 57 /** 58 * DOC: Multi Queue Group 59 * 60 * Multi Queue Group is another mode of execution supported by the compute 61 * and blitter copy command streamers (CCS and BCS, respectively). It is 62 * an enhancement of the existing hardware architecture and leverages the 63 * same submission model. It enables support for efficient, parallel 64 * execution of multiple queues within a single shared context. The multi 65 * queue group functionality is only supported with GuC submission backend. 66 * All the queues of a group must use the same address space (VM). 67 * 68 * The DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE execution queue property 69 * supports creating a multi queue group and adding queues to a queue group. 70 * 71 * The XE_EXEC_QUEUE_CREATE ioctl call with above property with value field 72 * set to DRM_XE_MULTI_GROUP_CREATE, will create a new multi queue group with 73 * the queue being created as the primary queue (aka q0) of the group. To add 74 * secondary queues to the group, they need to be created with the above 75 * property with id of the primary queue as the value. The properties of 76 * the primary queue (like priority, time slice) applies to the whole group. 77 * So, these properties can't be set for secondary queues of a group. 78 * 79 * The hardware does not support removing a queue from a multi-queue group. 80 * However, queues can be dynamically added to the group. A group can have 81 * up to 64 queues. To support this, XeKMD holds references to LRCs of the 82 * queues even after the queues are destroyed by the user until the whole 83 * group is destroyed. The secondary queues hold a reference to the primary 84 * queue thus preventing the group from being destroyed when user destroys 85 * the primary queue. Once the primary queue is destroyed, secondary queues 86 * can't be added to the queue group, but they can continue to submit the 87 * jobs if the DRM_XE_MULTI_GROUP_KEEP_ACTIVE flag is set during the multi 88 * queue group creation. 89 * 90 * The queues of a multi queue group can set their priority within the group 91 * through the DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY property. 92 * This multi queue priority can also be set dynamically through the 93 * XE_EXEC_QUEUE_SET_PROPERTY ioctl. This is the only other property 94 * supported by the secondary queues of a multi queue group, other than 95 * DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE. 96 * 97 * When GuC reports an error on any of the queues of a multi queue group, 98 * the queue cleanup mechanism is invoked for all the queues of the group 99 * as hardware cannot make progress on the multi queue context. 100 */ 101 102 enum xe_exec_queue_sched_prop { 103 XE_EXEC_QUEUE_JOB_TIMEOUT = 0, 104 XE_EXEC_QUEUE_TIMESLICE = 1, 105 XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, 106 XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, 107 }; 108 109 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 110 u64 extensions); 111 112 static void xe_exec_queue_group_cleanup(struct xe_exec_queue *q) 113 { 114 struct xe_exec_queue_group *group = q->multi_queue.group; 115 struct xe_lrc *lrc; 116 unsigned long idx; 117 118 if (xe_exec_queue_is_multi_queue_secondary(q)) { 119 /* 120 * Put pairs with get from xe_exec_queue_lookup() call 121 * in xe_exec_queue_group_validate(). 122 */ 123 xe_exec_queue_put(xe_exec_queue_multi_queue_primary(q)); 124 return; 125 } 126 127 if (!group) 128 return; 129 130 /* Primary queue cleanup */ 131 xa_for_each(&group->xa, idx, lrc) 132 xe_lrc_put(lrc); 133 134 xa_destroy(&group->xa); 135 mutex_destroy(&group->list_lock); 136 xe_bo_unpin_map_no_vm(group->cgp_bo); 137 kfree(group); 138 } 139 140 static void __xe_exec_queue_free(struct xe_exec_queue *q) 141 { 142 int i; 143 144 for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) 145 if (q->tlb_inval[i].dep_scheduler) 146 xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler); 147 148 if (xe_exec_queue_uses_pxp(q)) 149 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); 150 151 if (xe_exec_queue_is_multi_queue(q)) 152 xe_exec_queue_group_cleanup(q); 153 154 if (q->vm) 155 xe_vm_put(q->vm); 156 157 if (q->xef) 158 xe_file_put(q->xef); 159 160 kvfree(q->replay_state); 161 kfree(q); 162 } 163 164 static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q) 165 { 166 struct xe_tile *tile = gt_to_tile(q->gt); 167 int i; 168 169 for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) { 170 struct xe_dep_scheduler *dep_scheduler; 171 struct xe_gt *gt; 172 struct workqueue_struct *wq; 173 174 if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT) 175 gt = tile->primary_gt; 176 else 177 gt = tile->media_gt; 178 179 if (!gt) 180 continue; 181 182 wq = gt->tlb_inval.job_wq; 183 184 #define MAX_TLB_INVAL_JOBS 16 /* Picking a reasonable value */ 185 dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name, 186 MAX_TLB_INVAL_JOBS); 187 if (IS_ERR(dep_scheduler)) 188 return PTR_ERR(dep_scheduler); 189 190 q->tlb_inval[i].dep_scheduler = dep_scheduler; 191 } 192 #undef MAX_TLB_INVAL_JOBS 193 194 return 0; 195 } 196 197 static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, 198 struct xe_vm *vm, 199 u32 logical_mask, 200 u16 width, struct xe_hw_engine *hwe, 201 u32 flags, u64 extensions) 202 { 203 struct xe_exec_queue *q; 204 struct xe_gt *gt = hwe->gt; 205 int err; 206 207 /* only kernel queues can be permanent */ 208 XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); 209 210 q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL); 211 if (!q) 212 return ERR_PTR(-ENOMEM); 213 214 kref_init(&q->refcount); 215 q->flags = flags; 216 q->hwe = hwe; 217 q->gt = gt; 218 q->class = hwe->class; 219 q->width = width; 220 q->msix_vec = XE_IRQ_DEFAULT_MSIX; 221 q->logical_mask = logical_mask; 222 q->fence_irq = >->fence_irq[hwe->class]; 223 q->ring_ops = gt->ring_ops[hwe->class]; 224 q->ops = gt->exec_queue_ops; 225 INIT_LIST_HEAD(&q->lr.link); 226 INIT_LIST_HEAD(&q->multi_gt_link); 227 INIT_LIST_HEAD(&q->hw_engine_group_link); 228 INIT_LIST_HEAD(&q->pxp.link); 229 q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL; 230 231 q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; 232 q->sched_props.preempt_timeout_us = 233 hwe->eclass->sched_props.preempt_timeout_us; 234 q->sched_props.job_timeout_ms = 235 hwe->eclass->sched_props.job_timeout_ms; 236 if (q->flags & EXEC_QUEUE_FLAG_KERNEL && 237 q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) 238 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; 239 else 240 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; 241 242 if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) { 243 err = alloc_dep_schedulers(xe, q); 244 if (err) { 245 __xe_exec_queue_free(q); 246 return ERR_PTR(err); 247 } 248 } 249 250 if (vm) 251 q->vm = xe_vm_get(vm); 252 253 if (extensions) { 254 /* 255 * may set q->usm, must come before xe_lrc_create(), 256 * may overwrite q->sched_props, must come before q->ops->init() 257 */ 258 err = exec_queue_user_extensions(xe, q, extensions); 259 if (err) { 260 __xe_exec_queue_free(q); 261 return ERR_PTR(err); 262 } 263 } 264 265 return q; 266 } 267 268 static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) 269 { 270 int i, err; 271 u32 flags = 0; 272 273 /* 274 * PXP workloads executing on RCS or CCS must run in isolation (i.e. no 275 * other workload can use the EUs at the same time). On MTL this is done 276 * by setting the RUNALONE bit in the LRC, while starting on Xe2 there 277 * is a dedicated bit for it. 278 */ 279 if (xe_exec_queue_uses_pxp(q) && 280 (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) { 281 if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20) 282 flags |= XE_LRC_CREATE_PXP; 283 else 284 flags |= XE_LRC_CREATE_RUNALONE; 285 } 286 287 if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL)) 288 flags |= XE_LRC_CREATE_USER_CTX; 289 290 err = q->ops->init(q); 291 if (err) 292 return err; 293 294 /* 295 * This must occur after q->ops->init to avoid race conditions during VF 296 * post-migration recovery, as the fixups for the LRC GGTT addresses 297 * depend on the queue being present in the backend tracking structure. 298 * 299 * In addition to above, we must wait on inflight GGTT changes to avoid 300 * writing out stale values here. Such wait provides a solid solution 301 * (without a race) only if the function can detect migration instantly 302 * from the moment vCPU resumes execution. 303 */ 304 for (i = 0; i < q->width; ++i) { 305 struct xe_lrc *lrc; 306 307 xe_gt_sriov_vf_wait_valid_ggtt(q->gt); 308 lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state, 309 xe_lrc_ring_size(), q->msix_vec, flags); 310 if (IS_ERR(lrc)) { 311 err = PTR_ERR(lrc); 312 goto err_lrc; 313 } 314 315 /* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */ 316 WRITE_ONCE(q->lrc[i], lrc); 317 } 318 319 return 0; 320 321 err_lrc: 322 for (i = i - 1; i >= 0; --i) 323 xe_lrc_put(q->lrc[i]); 324 return err; 325 } 326 327 static void __xe_exec_queue_fini(struct xe_exec_queue *q) 328 { 329 int i; 330 331 q->ops->fini(q); 332 333 for (i = 0; i < q->width; ++i) 334 xe_lrc_put(q->lrc[i]); 335 } 336 337 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, 338 u32 logical_mask, u16 width, 339 struct xe_hw_engine *hwe, u32 flags, 340 u64 extensions) 341 { 342 struct xe_exec_queue *q; 343 int err; 344 345 /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */ 346 xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0))); 347 348 q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, 349 extensions); 350 if (IS_ERR(q)) 351 return q; 352 353 err = __xe_exec_queue_init(q, flags); 354 if (err) 355 goto err_post_alloc; 356 357 /* 358 * We can only add the queue to the PXP list after the init is complete, 359 * because the PXP termination can call exec_queue_kill and that will 360 * go bad if the queue is only half-initialized. This means that we 361 * can't do it when we handle the PXP extension in __xe_exec_queue_alloc 362 * and we need to do it here instead. 363 */ 364 if (xe_exec_queue_uses_pxp(q)) { 365 err = xe_pxp_exec_queue_add(xe->pxp, q); 366 if (err) 367 goto err_post_init; 368 } 369 370 return q; 371 372 err_post_init: 373 __xe_exec_queue_fini(q); 374 err_post_alloc: 375 __xe_exec_queue_free(q); 376 return ERR_PTR(err); 377 } 378 ALLOW_ERROR_INJECTION(xe_exec_queue_create, ERRNO); 379 380 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, 381 struct xe_vm *vm, 382 enum xe_engine_class class, 383 u32 flags, u64 extensions) 384 { 385 struct xe_hw_engine *hwe, *hwe0 = NULL; 386 enum xe_hw_engine_id id; 387 u32 logical_mask = 0; 388 389 for_each_hw_engine(hwe, gt, id) { 390 if (xe_hw_engine_is_reserved(hwe)) 391 continue; 392 393 if (hwe->class == class) { 394 logical_mask |= BIT(hwe->logical_instance); 395 if (!hwe0) 396 hwe0 = hwe; 397 } 398 } 399 400 if (!logical_mask) 401 return ERR_PTR(-ENODEV); 402 403 return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); 404 } 405 406 /** 407 * xe_exec_queue_create_bind() - Create bind exec queue. 408 * @xe: Xe device. 409 * @tile: tile which bind exec queue belongs to. 410 * @flags: exec queue creation flags 411 * @extensions: exec queue creation extensions 412 * 413 * Normalize bind exec queue creation. Bind exec queue is tied to migration VM 414 * for access to physical memory required for page table programming. On a 415 * faulting devices the reserved copy engine instance must be used to avoid 416 * deadlocking (user binds cannot get stuck behind faults as kernel binds which 417 * resolve faults depend on user binds). On non-faulting devices any copy engine 418 * can be used. 419 * 420 * Returns exec queue on success, ERR_PTR on failure 421 */ 422 struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, 423 struct xe_tile *tile, 424 u32 flags, u64 extensions) 425 { 426 struct xe_gt *gt = tile->primary_gt; 427 struct xe_exec_queue *q; 428 struct xe_vm *migrate_vm; 429 430 migrate_vm = xe_migrate_get_vm(tile->migrate); 431 if (xe->info.has_usm) { 432 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, 433 XE_ENGINE_CLASS_COPY, 434 gt->usm.reserved_bcs_instance, 435 false); 436 437 if (!hwe) { 438 xe_vm_put(migrate_vm); 439 return ERR_PTR(-EINVAL); 440 } 441 442 q = xe_exec_queue_create(xe, migrate_vm, 443 BIT(hwe->logical_instance), 1, hwe, 444 flags, extensions); 445 } else { 446 q = xe_exec_queue_create_class(xe, gt, migrate_vm, 447 XE_ENGINE_CLASS_COPY, flags, 448 extensions); 449 } 450 xe_vm_put(migrate_vm); 451 452 if (!IS_ERR(q)) { 453 int err = drm_syncobj_create(&q->ufence_syncobj, 454 DRM_SYNCOBJ_CREATE_SIGNALED, 455 NULL); 456 if (err) { 457 xe_exec_queue_put(q); 458 return ERR_PTR(err); 459 } 460 } 461 462 return q; 463 } 464 ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); 465 466 static void xe_exec_queue_group_kill(struct kref *ref) 467 { 468 struct xe_exec_queue_group *group = container_of(ref, struct xe_exec_queue_group, 469 kill_refcount); 470 xe_exec_queue_kill(group->primary); 471 } 472 473 static inline void xe_exec_queue_group_kill_get(struct xe_exec_queue_group *group) 474 { 475 kref_get(&group->kill_refcount); 476 } 477 478 void xe_exec_queue_group_kill_put(struct xe_exec_queue_group *group) 479 { 480 if (!group) 481 return; 482 483 kref_put(&group->kill_refcount, xe_exec_queue_group_kill); 484 } 485 486 void xe_exec_queue_destroy(struct kref *ref) 487 { 488 struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); 489 struct xe_exec_queue *eq, *next; 490 int i; 491 492 xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0); 493 494 if (q->ufence_syncobj) 495 drm_syncobj_put(q->ufence_syncobj); 496 497 if (q->ufence_syncobj) 498 drm_syncobj_put(q->ufence_syncobj); 499 500 if (xe_exec_queue_uses_pxp(q)) 501 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); 502 503 xe_exec_queue_last_fence_put_unlocked(q); 504 for_each_tlb_inval(i) 505 xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i); 506 507 if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { 508 list_for_each_entry_safe(eq, next, &q->multi_gt_list, 509 multi_gt_link) 510 xe_exec_queue_put(eq); 511 } 512 513 q->ops->destroy(q); 514 } 515 516 void xe_exec_queue_fini(struct xe_exec_queue *q) 517 { 518 /* 519 * Before releasing our ref to lrc and xef, accumulate our run ticks 520 * and wakeup any waiters. 521 */ 522 xe_exec_queue_update_run_ticks(q); 523 if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) 524 wake_up_var(&q->xef->exec_queue.pending_removal); 525 526 __xe_exec_queue_fini(q); 527 __xe_exec_queue_free(q); 528 } 529 530 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) 531 { 532 switch (q->class) { 533 case XE_ENGINE_CLASS_RENDER: 534 snprintf(q->name, sizeof(q->name), "rcs%d", instance); 535 break; 536 case XE_ENGINE_CLASS_VIDEO_DECODE: 537 snprintf(q->name, sizeof(q->name), "vcs%d", instance); 538 break; 539 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 540 snprintf(q->name, sizeof(q->name), "vecs%d", instance); 541 break; 542 case XE_ENGINE_CLASS_COPY: 543 snprintf(q->name, sizeof(q->name), "bcs%d", instance); 544 break; 545 case XE_ENGINE_CLASS_COMPUTE: 546 snprintf(q->name, sizeof(q->name), "ccs%d", instance); 547 break; 548 case XE_ENGINE_CLASS_OTHER: 549 snprintf(q->name, sizeof(q->name), "gsccs%d", instance); 550 break; 551 default: 552 XE_WARN_ON(q->class); 553 } 554 } 555 556 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) 557 { 558 struct xe_exec_queue *q; 559 560 mutex_lock(&xef->exec_queue.lock); 561 q = xa_load(&xef->exec_queue.xa, id); 562 if (q) 563 xe_exec_queue_get(q); 564 mutex_unlock(&xef->exec_queue.lock); 565 566 return q; 567 } 568 569 enum xe_exec_queue_priority 570 xe_exec_queue_device_get_max_priority(struct xe_device *xe) 571 { 572 return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : 573 XE_EXEC_QUEUE_PRIORITY_NORMAL; 574 } 575 576 static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, 577 u64 value) 578 { 579 if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) 580 return -EINVAL; 581 582 if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) 583 return -EPERM; 584 585 q->sched_props.priority = value; 586 return 0; 587 } 588 589 static bool xe_exec_queue_enforce_schedule_limit(void) 590 { 591 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 592 return true; 593 #else 594 return !capable(CAP_SYS_NICE); 595 #endif 596 } 597 598 static void 599 xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, 600 enum xe_exec_queue_sched_prop prop, 601 u32 *min, u32 *max) 602 { 603 switch (prop) { 604 case XE_EXEC_QUEUE_JOB_TIMEOUT: 605 *min = eclass->sched_props.job_timeout_min; 606 *max = eclass->sched_props.job_timeout_max; 607 break; 608 case XE_EXEC_QUEUE_TIMESLICE: 609 *min = eclass->sched_props.timeslice_min; 610 *max = eclass->sched_props.timeslice_max; 611 break; 612 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 613 *min = eclass->sched_props.preempt_timeout_min; 614 *max = eclass->sched_props.preempt_timeout_max; 615 break; 616 default: 617 break; 618 } 619 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 620 if (capable(CAP_SYS_NICE)) { 621 switch (prop) { 622 case XE_EXEC_QUEUE_JOB_TIMEOUT: 623 *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 624 *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 625 break; 626 case XE_EXEC_QUEUE_TIMESLICE: 627 *min = XE_HW_ENGINE_TIMESLICE_MIN; 628 *max = XE_HW_ENGINE_TIMESLICE_MAX; 629 break; 630 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 631 *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 632 *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 633 break; 634 default: 635 break; 636 } 637 } 638 #endif 639 } 640 641 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, 642 u64 value) 643 { 644 u32 min = 0, max = 0; 645 646 xe_exec_queue_get_prop_minmax(q->hwe->eclass, 647 XE_EXEC_QUEUE_TIMESLICE, &min, &max); 648 649 if (xe_exec_queue_enforce_schedule_limit() && 650 !xe_hw_engine_timeout_in_range(value, min, max)) 651 return -EINVAL; 652 653 q->sched_props.timeslice_us = value; 654 return 0; 655 } 656 657 static int 658 exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value) 659 { 660 if (value == DRM_XE_PXP_TYPE_NONE) 661 return 0; 662 663 /* we only support HWDRM sessions right now */ 664 if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM)) 665 return -EINVAL; 666 667 if (!xe_pxp_is_enabled(xe->pxp)) 668 return -ENODEV; 669 670 return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM); 671 } 672 673 static int exec_queue_set_hang_replay_state(struct xe_device *xe, 674 struct xe_exec_queue *q, 675 u64 value) 676 { 677 size_t size = xe_gt_lrc_hang_replay_size(q->gt, q->class); 678 u64 __user *address = u64_to_user_ptr(value); 679 void *ptr; 680 681 ptr = vmemdup_user(address, size); 682 if (XE_IOCTL_DBG(xe, IS_ERR(ptr))) 683 return PTR_ERR(ptr); 684 685 q->replay_state = ptr; 686 687 return 0; 688 } 689 690 static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue *q) 691 { 692 struct xe_tile *tile = gt_to_tile(q->gt); 693 struct xe_exec_queue_group *group; 694 struct xe_bo *bo; 695 696 group = kzalloc(sizeof(*group), GFP_KERNEL); 697 if (!group) 698 return -ENOMEM; 699 700 bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, 701 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 702 XE_BO_FLAG_PINNED_LATE_RESTORE | 703 XE_BO_FLAG_FORCE_USER_VRAM | 704 XE_BO_FLAG_GGTT_INVALIDATE | 705 XE_BO_FLAG_GGTT, false); 706 if (IS_ERR(bo)) { 707 drm_err(&xe->drm, "CGP bo allocation for queue group failed: %ld\n", 708 PTR_ERR(bo)); 709 kfree(group); 710 return PTR_ERR(bo); 711 } 712 713 xe_map_memset(xe, &bo->vmap, 0, 0, SZ_4K); 714 715 group->primary = q; 716 group->cgp_bo = bo; 717 INIT_LIST_HEAD(&group->list); 718 kref_init(&group->kill_refcount); 719 xa_init_flags(&group->xa, XA_FLAGS_ALLOC1); 720 mutex_init(&group->list_lock); 721 q->multi_queue.group = group; 722 723 /* group->list_lock is used in submission backend */ 724 if (IS_ENABLED(CONFIG_LOCKDEP)) { 725 fs_reclaim_acquire(GFP_KERNEL); 726 might_lock(&group->list_lock); 727 fs_reclaim_release(GFP_KERNEL); 728 } 729 730 return 0; 731 } 732 733 static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q) 734 { 735 return q->gt->info.multi_queue_engine_class_mask & BIT(q->class); 736 } 737 738 static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q, 739 u32 primary_id) 740 { 741 struct xe_exec_queue_group *group; 742 struct xe_exec_queue *primary; 743 int ret; 744 745 /* 746 * Get from below xe_exec_queue_lookup() pairs with put 747 * in xe_exec_queue_group_cleanup(). 748 */ 749 primary = xe_exec_queue_lookup(q->vm->xef, primary_id); 750 if (XE_IOCTL_DBG(xe, !primary)) 751 return -ENOENT; 752 753 if (XE_IOCTL_DBG(xe, !xe_exec_queue_is_multi_queue_primary(primary)) || 754 XE_IOCTL_DBG(xe, q->vm != primary->vm) || 755 XE_IOCTL_DBG(xe, q->logical_mask != primary->logical_mask)) { 756 ret = -EINVAL; 757 goto put_primary; 758 } 759 760 group = primary->multi_queue.group; 761 q->multi_queue.valid = true; 762 q->multi_queue.group = group; 763 764 return 0; 765 put_primary: 766 xe_exec_queue_put(primary); 767 return ret; 768 } 769 770 #define XE_MAX_GROUP_SIZE 64 771 static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q) 772 { 773 struct xe_exec_queue_group *group = q->multi_queue.group; 774 u32 pos; 775 int err; 776 777 xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); 778 779 /* Primary queue holds a reference to LRCs of all secondary queues */ 780 err = xa_alloc(&group->xa, &pos, xe_lrc_get(q->lrc[0]), 781 XA_LIMIT(1, XE_MAX_GROUP_SIZE - 1), GFP_KERNEL); 782 if (XE_IOCTL_DBG(xe, err)) { 783 xe_lrc_put(q->lrc[0]); 784 785 /* It is invalid if queue group limit is exceeded */ 786 if (err == -EBUSY) 787 err = -EINVAL; 788 789 return err; 790 } 791 792 q->multi_queue.pos = pos; 793 794 if (group->primary->multi_queue.keep_active) { 795 xe_exec_queue_group_kill_get(group); 796 q->multi_queue.keep_active = true; 797 } 798 799 return 0; 800 } 801 802 static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queue *q) 803 { 804 struct xe_exec_queue_group *group = q->multi_queue.group; 805 struct xe_lrc *lrc; 806 807 xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); 808 809 lrc = xa_erase(&group->xa, q->multi_queue.pos); 810 xe_assert(xe, lrc); 811 xe_lrc_put(lrc); 812 813 if (q->multi_queue.keep_active) { 814 xe_exec_queue_group_kill_put(group); 815 q->multi_queue.keep_active = false; 816 } 817 } 818 819 static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, 820 u64 value) 821 { 822 if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q))) 823 return -ENODEV; 824 825 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe))) 826 return -EOPNOTSUPP; 827 828 if (XE_IOCTL_DBG(xe, !q->vm->xef)) 829 return -EINVAL; 830 831 if (XE_IOCTL_DBG(xe, xe_exec_queue_is_parallel(q))) 832 return -EINVAL; 833 834 if (XE_IOCTL_DBG(xe, xe_exec_queue_is_multi_queue(q))) 835 return -EINVAL; 836 837 if (value & DRM_XE_MULTI_GROUP_CREATE) { 838 if (XE_IOCTL_DBG(xe, value & ~(DRM_XE_MULTI_GROUP_CREATE | 839 DRM_XE_MULTI_GROUP_KEEP_ACTIVE))) 840 return -EINVAL; 841 842 /* 843 * KEEP_ACTIVE is not supported in preempt fence mode as in that mode, 844 * VM_DESTROY ioctl expects all exec queues of that VM are already killed. 845 */ 846 if (XE_IOCTL_DBG(xe, (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) && 847 xe_vm_in_preempt_fence_mode(q->vm))) 848 return -EINVAL; 849 850 q->multi_queue.valid = true; 851 q->multi_queue.is_primary = true; 852 q->multi_queue.pos = 0; 853 if (value & DRM_XE_MULTI_GROUP_KEEP_ACTIVE) 854 q->multi_queue.keep_active = true; 855 856 return 0; 857 } 858 859 /* While adding secondary queues, the upper 32 bits must be 0 */ 860 if (XE_IOCTL_DBG(xe, value & (~0ull << 32))) 861 return -EINVAL; 862 863 return xe_exec_queue_group_validate(xe, q, value); 864 } 865 866 static int exec_queue_set_multi_queue_priority(struct xe_device *xe, struct xe_exec_queue *q, 867 u64 value) 868 { 869 if (XE_IOCTL_DBG(xe, value > XE_MULTI_QUEUE_PRIORITY_HIGH)) 870 return -EINVAL; 871 872 /* For queue creation time (!q->xef) setting, just store the priority value */ 873 if (!q->xef) { 874 q->multi_queue.priority = value; 875 return 0; 876 } 877 878 if (!xe_exec_queue_is_multi_queue(q)) 879 return -EINVAL; 880 881 return q->ops->set_multi_queue_priority(q, value); 882 } 883 884 typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, 885 struct xe_exec_queue *q, 886 u64 value); 887 888 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { 889 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, 890 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, 891 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, 892 [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state, 893 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group, 894 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY] = 895 exec_queue_set_multi_queue_priority, 896 }; 897 898 int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, 899 struct drm_file *file) 900 { 901 struct xe_device *xe = to_xe_device(dev); 902 struct xe_file *xef = to_xe_file(file); 903 struct drm_xe_exec_queue_set_property *args = data; 904 struct xe_exec_queue *q; 905 int ret; 906 u32 idx; 907 908 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 909 return -EINVAL; 910 911 if (XE_IOCTL_DBG(xe, args->property != 912 DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) 913 return -EINVAL; 914 915 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 916 if (XE_IOCTL_DBG(xe, !q)) 917 return -ENOENT; 918 919 idx = array_index_nospec(args->property, 920 ARRAY_SIZE(exec_queue_set_property_funcs)); 921 ret = exec_queue_set_property_funcs[idx](xe, q, args->value); 922 if (XE_IOCTL_DBG(xe, ret)) 923 goto err_post_lookup; 924 925 xe_exec_queue_put(q); 926 return 0; 927 928 err_post_lookup: 929 xe_exec_queue_put(q); 930 return ret; 931 } 932 933 static int exec_queue_user_ext_check(struct xe_exec_queue *q, u64 properties) 934 { 935 u64 secondary_queue_valid_props = BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP) | 936 BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY); 937 938 /* 939 * Only MULTI_QUEUE_PRIORITY property is valid for secondary queues of a 940 * multi-queue group. 941 */ 942 if (xe_exec_queue_is_multi_queue_secondary(q) && 943 properties & ~secondary_queue_valid_props) 944 return -EINVAL; 945 946 return 0; 947 } 948 949 static int exec_queue_user_ext_check_final(struct xe_exec_queue *q, u64 properties) 950 { 951 /* MULTI_QUEUE_PRIORITY only applies to multi-queue group queues */ 952 if ((properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) && 953 !(properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP))) 954 return -EINVAL; 955 956 return 0; 957 } 958 959 static int exec_queue_user_ext_set_property(struct xe_device *xe, 960 struct xe_exec_queue *q, 961 u64 extension, u64 *properties) 962 { 963 u64 __user *address = u64_to_user_ptr(extension); 964 struct drm_xe_ext_set_property ext; 965 int err; 966 u32 idx; 967 968 err = copy_from_user(&ext, address, sizeof(ext)); 969 if (XE_IOCTL_DBG(xe, err)) 970 return -EFAULT; 971 972 if (XE_IOCTL_DBG(xe, ext.property >= 973 ARRAY_SIZE(exec_queue_set_property_funcs)) || 974 XE_IOCTL_DBG(xe, ext.pad) || 975 XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && 976 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && 977 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE && 978 ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE && 979 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP && 980 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) 981 return -EINVAL; 982 983 idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); 984 if (!exec_queue_set_property_funcs[idx]) 985 return -EINVAL; 986 987 *properties |= BIT_ULL(idx); 988 err = exec_queue_user_ext_check(q, *properties); 989 if (XE_IOCTL_DBG(xe, err)) 990 return err; 991 992 return exec_queue_set_property_funcs[idx](xe, q, ext.value); 993 } 994 995 typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, 996 struct xe_exec_queue *q, 997 u64 extension, u64 *properties); 998 999 static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { 1000 [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, 1001 }; 1002 1003 #define MAX_USER_EXTENSIONS 16 1004 static int __exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 1005 u64 extensions, int ext_number, u64 *properties) 1006 { 1007 u64 __user *address = u64_to_user_ptr(extensions); 1008 struct drm_xe_user_extension ext; 1009 int err; 1010 u32 idx; 1011 1012 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) 1013 return -E2BIG; 1014 1015 err = copy_from_user(&ext, address, sizeof(ext)); 1016 if (XE_IOCTL_DBG(xe, err)) 1017 return -EFAULT; 1018 1019 if (XE_IOCTL_DBG(xe, ext.pad) || 1020 XE_IOCTL_DBG(xe, ext.name >= 1021 ARRAY_SIZE(exec_queue_user_extension_funcs))) 1022 return -EINVAL; 1023 1024 idx = array_index_nospec(ext.name, 1025 ARRAY_SIZE(exec_queue_user_extension_funcs)); 1026 err = exec_queue_user_extension_funcs[idx](xe, q, extensions, properties); 1027 if (XE_IOCTL_DBG(xe, err)) 1028 return err; 1029 1030 if (ext.next_extension) 1031 return __exec_queue_user_extensions(xe, q, ext.next_extension, 1032 ++ext_number, properties); 1033 1034 return 0; 1035 } 1036 1037 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 1038 u64 extensions) 1039 { 1040 u64 properties = 0; 1041 int err; 1042 1043 err = __exec_queue_user_extensions(xe, q, extensions, 0, &properties); 1044 if (XE_IOCTL_DBG(xe, err)) 1045 return err; 1046 1047 err = exec_queue_user_ext_check_final(q, properties); 1048 if (XE_IOCTL_DBG(xe, err)) 1049 return err; 1050 1051 if (xe_exec_queue_is_multi_queue_primary(q)) { 1052 err = xe_exec_queue_group_init(xe, q); 1053 if (XE_IOCTL_DBG(xe, err)) 1054 return err; 1055 } 1056 1057 return 0; 1058 } 1059 1060 static u32 calc_validate_logical_mask(struct xe_device *xe, 1061 struct drm_xe_engine_class_instance *eci, 1062 u16 width, u16 num_placements) 1063 { 1064 int len = width * num_placements; 1065 int i, j, n; 1066 u16 class; 1067 u16 gt_id; 1068 u32 return_mask = 0, prev_mask; 1069 1070 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && 1071 len > 1)) 1072 return 0; 1073 1074 for (i = 0; i < width; ++i) { 1075 u32 current_mask = 0; 1076 1077 for (j = 0; j < num_placements; ++j) { 1078 struct xe_hw_engine *hwe; 1079 1080 n = j * width + i; 1081 1082 hwe = xe_hw_engine_lookup(xe, eci[n]); 1083 if (XE_IOCTL_DBG(xe, !hwe)) 1084 return 0; 1085 1086 if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) 1087 return 0; 1088 1089 if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || 1090 XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) 1091 return 0; 1092 1093 class = eci[n].engine_class; 1094 gt_id = eci[n].gt_id; 1095 1096 if (width == 1 || !i) 1097 return_mask |= BIT(eci[n].engine_instance); 1098 current_mask |= BIT(eci[n].engine_instance); 1099 } 1100 1101 /* Parallel submissions must be logically contiguous */ 1102 if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) 1103 return 0; 1104 1105 prev_mask = current_mask; 1106 } 1107 1108 return return_mask; 1109 } 1110 1111 int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, 1112 struct drm_file *file) 1113 { 1114 struct xe_device *xe = to_xe_device(dev); 1115 struct xe_file *xef = to_xe_file(file); 1116 struct drm_xe_exec_queue_create *args = data; 1117 struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; 1118 struct drm_xe_engine_class_instance __user *user_eci = 1119 u64_to_user_ptr(args->instances); 1120 struct xe_hw_engine *hwe; 1121 struct xe_vm *vm; 1122 struct xe_tile *tile; 1123 struct xe_exec_queue *q = NULL; 1124 u32 logical_mask; 1125 u32 flags = 0; 1126 u32 id; 1127 u32 len; 1128 int err; 1129 1130 if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) || 1131 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1132 return -EINVAL; 1133 1134 len = args->width * args->num_placements; 1135 if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) 1136 return -EINVAL; 1137 1138 err = copy_from_user(eci, user_eci, 1139 sizeof(struct drm_xe_engine_class_instance) * len); 1140 if (XE_IOCTL_DBG(xe, err)) 1141 return -EFAULT; 1142 1143 if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) 1144 return -EINVAL; 1145 1146 if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) 1147 flags |= EXEC_QUEUE_FLAG_LOW_LATENCY; 1148 1149 if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { 1150 if (XE_IOCTL_DBG(xe, args->width != 1) || 1151 XE_IOCTL_DBG(xe, args->num_placements != 1) || 1152 XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) 1153 return -EINVAL; 1154 1155 for_each_tile(tile, xe, id) { 1156 struct xe_exec_queue *new; 1157 1158 flags |= EXEC_QUEUE_FLAG_VM; 1159 if (id) 1160 flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; 1161 1162 new = xe_exec_queue_create_bind(xe, tile, flags, 1163 args->extensions); 1164 if (IS_ERR(new)) { 1165 err = PTR_ERR(new); 1166 if (q) 1167 goto put_exec_queue; 1168 return err; 1169 } 1170 if (id == 0) 1171 q = new; 1172 else 1173 list_add_tail(&new->multi_gt_list, 1174 &q->multi_gt_link); 1175 } 1176 } else { 1177 logical_mask = calc_validate_logical_mask(xe, eci, 1178 args->width, 1179 args->num_placements); 1180 if (XE_IOCTL_DBG(xe, !logical_mask)) 1181 return -EINVAL; 1182 1183 hwe = xe_hw_engine_lookup(xe, eci[0]); 1184 if (XE_IOCTL_DBG(xe, !hwe)) 1185 return -EINVAL; 1186 1187 vm = xe_vm_lookup(xef, args->vm_id); 1188 if (XE_IOCTL_DBG(xe, !vm)) 1189 return -ENOENT; 1190 1191 err = down_read_interruptible(&vm->lock); 1192 if (err) { 1193 xe_vm_put(vm); 1194 return err; 1195 } 1196 1197 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 1198 up_read(&vm->lock); 1199 xe_vm_put(vm); 1200 return -ENOENT; 1201 } 1202 1203 q = xe_exec_queue_create(xe, vm, logical_mask, 1204 args->width, hwe, flags, 1205 args->extensions); 1206 up_read(&vm->lock); 1207 xe_vm_put(vm); 1208 if (IS_ERR(q)) 1209 return PTR_ERR(q); 1210 1211 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1212 err = xe_exec_queue_group_add(xe, q); 1213 if (XE_IOCTL_DBG(xe, err)) 1214 goto put_exec_queue; 1215 } 1216 1217 if (xe_vm_in_preempt_fence_mode(vm)) { 1218 q->lr.context = dma_fence_context_alloc(1); 1219 1220 err = xe_vm_add_compute_exec_queue(vm, q); 1221 if (XE_IOCTL_DBG(xe, err)) 1222 goto delete_queue_group; 1223 } 1224 1225 if (q->vm && q->hwe->hw_engine_group) { 1226 err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); 1227 if (err) 1228 goto put_exec_queue; 1229 } 1230 } 1231 1232 q->xef = xe_file_get(xef); 1233 1234 /* user id alloc must always be last in ioctl to prevent UAF */ 1235 err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); 1236 if (err) 1237 goto kill_exec_queue; 1238 1239 args->exec_queue_id = id; 1240 1241 return 0; 1242 1243 kill_exec_queue: 1244 xe_exec_queue_kill(q); 1245 delete_queue_group: 1246 if (xe_exec_queue_is_multi_queue_secondary(q)) 1247 xe_exec_queue_group_delete(xe, q); 1248 put_exec_queue: 1249 xe_exec_queue_put(q); 1250 return err; 1251 } 1252 1253 int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, 1254 struct drm_file *file) 1255 { 1256 struct xe_device *xe = to_xe_device(dev); 1257 struct xe_file *xef = to_xe_file(file); 1258 struct drm_xe_exec_queue_get_property *args = data; 1259 struct xe_exec_queue *q; 1260 int ret; 1261 1262 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1263 return -EINVAL; 1264 1265 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 1266 if (XE_IOCTL_DBG(xe, !q)) 1267 return -ENOENT; 1268 1269 switch (args->property) { 1270 case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: 1271 args->value = q->ops->reset_status(q); 1272 ret = 0; 1273 break; 1274 default: 1275 ret = -EINVAL; 1276 } 1277 1278 xe_exec_queue_put(q); 1279 1280 return ret; 1281 } 1282 1283 /** 1284 * xe_exec_queue_lrc() - Get the LRC from exec queue. 1285 * @q: The exec_queue. 1286 * 1287 * Retrieves the primary LRC for the exec queue. Note that this function 1288 * returns only the first LRC instance, even when multiple parallel LRCs 1289 * are configured. 1290 * 1291 * Return: Pointer to LRC on success, error on failure 1292 */ 1293 struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) 1294 { 1295 return q->lrc[0]; 1296 } 1297 1298 /** 1299 * xe_exec_queue_is_lr() - Whether an exec_queue is long-running 1300 * @q: The exec_queue 1301 * 1302 * Return: True if the exec_queue is long-running, false otherwise. 1303 */ 1304 bool xe_exec_queue_is_lr(struct xe_exec_queue *q) 1305 { 1306 return q->vm && xe_vm_in_lr_mode(q->vm) && 1307 !(q->flags & EXEC_QUEUE_FLAG_VM); 1308 } 1309 1310 /** 1311 * xe_exec_queue_is_idle() - Whether an exec_queue is idle. 1312 * @q: The exec_queue 1313 * 1314 * FIXME: Need to determine what to use as the short-lived 1315 * timeline lock for the exec_queues, so that the return value 1316 * of this function becomes more than just an advisory 1317 * snapshot in time. The timeline lock must protect the 1318 * seqno from racing submissions on the same exec_queue. 1319 * Typically vm->resv, but user-created timeline locks use the migrate vm 1320 * and never grabs the migrate vm->resv so we have a race there. 1321 * 1322 * Return: True if the exec_queue is idle, false otherwise. 1323 */ 1324 bool xe_exec_queue_is_idle(struct xe_exec_queue *q) 1325 { 1326 if (xe_exec_queue_is_parallel(q)) { 1327 int i; 1328 1329 for (i = 0; i < q->width; ++i) { 1330 if (xe_lrc_seqno(q->lrc[i]) != 1331 q->lrc[i]->fence_ctx.next_seqno - 1) 1332 return false; 1333 } 1334 1335 return true; 1336 } 1337 1338 return xe_lrc_seqno(q->lrc[0]) == 1339 q->lrc[0]->fence_ctx.next_seqno - 1; 1340 } 1341 1342 /** 1343 * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue 1344 * from hw 1345 * @q: The exec queue 1346 * 1347 * Update the timestamp saved by HW for this exec queue and save run ticks 1348 * calculated by using the delta from last update. 1349 */ 1350 void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) 1351 { 1352 struct xe_device *xe = gt_to_xe(q->gt); 1353 struct xe_lrc *lrc; 1354 u64 old_ts, new_ts; 1355 int idx; 1356 1357 /* 1358 * Jobs that are executed by kernel doesn't have a corresponding xe_file 1359 * and thus are not accounted. 1360 */ 1361 if (!q->xef) 1362 return; 1363 1364 /* Synchronize with unbind while holding the xe file open */ 1365 if (!drm_dev_enter(&xe->drm, &idx)) 1366 return; 1367 /* 1368 * Only sample the first LRC. For parallel submission, all of them are 1369 * scheduled together and we compensate that below by multiplying by 1370 * width - this may introduce errors if that premise is not true and 1371 * they don't exit 100% aligned. On the other hand, looping through 1372 * the LRCs and reading them in different time could also introduce 1373 * errors. 1374 */ 1375 lrc = q->lrc[0]; 1376 new_ts = xe_lrc_update_timestamp(lrc, &old_ts); 1377 q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; 1378 1379 drm_dev_exit(idx); 1380 } 1381 1382 /** 1383 * xe_exec_queue_kill - permanently stop all execution from an exec queue 1384 * @q: The exec queue 1385 * 1386 * This function permanently stops all activity on an exec queue. If the queue 1387 * is actively executing on the HW, it will be kicked off the engine; any 1388 * pending jobs are discarded and all future submissions are rejected. 1389 * This function is safe to call multiple times. 1390 */ 1391 void xe_exec_queue_kill(struct xe_exec_queue *q) 1392 { 1393 struct xe_exec_queue *eq = q, *next; 1394 1395 list_for_each_entry_safe(eq, next, &eq->multi_gt_list, 1396 multi_gt_link) { 1397 q->ops->kill(eq); 1398 xe_vm_remove_compute_exec_queue(q->vm, eq); 1399 } 1400 1401 q->ops->kill(q); 1402 xe_vm_remove_compute_exec_queue(q->vm, q); 1403 1404 if (!xe_exec_queue_is_multi_queue_primary(q) && q->multi_queue.keep_active) { 1405 xe_exec_queue_group_kill_put(q->multi_queue.group); 1406 q->multi_queue.keep_active = false; 1407 } 1408 } 1409 1410 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, 1411 struct drm_file *file) 1412 { 1413 struct xe_device *xe = to_xe_device(dev); 1414 struct xe_file *xef = to_xe_file(file); 1415 struct drm_xe_exec_queue_destroy *args = data; 1416 struct xe_exec_queue *q; 1417 1418 if (XE_IOCTL_DBG(xe, args->pad) || 1419 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1420 return -EINVAL; 1421 1422 mutex_lock(&xef->exec_queue.lock); 1423 q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); 1424 if (q) 1425 atomic_inc(&xef->exec_queue.pending_removal); 1426 mutex_unlock(&xef->exec_queue.lock); 1427 1428 if (XE_IOCTL_DBG(xe, !q)) 1429 return -ENOENT; 1430 1431 if (q->vm && q->hwe->hw_engine_group) 1432 xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); 1433 1434 if (xe_exec_queue_is_multi_queue_primary(q)) 1435 xe_exec_queue_group_kill_put(q->multi_queue.group); 1436 else 1437 xe_exec_queue_kill(q); 1438 1439 trace_xe_exec_queue_close(q); 1440 xe_exec_queue_put(q); 1441 1442 return 0; 1443 } 1444 1445 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, 1446 struct xe_vm *vm) 1447 { 1448 if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) { 1449 xe_migrate_job_lock_assert(q); 1450 } else if (q->flags & EXEC_QUEUE_FLAG_VM) { 1451 lockdep_assert_held(&vm->lock); 1452 } else { 1453 xe_vm_assert_held(vm); 1454 lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); 1455 } 1456 } 1457 1458 /** 1459 * xe_exec_queue_last_fence_put() - Drop ref to last fence 1460 * @q: The exec queue 1461 * @vm: The VM the engine does a bind or exec for 1462 */ 1463 void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) 1464 { 1465 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1466 1467 xe_exec_queue_last_fence_put_unlocked(q); 1468 } 1469 1470 /** 1471 * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked 1472 * @q: The exec queue 1473 * 1474 * Only safe to be called from xe_exec_queue_destroy(). 1475 */ 1476 void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) 1477 { 1478 if (q->last_fence) { 1479 dma_fence_put(q->last_fence); 1480 q->last_fence = NULL; 1481 } 1482 } 1483 1484 /** 1485 * xe_exec_queue_last_fence_get() - Get last fence 1486 * @q: The exec queue 1487 * @vm: The VM the engine does a bind or exec for 1488 * 1489 * Get last fence, takes a ref 1490 * 1491 * Returns: last fence if not signaled, dma fence stub if signaled 1492 */ 1493 struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, 1494 struct xe_vm *vm) 1495 { 1496 struct dma_fence *fence; 1497 1498 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1499 1500 if (q->last_fence && 1501 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 1502 xe_exec_queue_last_fence_put(q, vm); 1503 1504 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 1505 dma_fence_get(fence); 1506 return fence; 1507 } 1508 1509 /** 1510 * xe_exec_queue_last_fence_get_for_resume() - Get last fence 1511 * @q: The exec queue 1512 * @vm: The VM the engine does a bind or exec for 1513 * 1514 * Get last fence, takes a ref. Only safe to be called in the context of 1515 * resuming the hw engine group's long-running exec queue, when the group 1516 * semaphore is held. 1517 * 1518 * Returns: last fence if not signaled, dma fence stub if signaled 1519 */ 1520 struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, 1521 struct xe_vm *vm) 1522 { 1523 struct dma_fence *fence; 1524 1525 lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); 1526 1527 if (q->last_fence && 1528 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 1529 xe_exec_queue_last_fence_put_unlocked(q); 1530 1531 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 1532 dma_fence_get(fence); 1533 return fence; 1534 } 1535 1536 /** 1537 * xe_exec_queue_last_fence_set() - Set last fence 1538 * @q: The exec queue 1539 * @vm: The VM the engine does a bind or exec for 1540 * @fence: The fence 1541 * 1542 * Set the last fence for the engine. Increases reference count for fence, when 1543 * closing engine xe_exec_queue_last_fence_put should be called. 1544 */ 1545 void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, 1546 struct dma_fence *fence) 1547 { 1548 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1549 xe_assert(vm->xe, !dma_fence_is_container(fence)); 1550 1551 xe_exec_queue_last_fence_put(q, vm); 1552 q->last_fence = dma_fence_get(fence); 1553 } 1554 1555 /** 1556 * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence 1557 * @q: The exec queue 1558 * @vm: The VM the engine does a bind for 1559 * @type: Either primary or media GT 1560 */ 1561 void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, 1562 struct xe_vm *vm, 1563 unsigned int type) 1564 { 1565 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1566 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1567 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1568 1569 xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type); 1570 } 1571 1572 /** 1573 * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB 1574 * invalidation fence unlocked 1575 * @q: The exec queue 1576 * @type: Either primary or media GT 1577 * 1578 * Only safe to be called from xe_exec_queue_destroy(). 1579 */ 1580 void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, 1581 unsigned int type) 1582 { 1583 xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1584 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1585 1586 dma_fence_put(q->tlb_inval[type].last_fence); 1587 q->tlb_inval[type].last_fence = NULL; 1588 } 1589 1590 /** 1591 * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation 1592 * @q: The exec queue 1593 * @vm: The VM the engine does a bind for 1594 * @type: Either primary or media GT 1595 * 1596 * Get last fence, takes a ref 1597 * 1598 * Returns: last fence if not signaled, dma fence stub if signaled 1599 */ 1600 struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q, 1601 struct xe_vm *vm, 1602 unsigned int type) 1603 { 1604 struct dma_fence *fence; 1605 1606 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1607 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1608 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1609 xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | 1610 EXEC_QUEUE_FLAG_MIGRATE)); 1611 1612 if (q->tlb_inval[type].last_fence && 1613 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 1614 &q->tlb_inval[type].last_fence->flags)) 1615 xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); 1616 1617 fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub(); 1618 dma_fence_get(fence); 1619 return fence; 1620 } 1621 1622 /** 1623 * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation 1624 * @q: The exec queue 1625 * @vm: The VM the engine does a bind for 1626 * @fence: The fence 1627 * @type: Either primary or media GT 1628 * 1629 * Set the last fence for the tlb invalidation type on the queue. Increases 1630 * reference count for fence, when closing queue 1631 * xe_exec_queue_tlb_inval_last_fence_put should be called. 1632 */ 1633 void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q, 1634 struct xe_vm *vm, 1635 struct dma_fence *fence, 1636 unsigned int type) 1637 { 1638 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1639 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1640 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1641 xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | 1642 EXEC_QUEUE_FLAG_MIGRATE)); 1643 xe_assert(vm->xe, !dma_fence_is_container(fence)); 1644 1645 xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); 1646 q->tlb_inval[type].last_fence = dma_fence_get(fence); 1647 } 1648 1649 /** 1650 * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references 1651 * within all LRCs of a queue. 1652 * @q: the &xe_exec_queue struct instance containing target LRCs 1653 * @scratch: scratch buffer to be used as temporary storage 1654 * 1655 * Returns: zero on success, negative error code on failure 1656 */ 1657 int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) 1658 { 1659 int i; 1660 int err = 0; 1661 1662 for (i = 0; i < q->width; ++i) { 1663 struct xe_lrc *lrc; 1664 1665 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 1666 lrc = READ_ONCE(q->lrc[i]); 1667 if (!lrc) 1668 continue; 1669 1670 xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch); 1671 xe_lrc_update_hwctx_regs_with_address(lrc); 1672 err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch); 1673 if (err) 1674 break; 1675 } 1676 1677 return err; 1678 } 1679