1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_exec_queue.h" 7 8 #include <linux/nospec.h> 9 10 #include <drm/drm_device.h> 11 #include <drm/drm_drv.h> 12 #include <drm/drm_file.h> 13 #include <drm/drm_syncobj.h> 14 #include <uapi/drm/xe_drm.h> 15 16 #include "xe_bo.h" 17 #include "xe_dep_scheduler.h" 18 #include "xe_device.h" 19 #include "xe_gt.h" 20 #include "xe_gt_sriov_pf.h" 21 #include "xe_gt_sriov_vf.h" 22 #include "xe_hw_engine_class_sysfs.h" 23 #include "xe_hw_engine_group.h" 24 #include "xe_irq.h" 25 #include "xe_lrc.h" 26 #include "xe_macros.h" 27 #include "xe_migrate.h" 28 #include "xe_pm.h" 29 #include "xe_trace.h" 30 #include "xe_vm.h" 31 #include "xe_pxp.h" 32 33 /** 34 * DOC: Execution Queue 35 * 36 * An Execution queue is an interface for the HW context of execution. 37 * The user creates an execution queue, submits the GPU jobs through those 38 * queues and in the end destroys them. 39 * 40 * Execution queues can also be created by XeKMD itself for driver internal 41 * operations like object migration etc. 42 * 43 * An execution queue is associated with a specified HW engine or a group of 44 * engines (belonging to the same tile and engine class) and any GPU job 45 * submitted on the queue will be run on one of these engines. 46 * 47 * An execution queue is tied to an address space (VM). It holds a reference 48 * of the associated VM and the underlying Logical Ring Context/s (LRC/s) 49 * until the queue is destroyed. 50 * 51 * The execution queue sits on top of the submission backend. It opaquely 52 * handles the GuC and Execlist backends whichever the platform uses, and 53 * the ring operations the different engine classes support. 54 */ 55 56 /** 57 * DOC: Multi Queue Group 58 * 59 * Multi Queue Group is another mode of execution supported by the compute 60 * and blitter copy command streamers (CCS and BCS, respectively). It is 61 * an enhancement of the existing hardware architecture and leverages the 62 * same submission model. It enables support for efficient, parallel 63 * execution of multiple queues within a single shared context. The multi 64 * queue group functionality is only supported with GuC submission backend. 65 * All the queues of a group must use the same address space (VM). 66 * 67 * The DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE execution queue property 68 * supports creating a multi queue group and adding queues to a queue group. 69 * 70 * The XE_EXEC_QUEUE_CREATE ioctl call with above property with value field 71 * set to DRM_XE_MULTI_GROUP_CREATE, will create a new multi queue group with 72 * the queue being created as the primary queue (aka q0) of the group. To add 73 * secondary queues to the group, they need to be created with the above 74 * property with id of the primary queue as the value. The properties of 75 * the primary queue (like priority, time slice) applies to the whole group. 76 * So, these properties can't be set for secondary queues of a group. 77 * 78 * The hardware does not support removing a queue from a multi-queue group. 79 * However, queues can be dynamically added to the group. A group can have 80 * up to 64 queues. To support this, XeKMD holds references to LRCs of the 81 * queues even after the queues are destroyed by the user until the whole 82 * group is destroyed. The secondary queues hold a reference to the primary 83 * queue thus preventing the group from being destroyed when user destroys 84 * the primary queue. Once the primary queue is destroyed, secondary queues 85 * can't be added to the queue group and new job submissions on existing 86 * secondary queues are not allowed. 87 * 88 * The queues of a multi queue group can set their priority within the group 89 * through the DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY property. 90 * This multi queue priority can also be set dynamically through the 91 * XE_EXEC_QUEUE_SET_PROPERTY ioctl. This is the only other property 92 * supported by the secondary queues of a multi queue group, other than 93 * DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE. 94 * 95 * When GuC reports an error on any of the queues of a multi queue group, 96 * the queue cleanup mechanism is invoked for all the queues of the group 97 * as hardware cannot make progress on the multi queue context. 98 * 99 * Refer :ref:`multi-queue-group-guc-interface` for multi queue group GuC 100 * interface. 101 */ 102 103 enum xe_exec_queue_sched_prop { 104 XE_EXEC_QUEUE_JOB_TIMEOUT = 0, 105 XE_EXEC_QUEUE_TIMESLICE = 1, 106 XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, 107 XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, 108 }; 109 110 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 111 u64 extensions); 112 113 static void xe_exec_queue_group_cleanup(struct xe_exec_queue *q) 114 { 115 struct xe_exec_queue_group *group = q->multi_queue.group; 116 struct xe_lrc *lrc; 117 unsigned long idx; 118 119 if (xe_exec_queue_is_multi_queue_secondary(q)) { 120 /* 121 * Put pairs with get from xe_exec_queue_lookup() call 122 * in xe_exec_queue_group_validate(). 123 */ 124 xe_exec_queue_put(xe_exec_queue_multi_queue_primary(q)); 125 return; 126 } 127 128 if (!group) 129 return; 130 131 /* Primary queue cleanup */ 132 xa_for_each(&group->xa, idx, lrc) 133 xe_lrc_put(lrc); 134 135 xa_destroy(&group->xa); 136 mutex_destroy(&group->list_lock); 137 xe_bo_unpin_map_no_vm(group->cgp_bo); 138 kfree(group); 139 } 140 141 static void __xe_exec_queue_free(struct xe_exec_queue *q) 142 { 143 int i; 144 145 for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) 146 if (q->tlb_inval[i].dep_scheduler) 147 xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler); 148 149 if (xe_exec_queue_uses_pxp(q)) 150 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); 151 152 if (xe_exec_queue_is_multi_queue(q)) 153 xe_exec_queue_group_cleanup(q); 154 155 if (q->vm) 156 xe_vm_put(q->vm); 157 158 if (q->xef) 159 xe_file_put(q->xef); 160 161 kvfree(q->replay_state); 162 kfree(q); 163 } 164 165 static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q) 166 { 167 struct xe_tile *tile = gt_to_tile(q->gt); 168 int i; 169 170 for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) { 171 struct xe_dep_scheduler *dep_scheduler; 172 struct xe_gt *gt; 173 struct workqueue_struct *wq; 174 175 if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT) 176 gt = tile->primary_gt; 177 else 178 gt = tile->media_gt; 179 180 if (!gt) 181 continue; 182 183 wq = gt->tlb_inval.job_wq; 184 185 #define MAX_TLB_INVAL_JOBS 16 /* Picking a reasonable value */ 186 dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name, 187 MAX_TLB_INVAL_JOBS); 188 if (IS_ERR(dep_scheduler)) 189 return PTR_ERR(dep_scheduler); 190 191 q->tlb_inval[i].dep_scheduler = dep_scheduler; 192 } 193 #undef MAX_TLB_INVAL_JOBS 194 195 return 0; 196 } 197 198 static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, 199 struct xe_vm *vm, 200 u32 logical_mask, 201 u16 width, struct xe_hw_engine *hwe, 202 u32 flags, u64 extensions) 203 { 204 struct xe_exec_queue *q; 205 struct xe_gt *gt = hwe->gt; 206 int err; 207 208 /* only kernel queues can be permanent */ 209 XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); 210 211 q = kzalloc_flex(*q, lrc, width); 212 if (!q) 213 return ERR_PTR(-ENOMEM); 214 215 kref_init(&q->refcount); 216 q->flags = flags; 217 q->hwe = hwe; 218 q->gt = gt; 219 q->class = hwe->class; 220 q->width = width; 221 q->msix_vec = XE_IRQ_DEFAULT_MSIX; 222 q->logical_mask = logical_mask; 223 q->fence_irq = >->fence_irq[hwe->class]; 224 q->ring_ops = gt->ring_ops[hwe->class]; 225 q->ops = gt->exec_queue_ops; 226 INIT_LIST_HEAD(&q->lr.link); 227 INIT_LIST_HEAD(&q->multi_gt_link); 228 INIT_LIST_HEAD(&q->hw_engine_group_link); 229 INIT_LIST_HEAD(&q->pxp.link); 230 q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL; 231 232 q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; 233 q->sched_props.preempt_timeout_us = 234 hwe->eclass->sched_props.preempt_timeout_us; 235 q->sched_props.job_timeout_ms = 236 hwe->eclass->sched_props.job_timeout_ms; 237 if (q->flags & EXEC_QUEUE_FLAG_KERNEL && 238 q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) 239 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; 240 else 241 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; 242 243 if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) { 244 err = alloc_dep_schedulers(xe, q); 245 if (err) { 246 __xe_exec_queue_free(q); 247 return ERR_PTR(err); 248 } 249 } 250 251 if (vm) 252 q->vm = xe_vm_get(vm); 253 254 if (extensions) { 255 /* 256 * may set q->usm, must come before xe_lrc_create(), 257 * may overwrite q->sched_props, must come before q->ops->init() 258 */ 259 err = exec_queue_user_extensions(xe, q, extensions); 260 if (err) { 261 __xe_exec_queue_free(q); 262 return ERR_PTR(err); 263 } 264 } 265 266 return q; 267 } 268 269 static void __xe_exec_queue_fini(struct xe_exec_queue *q) 270 { 271 int i; 272 273 q->ops->fini(q); 274 275 for (i = 0; i < q->width; ++i) 276 xe_lrc_put(q->lrc[i]); 277 } 278 279 static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) 280 { 281 int i, err; 282 u32 flags = 0; 283 284 /* 285 * PXP workloads executing on RCS or CCS must run in isolation (i.e. no 286 * other workload can use the EUs at the same time). On MTL this is done 287 * by setting the RUNALONE bit in the LRC, while starting on Xe2 there 288 * is a dedicated bit for it. 289 */ 290 if (xe_exec_queue_uses_pxp(q) && 291 (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) { 292 if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20) 293 flags |= XE_LRC_CREATE_PXP; 294 else 295 flags |= XE_LRC_CREATE_RUNALONE; 296 } 297 298 if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL)) 299 flags |= XE_LRC_CREATE_USER_CTX; 300 301 err = q->ops->init(q); 302 if (err) 303 return err; 304 305 /* 306 * This must occur after q->ops->init to avoid race conditions during VF 307 * post-migration recovery, as the fixups for the LRC GGTT addresses 308 * depend on the queue being present in the backend tracking structure. 309 * 310 * In addition to above, we must wait on inflight GGTT changes to avoid 311 * writing out stale values here. Such wait provides a solid solution 312 * (without a race) only if the function can detect migration instantly 313 * from the moment vCPU resumes execution. 314 */ 315 for (i = 0; i < q->width; ++i) { 316 struct xe_lrc *lrc; 317 318 xe_gt_sriov_vf_wait_valid_ggtt(q->gt); 319 lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state, 320 xe_lrc_ring_size(), q->msix_vec, flags); 321 if (IS_ERR(lrc)) { 322 err = PTR_ERR(lrc); 323 goto err_lrc; 324 } 325 326 /* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */ 327 WRITE_ONCE(q->lrc[i], lrc); 328 } 329 330 return 0; 331 332 err_lrc: 333 __xe_exec_queue_fini(q); 334 return err; 335 } 336 337 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, 338 u32 logical_mask, u16 width, 339 struct xe_hw_engine *hwe, u32 flags, 340 u64 extensions) 341 { 342 struct xe_exec_queue *q; 343 int err; 344 345 /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */ 346 xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0))); 347 348 q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, 349 extensions); 350 if (IS_ERR(q)) 351 return q; 352 353 err = __xe_exec_queue_init(q, flags); 354 if (err) 355 goto err_post_alloc; 356 357 /* 358 * We can only add the queue to the PXP list after the init is complete, 359 * because the PXP termination can call exec_queue_kill and that will 360 * go bad if the queue is only half-initialized. This means that we 361 * can't do it when we handle the PXP extension in __xe_exec_queue_alloc 362 * and we need to do it here instead. 363 */ 364 if (xe_exec_queue_uses_pxp(q)) { 365 err = xe_pxp_exec_queue_add(xe->pxp, q); 366 if (err) 367 goto err_post_init; 368 } 369 370 return q; 371 372 err_post_init: 373 __xe_exec_queue_fini(q); 374 err_post_alloc: 375 __xe_exec_queue_free(q); 376 return ERR_PTR(err); 377 } 378 ALLOW_ERROR_INJECTION(xe_exec_queue_create, ERRNO); 379 380 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, 381 struct xe_vm *vm, 382 enum xe_engine_class class, 383 u32 flags, u64 extensions) 384 { 385 struct xe_hw_engine *hwe, *hwe0 = NULL; 386 enum xe_hw_engine_id id; 387 u32 logical_mask = 0; 388 389 for_each_hw_engine(hwe, gt, id) { 390 if (xe_hw_engine_is_reserved(hwe)) 391 continue; 392 393 if (hwe->class == class) { 394 logical_mask |= BIT(hwe->logical_instance); 395 if (!hwe0) 396 hwe0 = hwe; 397 } 398 } 399 400 if (!logical_mask) 401 return ERR_PTR(-ENODEV); 402 403 return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); 404 } 405 406 /** 407 * xe_exec_queue_create_bind() - Create bind exec queue. 408 * @xe: Xe device. 409 * @tile: tile which bind exec queue belongs to. 410 * @flags: exec queue creation flags 411 * @user_vm: The user VM which this exec queue belongs to 412 * @extensions: exec queue creation extensions 413 * 414 * Normalize bind exec queue creation. Bind exec queue is tied to migration VM 415 * for access to physical memory required for page table programming. On a 416 * faulting devices the reserved copy engine instance must be used to avoid 417 * deadlocking (user binds cannot get stuck behind faults as kernel binds which 418 * resolve faults depend on user binds). On non-faulting devices any copy engine 419 * can be used. 420 * 421 * Returns exec queue on success, ERR_PTR on failure 422 */ 423 struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, 424 struct xe_tile *tile, 425 struct xe_vm *user_vm, 426 u32 flags, u64 extensions) 427 { 428 struct xe_gt *gt = tile->primary_gt; 429 struct xe_exec_queue *q; 430 struct xe_vm *migrate_vm; 431 432 migrate_vm = xe_migrate_get_vm(tile->migrate); 433 if (xe->info.has_usm) { 434 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, 435 XE_ENGINE_CLASS_COPY, 436 gt->usm.reserved_bcs_instance, 437 false); 438 439 if (!hwe) { 440 xe_vm_put(migrate_vm); 441 return ERR_PTR(-EINVAL); 442 } 443 444 q = xe_exec_queue_create(xe, migrate_vm, 445 BIT(hwe->logical_instance), 1, hwe, 446 flags, extensions); 447 } else { 448 q = xe_exec_queue_create_class(xe, gt, migrate_vm, 449 XE_ENGINE_CLASS_COPY, flags, 450 extensions); 451 } 452 xe_vm_put(migrate_vm); 453 454 if (!IS_ERR(q)) { 455 int err = drm_syncobj_create(&q->ufence_syncobj, 456 DRM_SYNCOBJ_CREATE_SIGNALED, 457 NULL); 458 if (err) { 459 xe_exec_queue_put(q); 460 return ERR_PTR(err); 461 } 462 463 if (user_vm) 464 q->user_vm = xe_vm_get(user_vm); 465 } 466 467 return q; 468 } 469 ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); 470 471 void xe_exec_queue_destroy(struct kref *ref) 472 { 473 struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); 474 struct xe_exec_queue *eq, *next; 475 int i; 476 477 xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0); 478 479 if (q->ufence_syncobj) 480 drm_syncobj_put(q->ufence_syncobj); 481 482 if (xe_exec_queue_uses_pxp(q)) 483 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); 484 485 xe_exec_queue_last_fence_put_unlocked(q); 486 for_each_tlb_inval(i) 487 xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i); 488 489 if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { 490 list_for_each_entry_safe(eq, next, &q->multi_gt_list, 491 multi_gt_link) 492 xe_exec_queue_put(eq); 493 } 494 495 if (q->user_vm) { 496 xe_vm_put(q->user_vm); 497 q->user_vm = NULL; 498 } 499 500 q->ops->destroy(q); 501 } 502 503 void xe_exec_queue_fini(struct xe_exec_queue *q) 504 { 505 /* 506 * Before releasing our ref to lrc and xef, accumulate our run ticks 507 * and wakeup any waiters. 508 */ 509 xe_exec_queue_update_run_ticks(q); 510 if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) 511 wake_up_var(&q->xef->exec_queue.pending_removal); 512 513 __xe_exec_queue_fini(q); 514 __xe_exec_queue_free(q); 515 } 516 517 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) 518 { 519 switch (q->class) { 520 case XE_ENGINE_CLASS_RENDER: 521 snprintf(q->name, sizeof(q->name), "rcs%d", instance); 522 break; 523 case XE_ENGINE_CLASS_VIDEO_DECODE: 524 snprintf(q->name, sizeof(q->name), "vcs%d", instance); 525 break; 526 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 527 snprintf(q->name, sizeof(q->name), "vecs%d", instance); 528 break; 529 case XE_ENGINE_CLASS_COPY: 530 snprintf(q->name, sizeof(q->name), "bcs%d", instance); 531 break; 532 case XE_ENGINE_CLASS_COMPUTE: 533 snprintf(q->name, sizeof(q->name), "ccs%d", instance); 534 break; 535 case XE_ENGINE_CLASS_OTHER: 536 snprintf(q->name, sizeof(q->name), "gsccs%d", instance); 537 break; 538 default: 539 XE_WARN_ON(q->class); 540 } 541 } 542 543 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) 544 { 545 struct xe_exec_queue *q; 546 547 mutex_lock(&xef->exec_queue.lock); 548 q = xa_load(&xef->exec_queue.xa, id); 549 if (q) 550 xe_exec_queue_get(q); 551 mutex_unlock(&xef->exec_queue.lock); 552 553 return q; 554 } 555 556 enum xe_exec_queue_priority 557 xe_exec_queue_device_get_max_priority(struct xe_device *xe) 558 { 559 return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : 560 XE_EXEC_QUEUE_PRIORITY_NORMAL; 561 } 562 563 static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, 564 u64 value) 565 { 566 if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) 567 return -EINVAL; 568 569 if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) 570 return -EPERM; 571 572 q->sched_props.priority = value; 573 return 0; 574 } 575 576 static bool xe_exec_queue_enforce_schedule_limit(void) 577 { 578 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 579 return true; 580 #else 581 return !capable(CAP_SYS_NICE); 582 #endif 583 } 584 585 static void 586 xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, 587 enum xe_exec_queue_sched_prop prop, 588 u32 *min, u32 *max) 589 { 590 switch (prop) { 591 case XE_EXEC_QUEUE_JOB_TIMEOUT: 592 *min = eclass->sched_props.job_timeout_min; 593 *max = eclass->sched_props.job_timeout_max; 594 break; 595 case XE_EXEC_QUEUE_TIMESLICE: 596 *min = eclass->sched_props.timeslice_min; 597 *max = eclass->sched_props.timeslice_max; 598 break; 599 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 600 *min = eclass->sched_props.preempt_timeout_min; 601 *max = eclass->sched_props.preempt_timeout_max; 602 break; 603 default: 604 break; 605 } 606 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 607 if (capable(CAP_SYS_NICE)) { 608 switch (prop) { 609 case XE_EXEC_QUEUE_JOB_TIMEOUT: 610 *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 611 *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 612 break; 613 case XE_EXEC_QUEUE_TIMESLICE: 614 *min = XE_HW_ENGINE_TIMESLICE_MIN; 615 *max = XE_HW_ENGINE_TIMESLICE_MAX; 616 break; 617 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 618 *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 619 *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 620 break; 621 default: 622 break; 623 } 624 } 625 #endif 626 } 627 628 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, 629 u64 value) 630 { 631 u32 min = 0, max = 0; 632 633 xe_exec_queue_get_prop_minmax(q->hwe->eclass, 634 XE_EXEC_QUEUE_TIMESLICE, &min, &max); 635 636 if (xe_exec_queue_enforce_schedule_limit() && 637 !xe_hw_engine_timeout_in_range(value, min, max)) 638 return -EINVAL; 639 640 q->sched_props.timeslice_us = value; 641 return 0; 642 } 643 644 static int 645 exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value) 646 { 647 if (value == DRM_XE_PXP_TYPE_NONE) 648 return 0; 649 650 /* we only support HWDRM sessions right now */ 651 if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM)) 652 return -EINVAL; 653 654 if (!xe_pxp_is_enabled(xe->pxp)) 655 return -ENODEV; 656 657 return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM); 658 } 659 660 static int exec_queue_set_hang_replay_state(struct xe_device *xe, 661 struct xe_exec_queue *q, 662 u64 value) 663 { 664 size_t size = xe_gt_lrc_hang_replay_size(q->gt, q->class); 665 u64 __user *address = u64_to_user_ptr(value); 666 void *ptr; 667 668 ptr = vmemdup_user(address, size); 669 if (XE_IOCTL_DBG(xe, IS_ERR(ptr))) 670 return PTR_ERR(ptr); 671 672 q->replay_state = ptr; 673 674 return 0; 675 } 676 677 static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue *q) 678 { 679 struct xe_tile *tile = gt_to_tile(q->gt); 680 struct xe_exec_queue_group *group; 681 struct xe_bo *bo; 682 683 group = kzalloc_obj(*group); 684 if (!group) 685 return -ENOMEM; 686 687 bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, 688 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 689 XE_BO_FLAG_PINNED_LATE_RESTORE | 690 XE_BO_FLAG_FORCE_USER_VRAM | 691 XE_BO_FLAG_GGTT_INVALIDATE | 692 XE_BO_FLAG_GGTT, false); 693 if (IS_ERR(bo)) { 694 drm_err(&xe->drm, "CGP bo allocation for queue group failed: %ld\n", 695 PTR_ERR(bo)); 696 kfree(group); 697 return PTR_ERR(bo); 698 } 699 700 xe_map_memset(xe, &bo->vmap, 0, 0, SZ_4K); 701 702 group->primary = q; 703 group->cgp_bo = bo; 704 INIT_LIST_HEAD(&group->list); 705 xa_init_flags(&group->xa, XA_FLAGS_ALLOC1); 706 mutex_init(&group->list_lock); 707 q->multi_queue.group = group; 708 709 /* group->list_lock is used in submission backend */ 710 if (IS_ENABLED(CONFIG_LOCKDEP)) { 711 fs_reclaim_acquire(GFP_KERNEL); 712 might_lock(&group->list_lock); 713 fs_reclaim_release(GFP_KERNEL); 714 } 715 716 return 0; 717 } 718 719 static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q) 720 { 721 return q->gt->info.multi_queue_engine_class_mask & BIT(q->class); 722 } 723 724 static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q, 725 u32 primary_id) 726 { 727 struct xe_exec_queue_group *group; 728 struct xe_exec_queue *primary; 729 int ret; 730 731 /* 732 * Get from below xe_exec_queue_lookup() pairs with put 733 * in xe_exec_queue_group_cleanup(). 734 */ 735 primary = xe_exec_queue_lookup(q->vm->xef, primary_id); 736 if (XE_IOCTL_DBG(xe, !primary)) 737 return -ENOENT; 738 739 if (XE_IOCTL_DBG(xe, !xe_exec_queue_is_multi_queue_primary(primary)) || 740 XE_IOCTL_DBG(xe, q->vm != primary->vm) || 741 XE_IOCTL_DBG(xe, q->logical_mask != primary->logical_mask)) { 742 ret = -EINVAL; 743 goto put_primary; 744 } 745 746 group = primary->multi_queue.group; 747 q->multi_queue.valid = true; 748 q->multi_queue.group = group; 749 750 return 0; 751 put_primary: 752 xe_exec_queue_put(primary); 753 return ret; 754 } 755 756 #define XE_MAX_GROUP_SIZE 64 757 static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q) 758 { 759 struct xe_exec_queue_group *group = q->multi_queue.group; 760 u32 pos; 761 int err; 762 763 xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); 764 765 /* Primary queue holds a reference to LRCs of all secondary queues */ 766 err = xa_alloc(&group->xa, &pos, xe_lrc_get(q->lrc[0]), 767 XA_LIMIT(1, XE_MAX_GROUP_SIZE - 1), GFP_KERNEL); 768 if (XE_IOCTL_DBG(xe, err)) { 769 xe_lrc_put(q->lrc[0]); 770 771 /* It is invalid if queue group limit is exceeded */ 772 if (err == -EBUSY) 773 err = -EINVAL; 774 775 return err; 776 } 777 778 q->multi_queue.pos = pos; 779 780 return 0; 781 } 782 783 static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queue *q) 784 { 785 struct xe_exec_queue_group *group = q->multi_queue.group; 786 struct xe_lrc *lrc; 787 788 xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); 789 790 lrc = xa_erase(&group->xa, q->multi_queue.pos); 791 xe_assert(xe, lrc); 792 xe_lrc_put(lrc); 793 } 794 795 static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, 796 u64 value) 797 { 798 if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q))) 799 return -ENODEV; 800 801 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe))) 802 return -EOPNOTSUPP; 803 804 if (XE_IOCTL_DBG(xe, !q->vm->xef)) 805 return -EINVAL; 806 807 if (XE_IOCTL_DBG(xe, xe_exec_queue_is_parallel(q))) 808 return -EINVAL; 809 810 if (XE_IOCTL_DBG(xe, xe_exec_queue_is_multi_queue(q))) 811 return -EINVAL; 812 813 if (value & DRM_XE_MULTI_GROUP_CREATE) { 814 if (XE_IOCTL_DBG(xe, value & ~DRM_XE_MULTI_GROUP_CREATE)) 815 return -EINVAL; 816 817 q->multi_queue.valid = true; 818 q->multi_queue.is_primary = true; 819 q->multi_queue.pos = 0; 820 return 0; 821 } 822 823 /* While adding secondary queues, the upper 32 bits must be 0 */ 824 if (XE_IOCTL_DBG(xe, value & (~0ull << 32))) 825 return -EINVAL; 826 827 return xe_exec_queue_group_validate(xe, q, value); 828 } 829 830 static int exec_queue_set_multi_queue_priority(struct xe_device *xe, struct xe_exec_queue *q, 831 u64 value) 832 { 833 if (XE_IOCTL_DBG(xe, value > XE_MULTI_QUEUE_PRIORITY_HIGH)) 834 return -EINVAL; 835 836 /* For queue creation time (!q->xef) setting, just store the priority value */ 837 if (!q->xef) { 838 q->multi_queue.priority = value; 839 return 0; 840 } 841 842 if (!xe_exec_queue_is_multi_queue(q)) 843 return -EINVAL; 844 845 return q->ops->set_multi_queue_priority(q, value); 846 } 847 848 typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, 849 struct xe_exec_queue *q, 850 u64 value); 851 852 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { 853 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, 854 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, 855 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, 856 [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state, 857 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group, 858 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY] = 859 exec_queue_set_multi_queue_priority, 860 }; 861 862 int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, 863 struct drm_file *file) 864 { 865 struct xe_device *xe = to_xe_device(dev); 866 struct xe_file *xef = to_xe_file(file); 867 struct drm_xe_exec_queue_set_property *args = data; 868 struct xe_exec_queue *q; 869 int ret; 870 u32 idx; 871 872 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 873 return -EINVAL; 874 875 if (XE_IOCTL_DBG(xe, args->property != 876 DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) 877 return -EINVAL; 878 879 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 880 if (XE_IOCTL_DBG(xe, !q)) 881 return -ENOENT; 882 883 idx = array_index_nospec(args->property, 884 ARRAY_SIZE(exec_queue_set_property_funcs)); 885 ret = exec_queue_set_property_funcs[idx](xe, q, args->value); 886 if (XE_IOCTL_DBG(xe, ret)) 887 goto err_post_lookup; 888 889 xe_exec_queue_put(q); 890 return 0; 891 892 err_post_lookup: 893 xe_exec_queue_put(q); 894 return ret; 895 } 896 897 static int exec_queue_user_ext_check(struct xe_exec_queue *q, u64 properties) 898 { 899 u64 secondary_queue_valid_props = BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP) | 900 BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY); 901 902 /* 903 * Only MULTI_QUEUE_PRIORITY property is valid for secondary queues of a 904 * multi-queue group. 905 */ 906 if (xe_exec_queue_is_multi_queue_secondary(q) && 907 properties & ~secondary_queue_valid_props) 908 return -EINVAL; 909 910 return 0; 911 } 912 913 static int exec_queue_user_ext_check_final(struct xe_exec_queue *q, u64 properties) 914 { 915 /* MULTI_QUEUE_PRIORITY only applies to multi-queue group queues */ 916 if ((properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) && 917 !(properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP))) 918 return -EINVAL; 919 920 return 0; 921 } 922 923 static int exec_queue_user_ext_set_property(struct xe_device *xe, 924 struct xe_exec_queue *q, 925 u64 extension, u64 *properties) 926 { 927 u64 __user *address = u64_to_user_ptr(extension); 928 struct drm_xe_ext_set_property ext; 929 int err; 930 u32 idx; 931 932 err = copy_from_user(&ext, address, sizeof(ext)); 933 if (XE_IOCTL_DBG(xe, err)) 934 return -EFAULT; 935 936 if (XE_IOCTL_DBG(xe, ext.property >= 937 ARRAY_SIZE(exec_queue_set_property_funcs)) || 938 XE_IOCTL_DBG(xe, ext.pad) || 939 XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && 940 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && 941 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE && 942 ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE && 943 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP && 944 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) 945 return -EINVAL; 946 947 idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); 948 if (!exec_queue_set_property_funcs[idx]) 949 return -EINVAL; 950 951 *properties |= BIT_ULL(idx); 952 err = exec_queue_user_ext_check(q, *properties); 953 if (XE_IOCTL_DBG(xe, err)) 954 return err; 955 956 return exec_queue_set_property_funcs[idx](xe, q, ext.value); 957 } 958 959 typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, 960 struct xe_exec_queue *q, 961 u64 extension, u64 *properties); 962 963 static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { 964 [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, 965 }; 966 967 #define MAX_USER_EXTENSIONS 16 968 static int __exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 969 u64 extensions, int ext_number, u64 *properties) 970 { 971 u64 __user *address = u64_to_user_ptr(extensions); 972 struct drm_xe_user_extension ext; 973 int err; 974 u32 idx; 975 976 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) 977 return -E2BIG; 978 979 err = copy_from_user(&ext, address, sizeof(ext)); 980 if (XE_IOCTL_DBG(xe, err)) 981 return -EFAULT; 982 983 if (XE_IOCTL_DBG(xe, ext.pad) || 984 XE_IOCTL_DBG(xe, ext.name >= 985 ARRAY_SIZE(exec_queue_user_extension_funcs))) 986 return -EINVAL; 987 988 idx = array_index_nospec(ext.name, 989 ARRAY_SIZE(exec_queue_user_extension_funcs)); 990 err = exec_queue_user_extension_funcs[idx](xe, q, extensions, properties); 991 if (XE_IOCTL_DBG(xe, err)) 992 return err; 993 994 if (ext.next_extension) 995 return __exec_queue_user_extensions(xe, q, ext.next_extension, 996 ++ext_number, properties); 997 998 return 0; 999 } 1000 1001 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 1002 u64 extensions) 1003 { 1004 u64 properties = 0; 1005 int err; 1006 1007 err = __exec_queue_user_extensions(xe, q, extensions, 0, &properties); 1008 if (XE_IOCTL_DBG(xe, err)) 1009 return err; 1010 1011 err = exec_queue_user_ext_check_final(q, properties); 1012 if (XE_IOCTL_DBG(xe, err)) 1013 return err; 1014 1015 if (xe_exec_queue_is_multi_queue_primary(q)) { 1016 err = xe_exec_queue_group_init(xe, q); 1017 if (XE_IOCTL_DBG(xe, err)) 1018 return err; 1019 } 1020 1021 return 0; 1022 } 1023 1024 static u32 calc_validate_logical_mask(struct xe_device *xe, 1025 struct drm_xe_engine_class_instance *eci, 1026 u16 width, u16 num_placements) 1027 { 1028 int len = width * num_placements; 1029 int i, j, n; 1030 u16 class; 1031 u16 gt_id; 1032 u32 return_mask = 0, prev_mask; 1033 1034 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && 1035 len > 1)) 1036 return 0; 1037 1038 for (i = 0; i < width; ++i) { 1039 u32 current_mask = 0; 1040 1041 for (j = 0; j < num_placements; ++j) { 1042 struct xe_hw_engine *hwe; 1043 1044 n = j * width + i; 1045 1046 hwe = xe_hw_engine_lookup(xe, eci[n]); 1047 if (XE_IOCTL_DBG(xe, !hwe)) 1048 return 0; 1049 1050 if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) 1051 return 0; 1052 1053 if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || 1054 XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) 1055 return 0; 1056 1057 class = eci[n].engine_class; 1058 gt_id = eci[n].gt_id; 1059 1060 if (width == 1 || !i) 1061 return_mask |= BIT(eci[n].engine_instance); 1062 current_mask |= BIT(eci[n].engine_instance); 1063 } 1064 1065 /* Parallel submissions must be logically contiguous */ 1066 if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) 1067 return 0; 1068 1069 prev_mask = current_mask; 1070 } 1071 1072 return return_mask; 1073 } 1074 1075 static bool has_sched_groups(struct xe_gt *gt) 1076 { 1077 if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_sriov_pf_sched_groups_enabled(gt)) 1078 return true; 1079 1080 if (IS_SRIOV_VF(gt_to_xe(gt)) && xe_gt_sriov_vf_sched_groups_enabled(gt)) 1081 return true; 1082 1083 return false; 1084 } 1085 1086 int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, 1087 struct drm_file *file) 1088 { 1089 struct xe_device *xe = to_xe_device(dev); 1090 struct xe_file *xef = to_xe_file(file); 1091 struct drm_xe_exec_queue_create *args = data; 1092 struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; 1093 struct drm_xe_engine_class_instance __user *user_eci = 1094 u64_to_user_ptr(args->instances); 1095 struct xe_hw_engine *hwe; 1096 struct xe_vm *vm; 1097 struct xe_tile *tile; 1098 struct xe_exec_queue *q = NULL; 1099 u32 logical_mask; 1100 u32 flags = 0; 1101 u32 id; 1102 u32 len; 1103 int err; 1104 1105 if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) || 1106 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1107 return -EINVAL; 1108 1109 len = args->width * args->num_placements; 1110 if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) 1111 return -EINVAL; 1112 1113 err = copy_from_user(eci, user_eci, 1114 sizeof(struct drm_xe_engine_class_instance) * len); 1115 if (XE_IOCTL_DBG(xe, err)) 1116 return -EFAULT; 1117 1118 if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) 1119 return -EINVAL; 1120 1121 if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) 1122 flags |= EXEC_QUEUE_FLAG_LOW_LATENCY; 1123 1124 if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { 1125 if (XE_IOCTL_DBG(xe, args->width != 1) || 1126 XE_IOCTL_DBG(xe, args->num_placements != 1) || 1127 XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) 1128 return -EINVAL; 1129 1130 vm = xe_vm_lookup(xef, args->vm_id); 1131 if (XE_IOCTL_DBG(xe, !vm)) 1132 return -ENOENT; 1133 1134 err = down_read_interruptible(&vm->lock); 1135 if (err) { 1136 xe_vm_put(vm); 1137 return err; 1138 } 1139 1140 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 1141 up_read(&vm->lock); 1142 xe_vm_put(vm); 1143 return -ENOENT; 1144 } 1145 1146 for_each_tile(tile, xe, id) { 1147 struct xe_exec_queue *new; 1148 1149 flags |= EXEC_QUEUE_FLAG_VM; 1150 if (id) 1151 flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; 1152 1153 new = xe_exec_queue_create_bind(xe, tile, vm, flags, 1154 args->extensions); 1155 if (IS_ERR(new)) { 1156 up_read(&vm->lock); 1157 xe_vm_put(vm); 1158 err = PTR_ERR(new); 1159 if (q) 1160 goto put_exec_queue; 1161 return err; 1162 } 1163 if (id == 0) 1164 q = new; 1165 else 1166 list_add_tail(&new->multi_gt_list, 1167 &q->multi_gt_link); 1168 } 1169 up_read(&vm->lock); 1170 xe_vm_put(vm); 1171 } else { 1172 logical_mask = calc_validate_logical_mask(xe, eci, 1173 args->width, 1174 args->num_placements); 1175 if (XE_IOCTL_DBG(xe, !logical_mask)) 1176 return -EINVAL; 1177 1178 hwe = xe_hw_engine_lookup(xe, eci[0]); 1179 if (XE_IOCTL_DBG(xe, !hwe)) 1180 return -EINVAL; 1181 1182 vm = xe_vm_lookup(xef, args->vm_id); 1183 if (XE_IOCTL_DBG(xe, !vm)) 1184 return -ENOENT; 1185 1186 err = down_read_interruptible(&vm->lock); 1187 if (err) { 1188 xe_vm_put(vm); 1189 return err; 1190 } 1191 1192 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 1193 up_read(&vm->lock); 1194 xe_vm_put(vm); 1195 return -ENOENT; 1196 } 1197 1198 /* SRIOV sched groups are not compatible with multi-lrc */ 1199 if (XE_IOCTL_DBG(xe, args->width > 1 && has_sched_groups(hwe->gt))) { 1200 up_read(&vm->lock); 1201 xe_vm_put(vm); 1202 return -EINVAL; 1203 } 1204 1205 q = xe_exec_queue_create(xe, vm, logical_mask, 1206 args->width, hwe, flags, 1207 args->extensions); 1208 up_read(&vm->lock); 1209 xe_vm_put(vm); 1210 if (IS_ERR(q)) 1211 return PTR_ERR(q); 1212 1213 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1214 err = xe_exec_queue_group_add(xe, q); 1215 if (XE_IOCTL_DBG(xe, err)) 1216 goto put_exec_queue; 1217 } 1218 1219 if (xe_vm_in_preempt_fence_mode(vm)) { 1220 q->lr.context = dma_fence_context_alloc(1); 1221 1222 err = xe_vm_add_compute_exec_queue(vm, q); 1223 if (XE_IOCTL_DBG(xe, err)) 1224 goto delete_queue_group; 1225 } 1226 1227 if (q->vm && q->hwe->hw_engine_group) { 1228 err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); 1229 if (err) 1230 goto put_exec_queue; 1231 } 1232 } 1233 1234 q->xef = xe_file_get(xef); 1235 1236 /* user id alloc must always be last in ioctl to prevent UAF */ 1237 err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); 1238 if (err) 1239 goto kill_exec_queue; 1240 1241 args->exec_queue_id = id; 1242 1243 return 0; 1244 1245 kill_exec_queue: 1246 xe_exec_queue_kill(q); 1247 delete_queue_group: 1248 if (xe_exec_queue_is_multi_queue_secondary(q)) 1249 xe_exec_queue_group_delete(xe, q); 1250 put_exec_queue: 1251 xe_exec_queue_put(q); 1252 return err; 1253 } 1254 1255 int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, 1256 struct drm_file *file) 1257 { 1258 struct xe_device *xe = to_xe_device(dev); 1259 struct xe_file *xef = to_xe_file(file); 1260 struct drm_xe_exec_queue_get_property *args = data; 1261 struct xe_exec_queue *q; 1262 int ret; 1263 1264 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1265 return -EINVAL; 1266 1267 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 1268 if (XE_IOCTL_DBG(xe, !q)) 1269 return -ENOENT; 1270 1271 switch (args->property) { 1272 case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: 1273 args->value = q->ops->reset_status(q); 1274 ret = 0; 1275 break; 1276 default: 1277 ret = -EINVAL; 1278 } 1279 1280 xe_exec_queue_put(q); 1281 1282 return ret; 1283 } 1284 1285 /** 1286 * xe_exec_queue_lrc() - Get the LRC from exec queue. 1287 * @q: The exec_queue. 1288 * 1289 * Retrieves the primary LRC for the exec queue. Note that this function 1290 * returns only the first LRC instance, even when multiple parallel LRCs 1291 * are configured. 1292 * 1293 * Return: Pointer to LRC on success, error on failure 1294 */ 1295 struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) 1296 { 1297 return q->lrc[0]; 1298 } 1299 1300 /** 1301 * xe_exec_queue_is_lr() - Whether an exec_queue is long-running 1302 * @q: The exec_queue 1303 * 1304 * Return: True if the exec_queue is long-running, false otherwise. 1305 */ 1306 bool xe_exec_queue_is_lr(struct xe_exec_queue *q) 1307 { 1308 return q->vm && xe_vm_in_lr_mode(q->vm) && 1309 !(q->flags & EXEC_QUEUE_FLAG_VM); 1310 } 1311 1312 /** 1313 * xe_exec_queue_is_idle() - Whether an exec_queue is idle. 1314 * @q: The exec_queue 1315 * 1316 * FIXME: Need to determine what to use as the short-lived 1317 * timeline lock for the exec_queues, so that the return value 1318 * of this function becomes more than just an advisory 1319 * snapshot in time. The timeline lock must protect the 1320 * seqno from racing submissions on the same exec_queue. 1321 * Typically vm->resv, but user-created timeline locks use the migrate vm 1322 * and never grabs the migrate vm->resv so we have a race there. 1323 * 1324 * Return: True if the exec_queue is idle, false otherwise. 1325 */ 1326 bool xe_exec_queue_is_idle(struct xe_exec_queue *q) 1327 { 1328 if (xe_exec_queue_is_parallel(q)) { 1329 int i; 1330 1331 for (i = 0; i < q->width; ++i) { 1332 if (xe_lrc_seqno(q->lrc[i]) != 1333 q->lrc[i]->fence_ctx.next_seqno - 1) 1334 return false; 1335 } 1336 1337 return true; 1338 } 1339 1340 return xe_lrc_seqno(q->lrc[0]) == 1341 q->lrc[0]->fence_ctx.next_seqno - 1; 1342 } 1343 1344 /** 1345 * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue 1346 * from hw 1347 * @q: The exec queue 1348 * 1349 * Update the timestamp saved by HW for this exec queue and save run ticks 1350 * calculated by using the delta from last update. 1351 */ 1352 void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) 1353 { 1354 struct xe_device *xe = gt_to_xe(q->gt); 1355 struct xe_lrc *lrc; 1356 u64 old_ts, new_ts; 1357 int idx; 1358 1359 /* 1360 * Jobs that are executed by kernel doesn't have a corresponding xe_file 1361 * and thus are not accounted. 1362 */ 1363 if (!q->xef) 1364 return; 1365 1366 /* Synchronize with unbind while holding the xe file open */ 1367 if (!drm_dev_enter(&xe->drm, &idx)) 1368 return; 1369 /* 1370 * Only sample the first LRC. For parallel submission, all of them are 1371 * scheduled together and we compensate that below by multiplying by 1372 * width - this may introduce errors if that premise is not true and 1373 * they don't exit 100% aligned. On the other hand, looping through 1374 * the LRCs and reading them in different time could also introduce 1375 * errors. 1376 */ 1377 lrc = q->lrc[0]; 1378 new_ts = xe_lrc_update_timestamp(lrc, &old_ts); 1379 q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; 1380 1381 drm_dev_exit(idx); 1382 } 1383 1384 /** 1385 * xe_exec_queue_kill - permanently stop all execution from an exec queue 1386 * @q: The exec queue 1387 * 1388 * This function permanently stops all activity on an exec queue. If the queue 1389 * is actively executing on the HW, it will be kicked off the engine; any 1390 * pending jobs are discarded and all future submissions are rejected. 1391 * This function is safe to call multiple times. 1392 */ 1393 void xe_exec_queue_kill(struct xe_exec_queue *q) 1394 { 1395 struct xe_exec_queue *eq = q, *next; 1396 1397 list_for_each_entry_safe(eq, next, &eq->multi_gt_list, 1398 multi_gt_link) { 1399 q->ops->kill(eq); 1400 xe_vm_remove_compute_exec_queue(q->vm, eq); 1401 } 1402 1403 q->ops->kill(q); 1404 xe_vm_remove_compute_exec_queue(q->vm, q); 1405 } 1406 1407 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, 1408 struct drm_file *file) 1409 { 1410 struct xe_device *xe = to_xe_device(dev); 1411 struct xe_file *xef = to_xe_file(file); 1412 struct drm_xe_exec_queue_destroy *args = data; 1413 struct xe_exec_queue *q; 1414 1415 if (XE_IOCTL_DBG(xe, args->pad) || 1416 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1417 return -EINVAL; 1418 1419 mutex_lock(&xef->exec_queue.lock); 1420 q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); 1421 if (q) 1422 atomic_inc(&xef->exec_queue.pending_removal); 1423 mutex_unlock(&xef->exec_queue.lock); 1424 1425 if (XE_IOCTL_DBG(xe, !q)) 1426 return -ENOENT; 1427 1428 if (q->vm && q->hwe->hw_engine_group) 1429 xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); 1430 1431 xe_exec_queue_kill(q); 1432 1433 trace_xe_exec_queue_close(q); 1434 xe_exec_queue_put(q); 1435 1436 return 0; 1437 } 1438 1439 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, 1440 struct xe_vm *vm) 1441 { 1442 if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) { 1443 xe_migrate_job_lock_assert(q); 1444 } else if (q->flags & EXEC_QUEUE_FLAG_VM) { 1445 lockdep_assert_held(&vm->lock); 1446 } else { 1447 xe_vm_assert_held(vm); 1448 lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); 1449 } 1450 } 1451 1452 /** 1453 * xe_exec_queue_last_fence_put() - Drop ref to last fence 1454 * @q: The exec queue 1455 * @vm: The VM the engine does a bind or exec for 1456 */ 1457 void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) 1458 { 1459 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1460 1461 xe_exec_queue_last_fence_put_unlocked(q); 1462 } 1463 1464 /** 1465 * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked 1466 * @q: The exec queue 1467 * 1468 * Only safe to be called from xe_exec_queue_destroy(). 1469 */ 1470 void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) 1471 { 1472 if (q->last_fence) { 1473 dma_fence_put(q->last_fence); 1474 q->last_fence = NULL; 1475 } 1476 } 1477 1478 /** 1479 * xe_exec_queue_last_fence_get() - Get last fence 1480 * @q: The exec queue 1481 * @vm: The VM the engine does a bind or exec for 1482 * 1483 * Get last fence, takes a ref 1484 * 1485 * Returns: last fence if not signaled, dma fence stub if signaled 1486 */ 1487 struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, 1488 struct xe_vm *vm) 1489 { 1490 struct dma_fence *fence; 1491 1492 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1493 1494 if (q->last_fence && 1495 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 1496 xe_exec_queue_last_fence_put(q, vm); 1497 1498 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 1499 dma_fence_get(fence); 1500 return fence; 1501 } 1502 1503 /** 1504 * xe_exec_queue_last_fence_get_for_resume() - Get last fence 1505 * @q: The exec queue 1506 * @vm: The VM the engine does a bind or exec for 1507 * 1508 * Get last fence, takes a ref. Only safe to be called in the context of 1509 * resuming the hw engine group's long-running exec queue, when the group 1510 * semaphore is held. 1511 * 1512 * Returns: last fence if not signaled, dma fence stub if signaled 1513 */ 1514 struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, 1515 struct xe_vm *vm) 1516 { 1517 struct dma_fence *fence; 1518 1519 lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); 1520 1521 if (q->last_fence && 1522 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 1523 xe_exec_queue_last_fence_put_unlocked(q); 1524 1525 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 1526 dma_fence_get(fence); 1527 return fence; 1528 } 1529 1530 /** 1531 * xe_exec_queue_last_fence_set() - Set last fence 1532 * @q: The exec queue 1533 * @vm: The VM the engine does a bind or exec for 1534 * @fence: The fence 1535 * 1536 * Set the last fence for the engine. Increases reference count for fence, when 1537 * closing engine xe_exec_queue_last_fence_put should be called. 1538 */ 1539 void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, 1540 struct dma_fence *fence) 1541 { 1542 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1543 xe_assert(vm->xe, !dma_fence_is_container(fence)); 1544 1545 xe_exec_queue_last_fence_put(q, vm); 1546 q->last_fence = dma_fence_get(fence); 1547 } 1548 1549 /** 1550 * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence 1551 * @q: The exec queue 1552 * @vm: The VM the engine does a bind for 1553 * @type: Either primary or media GT 1554 */ 1555 void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, 1556 struct xe_vm *vm, 1557 unsigned int type) 1558 { 1559 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1560 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1561 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1562 1563 xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type); 1564 } 1565 1566 /** 1567 * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB 1568 * invalidation fence unlocked 1569 * @q: The exec queue 1570 * @type: Either primary or media GT 1571 * 1572 * Only safe to be called from xe_exec_queue_destroy(). 1573 */ 1574 void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, 1575 unsigned int type) 1576 { 1577 xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1578 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1579 1580 dma_fence_put(q->tlb_inval[type].last_fence); 1581 q->tlb_inval[type].last_fence = NULL; 1582 } 1583 1584 /** 1585 * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation 1586 * @q: The exec queue 1587 * @vm: The VM the engine does a bind for 1588 * @type: Either primary or media GT 1589 * 1590 * Get last fence, takes a ref 1591 * 1592 * Returns: last fence if not signaled, dma fence stub if signaled 1593 */ 1594 struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q, 1595 struct xe_vm *vm, 1596 unsigned int type) 1597 { 1598 struct dma_fence *fence; 1599 1600 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1601 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1602 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1603 xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | 1604 EXEC_QUEUE_FLAG_MIGRATE)); 1605 1606 if (q->tlb_inval[type].last_fence && 1607 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 1608 &q->tlb_inval[type].last_fence->flags)) 1609 xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); 1610 1611 fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub(); 1612 dma_fence_get(fence); 1613 return fence; 1614 } 1615 1616 /** 1617 * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation 1618 * @q: The exec queue 1619 * @vm: The VM the engine does a bind for 1620 * @fence: The fence 1621 * @type: Either primary or media GT 1622 * 1623 * Set the last fence for the tlb invalidation type on the queue. Increases 1624 * reference count for fence, when closing queue 1625 * xe_exec_queue_tlb_inval_last_fence_put should be called. 1626 */ 1627 void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q, 1628 struct xe_vm *vm, 1629 struct dma_fence *fence, 1630 unsigned int type) 1631 { 1632 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1633 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1634 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1635 xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | 1636 EXEC_QUEUE_FLAG_MIGRATE)); 1637 xe_assert(vm->xe, !dma_fence_is_container(fence)); 1638 1639 xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); 1640 q->tlb_inval[type].last_fence = dma_fence_get(fence); 1641 } 1642 1643 /** 1644 * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references 1645 * within all LRCs of a queue. 1646 * @q: the &xe_exec_queue struct instance containing target LRCs 1647 * @scratch: scratch buffer to be used as temporary storage 1648 * 1649 * Returns: zero on success, negative error code on failure 1650 */ 1651 int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) 1652 { 1653 int i; 1654 int err = 0; 1655 1656 for (i = 0; i < q->width; ++i) { 1657 struct xe_lrc *lrc; 1658 1659 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 1660 lrc = READ_ONCE(q->lrc[i]); 1661 if (!lrc) 1662 continue; 1663 1664 xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch); 1665 xe_lrc_update_hwctx_regs_with_address(lrc); 1666 err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch); 1667 if (err) 1668 break; 1669 } 1670 1671 return err; 1672 } 1673