1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_exec_queue.h" 7 8 #include <linux/nospec.h> 9 10 #include <drm/drm_device.h> 11 #include <drm/drm_drv.h> 12 #include <drm/drm_file.h> 13 #include <drm/drm_syncobj.h> 14 #include <uapi/drm/xe_drm.h> 15 16 #include "xe_bo.h" 17 #include "xe_dep_scheduler.h" 18 #include "xe_device.h" 19 #include "xe_gt.h" 20 #include "xe_gt_sriov_pf.h" 21 #include "xe_gt_sriov_vf.h" 22 #include "xe_hw_engine_class_sysfs.h" 23 #include "xe_hw_engine_group.h" 24 #include "xe_irq.h" 25 #include "xe_lrc.h" 26 #include "xe_macros.h" 27 #include "xe_migrate.h" 28 #include "xe_pm.h" 29 #include "xe_trace.h" 30 #include "xe_vm.h" 31 #include "xe_pxp.h" 32 33 /** 34 * DOC: Execution Queue 35 * 36 * An Execution queue is an interface for the HW context of execution. 37 * The user creates an execution queue, submits the GPU jobs through those 38 * queues and in the end destroys them. 39 * 40 * Execution queues can also be created by XeKMD itself for driver internal 41 * operations like object migration etc. 42 * 43 * An execution queue is associated with a specified HW engine or a group of 44 * engines (belonging to the same tile and engine class) and any GPU job 45 * submitted on the queue will be run on one of these engines. 46 * 47 * An execution queue is tied to an address space (VM). It holds a reference 48 * of the associated VM and the underlying Logical Ring Context/s (LRC/s) 49 * until the queue is destroyed. 50 * 51 * The execution queue sits on top of the submission backend. It opaquely 52 * handles the GuC and Execlist backends whichever the platform uses, and 53 * the ring operations the different engine classes support. 54 */ 55 56 /** 57 * DOC: Multi Queue Group 58 * 59 * Multi Queue Group is another mode of execution supported by the compute 60 * and blitter copy command streamers (CCS and BCS, respectively). It is 61 * an enhancement of the existing hardware architecture and leverages the 62 * same submission model. It enables support for efficient, parallel 63 * execution of multiple queues within a single shared context. The multi 64 * queue group functionality is only supported with GuC submission backend. 65 * All the queues of a group must use the same address space (VM). 66 * 67 * The DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE execution queue property 68 * supports creating a multi queue group and adding queues to a queue group. 69 * 70 * The XE_EXEC_QUEUE_CREATE ioctl call with above property with value field 71 * set to DRM_XE_MULTI_GROUP_CREATE, will create a new multi queue group with 72 * the queue being created as the primary queue (aka q0) of the group. To add 73 * secondary queues to the group, they need to be created with the above 74 * property with id of the primary queue as the value. The properties of 75 * the primary queue (like priority, time slice) applies to the whole group. 76 * So, these properties can't be set for secondary queues of a group. 77 * 78 * The hardware does not support removing a queue from a multi-queue group. 79 * However, queues can be dynamically added to the group. A group can have 80 * up to 64 queues. To support this, XeKMD holds references to LRCs of the 81 * queues even after the queues are destroyed by the user until the whole 82 * group is destroyed. The secondary queues hold a reference to the primary 83 * queue thus preventing the group from being destroyed when user destroys 84 * the primary queue. Once the primary queue is destroyed, secondary queues 85 * can't be added to the queue group and new job submissions on existing 86 * secondary queues are not allowed. 87 * 88 * The queues of a multi queue group can set their priority within the group 89 * through the DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY property. 90 * This multi queue priority can also be set dynamically through the 91 * XE_EXEC_QUEUE_SET_PROPERTY ioctl. This is the only other property 92 * supported by the secondary queues of a multi queue group, other than 93 * DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE. 94 * 95 * When GuC reports an error on any of the queues of a multi queue group, 96 * the queue cleanup mechanism is invoked for all the queues of the group 97 * as hardware cannot make progress on the multi queue context. 98 * 99 * Refer :ref:`multi-queue-group-guc-interface` for multi queue group GuC 100 * interface. 101 */ 102 103 enum xe_exec_queue_sched_prop { 104 XE_EXEC_QUEUE_JOB_TIMEOUT = 0, 105 XE_EXEC_QUEUE_TIMESLICE = 1, 106 XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, 107 XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, 108 }; 109 110 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 111 u64 extensions); 112 113 static void xe_exec_queue_group_cleanup(struct xe_exec_queue *q) 114 { 115 struct xe_exec_queue_group *group = q->multi_queue.group; 116 struct xe_lrc *lrc; 117 unsigned long idx; 118 119 if (xe_exec_queue_is_multi_queue_secondary(q)) { 120 /* 121 * Put pairs with get from xe_exec_queue_lookup() call 122 * in xe_exec_queue_group_validate(). 123 */ 124 xe_exec_queue_put(xe_exec_queue_multi_queue_primary(q)); 125 return; 126 } 127 128 if (!group) 129 return; 130 131 /* Primary queue cleanup */ 132 xa_for_each(&group->xa, idx, lrc) 133 xe_lrc_put(lrc); 134 135 xa_destroy(&group->xa); 136 mutex_destroy(&group->list_lock); 137 xe_bo_unpin_map_no_vm(group->cgp_bo); 138 kfree(group); 139 } 140 141 static void __xe_exec_queue_free(struct xe_exec_queue *q) 142 { 143 int i; 144 145 for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) 146 if (q->tlb_inval[i].dep_scheduler) 147 xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler); 148 149 if (xe_exec_queue_uses_pxp(q)) 150 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); 151 152 if (xe_exec_queue_is_multi_queue(q)) 153 xe_exec_queue_group_cleanup(q); 154 155 if (q->vm) 156 xe_vm_put(q->vm); 157 158 if (q->xef) 159 xe_file_put(q->xef); 160 161 kvfree(q->replay_state); 162 kfree(q); 163 } 164 165 static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q) 166 { 167 struct xe_tile *tile = gt_to_tile(q->gt); 168 int i; 169 170 for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) { 171 struct xe_dep_scheduler *dep_scheduler; 172 struct xe_gt *gt; 173 struct workqueue_struct *wq; 174 175 if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT) 176 gt = tile->primary_gt; 177 else 178 gt = tile->media_gt; 179 180 if (!gt) 181 continue; 182 183 wq = gt->tlb_inval.job_wq; 184 185 #define MAX_TLB_INVAL_JOBS 16 /* Picking a reasonable value */ 186 dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name, 187 MAX_TLB_INVAL_JOBS); 188 if (IS_ERR(dep_scheduler)) 189 return PTR_ERR(dep_scheduler); 190 191 q->tlb_inval[i].dep_scheduler = dep_scheduler; 192 } 193 #undef MAX_TLB_INVAL_JOBS 194 195 return 0; 196 } 197 198 static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, 199 struct xe_vm *vm, 200 u32 logical_mask, 201 u16 width, struct xe_hw_engine *hwe, 202 u32 flags, u64 extensions) 203 { 204 struct xe_exec_queue *q; 205 struct xe_gt *gt = hwe->gt; 206 int err; 207 208 /* only kernel queues can be permanent */ 209 XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); 210 211 q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL); 212 if (!q) 213 return ERR_PTR(-ENOMEM); 214 215 kref_init(&q->refcount); 216 q->flags = flags; 217 q->hwe = hwe; 218 q->gt = gt; 219 q->class = hwe->class; 220 q->width = width; 221 q->msix_vec = XE_IRQ_DEFAULT_MSIX; 222 q->logical_mask = logical_mask; 223 q->fence_irq = >->fence_irq[hwe->class]; 224 q->ring_ops = gt->ring_ops[hwe->class]; 225 q->ops = gt->exec_queue_ops; 226 INIT_LIST_HEAD(&q->lr.link); 227 INIT_LIST_HEAD(&q->multi_gt_link); 228 INIT_LIST_HEAD(&q->hw_engine_group_link); 229 INIT_LIST_HEAD(&q->pxp.link); 230 q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL; 231 232 q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; 233 q->sched_props.preempt_timeout_us = 234 hwe->eclass->sched_props.preempt_timeout_us; 235 q->sched_props.job_timeout_ms = 236 hwe->eclass->sched_props.job_timeout_ms; 237 if (q->flags & EXEC_QUEUE_FLAG_KERNEL && 238 q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) 239 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; 240 else 241 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; 242 243 if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) { 244 err = alloc_dep_schedulers(xe, q); 245 if (err) { 246 __xe_exec_queue_free(q); 247 return ERR_PTR(err); 248 } 249 } 250 251 if (vm) 252 q->vm = xe_vm_get(vm); 253 254 if (extensions) { 255 /* 256 * may set q->usm, must come before xe_lrc_create(), 257 * may overwrite q->sched_props, must come before q->ops->init() 258 */ 259 err = exec_queue_user_extensions(xe, q, extensions); 260 if (err) { 261 __xe_exec_queue_free(q); 262 return ERR_PTR(err); 263 } 264 } 265 266 return q; 267 } 268 269 static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags) 270 { 271 int i, err; 272 u32 flags = 0; 273 274 /* 275 * PXP workloads executing on RCS or CCS must run in isolation (i.e. no 276 * other workload can use the EUs at the same time). On MTL this is done 277 * by setting the RUNALONE bit in the LRC, while starting on Xe2 there 278 * is a dedicated bit for it. 279 */ 280 if (xe_exec_queue_uses_pxp(q) && 281 (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) { 282 if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20) 283 flags |= XE_LRC_CREATE_PXP; 284 else 285 flags |= XE_LRC_CREATE_RUNALONE; 286 } 287 288 if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL)) 289 flags |= XE_LRC_CREATE_USER_CTX; 290 291 err = q->ops->init(q); 292 if (err) 293 return err; 294 295 /* 296 * This must occur after q->ops->init to avoid race conditions during VF 297 * post-migration recovery, as the fixups for the LRC GGTT addresses 298 * depend on the queue being present in the backend tracking structure. 299 * 300 * In addition to above, we must wait on inflight GGTT changes to avoid 301 * writing out stale values here. Such wait provides a solid solution 302 * (without a race) only if the function can detect migration instantly 303 * from the moment vCPU resumes execution. 304 */ 305 for (i = 0; i < q->width; ++i) { 306 struct xe_lrc *lrc; 307 308 xe_gt_sriov_vf_wait_valid_ggtt(q->gt); 309 lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state, 310 xe_lrc_ring_size(), q->msix_vec, flags); 311 if (IS_ERR(lrc)) { 312 err = PTR_ERR(lrc); 313 goto err_lrc; 314 } 315 316 /* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */ 317 WRITE_ONCE(q->lrc[i], lrc); 318 } 319 320 return 0; 321 322 err_lrc: 323 for (i = i - 1; i >= 0; --i) 324 xe_lrc_put(q->lrc[i]); 325 return err; 326 } 327 328 static void __xe_exec_queue_fini(struct xe_exec_queue *q) 329 { 330 int i; 331 332 q->ops->fini(q); 333 334 for (i = 0; i < q->width; ++i) 335 xe_lrc_put(q->lrc[i]); 336 } 337 338 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, 339 u32 logical_mask, u16 width, 340 struct xe_hw_engine *hwe, u32 flags, 341 u64 extensions) 342 { 343 struct xe_exec_queue *q; 344 int err; 345 346 /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */ 347 xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0))); 348 349 q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, 350 extensions); 351 if (IS_ERR(q)) 352 return q; 353 354 err = __xe_exec_queue_init(q, flags); 355 if (err) 356 goto err_post_alloc; 357 358 /* 359 * We can only add the queue to the PXP list after the init is complete, 360 * because the PXP termination can call exec_queue_kill and that will 361 * go bad if the queue is only half-initialized. This means that we 362 * can't do it when we handle the PXP extension in __xe_exec_queue_alloc 363 * and we need to do it here instead. 364 */ 365 if (xe_exec_queue_uses_pxp(q)) { 366 err = xe_pxp_exec_queue_add(xe->pxp, q); 367 if (err) 368 goto err_post_init; 369 } 370 371 return q; 372 373 err_post_init: 374 __xe_exec_queue_fini(q); 375 err_post_alloc: 376 __xe_exec_queue_free(q); 377 return ERR_PTR(err); 378 } 379 ALLOW_ERROR_INJECTION(xe_exec_queue_create, ERRNO); 380 381 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, 382 struct xe_vm *vm, 383 enum xe_engine_class class, 384 u32 flags, u64 extensions) 385 { 386 struct xe_hw_engine *hwe, *hwe0 = NULL; 387 enum xe_hw_engine_id id; 388 u32 logical_mask = 0; 389 390 for_each_hw_engine(hwe, gt, id) { 391 if (xe_hw_engine_is_reserved(hwe)) 392 continue; 393 394 if (hwe->class == class) { 395 logical_mask |= BIT(hwe->logical_instance); 396 if (!hwe0) 397 hwe0 = hwe; 398 } 399 } 400 401 if (!logical_mask) 402 return ERR_PTR(-ENODEV); 403 404 return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); 405 } 406 407 /** 408 * xe_exec_queue_create_bind() - Create bind exec queue. 409 * @xe: Xe device. 410 * @tile: tile which bind exec queue belongs to. 411 * @flags: exec queue creation flags 412 * @user_vm: The user VM which this exec queue belongs to 413 * @extensions: exec queue creation extensions 414 * 415 * Normalize bind exec queue creation. Bind exec queue is tied to migration VM 416 * for access to physical memory required for page table programming. On a 417 * faulting devices the reserved copy engine instance must be used to avoid 418 * deadlocking (user binds cannot get stuck behind faults as kernel binds which 419 * resolve faults depend on user binds). On non-faulting devices any copy engine 420 * can be used. 421 * 422 * Returns exec queue on success, ERR_PTR on failure 423 */ 424 struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, 425 struct xe_tile *tile, 426 struct xe_vm *user_vm, 427 u32 flags, u64 extensions) 428 { 429 struct xe_gt *gt = tile->primary_gt; 430 struct xe_exec_queue *q; 431 struct xe_vm *migrate_vm; 432 433 migrate_vm = xe_migrate_get_vm(tile->migrate); 434 if (xe->info.has_usm) { 435 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, 436 XE_ENGINE_CLASS_COPY, 437 gt->usm.reserved_bcs_instance, 438 false); 439 440 if (!hwe) { 441 xe_vm_put(migrate_vm); 442 return ERR_PTR(-EINVAL); 443 } 444 445 q = xe_exec_queue_create(xe, migrate_vm, 446 BIT(hwe->logical_instance), 1, hwe, 447 flags, extensions); 448 } else { 449 q = xe_exec_queue_create_class(xe, gt, migrate_vm, 450 XE_ENGINE_CLASS_COPY, flags, 451 extensions); 452 } 453 xe_vm_put(migrate_vm); 454 455 if (!IS_ERR(q)) { 456 int err = drm_syncobj_create(&q->ufence_syncobj, 457 DRM_SYNCOBJ_CREATE_SIGNALED, 458 NULL); 459 if (err) { 460 xe_exec_queue_put(q); 461 return ERR_PTR(err); 462 } 463 464 if (user_vm) 465 q->user_vm = xe_vm_get(user_vm); 466 } 467 468 return q; 469 } 470 ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); 471 472 void xe_exec_queue_destroy(struct kref *ref) 473 { 474 struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); 475 struct xe_exec_queue *eq, *next; 476 int i; 477 478 xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0); 479 480 if (q->ufence_syncobj) 481 drm_syncobj_put(q->ufence_syncobj); 482 483 if (xe_exec_queue_uses_pxp(q)) 484 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); 485 486 xe_exec_queue_last_fence_put_unlocked(q); 487 for_each_tlb_inval(i) 488 xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i); 489 490 if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { 491 list_for_each_entry_safe(eq, next, &q->multi_gt_list, 492 multi_gt_link) 493 xe_exec_queue_put(eq); 494 } 495 496 if (q->user_vm) { 497 xe_vm_put(q->user_vm); 498 q->user_vm = NULL; 499 } 500 501 q->ops->destroy(q); 502 } 503 504 void xe_exec_queue_fini(struct xe_exec_queue *q) 505 { 506 /* 507 * Before releasing our ref to lrc and xef, accumulate our run ticks 508 * and wakeup any waiters. 509 */ 510 xe_exec_queue_update_run_ticks(q); 511 if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) 512 wake_up_var(&q->xef->exec_queue.pending_removal); 513 514 __xe_exec_queue_fini(q); 515 __xe_exec_queue_free(q); 516 } 517 518 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) 519 { 520 switch (q->class) { 521 case XE_ENGINE_CLASS_RENDER: 522 snprintf(q->name, sizeof(q->name), "rcs%d", instance); 523 break; 524 case XE_ENGINE_CLASS_VIDEO_DECODE: 525 snprintf(q->name, sizeof(q->name), "vcs%d", instance); 526 break; 527 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 528 snprintf(q->name, sizeof(q->name), "vecs%d", instance); 529 break; 530 case XE_ENGINE_CLASS_COPY: 531 snprintf(q->name, sizeof(q->name), "bcs%d", instance); 532 break; 533 case XE_ENGINE_CLASS_COMPUTE: 534 snprintf(q->name, sizeof(q->name), "ccs%d", instance); 535 break; 536 case XE_ENGINE_CLASS_OTHER: 537 snprintf(q->name, sizeof(q->name), "gsccs%d", instance); 538 break; 539 default: 540 XE_WARN_ON(q->class); 541 } 542 } 543 544 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) 545 { 546 struct xe_exec_queue *q; 547 548 mutex_lock(&xef->exec_queue.lock); 549 q = xa_load(&xef->exec_queue.xa, id); 550 if (q) 551 xe_exec_queue_get(q); 552 mutex_unlock(&xef->exec_queue.lock); 553 554 return q; 555 } 556 557 enum xe_exec_queue_priority 558 xe_exec_queue_device_get_max_priority(struct xe_device *xe) 559 { 560 return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : 561 XE_EXEC_QUEUE_PRIORITY_NORMAL; 562 } 563 564 static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, 565 u64 value) 566 { 567 if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) 568 return -EINVAL; 569 570 if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) 571 return -EPERM; 572 573 q->sched_props.priority = value; 574 return 0; 575 } 576 577 static bool xe_exec_queue_enforce_schedule_limit(void) 578 { 579 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 580 return true; 581 #else 582 return !capable(CAP_SYS_NICE); 583 #endif 584 } 585 586 static void 587 xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, 588 enum xe_exec_queue_sched_prop prop, 589 u32 *min, u32 *max) 590 { 591 switch (prop) { 592 case XE_EXEC_QUEUE_JOB_TIMEOUT: 593 *min = eclass->sched_props.job_timeout_min; 594 *max = eclass->sched_props.job_timeout_max; 595 break; 596 case XE_EXEC_QUEUE_TIMESLICE: 597 *min = eclass->sched_props.timeslice_min; 598 *max = eclass->sched_props.timeslice_max; 599 break; 600 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 601 *min = eclass->sched_props.preempt_timeout_min; 602 *max = eclass->sched_props.preempt_timeout_max; 603 break; 604 default: 605 break; 606 } 607 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 608 if (capable(CAP_SYS_NICE)) { 609 switch (prop) { 610 case XE_EXEC_QUEUE_JOB_TIMEOUT: 611 *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 612 *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 613 break; 614 case XE_EXEC_QUEUE_TIMESLICE: 615 *min = XE_HW_ENGINE_TIMESLICE_MIN; 616 *max = XE_HW_ENGINE_TIMESLICE_MAX; 617 break; 618 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 619 *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 620 *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 621 break; 622 default: 623 break; 624 } 625 } 626 #endif 627 } 628 629 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, 630 u64 value) 631 { 632 u32 min = 0, max = 0; 633 634 xe_exec_queue_get_prop_minmax(q->hwe->eclass, 635 XE_EXEC_QUEUE_TIMESLICE, &min, &max); 636 637 if (xe_exec_queue_enforce_schedule_limit() && 638 !xe_hw_engine_timeout_in_range(value, min, max)) 639 return -EINVAL; 640 641 q->sched_props.timeslice_us = value; 642 return 0; 643 } 644 645 static int 646 exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value) 647 { 648 if (value == DRM_XE_PXP_TYPE_NONE) 649 return 0; 650 651 /* we only support HWDRM sessions right now */ 652 if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM)) 653 return -EINVAL; 654 655 if (!xe_pxp_is_enabled(xe->pxp)) 656 return -ENODEV; 657 658 return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM); 659 } 660 661 static int exec_queue_set_hang_replay_state(struct xe_device *xe, 662 struct xe_exec_queue *q, 663 u64 value) 664 { 665 size_t size = xe_gt_lrc_hang_replay_size(q->gt, q->class); 666 u64 __user *address = u64_to_user_ptr(value); 667 void *ptr; 668 669 ptr = vmemdup_user(address, size); 670 if (XE_IOCTL_DBG(xe, IS_ERR(ptr))) 671 return PTR_ERR(ptr); 672 673 q->replay_state = ptr; 674 675 return 0; 676 } 677 678 static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue *q) 679 { 680 struct xe_tile *tile = gt_to_tile(q->gt); 681 struct xe_exec_queue_group *group; 682 struct xe_bo *bo; 683 684 group = kzalloc(sizeof(*group), GFP_KERNEL); 685 if (!group) 686 return -ENOMEM; 687 688 bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel, 689 XE_BO_FLAG_VRAM_IF_DGFX(tile) | 690 XE_BO_FLAG_PINNED_LATE_RESTORE | 691 XE_BO_FLAG_FORCE_USER_VRAM | 692 XE_BO_FLAG_GGTT_INVALIDATE | 693 XE_BO_FLAG_GGTT, false); 694 if (IS_ERR(bo)) { 695 drm_err(&xe->drm, "CGP bo allocation for queue group failed: %ld\n", 696 PTR_ERR(bo)); 697 kfree(group); 698 return PTR_ERR(bo); 699 } 700 701 xe_map_memset(xe, &bo->vmap, 0, 0, SZ_4K); 702 703 group->primary = q; 704 group->cgp_bo = bo; 705 INIT_LIST_HEAD(&group->list); 706 xa_init_flags(&group->xa, XA_FLAGS_ALLOC1); 707 mutex_init(&group->list_lock); 708 q->multi_queue.group = group; 709 710 /* group->list_lock is used in submission backend */ 711 if (IS_ENABLED(CONFIG_LOCKDEP)) { 712 fs_reclaim_acquire(GFP_KERNEL); 713 might_lock(&group->list_lock); 714 fs_reclaim_release(GFP_KERNEL); 715 } 716 717 return 0; 718 } 719 720 static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q) 721 { 722 return q->gt->info.multi_queue_engine_class_mask & BIT(q->class); 723 } 724 725 static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q, 726 u32 primary_id) 727 { 728 struct xe_exec_queue_group *group; 729 struct xe_exec_queue *primary; 730 int ret; 731 732 /* 733 * Get from below xe_exec_queue_lookup() pairs with put 734 * in xe_exec_queue_group_cleanup(). 735 */ 736 primary = xe_exec_queue_lookup(q->vm->xef, primary_id); 737 if (XE_IOCTL_DBG(xe, !primary)) 738 return -ENOENT; 739 740 if (XE_IOCTL_DBG(xe, !xe_exec_queue_is_multi_queue_primary(primary)) || 741 XE_IOCTL_DBG(xe, q->vm != primary->vm) || 742 XE_IOCTL_DBG(xe, q->logical_mask != primary->logical_mask)) { 743 ret = -EINVAL; 744 goto put_primary; 745 } 746 747 group = primary->multi_queue.group; 748 q->multi_queue.valid = true; 749 q->multi_queue.group = group; 750 751 return 0; 752 put_primary: 753 xe_exec_queue_put(primary); 754 return ret; 755 } 756 757 #define XE_MAX_GROUP_SIZE 64 758 static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q) 759 { 760 struct xe_exec_queue_group *group = q->multi_queue.group; 761 u32 pos; 762 int err; 763 764 xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); 765 766 /* Primary queue holds a reference to LRCs of all secondary queues */ 767 err = xa_alloc(&group->xa, &pos, xe_lrc_get(q->lrc[0]), 768 XA_LIMIT(1, XE_MAX_GROUP_SIZE - 1), GFP_KERNEL); 769 if (XE_IOCTL_DBG(xe, err)) { 770 xe_lrc_put(q->lrc[0]); 771 772 /* It is invalid if queue group limit is exceeded */ 773 if (err == -EBUSY) 774 err = -EINVAL; 775 776 return err; 777 } 778 779 q->multi_queue.pos = pos; 780 781 return 0; 782 } 783 784 static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queue *q) 785 { 786 struct xe_exec_queue_group *group = q->multi_queue.group; 787 struct xe_lrc *lrc; 788 789 xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q)); 790 791 lrc = xa_erase(&group->xa, q->multi_queue.pos); 792 xe_assert(xe, lrc); 793 xe_lrc_put(lrc); 794 } 795 796 static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q, 797 u64 value) 798 { 799 if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q))) 800 return -ENODEV; 801 802 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe))) 803 return -EOPNOTSUPP; 804 805 if (XE_IOCTL_DBG(xe, !q->vm->xef)) 806 return -EINVAL; 807 808 if (XE_IOCTL_DBG(xe, xe_exec_queue_is_parallel(q))) 809 return -EINVAL; 810 811 if (XE_IOCTL_DBG(xe, xe_exec_queue_is_multi_queue(q))) 812 return -EINVAL; 813 814 if (value & DRM_XE_MULTI_GROUP_CREATE) { 815 if (XE_IOCTL_DBG(xe, value & ~DRM_XE_MULTI_GROUP_CREATE)) 816 return -EINVAL; 817 818 q->multi_queue.valid = true; 819 q->multi_queue.is_primary = true; 820 q->multi_queue.pos = 0; 821 return 0; 822 } 823 824 /* While adding secondary queues, the upper 32 bits must be 0 */ 825 if (XE_IOCTL_DBG(xe, value & (~0ull << 32))) 826 return -EINVAL; 827 828 return xe_exec_queue_group_validate(xe, q, value); 829 } 830 831 static int exec_queue_set_multi_queue_priority(struct xe_device *xe, struct xe_exec_queue *q, 832 u64 value) 833 { 834 if (XE_IOCTL_DBG(xe, value > XE_MULTI_QUEUE_PRIORITY_HIGH)) 835 return -EINVAL; 836 837 /* For queue creation time (!q->xef) setting, just store the priority value */ 838 if (!q->xef) { 839 q->multi_queue.priority = value; 840 return 0; 841 } 842 843 if (!xe_exec_queue_is_multi_queue(q)) 844 return -EINVAL; 845 846 return q->ops->set_multi_queue_priority(q, value); 847 } 848 849 typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, 850 struct xe_exec_queue *q, 851 u64 value); 852 853 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { 854 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, 855 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, 856 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, 857 [DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state, 858 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group, 859 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY] = 860 exec_queue_set_multi_queue_priority, 861 }; 862 863 int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, 864 struct drm_file *file) 865 { 866 struct xe_device *xe = to_xe_device(dev); 867 struct xe_file *xef = to_xe_file(file); 868 struct drm_xe_exec_queue_set_property *args = data; 869 struct xe_exec_queue *q; 870 int ret; 871 u32 idx; 872 873 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 874 return -EINVAL; 875 876 if (XE_IOCTL_DBG(xe, args->property != 877 DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) 878 return -EINVAL; 879 880 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 881 if (XE_IOCTL_DBG(xe, !q)) 882 return -ENOENT; 883 884 idx = array_index_nospec(args->property, 885 ARRAY_SIZE(exec_queue_set_property_funcs)); 886 ret = exec_queue_set_property_funcs[idx](xe, q, args->value); 887 if (XE_IOCTL_DBG(xe, ret)) 888 goto err_post_lookup; 889 890 xe_exec_queue_put(q); 891 return 0; 892 893 err_post_lookup: 894 xe_exec_queue_put(q); 895 return ret; 896 } 897 898 static int exec_queue_user_ext_check(struct xe_exec_queue *q, u64 properties) 899 { 900 u64 secondary_queue_valid_props = BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP) | 901 BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY); 902 903 /* 904 * Only MULTI_QUEUE_PRIORITY property is valid for secondary queues of a 905 * multi-queue group. 906 */ 907 if (xe_exec_queue_is_multi_queue_secondary(q) && 908 properties & ~secondary_queue_valid_props) 909 return -EINVAL; 910 911 return 0; 912 } 913 914 static int exec_queue_user_ext_check_final(struct xe_exec_queue *q, u64 properties) 915 { 916 /* MULTI_QUEUE_PRIORITY only applies to multi-queue group queues */ 917 if ((properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) && 918 !(properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP))) 919 return -EINVAL; 920 921 return 0; 922 } 923 924 static int exec_queue_user_ext_set_property(struct xe_device *xe, 925 struct xe_exec_queue *q, 926 u64 extension, u64 *properties) 927 { 928 u64 __user *address = u64_to_user_ptr(extension); 929 struct drm_xe_ext_set_property ext; 930 int err; 931 u32 idx; 932 933 err = copy_from_user(&ext, address, sizeof(ext)); 934 if (XE_IOCTL_DBG(xe, err)) 935 return -EFAULT; 936 937 if (XE_IOCTL_DBG(xe, ext.property >= 938 ARRAY_SIZE(exec_queue_set_property_funcs)) || 939 XE_IOCTL_DBG(xe, ext.pad) || 940 XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && 941 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && 942 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE && 943 ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE && 944 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP && 945 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) 946 return -EINVAL; 947 948 idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); 949 if (!exec_queue_set_property_funcs[idx]) 950 return -EINVAL; 951 952 *properties |= BIT_ULL(idx); 953 err = exec_queue_user_ext_check(q, *properties); 954 if (XE_IOCTL_DBG(xe, err)) 955 return err; 956 957 return exec_queue_set_property_funcs[idx](xe, q, ext.value); 958 } 959 960 typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, 961 struct xe_exec_queue *q, 962 u64 extension, u64 *properties); 963 964 static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { 965 [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, 966 }; 967 968 #define MAX_USER_EXTENSIONS 16 969 static int __exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 970 u64 extensions, int ext_number, u64 *properties) 971 { 972 u64 __user *address = u64_to_user_ptr(extensions); 973 struct drm_xe_user_extension ext; 974 int err; 975 u32 idx; 976 977 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) 978 return -E2BIG; 979 980 err = copy_from_user(&ext, address, sizeof(ext)); 981 if (XE_IOCTL_DBG(xe, err)) 982 return -EFAULT; 983 984 if (XE_IOCTL_DBG(xe, ext.pad) || 985 XE_IOCTL_DBG(xe, ext.name >= 986 ARRAY_SIZE(exec_queue_user_extension_funcs))) 987 return -EINVAL; 988 989 idx = array_index_nospec(ext.name, 990 ARRAY_SIZE(exec_queue_user_extension_funcs)); 991 err = exec_queue_user_extension_funcs[idx](xe, q, extensions, properties); 992 if (XE_IOCTL_DBG(xe, err)) 993 return err; 994 995 if (ext.next_extension) 996 return __exec_queue_user_extensions(xe, q, ext.next_extension, 997 ++ext_number, properties); 998 999 return 0; 1000 } 1001 1002 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 1003 u64 extensions) 1004 { 1005 u64 properties = 0; 1006 int err; 1007 1008 err = __exec_queue_user_extensions(xe, q, extensions, 0, &properties); 1009 if (XE_IOCTL_DBG(xe, err)) 1010 return err; 1011 1012 err = exec_queue_user_ext_check_final(q, properties); 1013 if (XE_IOCTL_DBG(xe, err)) 1014 return err; 1015 1016 if (xe_exec_queue_is_multi_queue_primary(q)) { 1017 err = xe_exec_queue_group_init(xe, q); 1018 if (XE_IOCTL_DBG(xe, err)) 1019 return err; 1020 } 1021 1022 return 0; 1023 } 1024 1025 static u32 calc_validate_logical_mask(struct xe_device *xe, 1026 struct drm_xe_engine_class_instance *eci, 1027 u16 width, u16 num_placements) 1028 { 1029 int len = width * num_placements; 1030 int i, j, n; 1031 u16 class; 1032 u16 gt_id; 1033 u32 return_mask = 0, prev_mask; 1034 1035 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && 1036 len > 1)) 1037 return 0; 1038 1039 for (i = 0; i < width; ++i) { 1040 u32 current_mask = 0; 1041 1042 for (j = 0; j < num_placements; ++j) { 1043 struct xe_hw_engine *hwe; 1044 1045 n = j * width + i; 1046 1047 hwe = xe_hw_engine_lookup(xe, eci[n]); 1048 if (XE_IOCTL_DBG(xe, !hwe)) 1049 return 0; 1050 1051 if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) 1052 return 0; 1053 1054 if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || 1055 XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) 1056 return 0; 1057 1058 class = eci[n].engine_class; 1059 gt_id = eci[n].gt_id; 1060 1061 if (width == 1 || !i) 1062 return_mask |= BIT(eci[n].engine_instance); 1063 current_mask |= BIT(eci[n].engine_instance); 1064 } 1065 1066 /* Parallel submissions must be logically contiguous */ 1067 if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) 1068 return 0; 1069 1070 prev_mask = current_mask; 1071 } 1072 1073 return return_mask; 1074 } 1075 1076 static bool has_sched_groups(struct xe_gt *gt) 1077 { 1078 if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_sriov_pf_sched_groups_enabled(gt)) 1079 return true; 1080 1081 if (IS_SRIOV_VF(gt_to_xe(gt)) && xe_gt_sriov_vf_sched_groups_enabled(gt)) 1082 return true; 1083 1084 return false; 1085 } 1086 1087 int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, 1088 struct drm_file *file) 1089 { 1090 struct xe_device *xe = to_xe_device(dev); 1091 struct xe_file *xef = to_xe_file(file); 1092 struct drm_xe_exec_queue_create *args = data; 1093 struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; 1094 struct drm_xe_engine_class_instance __user *user_eci = 1095 u64_to_user_ptr(args->instances); 1096 struct xe_hw_engine *hwe; 1097 struct xe_vm *vm; 1098 struct xe_tile *tile; 1099 struct xe_exec_queue *q = NULL; 1100 u32 logical_mask; 1101 u32 flags = 0; 1102 u32 id; 1103 u32 len; 1104 int err; 1105 1106 if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) || 1107 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1108 return -EINVAL; 1109 1110 len = args->width * args->num_placements; 1111 if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) 1112 return -EINVAL; 1113 1114 err = copy_from_user(eci, user_eci, 1115 sizeof(struct drm_xe_engine_class_instance) * len); 1116 if (XE_IOCTL_DBG(xe, err)) 1117 return -EFAULT; 1118 1119 if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) 1120 return -EINVAL; 1121 1122 if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) 1123 flags |= EXEC_QUEUE_FLAG_LOW_LATENCY; 1124 1125 if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { 1126 if (XE_IOCTL_DBG(xe, args->width != 1) || 1127 XE_IOCTL_DBG(xe, args->num_placements != 1) || 1128 XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) 1129 return -EINVAL; 1130 1131 vm = xe_vm_lookup(xef, args->vm_id); 1132 if (XE_IOCTL_DBG(xe, !vm)) 1133 return -ENOENT; 1134 1135 err = down_read_interruptible(&vm->lock); 1136 if (err) { 1137 xe_vm_put(vm); 1138 return err; 1139 } 1140 1141 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 1142 up_read(&vm->lock); 1143 xe_vm_put(vm); 1144 return -ENOENT; 1145 } 1146 1147 for_each_tile(tile, xe, id) { 1148 struct xe_exec_queue *new; 1149 1150 flags |= EXEC_QUEUE_FLAG_VM; 1151 if (id) 1152 flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; 1153 1154 new = xe_exec_queue_create_bind(xe, tile, vm, flags, 1155 args->extensions); 1156 if (IS_ERR(new)) { 1157 up_read(&vm->lock); 1158 xe_vm_put(vm); 1159 err = PTR_ERR(new); 1160 if (q) 1161 goto put_exec_queue; 1162 return err; 1163 } 1164 if (id == 0) 1165 q = new; 1166 else 1167 list_add_tail(&new->multi_gt_list, 1168 &q->multi_gt_link); 1169 } 1170 up_read(&vm->lock); 1171 xe_vm_put(vm); 1172 } else { 1173 logical_mask = calc_validate_logical_mask(xe, eci, 1174 args->width, 1175 args->num_placements); 1176 if (XE_IOCTL_DBG(xe, !logical_mask)) 1177 return -EINVAL; 1178 1179 hwe = xe_hw_engine_lookup(xe, eci[0]); 1180 if (XE_IOCTL_DBG(xe, !hwe)) 1181 return -EINVAL; 1182 1183 vm = xe_vm_lookup(xef, args->vm_id); 1184 if (XE_IOCTL_DBG(xe, !vm)) 1185 return -ENOENT; 1186 1187 err = down_read_interruptible(&vm->lock); 1188 if (err) { 1189 xe_vm_put(vm); 1190 return err; 1191 } 1192 1193 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 1194 up_read(&vm->lock); 1195 xe_vm_put(vm); 1196 return -ENOENT; 1197 } 1198 1199 /* SRIOV sched groups are not compatible with multi-lrc */ 1200 if (XE_IOCTL_DBG(xe, args->width > 1 && has_sched_groups(hwe->gt))) { 1201 up_read(&vm->lock); 1202 xe_vm_put(vm); 1203 return -EINVAL; 1204 } 1205 1206 q = xe_exec_queue_create(xe, vm, logical_mask, 1207 args->width, hwe, flags, 1208 args->extensions); 1209 up_read(&vm->lock); 1210 xe_vm_put(vm); 1211 if (IS_ERR(q)) 1212 return PTR_ERR(q); 1213 1214 if (xe_exec_queue_is_multi_queue_secondary(q)) { 1215 err = xe_exec_queue_group_add(xe, q); 1216 if (XE_IOCTL_DBG(xe, err)) 1217 goto put_exec_queue; 1218 } 1219 1220 if (xe_vm_in_preempt_fence_mode(vm)) { 1221 q->lr.context = dma_fence_context_alloc(1); 1222 1223 err = xe_vm_add_compute_exec_queue(vm, q); 1224 if (XE_IOCTL_DBG(xe, err)) 1225 goto delete_queue_group; 1226 } 1227 1228 if (q->vm && q->hwe->hw_engine_group) { 1229 err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); 1230 if (err) 1231 goto put_exec_queue; 1232 } 1233 } 1234 1235 q->xef = xe_file_get(xef); 1236 1237 /* user id alloc must always be last in ioctl to prevent UAF */ 1238 err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); 1239 if (err) 1240 goto kill_exec_queue; 1241 1242 args->exec_queue_id = id; 1243 1244 return 0; 1245 1246 kill_exec_queue: 1247 xe_exec_queue_kill(q); 1248 delete_queue_group: 1249 if (xe_exec_queue_is_multi_queue_secondary(q)) 1250 xe_exec_queue_group_delete(xe, q); 1251 put_exec_queue: 1252 xe_exec_queue_put(q); 1253 return err; 1254 } 1255 1256 int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, 1257 struct drm_file *file) 1258 { 1259 struct xe_device *xe = to_xe_device(dev); 1260 struct xe_file *xef = to_xe_file(file); 1261 struct drm_xe_exec_queue_get_property *args = data; 1262 struct xe_exec_queue *q; 1263 int ret; 1264 1265 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1266 return -EINVAL; 1267 1268 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 1269 if (XE_IOCTL_DBG(xe, !q)) 1270 return -ENOENT; 1271 1272 switch (args->property) { 1273 case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: 1274 args->value = q->ops->reset_status(q); 1275 ret = 0; 1276 break; 1277 default: 1278 ret = -EINVAL; 1279 } 1280 1281 xe_exec_queue_put(q); 1282 1283 return ret; 1284 } 1285 1286 /** 1287 * xe_exec_queue_lrc() - Get the LRC from exec queue. 1288 * @q: The exec_queue. 1289 * 1290 * Retrieves the primary LRC for the exec queue. Note that this function 1291 * returns only the first LRC instance, even when multiple parallel LRCs 1292 * are configured. 1293 * 1294 * Return: Pointer to LRC on success, error on failure 1295 */ 1296 struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q) 1297 { 1298 return q->lrc[0]; 1299 } 1300 1301 /** 1302 * xe_exec_queue_is_lr() - Whether an exec_queue is long-running 1303 * @q: The exec_queue 1304 * 1305 * Return: True if the exec_queue is long-running, false otherwise. 1306 */ 1307 bool xe_exec_queue_is_lr(struct xe_exec_queue *q) 1308 { 1309 return q->vm && xe_vm_in_lr_mode(q->vm) && 1310 !(q->flags & EXEC_QUEUE_FLAG_VM); 1311 } 1312 1313 /** 1314 * xe_exec_queue_is_idle() - Whether an exec_queue is idle. 1315 * @q: The exec_queue 1316 * 1317 * FIXME: Need to determine what to use as the short-lived 1318 * timeline lock for the exec_queues, so that the return value 1319 * of this function becomes more than just an advisory 1320 * snapshot in time. The timeline lock must protect the 1321 * seqno from racing submissions on the same exec_queue. 1322 * Typically vm->resv, but user-created timeline locks use the migrate vm 1323 * and never grabs the migrate vm->resv so we have a race there. 1324 * 1325 * Return: True if the exec_queue is idle, false otherwise. 1326 */ 1327 bool xe_exec_queue_is_idle(struct xe_exec_queue *q) 1328 { 1329 if (xe_exec_queue_is_parallel(q)) { 1330 int i; 1331 1332 for (i = 0; i < q->width; ++i) { 1333 if (xe_lrc_seqno(q->lrc[i]) != 1334 q->lrc[i]->fence_ctx.next_seqno - 1) 1335 return false; 1336 } 1337 1338 return true; 1339 } 1340 1341 return xe_lrc_seqno(q->lrc[0]) == 1342 q->lrc[0]->fence_ctx.next_seqno - 1; 1343 } 1344 1345 /** 1346 * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue 1347 * from hw 1348 * @q: The exec queue 1349 * 1350 * Update the timestamp saved by HW for this exec queue and save run ticks 1351 * calculated by using the delta from last update. 1352 */ 1353 void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) 1354 { 1355 struct xe_device *xe = gt_to_xe(q->gt); 1356 struct xe_lrc *lrc; 1357 u64 old_ts, new_ts; 1358 int idx; 1359 1360 /* 1361 * Jobs that are executed by kernel doesn't have a corresponding xe_file 1362 * and thus are not accounted. 1363 */ 1364 if (!q->xef) 1365 return; 1366 1367 /* Synchronize with unbind while holding the xe file open */ 1368 if (!drm_dev_enter(&xe->drm, &idx)) 1369 return; 1370 /* 1371 * Only sample the first LRC. For parallel submission, all of them are 1372 * scheduled together and we compensate that below by multiplying by 1373 * width - this may introduce errors if that premise is not true and 1374 * they don't exit 100% aligned. On the other hand, looping through 1375 * the LRCs and reading them in different time could also introduce 1376 * errors. 1377 */ 1378 lrc = q->lrc[0]; 1379 new_ts = xe_lrc_update_timestamp(lrc, &old_ts); 1380 q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; 1381 1382 drm_dev_exit(idx); 1383 } 1384 1385 /** 1386 * xe_exec_queue_kill - permanently stop all execution from an exec queue 1387 * @q: The exec queue 1388 * 1389 * This function permanently stops all activity on an exec queue. If the queue 1390 * is actively executing on the HW, it will be kicked off the engine; any 1391 * pending jobs are discarded and all future submissions are rejected. 1392 * This function is safe to call multiple times. 1393 */ 1394 void xe_exec_queue_kill(struct xe_exec_queue *q) 1395 { 1396 struct xe_exec_queue *eq = q, *next; 1397 1398 list_for_each_entry_safe(eq, next, &eq->multi_gt_list, 1399 multi_gt_link) { 1400 q->ops->kill(eq); 1401 xe_vm_remove_compute_exec_queue(q->vm, eq); 1402 } 1403 1404 q->ops->kill(q); 1405 xe_vm_remove_compute_exec_queue(q->vm, q); 1406 } 1407 1408 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, 1409 struct drm_file *file) 1410 { 1411 struct xe_device *xe = to_xe_device(dev); 1412 struct xe_file *xef = to_xe_file(file); 1413 struct drm_xe_exec_queue_destroy *args = data; 1414 struct xe_exec_queue *q; 1415 1416 if (XE_IOCTL_DBG(xe, args->pad) || 1417 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 1418 return -EINVAL; 1419 1420 mutex_lock(&xef->exec_queue.lock); 1421 q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); 1422 if (q) 1423 atomic_inc(&xef->exec_queue.pending_removal); 1424 mutex_unlock(&xef->exec_queue.lock); 1425 1426 if (XE_IOCTL_DBG(xe, !q)) 1427 return -ENOENT; 1428 1429 if (q->vm && q->hwe->hw_engine_group) 1430 xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); 1431 1432 xe_exec_queue_kill(q); 1433 1434 trace_xe_exec_queue_close(q); 1435 xe_exec_queue_put(q); 1436 1437 return 0; 1438 } 1439 1440 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, 1441 struct xe_vm *vm) 1442 { 1443 if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) { 1444 xe_migrate_job_lock_assert(q); 1445 } else if (q->flags & EXEC_QUEUE_FLAG_VM) { 1446 lockdep_assert_held(&vm->lock); 1447 } else { 1448 xe_vm_assert_held(vm); 1449 lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); 1450 } 1451 } 1452 1453 /** 1454 * xe_exec_queue_last_fence_put() - Drop ref to last fence 1455 * @q: The exec queue 1456 * @vm: The VM the engine does a bind or exec for 1457 */ 1458 void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) 1459 { 1460 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1461 1462 xe_exec_queue_last_fence_put_unlocked(q); 1463 } 1464 1465 /** 1466 * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked 1467 * @q: The exec queue 1468 * 1469 * Only safe to be called from xe_exec_queue_destroy(). 1470 */ 1471 void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) 1472 { 1473 if (q->last_fence) { 1474 dma_fence_put(q->last_fence); 1475 q->last_fence = NULL; 1476 } 1477 } 1478 1479 /** 1480 * xe_exec_queue_last_fence_get() - Get last fence 1481 * @q: The exec queue 1482 * @vm: The VM the engine does a bind or exec for 1483 * 1484 * Get last fence, takes a ref 1485 * 1486 * Returns: last fence if not signaled, dma fence stub if signaled 1487 */ 1488 struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, 1489 struct xe_vm *vm) 1490 { 1491 struct dma_fence *fence; 1492 1493 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1494 1495 if (q->last_fence && 1496 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 1497 xe_exec_queue_last_fence_put(q, vm); 1498 1499 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 1500 dma_fence_get(fence); 1501 return fence; 1502 } 1503 1504 /** 1505 * xe_exec_queue_last_fence_get_for_resume() - Get last fence 1506 * @q: The exec queue 1507 * @vm: The VM the engine does a bind or exec for 1508 * 1509 * Get last fence, takes a ref. Only safe to be called in the context of 1510 * resuming the hw engine group's long-running exec queue, when the group 1511 * semaphore is held. 1512 * 1513 * Returns: last fence if not signaled, dma fence stub if signaled 1514 */ 1515 struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, 1516 struct xe_vm *vm) 1517 { 1518 struct dma_fence *fence; 1519 1520 lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); 1521 1522 if (q->last_fence && 1523 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 1524 xe_exec_queue_last_fence_put_unlocked(q); 1525 1526 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 1527 dma_fence_get(fence); 1528 return fence; 1529 } 1530 1531 /** 1532 * xe_exec_queue_last_fence_set() - Set last fence 1533 * @q: The exec queue 1534 * @vm: The VM the engine does a bind or exec for 1535 * @fence: The fence 1536 * 1537 * Set the last fence for the engine. Increases reference count for fence, when 1538 * closing engine xe_exec_queue_last_fence_put should be called. 1539 */ 1540 void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, 1541 struct dma_fence *fence) 1542 { 1543 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1544 xe_assert(vm->xe, !dma_fence_is_container(fence)); 1545 1546 xe_exec_queue_last_fence_put(q, vm); 1547 q->last_fence = dma_fence_get(fence); 1548 } 1549 1550 /** 1551 * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence 1552 * @q: The exec queue 1553 * @vm: The VM the engine does a bind for 1554 * @type: Either primary or media GT 1555 */ 1556 void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q, 1557 struct xe_vm *vm, 1558 unsigned int type) 1559 { 1560 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1561 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1562 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1563 1564 xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type); 1565 } 1566 1567 /** 1568 * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB 1569 * invalidation fence unlocked 1570 * @q: The exec queue 1571 * @type: Either primary or media GT 1572 * 1573 * Only safe to be called from xe_exec_queue_destroy(). 1574 */ 1575 void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q, 1576 unsigned int type) 1577 { 1578 xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1579 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1580 1581 dma_fence_put(q->tlb_inval[type].last_fence); 1582 q->tlb_inval[type].last_fence = NULL; 1583 } 1584 1585 /** 1586 * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation 1587 * @q: The exec queue 1588 * @vm: The VM the engine does a bind for 1589 * @type: Either primary or media GT 1590 * 1591 * Get last fence, takes a ref 1592 * 1593 * Returns: last fence if not signaled, dma fence stub if signaled 1594 */ 1595 struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q, 1596 struct xe_vm *vm, 1597 unsigned int type) 1598 { 1599 struct dma_fence *fence; 1600 1601 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1602 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1603 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1604 xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | 1605 EXEC_QUEUE_FLAG_MIGRATE)); 1606 1607 if (q->tlb_inval[type].last_fence && 1608 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 1609 &q->tlb_inval[type].last_fence->flags)) 1610 xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); 1611 1612 fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub(); 1613 dma_fence_get(fence); 1614 return fence; 1615 } 1616 1617 /** 1618 * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation 1619 * @q: The exec queue 1620 * @vm: The VM the engine does a bind for 1621 * @fence: The fence 1622 * @type: Either primary or media GT 1623 * 1624 * Set the last fence for the tlb invalidation type on the queue. Increases 1625 * reference count for fence, when closing queue 1626 * xe_exec_queue_tlb_inval_last_fence_put should be called. 1627 */ 1628 void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q, 1629 struct xe_vm *vm, 1630 struct dma_fence *fence, 1631 unsigned int type) 1632 { 1633 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1634 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT || 1635 type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT); 1636 xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM | 1637 EXEC_QUEUE_FLAG_MIGRATE)); 1638 xe_assert(vm->xe, !dma_fence_is_container(fence)); 1639 1640 xe_exec_queue_tlb_inval_last_fence_put(q, vm, type); 1641 q->tlb_inval[type].last_fence = dma_fence_get(fence); 1642 } 1643 1644 /** 1645 * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references 1646 * within all LRCs of a queue. 1647 * @q: the &xe_exec_queue struct instance containing target LRCs 1648 * @scratch: scratch buffer to be used as temporary storage 1649 * 1650 * Returns: zero on success, negative error code on failure 1651 */ 1652 int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch) 1653 { 1654 int i; 1655 int err = 0; 1656 1657 for (i = 0; i < q->width; ++i) { 1658 struct xe_lrc *lrc; 1659 1660 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */ 1661 lrc = READ_ONCE(q->lrc[i]); 1662 if (!lrc) 1663 continue; 1664 1665 xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch); 1666 xe_lrc_update_hwctx_regs_with_address(lrc); 1667 err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch); 1668 if (err) 1669 break; 1670 } 1671 1672 return err; 1673 } 1674