1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_exec_queue.h" 7 8 #include <linux/nospec.h> 9 10 #include <drm/drm_device.h> 11 #include <drm/drm_drv.h> 12 #include <drm/drm_file.h> 13 #include <uapi/drm/xe_drm.h> 14 15 #include "xe_device.h" 16 #include "xe_gt.h" 17 #include "xe_hw_engine_class_sysfs.h" 18 #include "xe_hw_engine_group.h" 19 #include "xe_hw_fence.h" 20 #include "xe_irq.h" 21 #include "xe_lrc.h" 22 #include "xe_macros.h" 23 #include "xe_migrate.h" 24 #include "xe_pm.h" 25 #include "xe_ring_ops_types.h" 26 #include "xe_trace.h" 27 #include "xe_vm.h" 28 #include "xe_pxp.h" 29 30 enum xe_exec_queue_sched_prop { 31 XE_EXEC_QUEUE_JOB_TIMEOUT = 0, 32 XE_EXEC_QUEUE_TIMESLICE = 1, 33 XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, 34 XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, 35 }; 36 37 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 38 u64 extensions, int ext_number); 39 40 static void __xe_exec_queue_free(struct xe_exec_queue *q) 41 { 42 if (xe_exec_queue_uses_pxp(q)) 43 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); 44 if (q->vm) 45 xe_vm_put(q->vm); 46 47 if (q->xef) 48 xe_file_put(q->xef); 49 50 kfree(q); 51 } 52 53 static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, 54 struct xe_vm *vm, 55 u32 logical_mask, 56 u16 width, struct xe_hw_engine *hwe, 57 u32 flags, u64 extensions) 58 { 59 struct xe_exec_queue *q; 60 struct xe_gt *gt = hwe->gt; 61 int err; 62 63 /* only kernel queues can be permanent */ 64 XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); 65 66 q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL); 67 if (!q) 68 return ERR_PTR(-ENOMEM); 69 70 kref_init(&q->refcount); 71 q->flags = flags; 72 q->hwe = hwe; 73 q->gt = gt; 74 q->class = hwe->class; 75 q->width = width; 76 q->msix_vec = XE_IRQ_DEFAULT_MSIX; 77 q->logical_mask = logical_mask; 78 q->fence_irq = >->fence_irq[hwe->class]; 79 q->ring_ops = gt->ring_ops[hwe->class]; 80 q->ops = gt->exec_queue_ops; 81 INIT_LIST_HEAD(&q->lr.link); 82 INIT_LIST_HEAD(&q->multi_gt_link); 83 INIT_LIST_HEAD(&q->hw_engine_group_link); 84 INIT_LIST_HEAD(&q->pxp.link); 85 86 q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; 87 q->sched_props.preempt_timeout_us = 88 hwe->eclass->sched_props.preempt_timeout_us; 89 q->sched_props.job_timeout_ms = 90 hwe->eclass->sched_props.job_timeout_ms; 91 if (q->flags & EXEC_QUEUE_FLAG_KERNEL && 92 q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) 93 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; 94 else 95 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; 96 97 if (vm) 98 q->vm = xe_vm_get(vm); 99 100 if (extensions) { 101 /* 102 * may set q->usm, must come before xe_lrc_create(), 103 * may overwrite q->sched_props, must come before q->ops->init() 104 */ 105 err = exec_queue_user_extensions(xe, q, extensions, 0); 106 if (err) { 107 __xe_exec_queue_free(q); 108 return ERR_PTR(err); 109 } 110 } 111 112 return q; 113 } 114 115 static int __xe_exec_queue_init(struct xe_exec_queue *q) 116 { 117 struct xe_vm *vm = q->vm; 118 int i, err; 119 u32 flags = 0; 120 121 /* 122 * PXP workloads executing on RCS or CCS must run in isolation (i.e. no 123 * other workload can use the EUs at the same time). On MTL this is done 124 * by setting the RUNALONE bit in the LRC, while starting on Xe2 there 125 * is a dedicated bit for it. 126 */ 127 if (xe_exec_queue_uses_pxp(q) && 128 (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) { 129 if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20) 130 flags |= XE_LRC_CREATE_PXP; 131 else 132 flags |= XE_LRC_CREATE_RUNALONE; 133 } 134 135 for (i = 0; i < q->width; ++i) { 136 q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags); 137 if (IS_ERR(q->lrc[i])) { 138 err = PTR_ERR(q->lrc[i]); 139 goto err_unlock; 140 } 141 } 142 143 err = q->ops->init(q); 144 if (err) 145 goto err_lrc; 146 147 return 0; 148 149 err_unlock: 150 if (vm) 151 xe_vm_unlock(vm); 152 err_lrc: 153 for (i = i - 1; i >= 0; --i) 154 xe_lrc_put(q->lrc[i]); 155 return err; 156 } 157 158 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, 159 u32 logical_mask, u16 width, 160 struct xe_hw_engine *hwe, u32 flags, 161 u64 extensions) 162 { 163 struct xe_exec_queue *q; 164 int err; 165 166 /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */ 167 xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0))); 168 169 q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, 170 extensions); 171 if (IS_ERR(q)) 172 return q; 173 174 err = __xe_exec_queue_init(q); 175 if (err) 176 goto err_post_alloc; 177 178 /* 179 * We can only add the queue to the PXP list after the init is complete, 180 * because the PXP termination can call exec_queue_kill and that will 181 * go bad if the queue is only half-initialized. This means that we 182 * can't do it when we handle the PXP extension in __xe_exec_queue_alloc 183 * and we need to do it here instead. 184 */ 185 if (xe_exec_queue_uses_pxp(q)) { 186 err = xe_pxp_exec_queue_add(xe->pxp, q); 187 if (err) 188 goto err_post_alloc; 189 } 190 191 return q; 192 193 err_post_alloc: 194 __xe_exec_queue_free(q); 195 return ERR_PTR(err); 196 } 197 ALLOW_ERROR_INJECTION(xe_exec_queue_create, ERRNO); 198 199 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, 200 struct xe_vm *vm, 201 enum xe_engine_class class, 202 u32 flags, u64 extensions) 203 { 204 struct xe_hw_engine *hwe, *hwe0 = NULL; 205 enum xe_hw_engine_id id; 206 u32 logical_mask = 0; 207 208 for_each_hw_engine(hwe, gt, id) { 209 if (xe_hw_engine_is_reserved(hwe)) 210 continue; 211 212 if (hwe->class == class) { 213 logical_mask |= BIT(hwe->logical_instance); 214 if (!hwe0) 215 hwe0 = hwe; 216 } 217 } 218 219 if (!logical_mask) 220 return ERR_PTR(-ENODEV); 221 222 return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); 223 } 224 225 /** 226 * xe_exec_queue_create_bind() - Create bind exec queue. 227 * @xe: Xe device. 228 * @tile: tile which bind exec queue belongs to. 229 * @flags: exec queue creation flags 230 * @extensions: exec queue creation extensions 231 * 232 * Normalize bind exec queue creation. Bind exec queue is tied to migration VM 233 * for access to physical memory required for page table programming. On a 234 * faulting devices the reserved copy engine instance must be used to avoid 235 * deadlocking (user binds cannot get stuck behind faults as kernel binds which 236 * resolve faults depend on user binds). On non-faulting devices any copy engine 237 * can be used. 238 * 239 * Returns exec queue on success, ERR_PTR on failure 240 */ 241 struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, 242 struct xe_tile *tile, 243 u32 flags, u64 extensions) 244 { 245 struct xe_gt *gt = tile->primary_gt; 246 struct xe_exec_queue *q; 247 struct xe_vm *migrate_vm; 248 249 migrate_vm = xe_migrate_get_vm(tile->migrate); 250 if (xe->info.has_usm) { 251 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, 252 XE_ENGINE_CLASS_COPY, 253 gt->usm.reserved_bcs_instance, 254 false); 255 256 if (!hwe) { 257 xe_vm_put(migrate_vm); 258 return ERR_PTR(-EINVAL); 259 } 260 261 q = xe_exec_queue_create(xe, migrate_vm, 262 BIT(hwe->logical_instance), 1, hwe, 263 flags, extensions); 264 } else { 265 q = xe_exec_queue_create_class(xe, gt, migrate_vm, 266 XE_ENGINE_CLASS_COPY, flags, 267 extensions); 268 } 269 xe_vm_put(migrate_vm); 270 271 return q; 272 } 273 ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); 274 275 void xe_exec_queue_destroy(struct kref *ref) 276 { 277 struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); 278 struct xe_exec_queue *eq, *next; 279 280 if (xe_exec_queue_uses_pxp(q)) 281 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); 282 283 xe_exec_queue_last_fence_put_unlocked(q); 284 if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { 285 list_for_each_entry_safe(eq, next, &q->multi_gt_list, 286 multi_gt_link) 287 xe_exec_queue_put(eq); 288 } 289 290 q->ops->fini(q); 291 } 292 293 void xe_exec_queue_fini(struct xe_exec_queue *q) 294 { 295 int i; 296 297 /* 298 * Before releasing our ref to lrc and xef, accumulate our run ticks 299 * and wakeup any waiters. 300 */ 301 xe_exec_queue_update_run_ticks(q); 302 if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) 303 wake_up_var(&q->xef->exec_queue.pending_removal); 304 305 for (i = 0; i < q->width; ++i) 306 xe_lrc_put(q->lrc[i]); 307 308 __xe_exec_queue_free(q); 309 } 310 311 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) 312 { 313 switch (q->class) { 314 case XE_ENGINE_CLASS_RENDER: 315 snprintf(q->name, sizeof(q->name), "rcs%d", instance); 316 break; 317 case XE_ENGINE_CLASS_VIDEO_DECODE: 318 snprintf(q->name, sizeof(q->name), "vcs%d", instance); 319 break; 320 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 321 snprintf(q->name, sizeof(q->name), "vecs%d", instance); 322 break; 323 case XE_ENGINE_CLASS_COPY: 324 snprintf(q->name, sizeof(q->name), "bcs%d", instance); 325 break; 326 case XE_ENGINE_CLASS_COMPUTE: 327 snprintf(q->name, sizeof(q->name), "ccs%d", instance); 328 break; 329 case XE_ENGINE_CLASS_OTHER: 330 snprintf(q->name, sizeof(q->name), "gsccs%d", instance); 331 break; 332 default: 333 XE_WARN_ON(q->class); 334 } 335 } 336 337 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) 338 { 339 struct xe_exec_queue *q; 340 341 mutex_lock(&xef->exec_queue.lock); 342 q = xa_load(&xef->exec_queue.xa, id); 343 if (q) 344 xe_exec_queue_get(q); 345 mutex_unlock(&xef->exec_queue.lock); 346 347 return q; 348 } 349 350 enum xe_exec_queue_priority 351 xe_exec_queue_device_get_max_priority(struct xe_device *xe) 352 { 353 return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : 354 XE_EXEC_QUEUE_PRIORITY_NORMAL; 355 } 356 357 static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, 358 u64 value) 359 { 360 if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) 361 return -EINVAL; 362 363 if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) 364 return -EPERM; 365 366 q->sched_props.priority = value; 367 return 0; 368 } 369 370 static bool xe_exec_queue_enforce_schedule_limit(void) 371 { 372 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 373 return true; 374 #else 375 return !capable(CAP_SYS_NICE); 376 #endif 377 } 378 379 static void 380 xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, 381 enum xe_exec_queue_sched_prop prop, 382 u32 *min, u32 *max) 383 { 384 switch (prop) { 385 case XE_EXEC_QUEUE_JOB_TIMEOUT: 386 *min = eclass->sched_props.job_timeout_min; 387 *max = eclass->sched_props.job_timeout_max; 388 break; 389 case XE_EXEC_QUEUE_TIMESLICE: 390 *min = eclass->sched_props.timeslice_min; 391 *max = eclass->sched_props.timeslice_max; 392 break; 393 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 394 *min = eclass->sched_props.preempt_timeout_min; 395 *max = eclass->sched_props.preempt_timeout_max; 396 break; 397 default: 398 break; 399 } 400 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 401 if (capable(CAP_SYS_NICE)) { 402 switch (prop) { 403 case XE_EXEC_QUEUE_JOB_TIMEOUT: 404 *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 405 *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 406 break; 407 case XE_EXEC_QUEUE_TIMESLICE: 408 *min = XE_HW_ENGINE_TIMESLICE_MIN; 409 *max = XE_HW_ENGINE_TIMESLICE_MAX; 410 break; 411 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 412 *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 413 *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 414 break; 415 default: 416 break; 417 } 418 } 419 #endif 420 } 421 422 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, 423 u64 value) 424 { 425 u32 min = 0, max = 0; 426 427 xe_exec_queue_get_prop_minmax(q->hwe->eclass, 428 XE_EXEC_QUEUE_TIMESLICE, &min, &max); 429 430 if (xe_exec_queue_enforce_schedule_limit() && 431 !xe_hw_engine_timeout_in_range(value, min, max)) 432 return -EINVAL; 433 434 q->sched_props.timeslice_us = value; 435 return 0; 436 } 437 438 static int 439 exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value) 440 { 441 if (value == DRM_XE_PXP_TYPE_NONE) 442 return 0; 443 444 /* we only support HWDRM sessions right now */ 445 if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM)) 446 return -EINVAL; 447 448 if (!xe_pxp_is_enabled(xe->pxp)) 449 return -ENODEV; 450 451 return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM); 452 } 453 454 typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, 455 struct xe_exec_queue *q, 456 u64 value); 457 458 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { 459 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, 460 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, 461 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type, 462 }; 463 464 static int exec_queue_user_ext_set_property(struct xe_device *xe, 465 struct xe_exec_queue *q, 466 u64 extension) 467 { 468 u64 __user *address = u64_to_user_ptr(extension); 469 struct drm_xe_ext_set_property ext; 470 int err; 471 u32 idx; 472 473 err = copy_from_user(&ext, address, sizeof(ext)); 474 if (XE_IOCTL_DBG(xe, err)) 475 return -EFAULT; 476 477 if (XE_IOCTL_DBG(xe, ext.property >= 478 ARRAY_SIZE(exec_queue_set_property_funcs)) || 479 XE_IOCTL_DBG(xe, ext.pad) || 480 XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && 481 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE && 482 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE)) 483 return -EINVAL; 484 485 idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); 486 if (!exec_queue_set_property_funcs[idx]) 487 return -EINVAL; 488 489 return exec_queue_set_property_funcs[idx](xe, q, ext.value); 490 } 491 492 typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, 493 struct xe_exec_queue *q, 494 u64 extension); 495 496 static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { 497 [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, 498 }; 499 500 #define MAX_USER_EXTENSIONS 16 501 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 502 u64 extensions, int ext_number) 503 { 504 u64 __user *address = u64_to_user_ptr(extensions); 505 struct drm_xe_user_extension ext; 506 int err; 507 u32 idx; 508 509 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) 510 return -E2BIG; 511 512 err = copy_from_user(&ext, address, sizeof(ext)); 513 if (XE_IOCTL_DBG(xe, err)) 514 return -EFAULT; 515 516 if (XE_IOCTL_DBG(xe, ext.pad) || 517 XE_IOCTL_DBG(xe, ext.name >= 518 ARRAY_SIZE(exec_queue_user_extension_funcs))) 519 return -EINVAL; 520 521 idx = array_index_nospec(ext.name, 522 ARRAY_SIZE(exec_queue_user_extension_funcs)); 523 err = exec_queue_user_extension_funcs[idx](xe, q, extensions); 524 if (XE_IOCTL_DBG(xe, err)) 525 return err; 526 527 if (ext.next_extension) 528 return exec_queue_user_extensions(xe, q, ext.next_extension, 529 ++ext_number); 530 531 return 0; 532 } 533 534 static u32 calc_validate_logical_mask(struct xe_device *xe, 535 struct drm_xe_engine_class_instance *eci, 536 u16 width, u16 num_placements) 537 { 538 int len = width * num_placements; 539 int i, j, n; 540 u16 class; 541 u16 gt_id; 542 u32 return_mask = 0, prev_mask; 543 544 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && 545 len > 1)) 546 return 0; 547 548 for (i = 0; i < width; ++i) { 549 u32 current_mask = 0; 550 551 for (j = 0; j < num_placements; ++j) { 552 struct xe_hw_engine *hwe; 553 554 n = j * width + i; 555 556 hwe = xe_hw_engine_lookup(xe, eci[n]); 557 if (XE_IOCTL_DBG(xe, !hwe)) 558 return 0; 559 560 if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) 561 return 0; 562 563 if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || 564 XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) 565 return 0; 566 567 class = eci[n].engine_class; 568 gt_id = eci[n].gt_id; 569 570 if (width == 1 || !i) 571 return_mask |= BIT(eci[n].engine_instance); 572 current_mask |= BIT(eci[n].engine_instance); 573 } 574 575 /* Parallel submissions must be logically contiguous */ 576 if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) 577 return 0; 578 579 prev_mask = current_mask; 580 } 581 582 return return_mask; 583 } 584 585 int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, 586 struct drm_file *file) 587 { 588 struct xe_device *xe = to_xe_device(dev); 589 struct xe_file *xef = to_xe_file(file); 590 struct drm_xe_exec_queue_create *args = data; 591 struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; 592 struct drm_xe_engine_class_instance __user *user_eci = 593 u64_to_user_ptr(args->instances); 594 struct xe_hw_engine *hwe; 595 struct xe_vm *vm; 596 struct xe_tile *tile; 597 struct xe_exec_queue *q = NULL; 598 u32 logical_mask; 599 u32 flags = 0; 600 u32 id; 601 u32 len; 602 int err; 603 604 if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) || 605 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 606 return -EINVAL; 607 608 len = args->width * args->num_placements; 609 if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) 610 return -EINVAL; 611 612 err = copy_from_user(eci, user_eci, 613 sizeof(struct drm_xe_engine_class_instance) * len); 614 if (XE_IOCTL_DBG(xe, err)) 615 return -EFAULT; 616 617 if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) 618 return -EINVAL; 619 620 if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) 621 flags |= EXEC_QUEUE_FLAG_LOW_LATENCY; 622 623 if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { 624 if (XE_IOCTL_DBG(xe, args->width != 1) || 625 XE_IOCTL_DBG(xe, args->num_placements != 1) || 626 XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) 627 return -EINVAL; 628 629 for_each_tile(tile, xe, id) { 630 struct xe_exec_queue *new; 631 632 flags |= EXEC_QUEUE_FLAG_VM; 633 if (id) 634 flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; 635 636 new = xe_exec_queue_create_bind(xe, tile, flags, 637 args->extensions); 638 if (IS_ERR(new)) { 639 err = PTR_ERR(new); 640 if (q) 641 goto put_exec_queue; 642 return err; 643 } 644 if (id == 0) 645 q = new; 646 else 647 list_add_tail(&new->multi_gt_list, 648 &q->multi_gt_link); 649 } 650 } else { 651 logical_mask = calc_validate_logical_mask(xe, eci, 652 args->width, 653 args->num_placements); 654 if (XE_IOCTL_DBG(xe, !logical_mask)) 655 return -EINVAL; 656 657 hwe = xe_hw_engine_lookup(xe, eci[0]); 658 if (XE_IOCTL_DBG(xe, !hwe)) 659 return -EINVAL; 660 661 vm = xe_vm_lookup(xef, args->vm_id); 662 if (XE_IOCTL_DBG(xe, !vm)) 663 return -ENOENT; 664 665 err = down_read_interruptible(&vm->lock); 666 if (err) { 667 xe_vm_put(vm); 668 return err; 669 } 670 671 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 672 up_read(&vm->lock); 673 xe_vm_put(vm); 674 return -ENOENT; 675 } 676 677 q = xe_exec_queue_create(xe, vm, logical_mask, 678 args->width, hwe, flags, 679 args->extensions); 680 up_read(&vm->lock); 681 xe_vm_put(vm); 682 if (IS_ERR(q)) 683 return PTR_ERR(q); 684 685 if (xe_vm_in_preempt_fence_mode(vm)) { 686 q->lr.context = dma_fence_context_alloc(1); 687 688 err = xe_vm_add_compute_exec_queue(vm, q); 689 if (XE_IOCTL_DBG(xe, err)) 690 goto put_exec_queue; 691 } 692 693 if (q->vm && q->hwe->hw_engine_group) { 694 err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); 695 if (err) 696 goto put_exec_queue; 697 } 698 } 699 700 q->xef = xe_file_get(xef); 701 702 /* user id alloc must always be last in ioctl to prevent UAF */ 703 err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); 704 if (err) 705 goto kill_exec_queue; 706 707 args->exec_queue_id = id; 708 709 return 0; 710 711 kill_exec_queue: 712 xe_exec_queue_kill(q); 713 put_exec_queue: 714 xe_exec_queue_put(q); 715 return err; 716 } 717 718 int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, 719 struct drm_file *file) 720 { 721 struct xe_device *xe = to_xe_device(dev); 722 struct xe_file *xef = to_xe_file(file); 723 struct drm_xe_exec_queue_get_property *args = data; 724 struct xe_exec_queue *q; 725 int ret; 726 727 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 728 return -EINVAL; 729 730 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 731 if (XE_IOCTL_DBG(xe, !q)) 732 return -ENOENT; 733 734 switch (args->property) { 735 case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: 736 args->value = q->ops->reset_status(q); 737 ret = 0; 738 break; 739 default: 740 ret = -EINVAL; 741 } 742 743 xe_exec_queue_put(q); 744 745 return ret; 746 } 747 748 /** 749 * xe_exec_queue_is_lr() - Whether an exec_queue is long-running 750 * @q: The exec_queue 751 * 752 * Return: True if the exec_queue is long-running, false otherwise. 753 */ 754 bool xe_exec_queue_is_lr(struct xe_exec_queue *q) 755 { 756 return q->vm && xe_vm_in_lr_mode(q->vm) && 757 !(q->flags & EXEC_QUEUE_FLAG_VM); 758 } 759 760 static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) 761 { 762 return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1; 763 } 764 765 /** 766 * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full 767 * @q: The exec_queue 768 * 769 * Return: True if the exec_queue's ring is full, false otherwise. 770 */ 771 bool xe_exec_queue_ring_full(struct xe_exec_queue *q) 772 { 773 struct xe_lrc *lrc = q->lrc[0]; 774 s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; 775 776 return xe_exec_queue_num_job_inflight(q) >= max_job; 777 } 778 779 /** 780 * xe_exec_queue_is_idle() - Whether an exec_queue is idle. 781 * @q: The exec_queue 782 * 783 * FIXME: Need to determine what to use as the short-lived 784 * timeline lock for the exec_queues, so that the return value 785 * of this function becomes more than just an advisory 786 * snapshot in time. The timeline lock must protect the 787 * seqno from racing submissions on the same exec_queue. 788 * Typically vm->resv, but user-created timeline locks use the migrate vm 789 * and never grabs the migrate vm->resv so we have a race there. 790 * 791 * Return: True if the exec_queue is idle, false otherwise. 792 */ 793 bool xe_exec_queue_is_idle(struct xe_exec_queue *q) 794 { 795 if (xe_exec_queue_is_parallel(q)) { 796 int i; 797 798 for (i = 0; i < q->width; ++i) { 799 if (xe_lrc_seqno(q->lrc[i]) != 800 q->lrc[i]->fence_ctx.next_seqno - 1) 801 return false; 802 } 803 804 return true; 805 } 806 807 return xe_lrc_seqno(q->lrc[0]) == 808 q->lrc[0]->fence_ctx.next_seqno - 1; 809 } 810 811 /** 812 * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue 813 * from hw 814 * @q: The exec queue 815 * 816 * Update the timestamp saved by HW for this exec queue and save run ticks 817 * calculated by using the delta from last update. 818 */ 819 void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) 820 { 821 struct xe_device *xe = gt_to_xe(q->gt); 822 struct xe_lrc *lrc; 823 u64 old_ts, new_ts; 824 int idx; 825 826 /* 827 * Jobs that are executed by kernel doesn't have a corresponding xe_file 828 * and thus are not accounted. 829 */ 830 if (!q->xef) 831 return; 832 833 /* Synchronize with unbind while holding the xe file open */ 834 if (!drm_dev_enter(&xe->drm, &idx)) 835 return; 836 /* 837 * Only sample the first LRC. For parallel submission, all of them are 838 * scheduled together and we compensate that below by multiplying by 839 * width - this may introduce errors if that premise is not true and 840 * they don't exit 100% aligned. On the other hand, looping through 841 * the LRCs and reading them in different time could also introduce 842 * errors. 843 */ 844 lrc = q->lrc[0]; 845 new_ts = xe_lrc_update_timestamp(lrc, &old_ts); 846 q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; 847 848 drm_dev_exit(idx); 849 } 850 851 /** 852 * xe_exec_queue_kill - permanently stop all execution from an exec queue 853 * @q: The exec queue 854 * 855 * This function permanently stops all activity on an exec queue. If the queue 856 * is actively executing on the HW, it will be kicked off the engine; any 857 * pending jobs are discarded and all future submissions are rejected. 858 * This function is safe to call multiple times. 859 */ 860 void xe_exec_queue_kill(struct xe_exec_queue *q) 861 { 862 struct xe_exec_queue *eq = q, *next; 863 864 list_for_each_entry_safe(eq, next, &eq->multi_gt_list, 865 multi_gt_link) { 866 q->ops->kill(eq); 867 xe_vm_remove_compute_exec_queue(q->vm, eq); 868 } 869 870 q->ops->kill(q); 871 xe_vm_remove_compute_exec_queue(q->vm, q); 872 } 873 874 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, 875 struct drm_file *file) 876 { 877 struct xe_device *xe = to_xe_device(dev); 878 struct xe_file *xef = to_xe_file(file); 879 struct drm_xe_exec_queue_destroy *args = data; 880 struct xe_exec_queue *q; 881 882 if (XE_IOCTL_DBG(xe, args->pad) || 883 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 884 return -EINVAL; 885 886 mutex_lock(&xef->exec_queue.lock); 887 q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); 888 if (q) 889 atomic_inc(&xef->exec_queue.pending_removal); 890 mutex_unlock(&xef->exec_queue.lock); 891 892 if (XE_IOCTL_DBG(xe, !q)) 893 return -ENOENT; 894 895 if (q->vm && q->hwe->hw_engine_group) 896 xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); 897 898 xe_exec_queue_kill(q); 899 900 trace_xe_exec_queue_close(q); 901 xe_exec_queue_put(q); 902 903 return 0; 904 } 905 906 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, 907 struct xe_vm *vm) 908 { 909 if (q->flags & EXEC_QUEUE_FLAG_VM) { 910 lockdep_assert_held(&vm->lock); 911 } else { 912 xe_vm_assert_held(vm); 913 lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); 914 } 915 } 916 917 /** 918 * xe_exec_queue_last_fence_put() - Drop ref to last fence 919 * @q: The exec queue 920 * @vm: The VM the engine does a bind or exec for 921 */ 922 void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) 923 { 924 xe_exec_queue_last_fence_lockdep_assert(q, vm); 925 926 xe_exec_queue_last_fence_put_unlocked(q); 927 } 928 929 /** 930 * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked 931 * @q: The exec queue 932 * 933 * Only safe to be called from xe_exec_queue_destroy(). 934 */ 935 void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) 936 { 937 if (q->last_fence) { 938 dma_fence_put(q->last_fence); 939 q->last_fence = NULL; 940 } 941 } 942 943 /** 944 * xe_exec_queue_last_fence_get() - Get last fence 945 * @q: The exec queue 946 * @vm: The VM the engine does a bind or exec for 947 * 948 * Get last fence, takes a ref 949 * 950 * Returns: last fence if not signaled, dma fence stub if signaled 951 */ 952 struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, 953 struct xe_vm *vm) 954 { 955 struct dma_fence *fence; 956 957 xe_exec_queue_last_fence_lockdep_assert(q, vm); 958 959 if (q->last_fence && 960 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 961 xe_exec_queue_last_fence_put(q, vm); 962 963 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 964 dma_fence_get(fence); 965 return fence; 966 } 967 968 /** 969 * xe_exec_queue_last_fence_get_for_resume() - Get last fence 970 * @q: The exec queue 971 * @vm: The VM the engine does a bind or exec for 972 * 973 * Get last fence, takes a ref. Only safe to be called in the context of 974 * resuming the hw engine group's long-running exec queue, when the group 975 * semaphore is held. 976 * 977 * Returns: last fence if not signaled, dma fence stub if signaled 978 */ 979 struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, 980 struct xe_vm *vm) 981 { 982 struct dma_fence *fence; 983 984 lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); 985 986 if (q->last_fence && 987 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 988 xe_exec_queue_last_fence_put_unlocked(q); 989 990 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 991 dma_fence_get(fence); 992 return fence; 993 } 994 995 /** 996 * xe_exec_queue_last_fence_set() - Set last fence 997 * @q: The exec queue 998 * @vm: The VM the engine does a bind or exec for 999 * @fence: The fence 1000 * 1001 * Set the last fence for the engine. Increases reference count for fence, when 1002 * closing engine xe_exec_queue_last_fence_put should be called. 1003 */ 1004 void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, 1005 struct dma_fence *fence) 1006 { 1007 xe_exec_queue_last_fence_lockdep_assert(q, vm); 1008 1009 xe_exec_queue_last_fence_put(q, vm); 1010 q->last_fence = dma_fence_get(fence); 1011 } 1012 1013 /** 1014 * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue 1015 * @q: The exec queue 1016 * @vm: The VM the engine does a bind or exec for 1017 * 1018 * Returns: 1019 * -ETIME if there exists an unsignalled last fence dependency, zero otherwise. 1020 */ 1021 int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm) 1022 { 1023 struct dma_fence *fence; 1024 int err = 0; 1025 1026 fence = xe_exec_queue_last_fence_get(q, vm); 1027 if (fence) { 1028 err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ? 1029 0 : -ETIME; 1030 dma_fence_put(fence); 1031 } 1032 1033 return err; 1034 } 1035