1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_exec_queue.h" 7 8 #include <linux/nospec.h> 9 10 #include <drm/drm_device.h> 11 #include <drm/drm_file.h> 12 #include <drm/xe_drm.h> 13 14 #include "xe_device.h" 15 #include "xe_gt.h" 16 #include "xe_hw_engine_class_sysfs.h" 17 #include "xe_hw_fence.h" 18 #include "xe_lrc.h" 19 #include "xe_macros.h" 20 #include "xe_migrate.h" 21 #include "xe_pm.h" 22 #include "xe_ring_ops_types.h" 23 #include "xe_trace.h" 24 #include "xe_vm.h" 25 26 enum xe_exec_queue_sched_prop { 27 XE_EXEC_QUEUE_JOB_TIMEOUT = 0, 28 XE_EXEC_QUEUE_TIMESLICE = 1, 29 XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, 30 XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, 31 }; 32 33 static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, 34 struct xe_vm *vm, 35 u32 logical_mask, 36 u16 width, struct xe_hw_engine *hwe, 37 u32 flags) 38 { 39 struct xe_exec_queue *q; 40 struct xe_gt *gt = hwe->gt; 41 int err; 42 int i; 43 44 /* only kernel queues can be permanent */ 45 XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); 46 47 q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL); 48 if (!q) 49 return ERR_PTR(-ENOMEM); 50 51 kref_init(&q->refcount); 52 q->flags = flags; 53 q->hwe = hwe; 54 q->gt = gt; 55 if (vm) 56 q->vm = xe_vm_get(vm); 57 q->class = hwe->class; 58 q->width = width; 59 q->logical_mask = logical_mask; 60 q->fence_irq = >->fence_irq[hwe->class]; 61 q->ring_ops = gt->ring_ops[hwe->class]; 62 q->ops = gt->exec_queue_ops; 63 INIT_LIST_HEAD(&q->persistent.link); 64 INIT_LIST_HEAD(&q->compute.link); 65 INIT_LIST_HEAD(&q->multi_gt_link); 66 67 q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; 68 q->sched_props.preempt_timeout_us = 69 hwe->eclass->sched_props.preempt_timeout_us; 70 if (q->flags & EXEC_QUEUE_FLAG_KERNEL && 71 q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) 72 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; 73 else 74 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; 75 76 if (xe_exec_queue_is_parallel(q)) { 77 q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); 78 q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; 79 } 80 if (q->flags & EXEC_QUEUE_FLAG_VM) { 81 q->bind.fence_ctx = dma_fence_context_alloc(1); 82 q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO; 83 } 84 85 for (i = 0; i < width; ++i) { 86 err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K); 87 if (err) 88 goto err_lrc; 89 } 90 91 err = q->ops->init(q); 92 if (err) 93 goto err_lrc; 94 95 /* 96 * Normally the user vm holds an rpm ref to keep the device 97 * awake, and the context holds a ref for the vm, however for 98 * some engines we use the kernels migrate vm underneath which offers no 99 * such rpm ref, or we lack a vm. Make sure we keep a ref here, so we 100 * can perform GuC CT actions when needed. Caller is expected to have 101 * already grabbed the rpm ref outside any sensitive locks. 102 */ 103 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !vm)) 104 drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); 105 106 return q; 107 108 err_lrc: 109 for (i = i - 1; i >= 0; --i) 110 xe_lrc_finish(q->lrc + i); 111 kfree(q); 112 return ERR_PTR(err); 113 } 114 115 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, 116 u32 logical_mask, u16 width, 117 struct xe_hw_engine *hwe, u32 flags) 118 { 119 struct xe_exec_queue *q; 120 int err; 121 122 if (vm) { 123 err = xe_vm_lock(vm, true); 124 if (err) 125 return ERR_PTR(err); 126 } 127 q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags); 128 if (vm) 129 xe_vm_unlock(vm); 130 131 return q; 132 } 133 134 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, 135 struct xe_vm *vm, 136 enum xe_engine_class class, u32 flags) 137 { 138 struct xe_hw_engine *hwe, *hwe0 = NULL; 139 enum xe_hw_engine_id id; 140 u32 logical_mask = 0; 141 142 for_each_hw_engine(hwe, gt, id) { 143 if (xe_hw_engine_is_reserved(hwe)) 144 continue; 145 146 if (hwe->class == class) { 147 logical_mask |= BIT(hwe->logical_instance); 148 if (!hwe0) 149 hwe0 = hwe; 150 } 151 } 152 153 if (!logical_mask) 154 return ERR_PTR(-ENODEV); 155 156 return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags); 157 } 158 159 void xe_exec_queue_destroy(struct kref *ref) 160 { 161 struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); 162 struct xe_exec_queue *eq, *next; 163 164 xe_exec_queue_last_fence_put_unlocked(q); 165 if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { 166 list_for_each_entry_safe(eq, next, &q->multi_gt_list, 167 multi_gt_link) 168 xe_exec_queue_put(eq); 169 } 170 171 q->ops->fini(q); 172 } 173 174 void xe_exec_queue_fini(struct xe_exec_queue *q) 175 { 176 int i; 177 178 for (i = 0; i < q->width; ++i) 179 xe_lrc_finish(q->lrc + i); 180 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) 181 xe_device_mem_access_put(gt_to_xe(q->gt)); 182 if (q->vm) 183 xe_vm_put(q->vm); 184 185 kfree(q); 186 } 187 188 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) 189 { 190 switch (q->class) { 191 case XE_ENGINE_CLASS_RENDER: 192 sprintf(q->name, "rcs%d", instance); 193 break; 194 case XE_ENGINE_CLASS_VIDEO_DECODE: 195 sprintf(q->name, "vcs%d", instance); 196 break; 197 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 198 sprintf(q->name, "vecs%d", instance); 199 break; 200 case XE_ENGINE_CLASS_COPY: 201 sprintf(q->name, "bcs%d", instance); 202 break; 203 case XE_ENGINE_CLASS_COMPUTE: 204 sprintf(q->name, "ccs%d", instance); 205 break; 206 case XE_ENGINE_CLASS_OTHER: 207 sprintf(q->name, "gsccs%d", instance); 208 break; 209 default: 210 XE_WARN_ON(q->class); 211 } 212 } 213 214 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) 215 { 216 struct xe_exec_queue *q; 217 218 mutex_lock(&xef->exec_queue.lock); 219 q = xa_load(&xef->exec_queue.xa, id); 220 if (q) 221 xe_exec_queue_get(q); 222 mutex_unlock(&xef->exec_queue.lock); 223 224 return q; 225 } 226 227 enum xe_exec_queue_priority 228 xe_exec_queue_device_get_max_priority(struct xe_device *xe) 229 { 230 return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : 231 XE_EXEC_QUEUE_PRIORITY_NORMAL; 232 } 233 234 static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, 235 u64 value, bool create) 236 { 237 if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) 238 return -EINVAL; 239 240 if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) 241 return -EPERM; 242 243 return q->ops->set_priority(q, value); 244 } 245 246 static bool xe_exec_queue_enforce_schedule_limit(void) 247 { 248 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 249 return true; 250 #else 251 return !capable(CAP_SYS_NICE); 252 #endif 253 } 254 255 static void 256 xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, 257 enum xe_exec_queue_sched_prop prop, 258 u32 *min, u32 *max) 259 { 260 switch (prop) { 261 case XE_EXEC_QUEUE_JOB_TIMEOUT: 262 *min = eclass->sched_props.job_timeout_min; 263 *max = eclass->sched_props.job_timeout_max; 264 break; 265 case XE_EXEC_QUEUE_TIMESLICE: 266 *min = eclass->sched_props.timeslice_min; 267 *max = eclass->sched_props.timeslice_max; 268 break; 269 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 270 *min = eclass->sched_props.preempt_timeout_min; 271 *max = eclass->sched_props.preempt_timeout_max; 272 break; 273 default: 274 break; 275 } 276 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 277 if (capable(CAP_SYS_NICE)) { 278 switch (prop) { 279 case XE_EXEC_QUEUE_JOB_TIMEOUT: 280 *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 281 *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 282 break; 283 case XE_EXEC_QUEUE_TIMESLICE: 284 *min = XE_HW_ENGINE_TIMESLICE_MIN; 285 *max = XE_HW_ENGINE_TIMESLICE_MAX; 286 break; 287 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 288 *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 289 *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 290 break; 291 default: 292 break; 293 } 294 } 295 #endif 296 } 297 298 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, 299 u64 value, bool create) 300 { 301 u32 min = 0, max = 0; 302 303 xe_exec_queue_get_prop_minmax(q->hwe->eclass, 304 XE_EXEC_QUEUE_TIMESLICE, &min, &max); 305 306 if (xe_exec_queue_enforce_schedule_limit() && 307 !xe_hw_engine_timeout_in_range(value, min, max)) 308 return -EINVAL; 309 310 return q->ops->set_timeslice(q, value); 311 } 312 313 static int exec_queue_set_preemption_timeout(struct xe_device *xe, 314 struct xe_exec_queue *q, u64 value, 315 bool create) 316 { 317 u32 min = 0, max = 0; 318 319 xe_exec_queue_get_prop_minmax(q->hwe->eclass, 320 XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max); 321 322 if (xe_exec_queue_enforce_schedule_limit() && 323 !xe_hw_engine_timeout_in_range(value, min, max)) 324 return -EINVAL; 325 326 return q->ops->set_preempt_timeout(q, value); 327 } 328 329 static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q, 330 u64 value, bool create) 331 { 332 if (XE_IOCTL_DBG(xe, !create)) 333 return -EINVAL; 334 335 if (XE_IOCTL_DBG(xe, xe_vm_in_preempt_fence_mode(q->vm))) 336 return -EINVAL; 337 338 if (value) 339 q->flags |= EXEC_QUEUE_FLAG_PERSISTENT; 340 else 341 q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT; 342 343 return 0; 344 } 345 346 static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q, 347 u64 value, bool create) 348 { 349 u32 min = 0, max = 0; 350 351 if (XE_IOCTL_DBG(xe, !create)) 352 return -EINVAL; 353 354 xe_exec_queue_get_prop_minmax(q->hwe->eclass, 355 XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max); 356 357 if (xe_exec_queue_enforce_schedule_limit() && 358 !xe_hw_engine_timeout_in_range(value, min, max)) 359 return -EINVAL; 360 361 return q->ops->set_job_timeout(q, value); 362 } 363 364 static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q, 365 u64 value, bool create) 366 { 367 if (XE_IOCTL_DBG(xe, !create)) 368 return -EINVAL; 369 370 if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) 371 return -EINVAL; 372 373 q->usm.acc_trigger = value; 374 375 return 0; 376 } 377 378 static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q, 379 u64 value, bool create) 380 { 381 if (XE_IOCTL_DBG(xe, !create)) 382 return -EINVAL; 383 384 if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) 385 return -EINVAL; 386 387 q->usm.acc_notify = value; 388 389 return 0; 390 } 391 392 static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q, 393 u64 value, bool create) 394 { 395 if (XE_IOCTL_DBG(xe, !create)) 396 return -EINVAL; 397 398 if (XE_IOCTL_DBG(xe, !xe->info.has_usm)) 399 return -EINVAL; 400 401 if (value > DRM_XE_ACC_GRANULARITY_64M) 402 return -EINVAL; 403 404 q->usm.acc_granularity = value; 405 406 return 0; 407 } 408 409 typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, 410 struct xe_exec_queue *q, 411 u64 value, bool create); 412 413 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { 414 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, 415 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, 416 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout, 417 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence, 418 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout, 419 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger, 420 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify, 421 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity, 422 }; 423 424 static int exec_queue_user_ext_set_property(struct xe_device *xe, 425 struct xe_exec_queue *q, 426 u64 extension, 427 bool create) 428 { 429 u64 __user *address = u64_to_user_ptr(extension); 430 struct drm_xe_ext_set_property ext; 431 int err; 432 u32 idx; 433 434 err = __copy_from_user(&ext, address, sizeof(ext)); 435 if (XE_IOCTL_DBG(xe, err)) 436 return -EFAULT; 437 438 if (XE_IOCTL_DBG(xe, ext.property >= 439 ARRAY_SIZE(exec_queue_set_property_funcs)) || 440 XE_IOCTL_DBG(xe, ext.pad)) 441 return -EINVAL; 442 443 idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); 444 return exec_queue_set_property_funcs[idx](xe, q, ext.value, create); 445 } 446 447 typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, 448 struct xe_exec_queue *q, 449 u64 extension, 450 bool create); 451 452 static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = { 453 [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, 454 }; 455 456 #define MAX_USER_EXTENSIONS 16 457 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 458 u64 extensions, int ext_number, bool create) 459 { 460 u64 __user *address = u64_to_user_ptr(extensions); 461 struct drm_xe_user_extension ext; 462 int err; 463 u32 idx; 464 465 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) 466 return -E2BIG; 467 468 err = __copy_from_user(&ext, address, sizeof(ext)); 469 if (XE_IOCTL_DBG(xe, err)) 470 return -EFAULT; 471 472 if (XE_IOCTL_DBG(xe, ext.pad) || 473 XE_IOCTL_DBG(xe, ext.name >= 474 ARRAY_SIZE(exec_queue_user_extension_funcs))) 475 return -EINVAL; 476 477 idx = array_index_nospec(ext.name, 478 ARRAY_SIZE(exec_queue_user_extension_funcs)); 479 err = exec_queue_user_extension_funcs[idx](xe, q, extensions, create); 480 if (XE_IOCTL_DBG(xe, err)) 481 return err; 482 483 if (ext.next_extension) 484 return exec_queue_user_extensions(xe, q, ext.next_extension, 485 ++ext_number, create); 486 487 return 0; 488 } 489 490 static const enum xe_engine_class user_to_xe_engine_class[] = { 491 [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, 492 [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, 493 [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, 494 [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, 495 [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, 496 }; 497 498 static struct xe_hw_engine * 499 find_hw_engine(struct xe_device *xe, 500 struct drm_xe_engine_class_instance eci) 501 { 502 u32 idx; 503 504 if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class)) 505 return NULL; 506 507 if (eci.gt_id >= xe->info.gt_count) 508 return NULL; 509 510 idx = array_index_nospec(eci.engine_class, 511 ARRAY_SIZE(user_to_xe_engine_class)); 512 513 return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), 514 user_to_xe_engine_class[idx], 515 eci.engine_instance, true); 516 } 517 518 static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, 519 struct drm_xe_engine_class_instance *eci, 520 u16 width, u16 num_placements) 521 { 522 struct xe_hw_engine *hwe; 523 enum xe_hw_engine_id id; 524 u32 logical_mask = 0; 525 526 if (XE_IOCTL_DBG(xe, width != 1)) 527 return 0; 528 if (XE_IOCTL_DBG(xe, num_placements != 1)) 529 return 0; 530 if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) 531 return 0; 532 533 eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; 534 535 for_each_hw_engine(hwe, gt, id) { 536 if (xe_hw_engine_is_reserved(hwe)) 537 continue; 538 539 if (hwe->class == 540 user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) 541 logical_mask |= BIT(hwe->logical_instance); 542 } 543 544 return logical_mask; 545 } 546 547 static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, 548 struct drm_xe_engine_class_instance *eci, 549 u16 width, u16 num_placements) 550 { 551 int len = width * num_placements; 552 int i, j, n; 553 u16 class; 554 u16 gt_id; 555 u32 return_mask = 0, prev_mask; 556 557 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && 558 len > 1)) 559 return 0; 560 561 for (i = 0; i < width; ++i) { 562 u32 current_mask = 0; 563 564 for (j = 0; j < num_placements; ++j) { 565 struct xe_hw_engine *hwe; 566 567 n = j * width + i; 568 569 hwe = find_hw_engine(xe, eci[n]); 570 if (XE_IOCTL_DBG(xe, !hwe)) 571 return 0; 572 573 if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) 574 return 0; 575 576 if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || 577 XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) 578 return 0; 579 580 class = eci[n].engine_class; 581 gt_id = eci[n].gt_id; 582 583 if (width == 1 || !i) 584 return_mask |= BIT(eci[n].engine_instance); 585 current_mask |= BIT(eci[n].engine_instance); 586 } 587 588 /* Parallel submissions must be logically contiguous */ 589 if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) 590 return 0; 591 592 prev_mask = current_mask; 593 } 594 595 return return_mask; 596 } 597 598 int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, 599 struct drm_file *file) 600 { 601 struct xe_device *xe = to_xe_device(dev); 602 struct xe_file *xef = to_xe_file(file); 603 struct drm_xe_exec_queue_create *args = data; 604 struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; 605 struct drm_xe_engine_class_instance __user *user_eci = 606 u64_to_user_ptr(args->instances); 607 struct xe_hw_engine *hwe; 608 struct xe_vm *vm, *migrate_vm; 609 struct xe_gt *gt; 610 struct xe_exec_queue *q = NULL; 611 u32 logical_mask; 612 u32 id; 613 u32 len; 614 int err; 615 616 if (XE_IOCTL_DBG(xe, args->flags) || 617 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 618 return -EINVAL; 619 620 len = args->width * args->num_placements; 621 if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) 622 return -EINVAL; 623 624 err = __copy_from_user(eci, user_eci, 625 sizeof(struct drm_xe_engine_class_instance) * 626 len); 627 if (XE_IOCTL_DBG(xe, err)) 628 return -EFAULT; 629 630 if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) 631 return -EINVAL; 632 633 if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { 634 for_each_gt(gt, xe, id) { 635 struct xe_exec_queue *new; 636 637 if (xe_gt_is_media_type(gt)) 638 continue; 639 640 eci[0].gt_id = gt->info.id; 641 logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, 642 args->width, 643 args->num_placements); 644 if (XE_IOCTL_DBG(xe, !logical_mask)) 645 return -EINVAL; 646 647 hwe = find_hw_engine(xe, eci[0]); 648 if (XE_IOCTL_DBG(xe, !hwe)) 649 return -EINVAL; 650 651 /* The migration vm doesn't hold rpm ref */ 652 xe_device_mem_access_get(xe); 653 654 migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); 655 new = xe_exec_queue_create(xe, migrate_vm, logical_mask, 656 args->width, hwe, 657 EXEC_QUEUE_FLAG_PERSISTENT | 658 EXEC_QUEUE_FLAG_VM | 659 (id ? 660 EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 661 0)); 662 663 xe_device_mem_access_put(xe); /* now held by engine */ 664 665 xe_vm_put(migrate_vm); 666 if (IS_ERR(new)) { 667 err = PTR_ERR(new); 668 if (q) 669 goto put_exec_queue; 670 return err; 671 } 672 if (id == 0) 673 q = new; 674 else 675 list_add_tail(&new->multi_gt_list, 676 &q->multi_gt_link); 677 } 678 } else { 679 gt = xe_device_get_gt(xe, eci[0].gt_id); 680 logical_mask = calc_validate_logical_mask(xe, gt, eci, 681 args->width, 682 args->num_placements); 683 if (XE_IOCTL_DBG(xe, !logical_mask)) 684 return -EINVAL; 685 686 hwe = find_hw_engine(xe, eci[0]); 687 if (XE_IOCTL_DBG(xe, !hwe)) 688 return -EINVAL; 689 690 vm = xe_vm_lookup(xef, args->vm_id); 691 if (XE_IOCTL_DBG(xe, !vm)) 692 return -ENOENT; 693 694 err = down_read_interruptible(&vm->lock); 695 if (err) { 696 xe_vm_put(vm); 697 return err; 698 } 699 700 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 701 up_read(&vm->lock); 702 xe_vm_put(vm); 703 return -ENOENT; 704 } 705 706 q = xe_exec_queue_create(xe, vm, logical_mask, 707 args->width, hwe, 708 xe_vm_in_lr_mode(vm) ? 0 : 709 EXEC_QUEUE_FLAG_PERSISTENT); 710 up_read(&vm->lock); 711 xe_vm_put(vm); 712 if (IS_ERR(q)) 713 return PTR_ERR(q); 714 715 if (xe_vm_in_preempt_fence_mode(vm)) { 716 q->compute.context = dma_fence_context_alloc(1); 717 spin_lock_init(&q->compute.lock); 718 719 err = xe_vm_add_compute_exec_queue(vm, q); 720 if (XE_IOCTL_DBG(xe, err)) 721 goto put_exec_queue; 722 } 723 } 724 725 if (args->extensions) { 726 err = exec_queue_user_extensions(xe, q, args->extensions, 0, true); 727 if (XE_IOCTL_DBG(xe, err)) 728 goto kill_exec_queue; 729 } 730 731 q->persistent.xef = xef; 732 733 mutex_lock(&xef->exec_queue.lock); 734 err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); 735 mutex_unlock(&xef->exec_queue.lock); 736 if (err) 737 goto kill_exec_queue; 738 739 args->exec_queue_id = id; 740 741 return 0; 742 743 kill_exec_queue: 744 xe_exec_queue_kill(q); 745 put_exec_queue: 746 xe_exec_queue_put(q); 747 return err; 748 } 749 750 int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, 751 struct drm_file *file) 752 { 753 struct xe_device *xe = to_xe_device(dev); 754 struct xe_file *xef = to_xe_file(file); 755 struct drm_xe_exec_queue_get_property *args = data; 756 struct xe_exec_queue *q; 757 int ret; 758 759 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 760 return -EINVAL; 761 762 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 763 if (XE_IOCTL_DBG(xe, !q)) 764 return -ENOENT; 765 766 switch (args->property) { 767 case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: 768 args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED); 769 ret = 0; 770 break; 771 default: 772 ret = -EINVAL; 773 } 774 775 xe_exec_queue_put(q); 776 777 return ret; 778 } 779 780 /** 781 * xe_exec_queue_is_lr() - Whether an exec_queue is long-running 782 * @q: The exec_queue 783 * 784 * Return: True if the exec_queue is long-running, false otherwise. 785 */ 786 bool xe_exec_queue_is_lr(struct xe_exec_queue *q) 787 { 788 return q->vm && xe_vm_in_lr_mode(q->vm) && 789 !(q->flags & EXEC_QUEUE_FLAG_VM); 790 } 791 792 static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) 793 { 794 return q->lrc->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc) - 1; 795 } 796 797 /** 798 * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full 799 * @q: The exec_queue 800 * 801 * Return: True if the exec_queue's ring is full, false otherwise. 802 */ 803 bool xe_exec_queue_ring_full(struct xe_exec_queue *q) 804 { 805 struct xe_lrc *lrc = q->lrc; 806 s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; 807 808 return xe_exec_queue_num_job_inflight(q) >= max_job; 809 } 810 811 /** 812 * xe_exec_queue_is_idle() - Whether an exec_queue is idle. 813 * @q: The exec_queue 814 * 815 * FIXME: Need to determine what to use as the short-lived 816 * timeline lock for the exec_queues, so that the return value 817 * of this function becomes more than just an advisory 818 * snapshot in time. The timeline lock must protect the 819 * seqno from racing submissions on the same exec_queue. 820 * Typically vm->resv, but user-created timeline locks use the migrate vm 821 * and never grabs the migrate vm->resv so we have a race there. 822 * 823 * Return: True if the exec_queue is idle, false otherwise. 824 */ 825 bool xe_exec_queue_is_idle(struct xe_exec_queue *q) 826 { 827 if (xe_exec_queue_is_parallel(q)) { 828 int i; 829 830 for (i = 0; i < q->width; ++i) { 831 if (xe_lrc_seqno(&q->lrc[i]) != 832 q->lrc[i].fence_ctx.next_seqno - 1) 833 return false; 834 } 835 836 return true; 837 } 838 839 return xe_lrc_seqno(&q->lrc[0]) == 840 q->lrc[0].fence_ctx.next_seqno - 1; 841 } 842 843 void xe_exec_queue_kill(struct xe_exec_queue *q) 844 { 845 struct xe_exec_queue *eq = q, *next; 846 847 list_for_each_entry_safe(eq, next, &eq->multi_gt_list, 848 multi_gt_link) { 849 q->ops->kill(eq); 850 xe_vm_remove_compute_exec_queue(q->vm, eq); 851 } 852 853 q->ops->kill(q); 854 xe_vm_remove_compute_exec_queue(q->vm, q); 855 } 856 857 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, 858 struct drm_file *file) 859 { 860 struct xe_device *xe = to_xe_device(dev); 861 struct xe_file *xef = to_xe_file(file); 862 struct drm_xe_exec_queue_destroy *args = data; 863 struct xe_exec_queue *q; 864 865 if (XE_IOCTL_DBG(xe, args->pad) || 866 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 867 return -EINVAL; 868 869 mutex_lock(&xef->exec_queue.lock); 870 q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); 871 mutex_unlock(&xef->exec_queue.lock); 872 if (XE_IOCTL_DBG(xe, !q)) 873 return -ENOENT; 874 875 if (!(q->flags & EXEC_QUEUE_FLAG_PERSISTENT)) 876 xe_exec_queue_kill(q); 877 else 878 xe_device_add_persistent_exec_queues(xe, q); 879 880 trace_xe_exec_queue_close(q); 881 xe_exec_queue_put(q); 882 883 return 0; 884 } 885 886 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, 887 struct xe_vm *vm) 888 { 889 if (q->flags & EXEC_QUEUE_FLAG_VM) 890 lockdep_assert_held(&vm->lock); 891 else 892 xe_vm_assert_held(vm); 893 } 894 895 /** 896 * xe_exec_queue_last_fence_put() - Drop ref to last fence 897 * @q: The exec queue 898 * @vm: The VM the engine does a bind or exec for 899 */ 900 void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) 901 { 902 xe_exec_queue_last_fence_lockdep_assert(q, vm); 903 904 if (q->last_fence) { 905 dma_fence_put(q->last_fence); 906 q->last_fence = NULL; 907 } 908 } 909 910 /** 911 * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked 912 * @q: The exec queue 913 * 914 * Only safe to be called from xe_exec_queue_destroy(). 915 */ 916 void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) 917 { 918 if (q->last_fence) { 919 dma_fence_put(q->last_fence); 920 q->last_fence = NULL; 921 } 922 } 923 924 /** 925 * xe_exec_queue_last_fence_get() - Get last fence 926 * @q: The exec queue 927 * @vm: The VM the engine does a bind or exec for 928 * 929 * Get last fence, takes a ref 930 * 931 * Returns: last fence if not signaled, dma fence stub if signaled 932 */ 933 struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, 934 struct xe_vm *vm) 935 { 936 struct dma_fence *fence; 937 938 xe_exec_queue_last_fence_lockdep_assert(q, vm); 939 940 if (q->last_fence && 941 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 942 xe_exec_queue_last_fence_put(q, vm); 943 944 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 945 dma_fence_get(fence); 946 return fence; 947 } 948 949 /** 950 * xe_exec_queue_last_fence_set() - Set last fence 951 * @q: The exec queue 952 * @vm: The VM the engine does a bind or exec for 953 * @fence: The fence 954 * 955 * Set the last fence for the engine. Increases reference count for fence, when 956 * closing engine xe_exec_queue_last_fence_put should be called. 957 */ 958 void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, 959 struct dma_fence *fence) 960 { 961 xe_exec_queue_last_fence_lockdep_assert(q, vm); 962 963 xe_exec_queue_last_fence_put(q, vm); 964 q->last_fence = dma_fence_get(fence); 965 } 966