Lines Matching +full:multi +full:- +full:instance
1 // SPDX-License-Identifier: MIT
57 * DOC: Multi Queue Group
59 * Multi Queue Group is another mode of execution supported by the compute
63 * execution of multiple queues within a single shared context. The multi
68 * supports creating a multi queue group and adding queues to a queue group.
71 * set to DRM_XE_MULTI_GROUP_CREATE, will create a new multi queue group with
78 * The hardware does not support removing a queue from a multi-queue group.
88 * The queues of a multi queue group can set their priority within the group
90 * This multi queue priority can also be set dynamically through the
92 * supported by the secondary queues of a multi queue group, other than
95 * When GuC reports an error on any of the queues of a multi queue group,
97 * as hardware cannot make progress on the multi queue context.
99 * Refer :ref:`multi-queue-group-guc-interface` for multi queue group GuC
115 struct xe_exec_queue_group *group = q->multi_queue.group;
132 xa_for_each(&group->xa, idx, lrc)
135 xa_destroy(&group->xa);
136 mutex_destroy(&group->list_lock);
137 xe_bo_unpin_map_no_vm(group->cgp_bo);
146 if (q->tlb_inval[i].dep_scheduler)
147 xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler);
150 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
155 if (q->vm) {
156 xe_vm_remove_exec_queue(q->vm, q);
157 xe_vm_put(q->vm);
160 if (q->xef)
161 xe_file_put(q->xef);
163 kvfree(q->replay_state);
169 struct xe_tile *tile = gt_to_tile(q->gt);
178 gt = tile->primary_gt;
180 gt = tile->media_gt;
185 wq = gt->tlb_inval.job_wq;
188 dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name,
193 q->tlb_inval[i].dep_scheduler = dep_scheduler;
207 struct xe_gt *gt = hwe->gt;
215 return ERR_PTR(-ENOMEM);
217 kref_init(&q->refcount);
218 q->flags = flags;
219 q->hwe = hwe;
220 q->gt = gt;
221 q->class = hwe->class;
222 q->width = width;
223 q->msix_vec = XE_IRQ_DEFAULT_MSIX;
224 q->logical_mask = logical_mask;
225 q->fence_irq = >->fence_irq[hwe->class];
226 q->ring_ops = gt->ring_ops[hwe->class];
227 q->ops = gt->exec_queue_ops;
228 INIT_LIST_HEAD(&q->lr.link);
229 INIT_LIST_HEAD(&q->vm_exec_queue_link);
230 INIT_LIST_HEAD(&q->multi_gt_link);
231 INIT_LIST_HEAD(&q->hw_engine_group_link);
232 INIT_LIST_HEAD(&q->pxp.link);
233 spin_lock_init(&q->multi_queue.lock);
234 spin_lock_init(&q->lrc_lookup_lock);
235 q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL;
237 q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
238 q->sched_props.preempt_timeout_us =
239 hwe->eclass->sched_props.preempt_timeout_us;
240 q->sched_props.job_timeout_ms =
241 hwe->eclass->sched_props.job_timeout_ms;
242 if (q->flags & EXEC_QUEUE_FLAG_KERNEL &&
243 q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY)
244 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
246 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
248 if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) {
257 q->vm = xe_vm_get(vm);
261 * may set q->usm, must come before xe_lrc_create(),
262 * may overwrite q->sched_props, must come before q->ops->init()
276 xe_assert(gt_to_xe(q->gt), idx < q->width);
278 scoped_guard(spinlock, &q->lrc_lookup_lock) {
279 q->lrc[idx] = lrc;
281 q->lrc[idx]->multi_queue.primary_lrc =
282 q->multi_queue.group->primary->lrc[0];
287 * xe_exec_queue_get_lrc() - Get the LRC from exec queue.
288 * @q: The exec queue instance.
289 * @idx: Index within multi-LRC array.
301 xe_assert(gt_to_xe(q->gt), idx < q->width);
303 scoped_guard(spinlock, &q->lrc_lookup_lock) {
304 lrc = q->lrc[idx];
313 * xe_exec_queue_lrc() - Get the LRC from exec queue.
314 * @q: The exec queue instance.
317 * returns only the first LRC instance, even when multiple parallel LRCs
325 return q->lrc[0];
332 q->ops->fini(q);
334 for (i = 0; i < q->width; ++i)
335 xe_lrc_put(q->lrc[i]);
350 (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) {
351 if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20)
360 if (q->flags & EXEC_QUEUE_FLAG_DISABLE_STATE_CACHE_PERF_FIX)
363 err = q->ops->init(q);
368 * This must occur after q->ops->init to avoid race conditions during VF
369 * post-migration recovery, as the fixups for the LRC GGTT addresses
377 for (i = 0; i < q->width; ++i) {
384 marker = xe_gt_sriov_vf_wait_valid_ggtt(q->gt);
386 lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state,
387 xe_lrc_ring_size(), q->msix_vec, flags);
399 } while (marker != xe_vf_migration_fixups_complete_count(q->gt));
410 * xe_exec_queue_create() - Create an exec queue
432 xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0)));
446 * go bad if the queue is only half-initialized. This means that we
451 err = xe_pxp_exec_queue_add(xe->pxp, q);
467 * xe_exec_queue_create_class() - Create an exec queue for a specific engine class
492 if (hwe->class == class) {
493 logical_mask |= BIT(hwe->logical_instance);
500 return ERR_PTR(-ENODEV);
506 * xe_exec_queue_create_bind() - Create bind exec queue.
515 * faulting devices the reserved copy engine instance must be used to avoid
517 * resolve faults depend on user binds). On non-faulting devices any copy engine
527 struct xe_gt *gt = tile->primary_gt;
531 migrate_vm = xe_migrate_get_vm(tile->migrate);
532 if (xe->info.has_usm) {
535 gt->usm.reserved_bcs_instance,
540 return ERR_PTR(-EINVAL);
544 BIT(hwe->logical_instance), 1, hwe,
554 int err = drm_syncobj_create(&q->ufence_syncobj,
563 q->user_vm = xe_vm_get(user_vm);
571 * xe_exec_queue_destroy() - Destroy an exec queue
584 xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0);
586 if (q->ufence_syncobj)
587 drm_syncobj_put(q->ufence_syncobj);
590 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
596 if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
597 list_for_each_entry_safe(eq, next, &q->multi_gt_list,
602 if (q->user_vm) {
603 xe_vm_put(q->user_vm);
604 q->user_vm = NULL;
607 q->ops->destroy(q);
611 * xe_exec_queue_fini() - Finalize an exec queue
625 if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal))
626 wake_up_var(&q->xef->exec_queue.pending_removal);
633 * xe_exec_queue_assign_name() - Assign a name to an exec queue
635 * @instance: Instance number for the engine
637 * Assigns a human-readable name to the exec queue based on its engine class
638 * and instance number (e.g., "rcs0", "vcs1", "bcs2").
640 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
642 switch (q->class) {
644 snprintf(q->name, sizeof(q->name), "rcs%d", instance);
647 snprintf(q->name, sizeof(q->name), "vcs%d", instance);
650 snprintf(q->name, sizeof(q->name), "vecs%d", instance);
653 snprintf(q->name, sizeof(q->name), "bcs%d", instance);
656 snprintf(q->name, sizeof(q->name), "ccs%d", instance);
659 snprintf(q->name, sizeof(q->name), "gsccs%d", instance);
662 XE_WARN_ON(q->class);
667 * xe_exec_queue_lookup() - Look up an exec queue by ID
679 mutex_lock(&xef->exec_queue.lock);
680 q = xa_load(&xef->exec_queue.xa, id);
683 mutex_unlock(&xef->exec_queue.lock);
689 * xe_exec_queue_device_get_max_priority() - Get maximum priority for an exec queues
707 return -EINVAL;
710 return -EPERM;
712 q->sched_props.priority = value;
732 *min = eclass->sched_props.job_timeout_min;
733 *max = eclass->sched_props.job_timeout_max;
736 *min = eclass->sched_props.timeslice_min;
737 *max = eclass->sched_props.timeslice_max;
740 *min = eclass->sched_props.preempt_timeout_min;
741 *max = eclass->sched_props.preempt_timeout_max;
773 xe_exec_queue_get_prop_minmax(q->hwe->eclass,
778 return -EINVAL;
780 q->sched_props.timeslice_us = value;
792 return -EINVAL;
794 if (!xe_pxp_is_enabled(xe->pxp))
795 return -ENODEV;
797 return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM);
804 size_t size = xe_gt_lrc_hang_replay_size(q->gt, q->class);
812 q->replay_state = ptr;
819 struct xe_tile *tile = gt_to_tile(q->gt);
825 return -ENOMEM;
834 drm_err(&xe->drm, "CGP bo allocation for queue group failed: %ld\n",
840 xe_map_memset(xe, &bo->vmap, 0, 0, SZ_4K);
842 group->primary = q;
843 group->cgp_bo = bo;
844 INIT_LIST_HEAD(&group->list);
845 xa_init_flags(&group->xa, XA_FLAGS_ALLOC1);
846 mutex_init(&group->list_lock);
847 q->multi_queue.group = group;
849 /* group->list_lock is used in submission backend */
852 might_lock(&group->list_lock);
870 primary = xe_exec_queue_lookup(q->vm->xef, primary_id);
872 return -ENOENT;
875 XE_IOCTL_DBG(xe, q->vm != primary->vm) ||
876 XE_IOCTL_DBG(xe, q->logical_mask != primary->logical_mask)) {
877 ret = -EINVAL;
881 group = primary->multi_queue.group;
882 q->multi_queue.valid = true;
883 q->multi_queue.group = group;
894 struct xe_exec_queue_group *group = q->multi_queue.group;
901 err = xa_alloc(&group->xa, &pos, xe_lrc_get(q->lrc[0]),
902 XA_LIMIT(1, XE_MAX_GROUP_SIZE - 1), GFP_KERNEL);
904 xe_lrc_put(q->lrc[0]);
907 if (err == -EBUSY)
908 err = -EINVAL;
913 q->multi_queue.pos = pos;
914 q->lrc[0]->multi_queue.pos = pos;
921 struct xe_exec_queue_group *group = q->multi_queue.group;
926 lrc = xa_erase(&group->xa, q->multi_queue.pos);
934 if (XE_IOCTL_DBG(xe, !xe_gt_supports_multi_queue(q->gt, q->class)))
935 return -ENODEV;
938 return -EOPNOTSUPP;
940 if (XE_IOCTL_DBG(xe, !q->vm->xef))
941 return -EINVAL;
944 return -EINVAL;
947 return -EINVAL;
951 return -EINVAL;
953 q->multi_queue.valid = true;
954 q->multi_queue.is_primary = true;
955 q->multi_queue.pos = 0;
961 return -EINVAL;
970 return -EINVAL;
972 /* For queue creation time (!q->xef) setting, just store the priority value */
973 if (!q->xef) {
974 q->multi_queue.priority = value;
979 return -EINVAL;
981 return q->ops->set_multi_queue_priority(q, value);
987 if (XE_IOCTL_DBG(xe, q->class != XE_ENGINE_CLASS_RENDER))
988 return -EOPNOTSUPP;
990 q->flags |= value != 0 ? EXEC_QUEUE_FLAG_DISABLE_STATE_CACHE_PERF_FIX : 0;
1012 * xe_exec_queue_set_property_ioctl() - Set a property on an exec queue
1018 * supports setting multi-queue priority.
1032 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1033 return -EINVAL;
1035 if (XE_IOCTL_DBG(xe, args->property !=
1037 return -EINVAL;
1039 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
1041 return -ENOENT;
1043 idx = array_index_nospec(args->property,
1045 ret = exec_queue_set_property_funcs[idx](xe, q, args->value);
1064 * multi-queue group.
1068 return -EINVAL;
1075 /* MULTI_QUEUE_PRIORITY only applies to multi-queue group queues */
1078 return -EINVAL;
1094 return -EFAULT;
1106 return -EINVAL;
1110 return -EINVAL;
1138 return -E2BIG;
1142 return -EFAULT;
1147 return -EINVAL;
1248 * xe_exec_queue_create_ioctl() - Create an exec queue via IOCTL
1253 * Creates a new exec queue based on user-provided parameters. Supports
1254 * creating VM bind queues, regular exec queues, multi-lrc exec queues
1255 * and multi-queue groups.
1267 u64_to_user_ptr(args->instances);
1278 if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) ||
1279 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1280 return -EINVAL;
1282 len = args->width * args->num_placements;
1284 return -EINVAL;
1289 return -EFAULT;
1292 return -EINVAL;
1294 if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT)
1298 if (XE_IOCTL_DBG(xe, args->width != 1) ||
1299 XE_IOCTL_DBG(xe, args->num_placements != 1) ||
1301 return -EINVAL;
1303 vm = xe_vm_lookup(xef, args->vm_id);
1305 return -ENOENT;
1307 err = down_read_interruptible(&vm->lock);
1314 up_read(&vm->lock);
1316 return -ENOENT;
1327 args->extensions);
1329 up_read(&vm->lock);
1339 list_add_tail(&new->multi_gt_list,
1340 &q->multi_gt_link);
1342 up_read(&vm->lock);
1346 args->width,
1347 args->num_placements);
1349 return -EINVAL;
1353 return -EINVAL;
1355 /* multi-lrc is only supported on select engine classes */
1356 if (XE_IOCTL_DBG(xe, args->width > 1 &&
1357 !(xe->info.multi_lrc_mask & BIT(hwe->class))))
1358 return -EOPNOTSUPP;
1360 vm = xe_vm_lookup(xef, args->vm_id);
1362 return -ENOENT;
1364 err = down_read_interruptible(&vm->lock);
1371 up_read(&vm->lock);
1373 return -ENOENT;
1376 /* SRIOV sched groups are not compatible with multi-lrc */
1377 if (XE_IOCTL_DBG(xe, args->width > 1 && has_sched_groups(hwe->gt))) {
1378 up_read(&vm->lock);
1380 return -EINVAL;
1384 args->width, hwe, flags,
1385 args->extensions);
1386 up_read(&vm->lock);
1398 q->lr.context = dma_fence_context_alloc(1);
1405 if (q->vm && q->hwe->hw_engine_group) {
1406 err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q);
1412 q->xef = xe_file_get(xef);
1417 err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
1421 args->exec_queue_id = id;
1426 if (q->vm && q->hwe && q->hwe->hw_engine_group)
1427 xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
1439 * xe_exec_queue_get_property_ioctl() - Get a property from an exec queue
1458 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1459 return -EINVAL;
1461 q = xe_exec_queue_lookup(xef, args->exec_queue_id);
1463 return -ENOENT;
1465 switch (args->property) {
1467 args->value = q->ops->reset_status(q);
1471 ret = -EINVAL;
1480 * xe_exec_queue_is_lr() - Whether an exec_queue is long-running
1483 * Return: True if the exec_queue is long-running, false otherwise.
1487 return q->vm && xe_vm_in_lr_mode(q->vm) &&
1488 !(q->flags & EXEC_QUEUE_FLAG_VM);
1492 * xe_exec_queue_is_idle() - Whether an exec_queue is idle.
1495 * FIXME: Need to determine what to use as the short-lived
1500 * Typically vm->resv, but user-created timeline locks use the migrate vm
1501 * and never grabs the migrate vm->resv so we have a race there.
1510 for (i = 0; i < q->width; ++i) {
1511 if (xe_lrc_seqno(q->lrc[i]) !=
1512 q->lrc[i]->fence_ctx.next_seqno - 1)
1519 return xe_lrc_seqno(q->lrc[0]) ==
1520 q->lrc[0]->fence_ctx.next_seqno - 1;
1524 * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue
1533 struct xe_device *xe = gt_to_xe(q->gt);
1542 if (!q->xef)
1546 if (!drm_dev_enter(&xe->drm, &idx))
1551 * width - this may introduce errors if that premise is not true and
1556 lrc = q->lrc[0];
1558 q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
1564 * xe_exec_queue_kill - permanently stop all execution from an exec queue
1576 list_for_each_entry_safe(eq, next, &eq->multi_gt_list,
1578 q->ops->kill(eq);
1579 xe_vm_remove_compute_exec_queue(q->vm, eq);
1582 q->ops->kill(q);
1583 xe_vm_remove_compute_exec_queue(q->vm, q);
1587 * xe_exec_queue_destroy_ioctl() - Destroy an exec queue via IOCTL
1604 if (XE_IOCTL_DBG(xe, args->pad) ||
1605 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1606 return -EINVAL;
1608 mutex_lock(&xef->exec_queue.lock);
1609 q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id);
1611 atomic_inc(&xef->exec_queue.pending_removal);
1612 mutex_unlock(&xef->exec_queue.lock);
1615 return -ENOENT;
1617 if (q->vm && q->hwe->hw_engine_group)
1618 xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
1631 if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) {
1633 } else if (q->flags & EXEC_QUEUE_FLAG_VM) {
1634 lockdep_assert_held(&vm->lock);
1637 lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem);
1642 * xe_exec_queue_last_fence_put() - Drop ref to last fence
1654 * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked
1661 if (q->last_fence) {
1662 dma_fence_put(q->last_fence);
1663 q->last_fence = NULL;
1668 * xe_exec_queue_last_fence_get() - Get last fence
1683 if (q->last_fence &&
1684 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
1687 fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
1693 * xe_exec_queue_last_fence_get_for_resume() - Get last fence
1698 * resuming the hw engine group's long-running exec queue, when the group
1708 lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem);
1710 if (q->last_fence &&
1711 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
1714 fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
1720 * xe_exec_queue_last_fence_set() - Set last fence
1732 xe_assert(vm->xe, !dma_fence_is_container(fence));
1735 q->last_fence = dma_fence_get(fence);
1739 * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence
1749 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1756 * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB
1766 xe_assert(gt_to_xe(q->gt), type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1769 dma_fence_put(q->tlb_inval[type].last_fence);
1770 q->tlb_inval[type].last_fence = NULL;
1774 * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation
1790 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1792 xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
1795 if (q->tlb_inval[type].last_fence &&
1797 &q->tlb_inval[type].last_fence->flags))
1800 fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub();
1806 * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation
1822 xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1824 xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
1826 xe_assert(vm->xe, !dma_fence_is_container(fence));
1829 q->tlb_inval[type].last_fence = dma_fence_get(fence);
1833 * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references
1835 * @q: the &xe_exec_queue struct instance containing target LRCs
1845 for (i = 0; i < q->width; ++i) {
1852 xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch);
1854 err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch);