xref: /linux/drivers/gpu/drm/xe/xe_exec_queue.c (revision c0d6f52f9b62479d61f8cd4faf9fb2f8bce6e301)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_exec_queue.h"
7 
8 #include <linux/nospec.h>
9 
10 #include <drm/drm_device.h>
11 #include <drm/drm_drv.h>
12 #include <drm/drm_file.h>
13 #include <drm/drm_syncobj.h>
14 #include <uapi/drm/xe_drm.h>
15 
16 #include "xe_bo.h"
17 #include "xe_dep_scheduler.h"
18 #include "xe_device.h"
19 #include "xe_gt.h"
20 #include "xe_gt_sriov_pf.h"
21 #include "xe_gt_sriov_vf.h"
22 #include "xe_hw_engine_class_sysfs.h"
23 #include "xe_hw_engine_group.h"
24 #include "xe_irq.h"
25 #include "xe_lrc.h"
26 #include "xe_macros.h"
27 #include "xe_migrate.h"
28 #include "xe_pm.h"
29 #include "xe_trace.h"
30 #include "xe_vm.h"
31 #include "xe_pxp.h"
32 
33 /**
34  * DOC: Execution Queue
35  *
36  * An Execution queue is an interface for the HW context of execution.
37  * The user creates an execution queue, submits the GPU jobs through those
38  * queues and in the end destroys them.
39  *
40  * Execution queues can also be created by XeKMD itself for driver internal
41  * operations like object migration etc.
42  *
43  * An execution queue is associated with a specified HW engine or a group of
44  * engines (belonging to the same tile and engine class) and any GPU job
45  * submitted on the queue will be run on one of these engines.
46  *
47  * An execution queue is tied to an address space (VM). It holds a reference
48  * of the associated VM and the underlying Logical Ring Context/s (LRC/s)
49  * until the queue is destroyed.
50  *
51  * The execution queue sits on top of the submission backend. It opaquely
52  * handles the GuC and Execlist backends whichever the platform uses, and
53  * the ring operations the different engine classes support.
54  */
55 
56 /**
57  * DOC: Multi Queue Group
58  *
59  * Multi Queue Group is another mode of execution supported by the compute
60  * and blitter copy command streamers (CCS and BCS, respectively). It is
61  * an enhancement of the existing hardware architecture and leverages the
62  * same submission model. It enables support for efficient, parallel
63  * execution of multiple queues within a single shared context. The multi
64  * queue group functionality is only supported with GuC submission backend.
65  * All the queues of a group must use the same address space (VM).
66  *
67  * The DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE execution queue property
68  * supports creating a multi queue group and adding queues to a queue group.
69  *
70  * The XE_EXEC_QUEUE_CREATE ioctl call with above property with value field
71  * set to DRM_XE_MULTI_GROUP_CREATE, will create a new multi queue group with
72  * the queue being created as the primary queue (aka q0) of the group. To add
73  * secondary queues to the group, they need to be created with the above
74  * property with id of the primary queue as the value. The properties of
75  * the primary queue (like priority, time slice) applies to the whole group.
76  * So, these properties can't be set for secondary queues of a group.
77  *
78  * The hardware does not support removing a queue from a multi-queue group.
79  * However, queues can be dynamically added to the group. A group can have
80  * up to 64 queues. To support this, XeKMD holds references to LRCs of the
81  * queues even after the queues are destroyed by the user until the whole
82  * group is destroyed. The secondary queues hold a reference to the primary
83  * queue thus preventing the group from being destroyed when user destroys
84  * the primary queue. Once the primary queue is destroyed, secondary queues
85  * can't be added to the queue group and new job submissions on existing
86  * secondary queues are not allowed.
87  *
88  * The queues of a multi queue group can set their priority within the group
89  * through the DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY property.
90  * This multi queue priority can also be set dynamically through the
91  * XE_EXEC_QUEUE_SET_PROPERTY ioctl. This is the only other property
92  * supported by the secondary queues of a multi queue group, other than
93  * DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE.
94  *
95  * When GuC reports an error on any of the queues of a multi queue group,
96  * the queue cleanup mechanism is invoked for all the queues of the group
97  * as hardware cannot make progress on the multi queue context.
98  *
99  * Refer :ref:`multi-queue-group-guc-interface` for multi queue group GuC
100  * interface.
101  */
102 
103 enum xe_exec_queue_sched_prop {
104 	XE_EXEC_QUEUE_JOB_TIMEOUT = 0,
105 	XE_EXEC_QUEUE_TIMESLICE = 1,
106 	XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2,
107 	XE_EXEC_QUEUE_SCHED_PROP_MAX = 3,
108 };
109 
110 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
111 				      u64 extensions);
112 
113 static void xe_exec_queue_group_cleanup(struct xe_exec_queue *q)
114 {
115 	struct xe_exec_queue_group *group = q->multi_queue.group;
116 	struct xe_lrc *lrc;
117 	unsigned long idx;
118 
119 	if (xe_exec_queue_is_multi_queue_secondary(q)) {
120 		/*
121 		 * Put pairs with get from xe_exec_queue_lookup() call
122 		 * in xe_exec_queue_group_validate().
123 		 */
124 		xe_exec_queue_put(xe_exec_queue_multi_queue_primary(q));
125 		return;
126 	}
127 
128 	if (!group)
129 		return;
130 
131 	/* Primary queue cleanup */
132 	xa_for_each(&group->xa, idx, lrc)
133 		xe_lrc_put(lrc);
134 
135 	xa_destroy(&group->xa);
136 	mutex_destroy(&group->list_lock);
137 	xe_bo_unpin_map_no_vm(group->cgp_bo);
138 	kfree(group);
139 }
140 
141 static void __xe_exec_queue_free(struct xe_exec_queue *q)
142 {
143 	int i;
144 
145 	for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i)
146 		if (q->tlb_inval[i].dep_scheduler)
147 			xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler);
148 
149 	if (xe_exec_queue_uses_pxp(q))
150 		xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
151 
152 	if (xe_exec_queue_is_multi_queue(q))
153 		xe_exec_queue_group_cleanup(q);
154 
155 	if (q->vm)
156 		xe_vm_put(q->vm);
157 
158 	if (q->xef)
159 		xe_file_put(q->xef);
160 
161 	kvfree(q->replay_state);
162 	kfree(q);
163 }
164 
165 static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q)
166 {
167 	struct xe_tile *tile = gt_to_tile(q->gt);
168 	int i;
169 
170 	for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) {
171 		struct xe_dep_scheduler *dep_scheduler;
172 		struct xe_gt *gt;
173 		struct workqueue_struct *wq;
174 
175 		if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT)
176 			gt = tile->primary_gt;
177 		else
178 			gt = tile->media_gt;
179 
180 		if (!gt)
181 			continue;
182 
183 		wq = gt->tlb_inval.job_wq;
184 
185 #define MAX_TLB_INVAL_JOBS	16	/* Picking a reasonable value */
186 		dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name,
187 							MAX_TLB_INVAL_JOBS);
188 		if (IS_ERR(dep_scheduler))
189 			return PTR_ERR(dep_scheduler);
190 
191 		q->tlb_inval[i].dep_scheduler = dep_scheduler;
192 	}
193 #undef MAX_TLB_INVAL_JOBS
194 
195 	return 0;
196 }
197 
198 static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
199 						   struct xe_vm *vm,
200 						   u32 logical_mask,
201 						   u16 width, struct xe_hw_engine *hwe,
202 						   u32 flags, u64 extensions)
203 {
204 	struct xe_exec_queue *q;
205 	struct xe_gt *gt = hwe->gt;
206 	int err;
207 
208 	/* only kernel queues can be permanent */
209 	XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
210 
211 	q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL);
212 	if (!q)
213 		return ERR_PTR(-ENOMEM);
214 
215 	kref_init(&q->refcount);
216 	q->flags = flags;
217 	q->hwe = hwe;
218 	q->gt = gt;
219 	q->class = hwe->class;
220 	q->width = width;
221 	q->msix_vec = XE_IRQ_DEFAULT_MSIX;
222 	q->logical_mask = logical_mask;
223 	q->fence_irq = &gt->fence_irq[hwe->class];
224 	q->ring_ops = gt->ring_ops[hwe->class];
225 	q->ops = gt->exec_queue_ops;
226 	INIT_LIST_HEAD(&q->lr.link);
227 	INIT_LIST_HEAD(&q->multi_gt_link);
228 	INIT_LIST_HEAD(&q->hw_engine_group_link);
229 	INIT_LIST_HEAD(&q->pxp.link);
230 	q->multi_queue.priority = XE_MULTI_QUEUE_PRIORITY_NORMAL;
231 
232 	q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
233 	q->sched_props.preempt_timeout_us =
234 				hwe->eclass->sched_props.preempt_timeout_us;
235 	q->sched_props.job_timeout_ms =
236 				hwe->eclass->sched_props.job_timeout_ms;
237 	if (q->flags & EXEC_QUEUE_FLAG_KERNEL &&
238 	    q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY)
239 		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
240 	else
241 		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
242 
243 	if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) {
244 		err = alloc_dep_schedulers(xe, q);
245 		if (err) {
246 			__xe_exec_queue_free(q);
247 			return ERR_PTR(err);
248 		}
249 	}
250 
251 	if (vm)
252 		q->vm = xe_vm_get(vm);
253 
254 	if (extensions) {
255 		/*
256 		 * may set q->usm, must come before xe_lrc_create(),
257 		 * may overwrite q->sched_props, must come before q->ops->init()
258 		 */
259 		err = exec_queue_user_extensions(xe, q, extensions);
260 		if (err) {
261 			__xe_exec_queue_free(q);
262 			return ERR_PTR(err);
263 		}
264 	}
265 
266 	return q;
267 }
268 
269 static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags)
270 {
271 	int i, err;
272 	u32 flags = 0;
273 
274 	/*
275 	 * PXP workloads executing on RCS or CCS must run in isolation (i.e. no
276 	 * other workload can use the EUs at the same time). On MTL this is done
277 	 * by setting the RUNALONE bit in the LRC, while starting on Xe2 there
278 	 * is a dedicated bit for it.
279 	 */
280 	if (xe_exec_queue_uses_pxp(q) &&
281 	    (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) {
282 		if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20)
283 			flags |= XE_LRC_CREATE_PXP;
284 		else
285 			flags |= XE_LRC_CREATE_RUNALONE;
286 	}
287 
288 	if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL))
289 		flags |= XE_LRC_CREATE_USER_CTX;
290 
291 	err = q->ops->init(q);
292 	if (err)
293 		return err;
294 
295 	/*
296 	 * This must occur after q->ops->init to avoid race conditions during VF
297 	 * post-migration recovery, as the fixups for the LRC GGTT addresses
298 	 * depend on the queue being present in the backend tracking structure.
299 	 *
300 	 * In addition to above, we must wait on inflight GGTT changes to avoid
301 	 * writing out stale values here. Such wait provides a solid solution
302 	 * (without a race) only if the function can detect migration instantly
303 	 * from the moment vCPU resumes execution.
304 	 */
305 	for (i = 0; i < q->width; ++i) {
306 		struct xe_lrc *lrc;
307 
308 		xe_gt_sriov_vf_wait_valid_ggtt(q->gt);
309 		lrc = xe_lrc_create(q->hwe, q->vm, q->replay_state,
310 				    xe_lrc_ring_size(), q->msix_vec, flags);
311 		if (IS_ERR(lrc)) {
312 			err = PTR_ERR(lrc);
313 			goto err_lrc;
314 		}
315 
316 		/* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */
317 		WRITE_ONCE(q->lrc[i], lrc);
318 	}
319 
320 	return 0;
321 
322 err_lrc:
323 	for (i = i - 1; i >= 0; --i)
324 		xe_lrc_put(q->lrc[i]);
325 	return err;
326 }
327 
328 static void __xe_exec_queue_fini(struct xe_exec_queue *q)
329 {
330 	int i;
331 
332 	q->ops->fini(q);
333 
334 	for (i = 0; i < q->width; ++i)
335 		xe_lrc_put(q->lrc[i]);
336 }
337 
338 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
339 					   u32 logical_mask, u16 width,
340 					   struct xe_hw_engine *hwe, u32 flags,
341 					   u64 extensions)
342 {
343 	struct xe_exec_queue *q;
344 	int err;
345 
346 	/* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */
347 	xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0)));
348 
349 	q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
350 				  extensions);
351 	if (IS_ERR(q))
352 		return q;
353 
354 	err = __xe_exec_queue_init(q, flags);
355 	if (err)
356 		goto err_post_alloc;
357 
358 	/*
359 	 * We can only add the queue to the PXP list after the init is complete,
360 	 * because the PXP termination can call exec_queue_kill and that will
361 	 * go bad if the queue is only half-initialized. This means that we
362 	 * can't do it when we handle the PXP extension in __xe_exec_queue_alloc
363 	 * and we need to do it here instead.
364 	 */
365 	if (xe_exec_queue_uses_pxp(q)) {
366 		err = xe_pxp_exec_queue_add(xe->pxp, q);
367 		if (err)
368 			goto err_post_init;
369 	}
370 
371 	return q;
372 
373 err_post_init:
374 	__xe_exec_queue_fini(q);
375 err_post_alloc:
376 	__xe_exec_queue_free(q);
377 	return ERR_PTR(err);
378 }
379 ALLOW_ERROR_INJECTION(xe_exec_queue_create, ERRNO);
380 
381 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
382 						 struct xe_vm *vm,
383 						 enum xe_engine_class class,
384 						 u32 flags, u64 extensions)
385 {
386 	struct xe_hw_engine *hwe, *hwe0 = NULL;
387 	enum xe_hw_engine_id id;
388 	u32 logical_mask = 0;
389 
390 	for_each_hw_engine(hwe, gt, id) {
391 		if (xe_hw_engine_is_reserved(hwe))
392 			continue;
393 
394 		if (hwe->class == class) {
395 			logical_mask |= BIT(hwe->logical_instance);
396 			if (!hwe0)
397 				hwe0 = hwe;
398 		}
399 	}
400 
401 	if (!logical_mask)
402 		return ERR_PTR(-ENODEV);
403 
404 	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions);
405 }
406 
407 /**
408  * xe_exec_queue_create_bind() - Create bind exec queue.
409  * @xe: Xe device.
410  * @tile: tile which bind exec queue belongs to.
411  * @flags: exec queue creation flags
412  * @extensions: exec queue creation extensions
413  *
414  * Normalize bind exec queue creation. Bind exec queue is tied to migration VM
415  * for access to physical memory required for page table programming. On a
416  * faulting devices the reserved copy engine instance must be used to avoid
417  * deadlocking (user binds cannot get stuck behind faults as kernel binds which
418  * resolve faults depend on user binds). On non-faulting devices any copy engine
419  * can be used.
420  *
421  * Returns exec queue on success, ERR_PTR on failure
422  */
423 struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
424 						struct xe_tile *tile,
425 						u32 flags, u64 extensions)
426 {
427 	struct xe_gt *gt = tile->primary_gt;
428 	struct xe_exec_queue *q;
429 	struct xe_vm *migrate_vm;
430 
431 	migrate_vm = xe_migrate_get_vm(tile->migrate);
432 	if (xe->info.has_usm) {
433 		struct xe_hw_engine *hwe = xe_gt_hw_engine(gt,
434 							   XE_ENGINE_CLASS_COPY,
435 							   gt->usm.reserved_bcs_instance,
436 							   false);
437 
438 		if (!hwe) {
439 			xe_vm_put(migrate_vm);
440 			return ERR_PTR(-EINVAL);
441 		}
442 
443 		q = xe_exec_queue_create(xe, migrate_vm,
444 					 BIT(hwe->logical_instance), 1, hwe,
445 					 flags, extensions);
446 	} else {
447 		q = xe_exec_queue_create_class(xe, gt, migrate_vm,
448 					       XE_ENGINE_CLASS_COPY, flags,
449 					       extensions);
450 	}
451 	xe_vm_put(migrate_vm);
452 
453 	if (!IS_ERR(q)) {
454 		int err = drm_syncobj_create(&q->ufence_syncobj,
455 					     DRM_SYNCOBJ_CREATE_SIGNALED,
456 					     NULL);
457 		if (err) {
458 			xe_exec_queue_put(q);
459 			return ERR_PTR(err);
460 		}
461 	}
462 
463 	return q;
464 }
465 ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
466 
467 void xe_exec_queue_destroy(struct kref *ref)
468 {
469 	struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
470 	struct xe_exec_queue *eq, *next;
471 	int i;
472 
473 	xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0);
474 
475 	if (q->ufence_syncobj)
476 		drm_syncobj_put(q->ufence_syncobj);
477 
478 	if (xe_exec_queue_uses_pxp(q))
479 		xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
480 
481 	xe_exec_queue_last_fence_put_unlocked(q);
482 	for_each_tlb_inval(i)
483 		xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i);
484 
485 	if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
486 		list_for_each_entry_safe(eq, next, &q->multi_gt_list,
487 					 multi_gt_link)
488 			xe_exec_queue_put(eq);
489 	}
490 
491 	q->ops->destroy(q);
492 }
493 
494 void xe_exec_queue_fini(struct xe_exec_queue *q)
495 {
496 	/*
497 	 * Before releasing our ref to lrc and xef, accumulate our run ticks
498 	 * and wakeup any waiters.
499 	 */
500 	xe_exec_queue_update_run_ticks(q);
501 	if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal))
502 		wake_up_var(&q->xef->exec_queue.pending_removal);
503 
504 	__xe_exec_queue_fini(q);
505 	__xe_exec_queue_free(q);
506 }
507 
508 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
509 {
510 	switch (q->class) {
511 	case XE_ENGINE_CLASS_RENDER:
512 		snprintf(q->name, sizeof(q->name), "rcs%d", instance);
513 		break;
514 	case XE_ENGINE_CLASS_VIDEO_DECODE:
515 		snprintf(q->name, sizeof(q->name), "vcs%d", instance);
516 		break;
517 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
518 		snprintf(q->name, sizeof(q->name), "vecs%d", instance);
519 		break;
520 	case XE_ENGINE_CLASS_COPY:
521 		snprintf(q->name, sizeof(q->name), "bcs%d", instance);
522 		break;
523 	case XE_ENGINE_CLASS_COMPUTE:
524 		snprintf(q->name, sizeof(q->name), "ccs%d", instance);
525 		break;
526 	case XE_ENGINE_CLASS_OTHER:
527 		snprintf(q->name, sizeof(q->name), "gsccs%d", instance);
528 		break;
529 	default:
530 		XE_WARN_ON(q->class);
531 	}
532 }
533 
534 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id)
535 {
536 	struct xe_exec_queue *q;
537 
538 	mutex_lock(&xef->exec_queue.lock);
539 	q = xa_load(&xef->exec_queue.xa, id);
540 	if (q)
541 		xe_exec_queue_get(q);
542 	mutex_unlock(&xef->exec_queue.lock);
543 
544 	return q;
545 }
546 
547 enum xe_exec_queue_priority
548 xe_exec_queue_device_get_max_priority(struct xe_device *xe)
549 {
550 	return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH :
551 				       XE_EXEC_QUEUE_PRIORITY_NORMAL;
552 }
553 
554 static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q,
555 				   u64 value)
556 {
557 	if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH))
558 		return -EINVAL;
559 
560 	if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
561 		return -EPERM;
562 
563 	q->sched_props.priority = value;
564 	return 0;
565 }
566 
567 static bool xe_exec_queue_enforce_schedule_limit(void)
568 {
569 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
570 	return true;
571 #else
572 	return !capable(CAP_SYS_NICE);
573 #endif
574 }
575 
576 static void
577 xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass,
578 			      enum xe_exec_queue_sched_prop prop,
579 			      u32 *min, u32 *max)
580 {
581 	switch (prop) {
582 	case XE_EXEC_QUEUE_JOB_TIMEOUT:
583 		*min = eclass->sched_props.job_timeout_min;
584 		*max = eclass->sched_props.job_timeout_max;
585 		break;
586 	case XE_EXEC_QUEUE_TIMESLICE:
587 		*min = eclass->sched_props.timeslice_min;
588 		*max = eclass->sched_props.timeslice_max;
589 		break;
590 	case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
591 		*min = eclass->sched_props.preempt_timeout_min;
592 		*max = eclass->sched_props.preempt_timeout_max;
593 		break;
594 	default:
595 		break;
596 	}
597 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
598 	if (capable(CAP_SYS_NICE)) {
599 		switch (prop) {
600 		case XE_EXEC_QUEUE_JOB_TIMEOUT:
601 			*min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
602 			*max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
603 			break;
604 		case XE_EXEC_QUEUE_TIMESLICE:
605 			*min = XE_HW_ENGINE_TIMESLICE_MIN;
606 			*max = XE_HW_ENGINE_TIMESLICE_MAX;
607 			break;
608 		case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
609 			*min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
610 			*max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
611 			break;
612 		default:
613 			break;
614 		}
615 	}
616 #endif
617 }
618 
619 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q,
620 				    u64 value)
621 {
622 	u32 min = 0, max = 0;
623 
624 	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
625 				      XE_EXEC_QUEUE_TIMESLICE, &min, &max);
626 
627 	if (xe_exec_queue_enforce_schedule_limit() &&
628 	    !xe_hw_engine_timeout_in_range(value, min, max))
629 		return -EINVAL;
630 
631 	q->sched_props.timeslice_us = value;
632 	return 0;
633 }
634 
635 static int
636 exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value)
637 {
638 	if (value == DRM_XE_PXP_TYPE_NONE)
639 		return 0;
640 
641 	/* we only support HWDRM sessions right now */
642 	if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM))
643 		return -EINVAL;
644 
645 	if (!xe_pxp_is_enabled(xe->pxp))
646 		return -ENODEV;
647 
648 	return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM);
649 }
650 
651 static int exec_queue_set_hang_replay_state(struct xe_device *xe,
652 					    struct xe_exec_queue *q,
653 					    u64 value)
654 {
655 	size_t size = xe_gt_lrc_hang_replay_size(q->gt, q->class);
656 	u64 __user *address = u64_to_user_ptr(value);
657 	void *ptr;
658 
659 	ptr = vmemdup_user(address, size);
660 	if (XE_IOCTL_DBG(xe, IS_ERR(ptr)))
661 		return PTR_ERR(ptr);
662 
663 	q->replay_state = ptr;
664 
665 	return 0;
666 }
667 
668 static int xe_exec_queue_group_init(struct xe_device *xe, struct xe_exec_queue *q)
669 {
670 	struct xe_tile *tile = gt_to_tile(q->gt);
671 	struct xe_exec_queue_group *group;
672 	struct xe_bo *bo;
673 
674 	group = kzalloc(sizeof(*group), GFP_KERNEL);
675 	if (!group)
676 		return -ENOMEM;
677 
678 	bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel,
679 				       XE_BO_FLAG_VRAM_IF_DGFX(tile) |
680 				       XE_BO_FLAG_PINNED_LATE_RESTORE |
681 				       XE_BO_FLAG_FORCE_USER_VRAM |
682 				       XE_BO_FLAG_GGTT_INVALIDATE |
683 				       XE_BO_FLAG_GGTT, false);
684 	if (IS_ERR(bo)) {
685 		drm_err(&xe->drm, "CGP bo allocation for queue group failed: %ld\n",
686 			PTR_ERR(bo));
687 		kfree(group);
688 		return PTR_ERR(bo);
689 	}
690 
691 	xe_map_memset(xe, &bo->vmap, 0, 0, SZ_4K);
692 
693 	group->primary = q;
694 	group->cgp_bo = bo;
695 	INIT_LIST_HEAD(&group->list);
696 	xa_init_flags(&group->xa, XA_FLAGS_ALLOC1);
697 	mutex_init(&group->list_lock);
698 	q->multi_queue.group = group;
699 
700 	/* group->list_lock is used in submission backend */
701 	if (IS_ENABLED(CONFIG_LOCKDEP)) {
702 		fs_reclaim_acquire(GFP_KERNEL);
703 		might_lock(&group->list_lock);
704 		fs_reclaim_release(GFP_KERNEL);
705 	}
706 
707 	return 0;
708 }
709 
710 static inline bool xe_exec_queue_supports_multi_queue(struct xe_exec_queue *q)
711 {
712 	return q->gt->info.multi_queue_engine_class_mask & BIT(q->class);
713 }
714 
715 static int xe_exec_queue_group_validate(struct xe_device *xe, struct xe_exec_queue *q,
716 					u32 primary_id)
717 {
718 	struct xe_exec_queue_group *group;
719 	struct xe_exec_queue *primary;
720 	int ret;
721 
722 	/*
723 	 * Get from below xe_exec_queue_lookup() pairs with put
724 	 * in xe_exec_queue_group_cleanup().
725 	 */
726 	primary = xe_exec_queue_lookup(q->vm->xef, primary_id);
727 	if (XE_IOCTL_DBG(xe, !primary))
728 		return -ENOENT;
729 
730 	if (XE_IOCTL_DBG(xe, !xe_exec_queue_is_multi_queue_primary(primary)) ||
731 	    XE_IOCTL_DBG(xe, q->vm != primary->vm) ||
732 	    XE_IOCTL_DBG(xe, q->logical_mask != primary->logical_mask)) {
733 		ret = -EINVAL;
734 		goto put_primary;
735 	}
736 
737 	group = primary->multi_queue.group;
738 	q->multi_queue.valid = true;
739 	q->multi_queue.group = group;
740 
741 	return 0;
742 put_primary:
743 	xe_exec_queue_put(primary);
744 	return ret;
745 }
746 
747 #define XE_MAX_GROUP_SIZE	64
748 static int xe_exec_queue_group_add(struct xe_device *xe, struct xe_exec_queue *q)
749 {
750 	struct xe_exec_queue_group *group = q->multi_queue.group;
751 	u32 pos;
752 	int err;
753 
754 	xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q));
755 
756 	/* Primary queue holds a reference to LRCs of all secondary queues */
757 	err = xa_alloc(&group->xa, &pos, xe_lrc_get(q->lrc[0]),
758 		       XA_LIMIT(1, XE_MAX_GROUP_SIZE - 1), GFP_KERNEL);
759 	if (XE_IOCTL_DBG(xe, err)) {
760 		xe_lrc_put(q->lrc[0]);
761 
762 		/* It is invalid if queue group limit is exceeded */
763 		if (err == -EBUSY)
764 			err = -EINVAL;
765 
766 		return err;
767 	}
768 
769 	q->multi_queue.pos = pos;
770 
771 	return 0;
772 }
773 
774 static void xe_exec_queue_group_delete(struct xe_device *xe, struct xe_exec_queue *q)
775 {
776 	struct xe_exec_queue_group *group = q->multi_queue.group;
777 	struct xe_lrc *lrc;
778 
779 	xe_assert(xe, xe_exec_queue_is_multi_queue_secondary(q));
780 
781 	lrc = xa_erase(&group->xa, q->multi_queue.pos);
782 	xe_assert(xe, lrc);
783 	xe_lrc_put(lrc);
784 }
785 
786 static int exec_queue_set_multi_group(struct xe_device *xe, struct xe_exec_queue *q,
787 				      u64 value)
788 {
789 	if (XE_IOCTL_DBG(xe, !xe_exec_queue_supports_multi_queue(q)))
790 		return -ENODEV;
791 
792 	if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe)))
793 		return -EOPNOTSUPP;
794 
795 	if (XE_IOCTL_DBG(xe, !q->vm->xef))
796 		return -EINVAL;
797 
798 	if (XE_IOCTL_DBG(xe, xe_exec_queue_is_parallel(q)))
799 		return -EINVAL;
800 
801 	if (XE_IOCTL_DBG(xe, xe_exec_queue_is_multi_queue(q)))
802 		return -EINVAL;
803 
804 	if (value & DRM_XE_MULTI_GROUP_CREATE) {
805 		if (XE_IOCTL_DBG(xe, value & ~DRM_XE_MULTI_GROUP_CREATE))
806 			return -EINVAL;
807 
808 		q->multi_queue.valid = true;
809 		q->multi_queue.is_primary = true;
810 		q->multi_queue.pos = 0;
811 		return 0;
812 	}
813 
814 	/* While adding secondary queues, the upper 32 bits must be 0 */
815 	if (XE_IOCTL_DBG(xe, value & (~0ull << 32)))
816 		return -EINVAL;
817 
818 	return xe_exec_queue_group_validate(xe, q, value);
819 }
820 
821 static int exec_queue_set_multi_queue_priority(struct xe_device *xe, struct xe_exec_queue *q,
822 					       u64 value)
823 {
824 	if (XE_IOCTL_DBG(xe, value > XE_MULTI_QUEUE_PRIORITY_HIGH))
825 		return -EINVAL;
826 
827 	/* For queue creation time (!q->xef) setting, just store the priority value */
828 	if (!q->xef) {
829 		q->multi_queue.priority = value;
830 		return 0;
831 	}
832 
833 	if (!xe_exec_queue_is_multi_queue(q))
834 		return -EINVAL;
835 
836 	return q->ops->set_multi_queue_priority(q, value);
837 }
838 
839 typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
840 					     struct xe_exec_queue *q,
841 					     u64 value);
842 
843 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
844 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
845 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
846 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type,
847 	[DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE] = exec_queue_set_hang_replay_state,
848 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP] = exec_queue_set_multi_group,
849 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY] =
850 							exec_queue_set_multi_queue_priority,
851 };
852 
853 int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data,
854 				     struct drm_file *file)
855 {
856 	struct xe_device *xe = to_xe_device(dev);
857 	struct xe_file *xef = to_xe_file(file);
858 	struct drm_xe_exec_queue_set_property *args = data;
859 	struct xe_exec_queue *q;
860 	int ret;
861 	u32 idx;
862 
863 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
864 		return -EINVAL;
865 
866 	if (XE_IOCTL_DBG(xe, args->property !=
867 			 DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY))
868 		return -EINVAL;
869 
870 	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
871 	if (XE_IOCTL_DBG(xe, !q))
872 		return -ENOENT;
873 
874 	idx = array_index_nospec(args->property,
875 				 ARRAY_SIZE(exec_queue_set_property_funcs));
876 	ret = exec_queue_set_property_funcs[idx](xe, q, args->value);
877 	if (XE_IOCTL_DBG(xe, ret))
878 		goto err_post_lookup;
879 
880 	xe_exec_queue_put(q);
881 	return 0;
882 
883  err_post_lookup:
884 	xe_exec_queue_put(q);
885 	return ret;
886 }
887 
888 static int exec_queue_user_ext_check(struct xe_exec_queue *q, u64 properties)
889 {
890 	u64 secondary_queue_valid_props = BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP) |
891 				  BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY);
892 
893 	/*
894 	 * Only MULTI_QUEUE_PRIORITY property is valid for secondary queues of a
895 	 * multi-queue group.
896 	 */
897 	if (xe_exec_queue_is_multi_queue_secondary(q) &&
898 	    properties & ~secondary_queue_valid_props)
899 		return -EINVAL;
900 
901 	return 0;
902 }
903 
904 static int exec_queue_user_ext_check_final(struct xe_exec_queue *q, u64 properties)
905 {
906 	/* MULTI_QUEUE_PRIORITY only applies to multi-queue group queues */
907 	if ((properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY)) &&
908 	    !(properties & BIT_ULL(DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP)))
909 		return -EINVAL;
910 
911 	return 0;
912 }
913 
914 static int exec_queue_user_ext_set_property(struct xe_device *xe,
915 					    struct xe_exec_queue *q,
916 					    u64 extension, u64 *properties)
917 {
918 	u64 __user *address = u64_to_user_ptr(extension);
919 	struct drm_xe_ext_set_property ext;
920 	int err;
921 	u32 idx;
922 
923 	err = copy_from_user(&ext, address, sizeof(ext));
924 	if (XE_IOCTL_DBG(xe, err))
925 		return -EFAULT;
926 
927 	if (XE_IOCTL_DBG(xe, ext.property >=
928 			 ARRAY_SIZE(exec_queue_set_property_funcs)) ||
929 	    XE_IOCTL_DBG(xe, ext.pad) ||
930 	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY &&
931 			 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE &&
932 			 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE &&
933 			 ext.property != DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE &&
934 			 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_GROUP &&
935 			 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_MULTI_QUEUE_PRIORITY))
936 		return -EINVAL;
937 
938 	idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
939 	if (!exec_queue_set_property_funcs[idx])
940 		return -EINVAL;
941 
942 	*properties |= BIT_ULL(idx);
943 	err = exec_queue_user_ext_check(q, *properties);
944 	if (XE_IOCTL_DBG(xe, err))
945 		return err;
946 
947 	return exec_queue_set_property_funcs[idx](xe, q, ext.value);
948 }
949 
950 typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe,
951 					       struct xe_exec_queue *q,
952 					       u64 extension, u64 *properties);
953 
954 static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = {
955 	[DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
956 };
957 
958 #define MAX_USER_EXTENSIONS	16
959 static int __exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
960 					u64 extensions, int ext_number, u64 *properties)
961 {
962 	u64 __user *address = u64_to_user_ptr(extensions);
963 	struct drm_xe_user_extension ext;
964 	int err;
965 	u32 idx;
966 
967 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
968 		return -E2BIG;
969 
970 	err = copy_from_user(&ext, address, sizeof(ext));
971 	if (XE_IOCTL_DBG(xe, err))
972 		return -EFAULT;
973 
974 	if (XE_IOCTL_DBG(xe, ext.pad) ||
975 	    XE_IOCTL_DBG(xe, ext.name >=
976 			 ARRAY_SIZE(exec_queue_user_extension_funcs)))
977 		return -EINVAL;
978 
979 	idx = array_index_nospec(ext.name,
980 				 ARRAY_SIZE(exec_queue_user_extension_funcs));
981 	err = exec_queue_user_extension_funcs[idx](xe, q, extensions, properties);
982 	if (XE_IOCTL_DBG(xe, err))
983 		return err;
984 
985 	if (ext.next_extension)
986 		return __exec_queue_user_extensions(xe, q, ext.next_extension,
987 						    ++ext_number, properties);
988 
989 	return 0;
990 }
991 
992 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
993 				      u64 extensions)
994 {
995 	u64 properties = 0;
996 	int err;
997 
998 	err = __exec_queue_user_extensions(xe, q, extensions, 0, &properties);
999 	if (XE_IOCTL_DBG(xe, err))
1000 		return err;
1001 
1002 	err = exec_queue_user_ext_check_final(q, properties);
1003 	if (XE_IOCTL_DBG(xe, err))
1004 		return err;
1005 
1006 	if (xe_exec_queue_is_multi_queue_primary(q)) {
1007 		err = xe_exec_queue_group_init(xe, q);
1008 		if (XE_IOCTL_DBG(xe, err))
1009 			return err;
1010 	}
1011 
1012 	return 0;
1013 }
1014 
1015 static u32 calc_validate_logical_mask(struct xe_device *xe,
1016 				      struct drm_xe_engine_class_instance *eci,
1017 				      u16 width, u16 num_placements)
1018 {
1019 	int len = width * num_placements;
1020 	int i, j, n;
1021 	u16 class;
1022 	u16 gt_id;
1023 	u32 return_mask = 0, prev_mask;
1024 
1025 	if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) &&
1026 			 len > 1))
1027 		return 0;
1028 
1029 	for (i = 0; i < width; ++i) {
1030 		u32 current_mask = 0;
1031 
1032 		for (j = 0; j < num_placements; ++j) {
1033 			struct xe_hw_engine *hwe;
1034 
1035 			n = j * width + i;
1036 
1037 			hwe = xe_hw_engine_lookup(xe, eci[n]);
1038 			if (XE_IOCTL_DBG(xe, !hwe))
1039 				return 0;
1040 
1041 			if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe)))
1042 				return 0;
1043 
1044 			if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) ||
1045 			    XE_IOCTL_DBG(xe, n && eci[n].engine_class != class))
1046 				return 0;
1047 
1048 			class = eci[n].engine_class;
1049 			gt_id = eci[n].gt_id;
1050 
1051 			if (width == 1 || !i)
1052 				return_mask |= BIT(eci[n].engine_instance);
1053 			current_mask |= BIT(eci[n].engine_instance);
1054 		}
1055 
1056 		/* Parallel submissions must be logically contiguous */
1057 		if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1))
1058 			return 0;
1059 
1060 		prev_mask = current_mask;
1061 	}
1062 
1063 	return return_mask;
1064 }
1065 
1066 static bool has_sched_groups(struct xe_gt *gt)
1067 {
1068 	if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_sriov_pf_sched_groups_enabled(gt))
1069 		return true;
1070 
1071 	if (IS_SRIOV_VF(gt_to_xe(gt)) && xe_gt_sriov_vf_sched_groups_enabled(gt))
1072 		return true;
1073 
1074 	return false;
1075 }
1076 
1077 int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
1078 			       struct drm_file *file)
1079 {
1080 	struct xe_device *xe = to_xe_device(dev);
1081 	struct xe_file *xef = to_xe_file(file);
1082 	struct drm_xe_exec_queue_create *args = data;
1083 	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
1084 	struct drm_xe_engine_class_instance __user *user_eci =
1085 		u64_to_user_ptr(args->instances);
1086 	struct xe_hw_engine *hwe;
1087 	struct xe_vm *vm;
1088 	struct xe_tile *tile;
1089 	struct xe_exec_queue *q = NULL;
1090 	u32 logical_mask;
1091 	u32 flags = 0;
1092 	u32 id;
1093 	u32 len;
1094 	int err;
1095 
1096 	if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) ||
1097 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1098 		return -EINVAL;
1099 
1100 	len = args->width * args->num_placements;
1101 	if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
1102 		return -EINVAL;
1103 
1104 	err = copy_from_user(eci, user_eci,
1105 			     sizeof(struct drm_xe_engine_class_instance) * len);
1106 	if (XE_IOCTL_DBG(xe, err))
1107 		return -EFAULT;
1108 
1109 	if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id)))
1110 		return -EINVAL;
1111 
1112 	if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT)
1113 		flags |= EXEC_QUEUE_FLAG_LOW_LATENCY;
1114 
1115 	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
1116 		if (XE_IOCTL_DBG(xe, args->width != 1) ||
1117 		    XE_IOCTL_DBG(xe, args->num_placements != 1) ||
1118 		    XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
1119 			return -EINVAL;
1120 
1121 		for_each_tile(tile, xe, id) {
1122 			struct xe_exec_queue *new;
1123 
1124 			flags |= EXEC_QUEUE_FLAG_VM;
1125 			if (id)
1126 				flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD;
1127 
1128 			new = xe_exec_queue_create_bind(xe, tile, flags,
1129 							args->extensions);
1130 			if (IS_ERR(new)) {
1131 				err = PTR_ERR(new);
1132 				if (q)
1133 					goto put_exec_queue;
1134 				return err;
1135 			}
1136 			if (id == 0)
1137 				q = new;
1138 			else
1139 				list_add_tail(&new->multi_gt_list,
1140 					      &q->multi_gt_link);
1141 		}
1142 	} else {
1143 		logical_mask = calc_validate_logical_mask(xe, eci,
1144 							  args->width,
1145 							  args->num_placements);
1146 		if (XE_IOCTL_DBG(xe, !logical_mask))
1147 			return -EINVAL;
1148 
1149 		hwe = xe_hw_engine_lookup(xe, eci[0]);
1150 		if (XE_IOCTL_DBG(xe, !hwe))
1151 			return -EINVAL;
1152 
1153 		vm = xe_vm_lookup(xef, args->vm_id);
1154 		if (XE_IOCTL_DBG(xe, !vm))
1155 			return -ENOENT;
1156 
1157 		err = down_read_interruptible(&vm->lock);
1158 		if (err) {
1159 			xe_vm_put(vm);
1160 			return err;
1161 		}
1162 
1163 		if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
1164 			up_read(&vm->lock);
1165 			xe_vm_put(vm);
1166 			return -ENOENT;
1167 		}
1168 
1169 		/* SRIOV sched groups are not compatible with multi-lrc */
1170 		if (XE_IOCTL_DBG(xe, args->width > 1 && has_sched_groups(hwe->gt))) {
1171 			up_read(&vm->lock);
1172 			xe_vm_put(vm);
1173 			return -EINVAL;
1174 		}
1175 
1176 		q = xe_exec_queue_create(xe, vm, logical_mask,
1177 					 args->width, hwe, flags,
1178 					 args->extensions);
1179 		up_read(&vm->lock);
1180 		xe_vm_put(vm);
1181 		if (IS_ERR(q))
1182 			return PTR_ERR(q);
1183 
1184 		if (xe_exec_queue_is_multi_queue_secondary(q)) {
1185 			err = xe_exec_queue_group_add(xe, q);
1186 			if (XE_IOCTL_DBG(xe, err))
1187 				goto put_exec_queue;
1188 		}
1189 
1190 		if (xe_vm_in_preempt_fence_mode(vm)) {
1191 			q->lr.context = dma_fence_context_alloc(1);
1192 
1193 			err = xe_vm_add_compute_exec_queue(vm, q);
1194 			if (XE_IOCTL_DBG(xe, err))
1195 				goto delete_queue_group;
1196 		}
1197 
1198 		if (q->vm && q->hwe->hw_engine_group) {
1199 			err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q);
1200 			if (err)
1201 				goto put_exec_queue;
1202 		}
1203 	}
1204 
1205 	q->xef = xe_file_get(xef);
1206 
1207 	/* user id alloc must always be last in ioctl to prevent UAF */
1208 	err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
1209 	if (err)
1210 		goto kill_exec_queue;
1211 
1212 	args->exec_queue_id = id;
1213 
1214 	return 0;
1215 
1216 kill_exec_queue:
1217 	xe_exec_queue_kill(q);
1218 delete_queue_group:
1219 	if (xe_exec_queue_is_multi_queue_secondary(q))
1220 		xe_exec_queue_group_delete(xe, q);
1221 put_exec_queue:
1222 	xe_exec_queue_put(q);
1223 	return err;
1224 }
1225 
1226 int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
1227 				     struct drm_file *file)
1228 {
1229 	struct xe_device *xe = to_xe_device(dev);
1230 	struct xe_file *xef = to_xe_file(file);
1231 	struct drm_xe_exec_queue_get_property *args = data;
1232 	struct xe_exec_queue *q;
1233 	int ret;
1234 
1235 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1236 		return -EINVAL;
1237 
1238 	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
1239 	if (XE_IOCTL_DBG(xe, !q))
1240 		return -ENOENT;
1241 
1242 	switch (args->property) {
1243 	case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN:
1244 		args->value = q->ops->reset_status(q);
1245 		ret = 0;
1246 		break;
1247 	default:
1248 		ret = -EINVAL;
1249 	}
1250 
1251 	xe_exec_queue_put(q);
1252 
1253 	return ret;
1254 }
1255 
1256 /**
1257  * xe_exec_queue_lrc() - Get the LRC from exec queue.
1258  * @q: The exec_queue.
1259  *
1260  * Retrieves the primary LRC for the exec queue. Note that this function
1261  * returns only the first LRC instance, even when multiple parallel LRCs
1262  * are configured.
1263  *
1264  * Return: Pointer to LRC on success, error on failure
1265  */
1266 struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q)
1267 {
1268 	return q->lrc[0];
1269 }
1270 
1271 /**
1272  * xe_exec_queue_is_lr() - Whether an exec_queue is long-running
1273  * @q: The exec_queue
1274  *
1275  * Return: True if the exec_queue is long-running, false otherwise.
1276  */
1277 bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
1278 {
1279 	return q->vm && xe_vm_in_lr_mode(q->vm) &&
1280 		!(q->flags & EXEC_QUEUE_FLAG_VM);
1281 }
1282 
1283 /**
1284  * xe_exec_queue_is_idle() - Whether an exec_queue is idle.
1285  * @q: The exec_queue
1286  *
1287  * FIXME: Need to determine what to use as the short-lived
1288  * timeline lock for the exec_queues, so that the return value
1289  * of this function becomes more than just an advisory
1290  * snapshot in time. The timeline lock must protect the
1291  * seqno from racing submissions on the same exec_queue.
1292  * Typically vm->resv, but user-created timeline locks use the migrate vm
1293  * and never grabs the migrate vm->resv so we have a race there.
1294  *
1295  * Return: True if the exec_queue is idle, false otherwise.
1296  */
1297 bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
1298 {
1299 	if (xe_exec_queue_is_parallel(q)) {
1300 		int i;
1301 
1302 		for (i = 0; i < q->width; ++i) {
1303 			if (xe_lrc_seqno(q->lrc[i]) !=
1304 			    q->lrc[i]->fence_ctx.next_seqno - 1)
1305 				return false;
1306 		}
1307 
1308 		return true;
1309 	}
1310 
1311 	return xe_lrc_seqno(q->lrc[0]) ==
1312 		q->lrc[0]->fence_ctx.next_seqno - 1;
1313 }
1314 
1315 /**
1316  * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue
1317  * from hw
1318  * @q: The exec queue
1319  *
1320  * Update the timestamp saved by HW for this exec queue and save run ticks
1321  * calculated by using the delta from last update.
1322  */
1323 void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
1324 {
1325 	struct xe_device *xe = gt_to_xe(q->gt);
1326 	struct xe_lrc *lrc;
1327 	u64 old_ts, new_ts;
1328 	int idx;
1329 
1330 	/*
1331 	 * Jobs that are executed by kernel doesn't have a corresponding xe_file
1332 	 * and thus are not accounted.
1333 	 */
1334 	if (!q->xef)
1335 		return;
1336 
1337 	/* Synchronize with unbind while holding the xe file open */
1338 	if (!drm_dev_enter(&xe->drm, &idx))
1339 		return;
1340 	/*
1341 	 * Only sample the first LRC. For parallel submission, all of them are
1342 	 * scheduled together and we compensate that below by multiplying by
1343 	 * width - this may introduce errors if that premise is not true and
1344 	 * they don't exit 100% aligned. On the other hand, looping through
1345 	 * the LRCs and reading them in different time could also introduce
1346 	 * errors.
1347 	 */
1348 	lrc = q->lrc[0];
1349 	new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
1350 	q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
1351 
1352 	drm_dev_exit(idx);
1353 }
1354 
1355 /**
1356  * xe_exec_queue_kill - permanently stop all execution from an exec queue
1357  * @q: The exec queue
1358  *
1359  * This function permanently stops all activity on an exec queue. If the queue
1360  * is actively executing on the HW, it will be kicked off the engine; any
1361  * pending jobs are discarded and all future submissions are rejected.
1362  * This function is safe to call multiple times.
1363  */
1364 void xe_exec_queue_kill(struct xe_exec_queue *q)
1365 {
1366 	struct xe_exec_queue *eq = q, *next;
1367 
1368 	list_for_each_entry_safe(eq, next, &eq->multi_gt_list,
1369 				 multi_gt_link) {
1370 		q->ops->kill(eq);
1371 		xe_vm_remove_compute_exec_queue(q->vm, eq);
1372 	}
1373 
1374 	q->ops->kill(q);
1375 	xe_vm_remove_compute_exec_queue(q->vm, q);
1376 }
1377 
1378 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
1379 				struct drm_file *file)
1380 {
1381 	struct xe_device *xe = to_xe_device(dev);
1382 	struct xe_file *xef = to_xe_file(file);
1383 	struct drm_xe_exec_queue_destroy *args = data;
1384 	struct xe_exec_queue *q;
1385 
1386 	if (XE_IOCTL_DBG(xe, args->pad) ||
1387 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
1388 		return -EINVAL;
1389 
1390 	mutex_lock(&xef->exec_queue.lock);
1391 	q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id);
1392 	if (q)
1393 		atomic_inc(&xef->exec_queue.pending_removal);
1394 	mutex_unlock(&xef->exec_queue.lock);
1395 
1396 	if (XE_IOCTL_DBG(xe, !q))
1397 		return -ENOENT;
1398 
1399 	if (q->vm && q->hwe->hw_engine_group)
1400 		xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
1401 
1402 	xe_exec_queue_kill(q);
1403 
1404 	trace_xe_exec_queue_close(q);
1405 	xe_exec_queue_put(q);
1406 
1407 	return 0;
1408 }
1409 
1410 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
1411 						    struct xe_vm *vm)
1412 {
1413 	if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) {
1414 		xe_migrate_job_lock_assert(q);
1415 	} else if (q->flags & EXEC_QUEUE_FLAG_VM) {
1416 		lockdep_assert_held(&vm->lock);
1417 	} else {
1418 		xe_vm_assert_held(vm);
1419 		lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem);
1420 	}
1421 }
1422 
1423 /**
1424  * xe_exec_queue_last_fence_put() - Drop ref to last fence
1425  * @q: The exec queue
1426  * @vm: The VM the engine does a bind or exec for
1427  */
1428 void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm)
1429 {
1430 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
1431 
1432 	xe_exec_queue_last_fence_put_unlocked(q);
1433 }
1434 
1435 /**
1436  * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked
1437  * @q: The exec queue
1438  *
1439  * Only safe to be called from xe_exec_queue_destroy().
1440  */
1441 void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q)
1442 {
1443 	if (q->last_fence) {
1444 		dma_fence_put(q->last_fence);
1445 		q->last_fence = NULL;
1446 	}
1447 }
1448 
1449 /**
1450  * xe_exec_queue_last_fence_get() - Get last fence
1451  * @q: The exec queue
1452  * @vm: The VM the engine does a bind or exec for
1453  *
1454  * Get last fence, takes a ref
1455  *
1456  * Returns: last fence if not signaled, dma fence stub if signaled
1457  */
1458 struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
1459 					       struct xe_vm *vm)
1460 {
1461 	struct dma_fence *fence;
1462 
1463 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
1464 
1465 	if (q->last_fence &&
1466 	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
1467 		xe_exec_queue_last_fence_put(q, vm);
1468 
1469 	fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
1470 	dma_fence_get(fence);
1471 	return fence;
1472 }
1473 
1474 /**
1475  * xe_exec_queue_last_fence_get_for_resume() - Get last fence
1476  * @q: The exec queue
1477  * @vm: The VM the engine does a bind or exec for
1478  *
1479  * Get last fence, takes a ref. Only safe to be called in the context of
1480  * resuming the hw engine group's long-running exec queue, when the group
1481  * semaphore is held.
1482  *
1483  * Returns: last fence if not signaled, dma fence stub if signaled
1484  */
1485 struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q,
1486 							  struct xe_vm *vm)
1487 {
1488 	struct dma_fence *fence;
1489 
1490 	lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem);
1491 
1492 	if (q->last_fence &&
1493 	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
1494 		xe_exec_queue_last_fence_put_unlocked(q);
1495 
1496 	fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
1497 	dma_fence_get(fence);
1498 	return fence;
1499 }
1500 
1501 /**
1502  * xe_exec_queue_last_fence_set() - Set last fence
1503  * @q: The exec queue
1504  * @vm: The VM the engine does a bind or exec for
1505  * @fence: The fence
1506  *
1507  * Set the last fence for the engine. Increases reference count for fence, when
1508  * closing engine xe_exec_queue_last_fence_put should be called.
1509  */
1510 void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
1511 				  struct dma_fence *fence)
1512 {
1513 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
1514 	xe_assert(vm->xe, !dma_fence_is_container(fence));
1515 
1516 	xe_exec_queue_last_fence_put(q, vm);
1517 	q->last_fence = dma_fence_get(fence);
1518 }
1519 
1520 /**
1521  * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence
1522  * @q: The exec queue
1523  * @vm: The VM the engine does a bind for
1524  * @type: Either primary or media GT
1525  */
1526 void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
1527 					    struct xe_vm *vm,
1528 					    unsigned int type)
1529 {
1530 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
1531 	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1532 		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
1533 
1534 	xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type);
1535 }
1536 
1537 /**
1538  * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB
1539  * invalidation fence unlocked
1540  * @q: The exec queue
1541  * @type: Either primary or media GT
1542  *
1543  * Only safe to be called from xe_exec_queue_destroy().
1544  */
1545 void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
1546 						     unsigned int type)
1547 {
1548 	xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1549 		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
1550 
1551 	dma_fence_put(q->tlb_inval[type].last_fence);
1552 	q->tlb_inval[type].last_fence = NULL;
1553 }
1554 
1555 /**
1556  * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation
1557  * @q: The exec queue
1558  * @vm: The VM the engine does a bind for
1559  * @type: Either primary or media GT
1560  *
1561  * Get last fence, takes a ref
1562  *
1563  * Returns: last fence if not signaled, dma fence stub if signaled
1564  */
1565 struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
1566 							 struct xe_vm *vm,
1567 							 unsigned int type)
1568 {
1569 	struct dma_fence *fence;
1570 
1571 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
1572 	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1573 		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
1574 	xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
1575 				      EXEC_QUEUE_FLAG_MIGRATE));
1576 
1577 	if (q->tlb_inval[type].last_fence &&
1578 	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
1579 		     &q->tlb_inval[type].last_fence->flags))
1580 		xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
1581 
1582 	fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub();
1583 	dma_fence_get(fence);
1584 	return fence;
1585 }
1586 
1587 /**
1588  * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation
1589  * @q: The exec queue
1590  * @vm: The VM the engine does a bind for
1591  * @fence: The fence
1592  * @type: Either primary or media GT
1593  *
1594  * Set the last fence for the tlb invalidation type on the queue. Increases
1595  * reference count for fence, when closing queue
1596  * xe_exec_queue_tlb_inval_last_fence_put should be called.
1597  */
1598 void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
1599 					    struct xe_vm *vm,
1600 					    struct dma_fence *fence,
1601 					    unsigned int type)
1602 {
1603 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
1604 	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1605 		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
1606 	xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
1607 				      EXEC_QUEUE_FLAG_MIGRATE));
1608 	xe_assert(vm->xe, !dma_fence_is_container(fence));
1609 
1610 	xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
1611 	q->tlb_inval[type].last_fence = dma_fence_get(fence);
1612 }
1613 
1614 /**
1615  * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references
1616  * within all LRCs of a queue.
1617  * @q: the &xe_exec_queue struct instance containing target LRCs
1618  * @scratch: scratch buffer to be used as temporary storage
1619  *
1620  * Returns: zero on success, negative error code on failure
1621  */
1622 int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch)
1623 {
1624 	int i;
1625 	int err = 0;
1626 
1627 	for (i = 0; i < q->width; ++i) {
1628 		struct xe_lrc *lrc;
1629 
1630 		/* Pairs with WRITE_ONCE in __xe_exec_queue_init  */
1631 		lrc = READ_ONCE(q->lrc[i]);
1632 		if (!lrc)
1633 			continue;
1634 
1635 		xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch);
1636 		xe_lrc_update_hwctx_regs_with_address(lrc);
1637 		err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch);
1638 		if (err)
1639 			break;
1640 	}
1641 
1642 	return err;
1643 }
1644