xref: /linux/drivers/gpu/drm/xe/xe_exec_queue.c (revision 6dfafbd0299a60bfb5d5e277fdf100037c7ded07)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_exec_queue.h"
7 
8 #include <linux/nospec.h>
9 
10 #include <drm/drm_device.h>
11 #include <drm/drm_drv.h>
12 #include <drm/drm_file.h>
13 #include <drm/drm_syncobj.h>
14 #include <uapi/drm/xe_drm.h>
15 
16 #include "xe_dep_scheduler.h"
17 #include "xe_device.h"
18 #include "xe_gt.h"
19 #include "xe_gt_sriov_vf.h"
20 #include "xe_hw_engine_class_sysfs.h"
21 #include "xe_hw_engine_group.h"
22 #include "xe_hw_fence.h"
23 #include "xe_irq.h"
24 #include "xe_lrc.h"
25 #include "xe_macros.h"
26 #include "xe_migrate.h"
27 #include "xe_pm.h"
28 #include "xe_ring_ops_types.h"
29 #include "xe_trace.h"
30 #include "xe_vm.h"
31 #include "xe_pxp.h"
32 
33 /**
34  * DOC: Execution Queue
35  *
36  * An Execution queue is an interface for the HW context of execution.
37  * The user creates an execution queue, submits the GPU jobs through those
38  * queues and in the end destroys them.
39  *
40  * Execution queues can also be created by XeKMD itself for driver internal
41  * operations like object migration etc.
42  *
43  * An execution queue is associated with a specified HW engine or a group of
44  * engines (belonging to the same tile and engine class) and any GPU job
45  * submitted on the queue will be run on one of these engines.
46  *
47  * An execution queue is tied to an address space (VM). It holds a reference
48  * of the associated VM and the underlying Logical Ring Context/s (LRC/s)
49  * until the queue is destroyed.
50  *
51  * The execution queue sits on top of the submission backend. It opaquely
52  * handles the GuC and Execlist backends whichever the platform uses, and
53  * the ring operations the different engine classes support.
54  */
55 
56 enum xe_exec_queue_sched_prop {
57 	XE_EXEC_QUEUE_JOB_TIMEOUT = 0,
58 	XE_EXEC_QUEUE_TIMESLICE = 1,
59 	XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2,
60 	XE_EXEC_QUEUE_SCHED_PROP_MAX = 3,
61 };
62 
63 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
64 				      u64 extensions, int ext_number);
65 
66 static void __xe_exec_queue_free(struct xe_exec_queue *q)
67 {
68 	int i;
69 
70 	for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i)
71 		if (q->tlb_inval[i].dep_scheduler)
72 			xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler);
73 
74 	if (xe_exec_queue_uses_pxp(q))
75 		xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
76 	if (q->vm)
77 		xe_vm_put(q->vm);
78 
79 	if (q->xef)
80 		xe_file_put(q->xef);
81 
82 	kfree(q);
83 }
84 
85 static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q)
86 {
87 	struct xe_tile *tile = gt_to_tile(q->gt);
88 	int i;
89 
90 	for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) {
91 		struct xe_dep_scheduler *dep_scheduler;
92 		struct xe_gt *gt;
93 		struct workqueue_struct *wq;
94 
95 		if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT)
96 			gt = tile->primary_gt;
97 		else
98 			gt = tile->media_gt;
99 
100 		if (!gt)
101 			continue;
102 
103 		wq = gt->tlb_inval.job_wq;
104 
105 #define MAX_TLB_INVAL_JOBS	16	/* Picking a reasonable value */
106 		dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name,
107 							MAX_TLB_INVAL_JOBS);
108 		if (IS_ERR(dep_scheduler))
109 			return PTR_ERR(dep_scheduler);
110 
111 		q->tlb_inval[i].dep_scheduler = dep_scheduler;
112 	}
113 #undef MAX_TLB_INVAL_JOBS
114 
115 	return 0;
116 }
117 
118 static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
119 						   struct xe_vm *vm,
120 						   u32 logical_mask,
121 						   u16 width, struct xe_hw_engine *hwe,
122 						   u32 flags, u64 extensions)
123 {
124 	struct xe_exec_queue *q;
125 	struct xe_gt *gt = hwe->gt;
126 	int err;
127 
128 	/* only kernel queues can be permanent */
129 	XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
130 
131 	q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL);
132 	if (!q)
133 		return ERR_PTR(-ENOMEM);
134 
135 	kref_init(&q->refcount);
136 	q->flags = flags;
137 	q->hwe = hwe;
138 	q->gt = gt;
139 	q->class = hwe->class;
140 	q->width = width;
141 	q->msix_vec = XE_IRQ_DEFAULT_MSIX;
142 	q->logical_mask = logical_mask;
143 	q->fence_irq = &gt->fence_irq[hwe->class];
144 	q->ring_ops = gt->ring_ops[hwe->class];
145 	q->ops = gt->exec_queue_ops;
146 	INIT_LIST_HEAD(&q->lr.link);
147 	INIT_LIST_HEAD(&q->multi_gt_link);
148 	INIT_LIST_HEAD(&q->hw_engine_group_link);
149 	INIT_LIST_HEAD(&q->pxp.link);
150 
151 	q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
152 	q->sched_props.preempt_timeout_us =
153 				hwe->eclass->sched_props.preempt_timeout_us;
154 	q->sched_props.job_timeout_ms =
155 				hwe->eclass->sched_props.job_timeout_ms;
156 	if (q->flags & EXEC_QUEUE_FLAG_KERNEL &&
157 	    q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY)
158 		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
159 	else
160 		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
161 
162 	if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) {
163 		err = alloc_dep_schedulers(xe, q);
164 		if (err) {
165 			__xe_exec_queue_free(q);
166 			return ERR_PTR(err);
167 		}
168 	}
169 
170 	if (vm)
171 		q->vm = xe_vm_get(vm);
172 
173 	if (extensions) {
174 		/*
175 		 * may set q->usm, must come before xe_lrc_create(),
176 		 * may overwrite q->sched_props, must come before q->ops->init()
177 		 */
178 		err = exec_queue_user_extensions(xe, q, extensions, 0);
179 		if (err) {
180 			__xe_exec_queue_free(q);
181 			return ERR_PTR(err);
182 		}
183 	}
184 
185 	return q;
186 }
187 
188 static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags)
189 {
190 	int i, err;
191 	u32 flags = 0;
192 
193 	/*
194 	 * PXP workloads executing on RCS or CCS must run in isolation (i.e. no
195 	 * other workload can use the EUs at the same time). On MTL this is done
196 	 * by setting the RUNALONE bit in the LRC, while starting on Xe2 there
197 	 * is a dedicated bit for it.
198 	 */
199 	if (xe_exec_queue_uses_pxp(q) &&
200 	    (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) {
201 		if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20)
202 			flags |= XE_LRC_CREATE_PXP;
203 		else
204 			flags |= XE_LRC_CREATE_RUNALONE;
205 	}
206 
207 	if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL))
208 		flags |= XE_LRC_CREATE_USER_CTX;
209 
210 	err = q->ops->init(q);
211 	if (err)
212 		return err;
213 
214 	/*
215 	 * This must occur after q->ops->init to avoid race conditions during VF
216 	 * post-migration recovery, as the fixups for the LRC GGTT addresses
217 	 * depend on the queue being present in the backend tracking structure.
218 	 *
219 	 * In addition to above, we must wait on inflight GGTT changes to avoid
220 	 * writing out stale values here. Such wait provides a solid solution
221 	 * (without a race) only if the function can detect migration instantly
222 	 * from the moment vCPU resumes execution.
223 	 */
224 	for (i = 0; i < q->width; ++i) {
225 		struct xe_lrc *lrc;
226 
227 		xe_gt_sriov_vf_wait_valid_ggtt(q->gt);
228 		lrc = xe_lrc_create(q->hwe, q->vm, xe_lrc_ring_size(),
229 				    q->msix_vec, flags);
230 		if (IS_ERR(lrc)) {
231 			err = PTR_ERR(lrc);
232 			goto err_lrc;
233 		}
234 
235 		/* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */
236 		WRITE_ONCE(q->lrc[i], lrc);
237 	}
238 
239 	return 0;
240 
241 err_lrc:
242 	for (i = i - 1; i >= 0; --i)
243 		xe_lrc_put(q->lrc[i]);
244 	return err;
245 }
246 
247 static void __xe_exec_queue_fini(struct xe_exec_queue *q)
248 {
249 	int i;
250 
251 	q->ops->fini(q);
252 
253 	for (i = 0; i < q->width; ++i)
254 		xe_lrc_put(q->lrc[i]);
255 }
256 
257 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
258 					   u32 logical_mask, u16 width,
259 					   struct xe_hw_engine *hwe, u32 flags,
260 					   u64 extensions)
261 {
262 	struct xe_exec_queue *q;
263 	int err;
264 
265 	/* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */
266 	xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0)));
267 
268 	q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
269 				  extensions);
270 	if (IS_ERR(q))
271 		return q;
272 
273 	err = __xe_exec_queue_init(q, flags);
274 	if (err)
275 		goto err_post_alloc;
276 
277 	/*
278 	 * We can only add the queue to the PXP list after the init is complete,
279 	 * because the PXP termination can call exec_queue_kill and that will
280 	 * go bad if the queue is only half-initialized. This means that we
281 	 * can't do it when we handle the PXP extension in __xe_exec_queue_alloc
282 	 * and we need to do it here instead.
283 	 */
284 	if (xe_exec_queue_uses_pxp(q)) {
285 		err = xe_pxp_exec_queue_add(xe->pxp, q);
286 		if (err)
287 			goto err_post_init;
288 	}
289 
290 	return q;
291 
292 err_post_init:
293 	__xe_exec_queue_fini(q);
294 err_post_alloc:
295 	__xe_exec_queue_free(q);
296 	return ERR_PTR(err);
297 }
298 ALLOW_ERROR_INJECTION(xe_exec_queue_create, ERRNO);
299 
300 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
301 						 struct xe_vm *vm,
302 						 enum xe_engine_class class,
303 						 u32 flags, u64 extensions)
304 {
305 	struct xe_hw_engine *hwe, *hwe0 = NULL;
306 	enum xe_hw_engine_id id;
307 	u32 logical_mask = 0;
308 
309 	for_each_hw_engine(hwe, gt, id) {
310 		if (xe_hw_engine_is_reserved(hwe))
311 			continue;
312 
313 		if (hwe->class == class) {
314 			logical_mask |= BIT(hwe->logical_instance);
315 			if (!hwe0)
316 				hwe0 = hwe;
317 		}
318 	}
319 
320 	if (!logical_mask)
321 		return ERR_PTR(-ENODEV);
322 
323 	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions);
324 }
325 
326 /**
327  * xe_exec_queue_create_bind() - Create bind exec queue.
328  * @xe: Xe device.
329  * @tile: tile which bind exec queue belongs to.
330  * @flags: exec queue creation flags
331  * @extensions: exec queue creation extensions
332  *
333  * Normalize bind exec queue creation. Bind exec queue is tied to migration VM
334  * for access to physical memory required for page table programming. On a
335  * faulting devices the reserved copy engine instance must be used to avoid
336  * deadlocking (user binds cannot get stuck behind faults as kernel binds which
337  * resolve faults depend on user binds). On non-faulting devices any copy engine
338  * can be used.
339  *
340  * Returns exec queue on success, ERR_PTR on failure
341  */
342 struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
343 						struct xe_tile *tile,
344 						u32 flags, u64 extensions)
345 {
346 	struct xe_gt *gt = tile->primary_gt;
347 	struct xe_exec_queue *q;
348 	struct xe_vm *migrate_vm;
349 
350 	migrate_vm = xe_migrate_get_vm(tile->migrate);
351 	if (xe->info.has_usm) {
352 		struct xe_hw_engine *hwe = xe_gt_hw_engine(gt,
353 							   XE_ENGINE_CLASS_COPY,
354 							   gt->usm.reserved_bcs_instance,
355 							   false);
356 
357 		if (!hwe) {
358 			xe_vm_put(migrate_vm);
359 			return ERR_PTR(-EINVAL);
360 		}
361 
362 		q = xe_exec_queue_create(xe, migrate_vm,
363 					 BIT(hwe->logical_instance), 1, hwe,
364 					 flags, extensions);
365 	} else {
366 		q = xe_exec_queue_create_class(xe, gt, migrate_vm,
367 					       XE_ENGINE_CLASS_COPY, flags,
368 					       extensions);
369 	}
370 	xe_vm_put(migrate_vm);
371 
372 	if (!IS_ERR(q)) {
373 		int err = drm_syncobj_create(&q->ufence_syncobj,
374 					     DRM_SYNCOBJ_CREATE_SIGNALED,
375 					     NULL);
376 		if (err) {
377 			xe_exec_queue_put(q);
378 			return ERR_PTR(err);
379 		}
380 	}
381 
382 	return q;
383 }
384 ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
385 
386 void xe_exec_queue_destroy(struct kref *ref)
387 {
388 	struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
389 	struct xe_exec_queue *eq, *next;
390 	int i;
391 
392 	xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0);
393 
394 	if (q->ufence_syncobj)
395 		drm_syncobj_put(q->ufence_syncobj);
396 
397 	if (q->ufence_syncobj)
398 		drm_syncobj_put(q->ufence_syncobj);
399 
400 	if (xe_exec_queue_uses_pxp(q))
401 		xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
402 
403 	xe_exec_queue_last_fence_put_unlocked(q);
404 	for_each_tlb_inval(i)
405 		xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i);
406 
407 	if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
408 		list_for_each_entry_safe(eq, next, &q->multi_gt_list,
409 					 multi_gt_link)
410 			xe_exec_queue_put(eq);
411 	}
412 
413 	q->ops->destroy(q);
414 }
415 
416 void xe_exec_queue_fini(struct xe_exec_queue *q)
417 {
418 	/*
419 	 * Before releasing our ref to lrc and xef, accumulate our run ticks
420 	 * and wakeup any waiters.
421 	 */
422 	xe_exec_queue_update_run_ticks(q);
423 	if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal))
424 		wake_up_var(&q->xef->exec_queue.pending_removal);
425 
426 	__xe_exec_queue_fini(q);
427 	__xe_exec_queue_free(q);
428 }
429 
430 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
431 {
432 	switch (q->class) {
433 	case XE_ENGINE_CLASS_RENDER:
434 		snprintf(q->name, sizeof(q->name), "rcs%d", instance);
435 		break;
436 	case XE_ENGINE_CLASS_VIDEO_DECODE:
437 		snprintf(q->name, sizeof(q->name), "vcs%d", instance);
438 		break;
439 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
440 		snprintf(q->name, sizeof(q->name), "vecs%d", instance);
441 		break;
442 	case XE_ENGINE_CLASS_COPY:
443 		snprintf(q->name, sizeof(q->name), "bcs%d", instance);
444 		break;
445 	case XE_ENGINE_CLASS_COMPUTE:
446 		snprintf(q->name, sizeof(q->name), "ccs%d", instance);
447 		break;
448 	case XE_ENGINE_CLASS_OTHER:
449 		snprintf(q->name, sizeof(q->name), "gsccs%d", instance);
450 		break;
451 	default:
452 		XE_WARN_ON(q->class);
453 	}
454 }
455 
456 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id)
457 {
458 	struct xe_exec_queue *q;
459 
460 	mutex_lock(&xef->exec_queue.lock);
461 	q = xa_load(&xef->exec_queue.xa, id);
462 	if (q)
463 		xe_exec_queue_get(q);
464 	mutex_unlock(&xef->exec_queue.lock);
465 
466 	return q;
467 }
468 
469 enum xe_exec_queue_priority
470 xe_exec_queue_device_get_max_priority(struct xe_device *xe)
471 {
472 	return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH :
473 				       XE_EXEC_QUEUE_PRIORITY_NORMAL;
474 }
475 
476 static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q,
477 				   u64 value)
478 {
479 	if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH))
480 		return -EINVAL;
481 
482 	if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
483 		return -EPERM;
484 
485 	q->sched_props.priority = value;
486 	return 0;
487 }
488 
489 static bool xe_exec_queue_enforce_schedule_limit(void)
490 {
491 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
492 	return true;
493 #else
494 	return !capable(CAP_SYS_NICE);
495 #endif
496 }
497 
498 static void
499 xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass,
500 			      enum xe_exec_queue_sched_prop prop,
501 			      u32 *min, u32 *max)
502 {
503 	switch (prop) {
504 	case XE_EXEC_QUEUE_JOB_TIMEOUT:
505 		*min = eclass->sched_props.job_timeout_min;
506 		*max = eclass->sched_props.job_timeout_max;
507 		break;
508 	case XE_EXEC_QUEUE_TIMESLICE:
509 		*min = eclass->sched_props.timeslice_min;
510 		*max = eclass->sched_props.timeslice_max;
511 		break;
512 	case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
513 		*min = eclass->sched_props.preempt_timeout_min;
514 		*max = eclass->sched_props.preempt_timeout_max;
515 		break;
516 	default:
517 		break;
518 	}
519 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
520 	if (capable(CAP_SYS_NICE)) {
521 		switch (prop) {
522 		case XE_EXEC_QUEUE_JOB_TIMEOUT:
523 			*min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
524 			*max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
525 			break;
526 		case XE_EXEC_QUEUE_TIMESLICE:
527 			*min = XE_HW_ENGINE_TIMESLICE_MIN;
528 			*max = XE_HW_ENGINE_TIMESLICE_MAX;
529 			break;
530 		case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
531 			*min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
532 			*max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
533 			break;
534 		default:
535 			break;
536 		}
537 	}
538 #endif
539 }
540 
541 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q,
542 				    u64 value)
543 {
544 	u32 min = 0, max = 0;
545 
546 	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
547 				      XE_EXEC_QUEUE_TIMESLICE, &min, &max);
548 
549 	if (xe_exec_queue_enforce_schedule_limit() &&
550 	    !xe_hw_engine_timeout_in_range(value, min, max))
551 		return -EINVAL;
552 
553 	q->sched_props.timeslice_us = value;
554 	return 0;
555 }
556 
557 static int
558 exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value)
559 {
560 	if (value == DRM_XE_PXP_TYPE_NONE)
561 		return 0;
562 
563 	/* we only support HWDRM sessions right now */
564 	if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM))
565 		return -EINVAL;
566 
567 	if (!xe_pxp_is_enabled(xe->pxp))
568 		return -ENODEV;
569 
570 	return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM);
571 }
572 
573 typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
574 					     struct xe_exec_queue *q,
575 					     u64 value);
576 
577 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
578 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
579 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
580 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type,
581 };
582 
583 static int exec_queue_user_ext_set_property(struct xe_device *xe,
584 					    struct xe_exec_queue *q,
585 					    u64 extension)
586 {
587 	u64 __user *address = u64_to_user_ptr(extension);
588 	struct drm_xe_ext_set_property ext;
589 	int err;
590 	u32 idx;
591 
592 	err = copy_from_user(&ext, address, sizeof(ext));
593 	if (XE_IOCTL_DBG(xe, err))
594 		return -EFAULT;
595 
596 	if (XE_IOCTL_DBG(xe, ext.property >=
597 			 ARRAY_SIZE(exec_queue_set_property_funcs)) ||
598 	    XE_IOCTL_DBG(xe, ext.pad) ||
599 	    XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY &&
600 			 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE &&
601 			 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE))
602 		return -EINVAL;
603 
604 	idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
605 	if (!exec_queue_set_property_funcs[idx])
606 		return -EINVAL;
607 
608 	return exec_queue_set_property_funcs[idx](xe, q, ext.value);
609 }
610 
611 typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe,
612 					       struct xe_exec_queue *q,
613 					       u64 extension);
614 
615 static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = {
616 	[DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
617 };
618 
619 #define MAX_USER_EXTENSIONS	16
620 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
621 				      u64 extensions, int ext_number)
622 {
623 	u64 __user *address = u64_to_user_ptr(extensions);
624 	struct drm_xe_user_extension ext;
625 	int err;
626 	u32 idx;
627 
628 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
629 		return -E2BIG;
630 
631 	err = copy_from_user(&ext, address, sizeof(ext));
632 	if (XE_IOCTL_DBG(xe, err))
633 		return -EFAULT;
634 
635 	if (XE_IOCTL_DBG(xe, ext.pad) ||
636 	    XE_IOCTL_DBG(xe, ext.name >=
637 			 ARRAY_SIZE(exec_queue_user_extension_funcs)))
638 		return -EINVAL;
639 
640 	idx = array_index_nospec(ext.name,
641 				 ARRAY_SIZE(exec_queue_user_extension_funcs));
642 	err = exec_queue_user_extension_funcs[idx](xe, q, extensions);
643 	if (XE_IOCTL_DBG(xe, err))
644 		return err;
645 
646 	if (ext.next_extension)
647 		return exec_queue_user_extensions(xe, q, ext.next_extension,
648 						  ++ext_number);
649 
650 	return 0;
651 }
652 
653 static u32 calc_validate_logical_mask(struct xe_device *xe,
654 				      struct drm_xe_engine_class_instance *eci,
655 				      u16 width, u16 num_placements)
656 {
657 	int len = width * num_placements;
658 	int i, j, n;
659 	u16 class;
660 	u16 gt_id;
661 	u32 return_mask = 0, prev_mask;
662 
663 	if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) &&
664 			 len > 1))
665 		return 0;
666 
667 	for (i = 0; i < width; ++i) {
668 		u32 current_mask = 0;
669 
670 		for (j = 0; j < num_placements; ++j) {
671 			struct xe_hw_engine *hwe;
672 
673 			n = j * width + i;
674 
675 			hwe = xe_hw_engine_lookup(xe, eci[n]);
676 			if (XE_IOCTL_DBG(xe, !hwe))
677 				return 0;
678 
679 			if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe)))
680 				return 0;
681 
682 			if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) ||
683 			    XE_IOCTL_DBG(xe, n && eci[n].engine_class != class))
684 				return 0;
685 
686 			class = eci[n].engine_class;
687 			gt_id = eci[n].gt_id;
688 
689 			if (width == 1 || !i)
690 				return_mask |= BIT(eci[n].engine_instance);
691 			current_mask |= BIT(eci[n].engine_instance);
692 		}
693 
694 		/* Parallel submissions must be logically contiguous */
695 		if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1))
696 			return 0;
697 
698 		prev_mask = current_mask;
699 	}
700 
701 	return return_mask;
702 }
703 
704 int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
705 			       struct drm_file *file)
706 {
707 	struct xe_device *xe = to_xe_device(dev);
708 	struct xe_file *xef = to_xe_file(file);
709 	struct drm_xe_exec_queue_create *args = data;
710 	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
711 	struct drm_xe_engine_class_instance __user *user_eci =
712 		u64_to_user_ptr(args->instances);
713 	struct xe_hw_engine *hwe;
714 	struct xe_vm *vm;
715 	struct xe_tile *tile;
716 	struct xe_exec_queue *q = NULL;
717 	u32 logical_mask;
718 	u32 flags = 0;
719 	u32 id;
720 	u32 len;
721 	int err;
722 
723 	if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) ||
724 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
725 		return -EINVAL;
726 
727 	len = args->width * args->num_placements;
728 	if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
729 		return -EINVAL;
730 
731 	err = copy_from_user(eci, user_eci,
732 			     sizeof(struct drm_xe_engine_class_instance) * len);
733 	if (XE_IOCTL_DBG(xe, err))
734 		return -EFAULT;
735 
736 	if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id)))
737 		return -EINVAL;
738 
739 	if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT)
740 		flags |= EXEC_QUEUE_FLAG_LOW_LATENCY;
741 
742 	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
743 		if (XE_IOCTL_DBG(xe, args->width != 1) ||
744 		    XE_IOCTL_DBG(xe, args->num_placements != 1) ||
745 		    XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
746 			return -EINVAL;
747 
748 		for_each_tile(tile, xe, id) {
749 			struct xe_exec_queue *new;
750 
751 			flags |= EXEC_QUEUE_FLAG_VM;
752 			if (id)
753 				flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD;
754 
755 			new = xe_exec_queue_create_bind(xe, tile, flags,
756 							args->extensions);
757 			if (IS_ERR(new)) {
758 				err = PTR_ERR(new);
759 				if (q)
760 					goto put_exec_queue;
761 				return err;
762 			}
763 			if (id == 0)
764 				q = new;
765 			else
766 				list_add_tail(&new->multi_gt_list,
767 					      &q->multi_gt_link);
768 		}
769 	} else {
770 		logical_mask = calc_validate_logical_mask(xe, eci,
771 							  args->width,
772 							  args->num_placements);
773 		if (XE_IOCTL_DBG(xe, !logical_mask))
774 			return -EINVAL;
775 
776 		hwe = xe_hw_engine_lookup(xe, eci[0]);
777 		if (XE_IOCTL_DBG(xe, !hwe))
778 			return -EINVAL;
779 
780 		vm = xe_vm_lookup(xef, args->vm_id);
781 		if (XE_IOCTL_DBG(xe, !vm))
782 			return -ENOENT;
783 
784 		err = down_read_interruptible(&vm->lock);
785 		if (err) {
786 			xe_vm_put(vm);
787 			return err;
788 		}
789 
790 		if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
791 			up_read(&vm->lock);
792 			xe_vm_put(vm);
793 			return -ENOENT;
794 		}
795 
796 		q = xe_exec_queue_create(xe, vm, logical_mask,
797 					 args->width, hwe, flags,
798 					 args->extensions);
799 		up_read(&vm->lock);
800 		xe_vm_put(vm);
801 		if (IS_ERR(q))
802 			return PTR_ERR(q);
803 
804 		if (xe_vm_in_preempt_fence_mode(vm)) {
805 			q->lr.context = dma_fence_context_alloc(1);
806 
807 			err = xe_vm_add_compute_exec_queue(vm, q);
808 			if (XE_IOCTL_DBG(xe, err))
809 				goto put_exec_queue;
810 		}
811 
812 		if (q->vm && q->hwe->hw_engine_group) {
813 			err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q);
814 			if (err)
815 				goto put_exec_queue;
816 		}
817 	}
818 
819 	q->xef = xe_file_get(xef);
820 
821 	/* user id alloc must always be last in ioctl to prevent UAF */
822 	err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
823 	if (err)
824 		goto kill_exec_queue;
825 
826 	args->exec_queue_id = id;
827 
828 	return 0;
829 
830 kill_exec_queue:
831 	xe_exec_queue_kill(q);
832 put_exec_queue:
833 	xe_exec_queue_put(q);
834 	return err;
835 }
836 
837 int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
838 				     struct drm_file *file)
839 {
840 	struct xe_device *xe = to_xe_device(dev);
841 	struct xe_file *xef = to_xe_file(file);
842 	struct drm_xe_exec_queue_get_property *args = data;
843 	struct xe_exec_queue *q;
844 	int ret;
845 
846 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
847 		return -EINVAL;
848 
849 	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
850 	if (XE_IOCTL_DBG(xe, !q))
851 		return -ENOENT;
852 
853 	switch (args->property) {
854 	case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN:
855 		args->value = q->ops->reset_status(q);
856 		ret = 0;
857 		break;
858 	default:
859 		ret = -EINVAL;
860 	}
861 
862 	xe_exec_queue_put(q);
863 
864 	return ret;
865 }
866 
867 /**
868  * xe_exec_queue_lrc() - Get the LRC from exec queue.
869  * @q: The exec_queue.
870  *
871  * Retrieves the primary LRC for the exec queue. Note that this function
872  * returns only the first LRC instance, even when multiple parallel LRCs
873  * are configured.
874  *
875  * Return: Pointer to LRC on success, error on failure
876  */
877 struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q)
878 {
879 	return q->lrc[0];
880 }
881 
882 /**
883  * xe_exec_queue_is_lr() - Whether an exec_queue is long-running
884  * @q: The exec_queue
885  *
886  * Return: True if the exec_queue is long-running, false otherwise.
887  */
888 bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
889 {
890 	return q->vm && xe_vm_in_lr_mode(q->vm) &&
891 		!(q->flags & EXEC_QUEUE_FLAG_VM);
892 }
893 
894 /**
895  * xe_exec_queue_is_idle() - Whether an exec_queue is idle.
896  * @q: The exec_queue
897  *
898  * FIXME: Need to determine what to use as the short-lived
899  * timeline lock for the exec_queues, so that the return value
900  * of this function becomes more than just an advisory
901  * snapshot in time. The timeline lock must protect the
902  * seqno from racing submissions on the same exec_queue.
903  * Typically vm->resv, but user-created timeline locks use the migrate vm
904  * and never grabs the migrate vm->resv so we have a race there.
905  *
906  * Return: True if the exec_queue is idle, false otherwise.
907  */
908 bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
909 {
910 	if (xe_exec_queue_is_parallel(q)) {
911 		int i;
912 
913 		for (i = 0; i < q->width; ++i) {
914 			if (xe_lrc_seqno(q->lrc[i]) !=
915 			    q->lrc[i]->fence_ctx.next_seqno - 1)
916 				return false;
917 		}
918 
919 		return true;
920 	}
921 
922 	return xe_lrc_seqno(q->lrc[0]) ==
923 		q->lrc[0]->fence_ctx.next_seqno - 1;
924 }
925 
926 /**
927  * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue
928  * from hw
929  * @q: The exec queue
930  *
931  * Update the timestamp saved by HW for this exec queue and save run ticks
932  * calculated by using the delta from last update.
933  */
934 void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
935 {
936 	struct xe_device *xe = gt_to_xe(q->gt);
937 	struct xe_lrc *lrc;
938 	u64 old_ts, new_ts;
939 	int idx;
940 
941 	/*
942 	 * Jobs that are executed by kernel doesn't have a corresponding xe_file
943 	 * and thus are not accounted.
944 	 */
945 	if (!q->xef)
946 		return;
947 
948 	/* Synchronize with unbind while holding the xe file open */
949 	if (!drm_dev_enter(&xe->drm, &idx))
950 		return;
951 	/*
952 	 * Only sample the first LRC. For parallel submission, all of them are
953 	 * scheduled together and we compensate that below by multiplying by
954 	 * width - this may introduce errors if that premise is not true and
955 	 * they don't exit 100% aligned. On the other hand, looping through
956 	 * the LRCs and reading them in different time could also introduce
957 	 * errors.
958 	 */
959 	lrc = q->lrc[0];
960 	new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
961 	q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
962 
963 	drm_dev_exit(idx);
964 }
965 
966 /**
967  * xe_exec_queue_kill - permanently stop all execution from an exec queue
968  * @q: The exec queue
969  *
970  * This function permanently stops all activity on an exec queue. If the queue
971  * is actively executing on the HW, it will be kicked off the engine; any
972  * pending jobs are discarded and all future submissions are rejected.
973  * This function is safe to call multiple times.
974  */
975 void xe_exec_queue_kill(struct xe_exec_queue *q)
976 {
977 	struct xe_exec_queue *eq = q, *next;
978 
979 	list_for_each_entry_safe(eq, next, &eq->multi_gt_list,
980 				 multi_gt_link) {
981 		q->ops->kill(eq);
982 		xe_vm_remove_compute_exec_queue(q->vm, eq);
983 	}
984 
985 	q->ops->kill(q);
986 	xe_vm_remove_compute_exec_queue(q->vm, q);
987 }
988 
989 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
990 				struct drm_file *file)
991 {
992 	struct xe_device *xe = to_xe_device(dev);
993 	struct xe_file *xef = to_xe_file(file);
994 	struct drm_xe_exec_queue_destroy *args = data;
995 	struct xe_exec_queue *q;
996 
997 	if (XE_IOCTL_DBG(xe, args->pad) ||
998 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
999 		return -EINVAL;
1000 
1001 	mutex_lock(&xef->exec_queue.lock);
1002 	q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id);
1003 	if (q)
1004 		atomic_inc(&xef->exec_queue.pending_removal);
1005 	mutex_unlock(&xef->exec_queue.lock);
1006 
1007 	if (XE_IOCTL_DBG(xe, !q))
1008 		return -ENOENT;
1009 
1010 	if (q->vm && q->hwe->hw_engine_group)
1011 		xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q);
1012 
1013 	xe_exec_queue_kill(q);
1014 
1015 	trace_xe_exec_queue_close(q);
1016 	xe_exec_queue_put(q);
1017 
1018 	return 0;
1019 }
1020 
1021 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
1022 						    struct xe_vm *vm)
1023 {
1024 	if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) {
1025 		xe_migrate_job_lock_assert(q);
1026 	} else if (q->flags & EXEC_QUEUE_FLAG_VM) {
1027 		lockdep_assert_held(&vm->lock);
1028 	} else {
1029 		xe_vm_assert_held(vm);
1030 		lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem);
1031 	}
1032 }
1033 
1034 /**
1035  * xe_exec_queue_last_fence_put() - Drop ref to last fence
1036  * @q: The exec queue
1037  * @vm: The VM the engine does a bind or exec for
1038  */
1039 void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm)
1040 {
1041 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
1042 
1043 	xe_exec_queue_last_fence_put_unlocked(q);
1044 }
1045 
1046 /**
1047  * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked
1048  * @q: The exec queue
1049  *
1050  * Only safe to be called from xe_exec_queue_destroy().
1051  */
1052 void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q)
1053 {
1054 	if (q->last_fence) {
1055 		dma_fence_put(q->last_fence);
1056 		q->last_fence = NULL;
1057 	}
1058 }
1059 
1060 /**
1061  * xe_exec_queue_last_fence_get() - Get last fence
1062  * @q: The exec queue
1063  * @vm: The VM the engine does a bind or exec for
1064  *
1065  * Get last fence, takes a ref
1066  *
1067  * Returns: last fence if not signaled, dma fence stub if signaled
1068  */
1069 struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
1070 					       struct xe_vm *vm)
1071 {
1072 	struct dma_fence *fence;
1073 
1074 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
1075 
1076 	if (q->last_fence &&
1077 	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
1078 		xe_exec_queue_last_fence_put(q, vm);
1079 
1080 	fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
1081 	dma_fence_get(fence);
1082 	return fence;
1083 }
1084 
1085 /**
1086  * xe_exec_queue_last_fence_get_for_resume() - Get last fence
1087  * @q: The exec queue
1088  * @vm: The VM the engine does a bind or exec for
1089  *
1090  * Get last fence, takes a ref. Only safe to be called in the context of
1091  * resuming the hw engine group's long-running exec queue, when the group
1092  * semaphore is held.
1093  *
1094  * Returns: last fence if not signaled, dma fence stub if signaled
1095  */
1096 struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q,
1097 							  struct xe_vm *vm)
1098 {
1099 	struct dma_fence *fence;
1100 
1101 	lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem);
1102 
1103 	if (q->last_fence &&
1104 	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
1105 		xe_exec_queue_last_fence_put_unlocked(q);
1106 
1107 	fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
1108 	dma_fence_get(fence);
1109 	return fence;
1110 }
1111 
1112 /**
1113  * xe_exec_queue_last_fence_set() - Set last fence
1114  * @q: The exec queue
1115  * @vm: The VM the engine does a bind or exec for
1116  * @fence: The fence
1117  *
1118  * Set the last fence for the engine. Increases reference count for fence, when
1119  * closing engine xe_exec_queue_last_fence_put should be called.
1120  */
1121 void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
1122 				  struct dma_fence *fence)
1123 {
1124 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
1125 	xe_assert(vm->xe, !dma_fence_is_container(fence));
1126 
1127 	xe_exec_queue_last_fence_put(q, vm);
1128 	q->last_fence = dma_fence_get(fence);
1129 }
1130 
1131 /**
1132  * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence
1133  * @q: The exec queue
1134  * @vm: The VM the engine does a bind for
1135  * @type: Either primary or media GT
1136  */
1137 void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
1138 					    struct xe_vm *vm,
1139 					    unsigned int type)
1140 {
1141 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
1142 	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1143 		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
1144 
1145 	xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type);
1146 }
1147 
1148 /**
1149  * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB
1150  * invalidation fence unlocked
1151  * @q: The exec queue
1152  * @type: Either primary or media GT
1153  *
1154  * Only safe to be called from xe_exec_queue_destroy().
1155  */
1156 void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
1157 						     unsigned int type)
1158 {
1159 	xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1160 		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
1161 
1162 	dma_fence_put(q->tlb_inval[type].last_fence);
1163 	q->tlb_inval[type].last_fence = NULL;
1164 }
1165 
1166 /**
1167  * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation
1168  * @q: The exec queue
1169  * @vm: The VM the engine does a bind for
1170  * @type: Either primary or media GT
1171  *
1172  * Get last fence, takes a ref
1173  *
1174  * Returns: last fence if not signaled, dma fence stub if signaled
1175  */
1176 struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
1177 							 struct xe_vm *vm,
1178 							 unsigned int type)
1179 {
1180 	struct dma_fence *fence;
1181 
1182 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
1183 	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1184 		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
1185 	xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
1186 				      EXEC_QUEUE_FLAG_MIGRATE));
1187 
1188 	if (q->tlb_inval[type].last_fence &&
1189 	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
1190 		     &q->tlb_inval[type].last_fence->flags))
1191 		xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
1192 
1193 	fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub();
1194 	dma_fence_get(fence);
1195 	return fence;
1196 }
1197 
1198 /**
1199  * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation
1200  * @q: The exec queue
1201  * @vm: The VM the engine does a bind for
1202  * @fence: The fence
1203  * @type: Either primary or media GT
1204  *
1205  * Set the last fence for the tlb invalidation type on the queue. Increases
1206  * reference count for fence, when closing queue
1207  * xe_exec_queue_tlb_inval_last_fence_put should be called.
1208  */
1209 void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
1210 					    struct xe_vm *vm,
1211 					    struct dma_fence *fence,
1212 					    unsigned int type)
1213 {
1214 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
1215 	xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
1216 		  type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
1217 	xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
1218 				      EXEC_QUEUE_FLAG_MIGRATE));
1219 	xe_assert(vm->xe, !dma_fence_is_container(fence));
1220 
1221 	xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
1222 	q->tlb_inval[type].last_fence = dma_fence_get(fence);
1223 }
1224 
1225 /**
1226  * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references
1227  * within all LRCs of a queue.
1228  * @q: the &xe_exec_queue struct instance containing target LRCs
1229  * @scratch: scratch buffer to be used as temporary storage
1230  *
1231  * Returns: zero on success, negative error code on failure
1232  */
1233 int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch)
1234 {
1235 	int i;
1236 	int err = 0;
1237 
1238 	for (i = 0; i < q->width; ++i) {
1239 		struct xe_lrc *lrc;
1240 
1241 		/* Pairs with WRITE_ONCE in __xe_exec_queue_init  */
1242 		lrc = READ_ONCE(q->lrc[i]);
1243 		if (!lrc)
1244 			continue;
1245 
1246 		xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch);
1247 		xe_lrc_update_hwctx_regs_with_address(lrc);
1248 		err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch);
1249 		if (err)
1250 			break;
1251 	}
1252 
1253 	return err;
1254 }
1255