xref: /linux/drivers/gpu/drm/xe/xe_exec_queue.c (revision 4e73826089ce899357580bbf6e0afe4e6f9900b7)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_exec_queue.h"
7 
8 #include <linux/nospec.h>
9 
10 #include <drm/drm_device.h>
11 #include <drm/drm_file.h>
12 #include <drm/xe_drm.h>
13 
14 #include "xe_device.h"
15 #include "xe_gt.h"
16 #include "xe_hw_engine_class_sysfs.h"
17 #include "xe_hw_fence.h"
18 #include "xe_lrc.h"
19 #include "xe_macros.h"
20 #include "xe_migrate.h"
21 #include "xe_pm.h"
22 #include "xe_ring_ops_types.h"
23 #include "xe_trace.h"
24 #include "xe_vm.h"
25 
26 enum xe_exec_queue_sched_prop {
27 	XE_EXEC_QUEUE_JOB_TIMEOUT = 0,
28 	XE_EXEC_QUEUE_TIMESLICE = 1,
29 	XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2,
30 	XE_EXEC_QUEUE_SCHED_PROP_MAX = 3,
31 };
32 
33 static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
34 						    struct xe_vm *vm,
35 						    u32 logical_mask,
36 						    u16 width, struct xe_hw_engine *hwe,
37 						    u32 flags)
38 {
39 	struct xe_exec_queue *q;
40 	struct xe_gt *gt = hwe->gt;
41 	int err;
42 	int i;
43 
44 	/* only kernel queues can be permanent */
45 	XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
46 
47 	q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
48 	if (!q)
49 		return ERR_PTR(-ENOMEM);
50 
51 	kref_init(&q->refcount);
52 	q->flags = flags;
53 	q->hwe = hwe;
54 	q->gt = gt;
55 	if (vm)
56 		q->vm = xe_vm_get(vm);
57 	q->class = hwe->class;
58 	q->width = width;
59 	q->logical_mask = logical_mask;
60 	q->fence_irq = &gt->fence_irq[hwe->class];
61 	q->ring_ops = gt->ring_ops[hwe->class];
62 	q->ops = gt->exec_queue_ops;
63 	INIT_LIST_HEAD(&q->persistent.link);
64 	INIT_LIST_HEAD(&q->compute.link);
65 	INIT_LIST_HEAD(&q->multi_gt_link);
66 
67 	q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
68 	q->sched_props.preempt_timeout_us =
69 				hwe->eclass->sched_props.preempt_timeout_us;
70 	if (q->flags & EXEC_QUEUE_FLAG_KERNEL &&
71 	    q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY)
72 		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL;
73 	else
74 		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
75 
76 	if (xe_exec_queue_is_parallel(q)) {
77 		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
78 		q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
79 	}
80 	if (q->flags & EXEC_QUEUE_FLAG_VM) {
81 		q->bind.fence_ctx = dma_fence_context_alloc(1);
82 		q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO;
83 	}
84 
85 	for (i = 0; i < width; ++i) {
86 		err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K);
87 		if (err)
88 			goto err_lrc;
89 	}
90 
91 	err = q->ops->init(q);
92 	if (err)
93 		goto err_lrc;
94 
95 	/*
96 	 * Normally the user vm holds an rpm ref to keep the device
97 	 * awake, and the context holds a ref for the vm, however for
98 	 * some engines we use the kernels migrate vm underneath which offers no
99 	 * such rpm ref, or we lack a vm. Make sure we keep a ref here, so we
100 	 * can perform GuC CT actions when needed. Caller is expected to have
101 	 * already grabbed the rpm ref outside any sensitive locks.
102 	 */
103 	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !vm))
104 		drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe));
105 
106 	return q;
107 
108 err_lrc:
109 	for (i = i - 1; i >= 0; --i)
110 		xe_lrc_finish(q->lrc + i);
111 	kfree(q);
112 	return ERR_PTR(err);
113 }
114 
115 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
116 					   u32 logical_mask, u16 width,
117 					   struct xe_hw_engine *hwe, u32 flags)
118 {
119 	struct xe_exec_queue *q;
120 	int err;
121 
122 	if (vm) {
123 		err = xe_vm_lock(vm, true);
124 		if (err)
125 			return ERR_PTR(err);
126 	}
127 	q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags);
128 	if (vm)
129 		xe_vm_unlock(vm);
130 
131 	return q;
132 }
133 
134 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
135 						 struct xe_vm *vm,
136 						 enum xe_engine_class class, u32 flags)
137 {
138 	struct xe_hw_engine *hwe, *hwe0 = NULL;
139 	enum xe_hw_engine_id id;
140 	u32 logical_mask = 0;
141 
142 	for_each_hw_engine(hwe, gt, id) {
143 		if (xe_hw_engine_is_reserved(hwe))
144 			continue;
145 
146 		if (hwe->class == class) {
147 			logical_mask |= BIT(hwe->logical_instance);
148 			if (!hwe0)
149 				hwe0 = hwe;
150 		}
151 	}
152 
153 	if (!logical_mask)
154 		return ERR_PTR(-ENODEV);
155 
156 	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags);
157 }
158 
159 void xe_exec_queue_destroy(struct kref *ref)
160 {
161 	struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
162 	struct xe_exec_queue *eq, *next;
163 
164 	xe_exec_queue_last_fence_put_unlocked(q);
165 	if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
166 		list_for_each_entry_safe(eq, next, &q->multi_gt_list,
167 					 multi_gt_link)
168 			xe_exec_queue_put(eq);
169 	}
170 
171 	q->ops->fini(q);
172 }
173 
174 void xe_exec_queue_fini(struct xe_exec_queue *q)
175 {
176 	int i;
177 
178 	for (i = 0; i < q->width; ++i)
179 		xe_lrc_finish(q->lrc + i);
180 	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
181 		xe_device_mem_access_put(gt_to_xe(q->gt));
182 	if (q->vm)
183 		xe_vm_put(q->vm);
184 
185 	kfree(q);
186 }
187 
188 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
189 {
190 	switch (q->class) {
191 	case XE_ENGINE_CLASS_RENDER:
192 		sprintf(q->name, "rcs%d", instance);
193 		break;
194 	case XE_ENGINE_CLASS_VIDEO_DECODE:
195 		sprintf(q->name, "vcs%d", instance);
196 		break;
197 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
198 		sprintf(q->name, "vecs%d", instance);
199 		break;
200 	case XE_ENGINE_CLASS_COPY:
201 		sprintf(q->name, "bcs%d", instance);
202 		break;
203 	case XE_ENGINE_CLASS_COMPUTE:
204 		sprintf(q->name, "ccs%d", instance);
205 		break;
206 	case XE_ENGINE_CLASS_OTHER:
207 		sprintf(q->name, "gsccs%d", instance);
208 		break;
209 	default:
210 		XE_WARN_ON(q->class);
211 	}
212 }
213 
214 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id)
215 {
216 	struct xe_exec_queue *q;
217 
218 	mutex_lock(&xef->exec_queue.lock);
219 	q = xa_load(&xef->exec_queue.xa, id);
220 	if (q)
221 		xe_exec_queue_get(q);
222 	mutex_unlock(&xef->exec_queue.lock);
223 
224 	return q;
225 }
226 
227 enum xe_exec_queue_priority
228 xe_exec_queue_device_get_max_priority(struct xe_device *xe)
229 {
230 	return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH :
231 				       XE_EXEC_QUEUE_PRIORITY_NORMAL;
232 }
233 
234 static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q,
235 				   u64 value, bool create)
236 {
237 	if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH))
238 		return -EINVAL;
239 
240 	if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
241 		return -EPERM;
242 
243 	return q->ops->set_priority(q, value);
244 }
245 
246 static bool xe_exec_queue_enforce_schedule_limit(void)
247 {
248 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
249 	return true;
250 #else
251 	return !capable(CAP_SYS_NICE);
252 #endif
253 }
254 
255 static void
256 xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass,
257 			      enum xe_exec_queue_sched_prop prop,
258 			      u32 *min, u32 *max)
259 {
260 	switch (prop) {
261 	case XE_EXEC_QUEUE_JOB_TIMEOUT:
262 		*min = eclass->sched_props.job_timeout_min;
263 		*max = eclass->sched_props.job_timeout_max;
264 		break;
265 	case XE_EXEC_QUEUE_TIMESLICE:
266 		*min = eclass->sched_props.timeslice_min;
267 		*max = eclass->sched_props.timeslice_max;
268 		break;
269 	case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
270 		*min = eclass->sched_props.preempt_timeout_min;
271 		*max = eclass->sched_props.preempt_timeout_max;
272 		break;
273 	default:
274 		break;
275 	}
276 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT)
277 	if (capable(CAP_SYS_NICE)) {
278 		switch (prop) {
279 		case XE_EXEC_QUEUE_JOB_TIMEOUT:
280 			*min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
281 			*max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
282 			break;
283 		case XE_EXEC_QUEUE_TIMESLICE:
284 			*min = XE_HW_ENGINE_TIMESLICE_MIN;
285 			*max = XE_HW_ENGINE_TIMESLICE_MAX;
286 			break;
287 		case XE_EXEC_QUEUE_PREEMPT_TIMEOUT:
288 			*min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
289 			*max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
290 			break;
291 		default:
292 			break;
293 		}
294 	}
295 #endif
296 }
297 
298 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q,
299 				    u64 value, bool create)
300 {
301 	u32 min = 0, max = 0;
302 
303 	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
304 				      XE_EXEC_QUEUE_TIMESLICE, &min, &max);
305 
306 	if (xe_exec_queue_enforce_schedule_limit() &&
307 	    !xe_hw_engine_timeout_in_range(value, min, max))
308 		return -EINVAL;
309 
310 	return q->ops->set_timeslice(q, value);
311 }
312 
313 static int exec_queue_set_preemption_timeout(struct xe_device *xe,
314 					     struct xe_exec_queue *q, u64 value,
315 					     bool create)
316 {
317 	u32 min = 0, max = 0;
318 
319 	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
320 				      XE_EXEC_QUEUE_PREEMPT_TIMEOUT, &min, &max);
321 
322 	if (xe_exec_queue_enforce_schedule_limit() &&
323 	    !xe_hw_engine_timeout_in_range(value, min, max))
324 		return -EINVAL;
325 
326 	return q->ops->set_preempt_timeout(q, value);
327 }
328 
329 static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q,
330 				      u64 value, bool create)
331 {
332 	if (XE_IOCTL_DBG(xe, !create))
333 		return -EINVAL;
334 
335 	if (XE_IOCTL_DBG(xe, xe_vm_in_preempt_fence_mode(q->vm)))
336 		return -EINVAL;
337 
338 	if (value)
339 		q->flags |= EXEC_QUEUE_FLAG_PERSISTENT;
340 	else
341 		q->flags &= ~EXEC_QUEUE_FLAG_PERSISTENT;
342 
343 	return 0;
344 }
345 
346 static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue *q,
347 				      u64 value, bool create)
348 {
349 	u32 min = 0, max = 0;
350 
351 	if (XE_IOCTL_DBG(xe, !create))
352 		return -EINVAL;
353 
354 	xe_exec_queue_get_prop_minmax(q->hwe->eclass,
355 				      XE_EXEC_QUEUE_JOB_TIMEOUT, &min, &max);
356 
357 	if (xe_exec_queue_enforce_schedule_limit() &&
358 	    !xe_hw_engine_timeout_in_range(value, min, max))
359 		return -EINVAL;
360 
361 	return q->ops->set_job_timeout(q, value);
362 }
363 
364 static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q,
365 				      u64 value, bool create)
366 {
367 	if (XE_IOCTL_DBG(xe, !create))
368 		return -EINVAL;
369 
370 	if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
371 		return -EINVAL;
372 
373 	q->usm.acc_trigger = value;
374 
375 	return 0;
376 }
377 
378 static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q,
379 				     u64 value, bool create)
380 {
381 	if (XE_IOCTL_DBG(xe, !create))
382 		return -EINVAL;
383 
384 	if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
385 		return -EINVAL;
386 
387 	q->usm.acc_notify = value;
388 
389 	return 0;
390 }
391 
392 static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q,
393 					  u64 value, bool create)
394 {
395 	if (XE_IOCTL_DBG(xe, !create))
396 		return -EINVAL;
397 
398 	if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
399 		return -EINVAL;
400 
401 	if (value > DRM_XE_ACC_GRANULARITY_64M)
402 		return -EINVAL;
403 
404 	q->usm.acc_granularity = value;
405 
406 	return 0;
407 }
408 
409 typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
410 					     struct xe_exec_queue *q,
411 					     u64 value, bool create);
412 
413 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
414 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
415 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
416 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT] = exec_queue_set_preemption_timeout,
417 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE] = exec_queue_set_persistence,
418 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT] = exec_queue_set_job_timeout,
419 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger,
420 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify,
421 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity,
422 };
423 
424 static int exec_queue_user_ext_set_property(struct xe_device *xe,
425 					    struct xe_exec_queue *q,
426 					    u64 extension,
427 					    bool create)
428 {
429 	u64 __user *address = u64_to_user_ptr(extension);
430 	struct drm_xe_ext_set_property ext;
431 	int err;
432 	u32 idx;
433 
434 	err = __copy_from_user(&ext, address, sizeof(ext));
435 	if (XE_IOCTL_DBG(xe, err))
436 		return -EFAULT;
437 
438 	if (XE_IOCTL_DBG(xe, ext.property >=
439 			 ARRAY_SIZE(exec_queue_set_property_funcs)) ||
440 	    XE_IOCTL_DBG(xe, ext.pad))
441 		return -EINVAL;
442 
443 	idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
444 	return exec_queue_set_property_funcs[idx](xe, q, ext.value,  create);
445 }
446 
447 typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe,
448 					       struct xe_exec_queue *q,
449 					       u64 extension,
450 					       bool create);
451 
452 static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = {
453 	[DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
454 };
455 
456 #define MAX_USER_EXTENSIONS	16
457 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
458 				      u64 extensions, int ext_number, bool create)
459 {
460 	u64 __user *address = u64_to_user_ptr(extensions);
461 	struct drm_xe_user_extension ext;
462 	int err;
463 	u32 idx;
464 
465 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
466 		return -E2BIG;
467 
468 	err = __copy_from_user(&ext, address, sizeof(ext));
469 	if (XE_IOCTL_DBG(xe, err))
470 		return -EFAULT;
471 
472 	if (XE_IOCTL_DBG(xe, ext.pad) ||
473 	    XE_IOCTL_DBG(xe, ext.name >=
474 			 ARRAY_SIZE(exec_queue_user_extension_funcs)))
475 		return -EINVAL;
476 
477 	idx = array_index_nospec(ext.name,
478 				 ARRAY_SIZE(exec_queue_user_extension_funcs));
479 	err = exec_queue_user_extension_funcs[idx](xe, q, extensions, create);
480 	if (XE_IOCTL_DBG(xe, err))
481 		return err;
482 
483 	if (ext.next_extension)
484 		return exec_queue_user_extensions(xe, q, ext.next_extension,
485 					      ++ext_number, create);
486 
487 	return 0;
488 }
489 
490 static const enum xe_engine_class user_to_xe_engine_class[] = {
491 	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
492 	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
493 	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
494 	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
495 	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
496 };
497 
498 static struct xe_hw_engine *
499 find_hw_engine(struct xe_device *xe,
500 	       struct drm_xe_engine_class_instance eci)
501 {
502 	u32 idx;
503 
504 	if (eci.engine_class > ARRAY_SIZE(user_to_xe_engine_class))
505 		return NULL;
506 
507 	if (eci.gt_id >= xe->info.gt_count)
508 		return NULL;
509 
510 	idx = array_index_nospec(eci.engine_class,
511 				 ARRAY_SIZE(user_to_xe_engine_class));
512 
513 	return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
514 			       user_to_xe_engine_class[idx],
515 			       eci.engine_instance, true);
516 }
517 
518 static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
519 					struct drm_xe_engine_class_instance *eci,
520 					u16 width, u16 num_placements)
521 {
522 	struct xe_hw_engine *hwe;
523 	enum xe_hw_engine_id id;
524 	u32 logical_mask = 0;
525 
526 	if (XE_IOCTL_DBG(xe, width != 1))
527 		return 0;
528 	if (XE_IOCTL_DBG(xe, num_placements != 1))
529 		return 0;
530 	if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
531 		return 0;
532 
533 	eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY;
534 
535 	for_each_hw_engine(hwe, gt, id) {
536 		if (xe_hw_engine_is_reserved(hwe))
537 			continue;
538 
539 		if (hwe->class ==
540 		    user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY])
541 			logical_mask |= BIT(hwe->logical_instance);
542 	}
543 
544 	return logical_mask;
545 }
546 
547 static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
548 				      struct drm_xe_engine_class_instance *eci,
549 				      u16 width, u16 num_placements)
550 {
551 	int len = width * num_placements;
552 	int i, j, n;
553 	u16 class;
554 	u16 gt_id;
555 	u32 return_mask = 0, prev_mask;
556 
557 	if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) &&
558 			 len > 1))
559 		return 0;
560 
561 	for (i = 0; i < width; ++i) {
562 		u32 current_mask = 0;
563 
564 		for (j = 0; j < num_placements; ++j) {
565 			struct xe_hw_engine *hwe;
566 
567 			n = j * width + i;
568 
569 			hwe = find_hw_engine(xe, eci[n]);
570 			if (XE_IOCTL_DBG(xe, !hwe))
571 				return 0;
572 
573 			if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe)))
574 				return 0;
575 
576 			if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) ||
577 			    XE_IOCTL_DBG(xe, n && eci[n].engine_class != class))
578 				return 0;
579 
580 			class = eci[n].engine_class;
581 			gt_id = eci[n].gt_id;
582 
583 			if (width == 1 || !i)
584 				return_mask |= BIT(eci[n].engine_instance);
585 			current_mask |= BIT(eci[n].engine_instance);
586 		}
587 
588 		/* Parallel submissions must be logically contiguous */
589 		if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1))
590 			return 0;
591 
592 		prev_mask = current_mask;
593 	}
594 
595 	return return_mask;
596 }
597 
598 int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
599 			       struct drm_file *file)
600 {
601 	struct xe_device *xe = to_xe_device(dev);
602 	struct xe_file *xef = to_xe_file(file);
603 	struct drm_xe_exec_queue_create *args = data;
604 	struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE];
605 	struct drm_xe_engine_class_instance __user *user_eci =
606 		u64_to_user_ptr(args->instances);
607 	struct xe_hw_engine *hwe;
608 	struct xe_vm *vm, *migrate_vm;
609 	struct xe_gt *gt;
610 	struct xe_exec_queue *q = NULL;
611 	u32 logical_mask;
612 	u32 id;
613 	u32 len;
614 	int err;
615 
616 	if (XE_IOCTL_DBG(xe, args->flags) ||
617 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
618 		return -EINVAL;
619 
620 	len = args->width * args->num_placements;
621 	if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
622 		return -EINVAL;
623 
624 	err = __copy_from_user(eci, user_eci,
625 			       sizeof(struct drm_xe_engine_class_instance) *
626 			       len);
627 	if (XE_IOCTL_DBG(xe, err))
628 		return -EFAULT;
629 
630 	if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
631 		return -EINVAL;
632 
633 	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
634 		for_each_gt(gt, xe, id) {
635 			struct xe_exec_queue *new;
636 
637 			if (xe_gt_is_media_type(gt))
638 				continue;
639 
640 			eci[0].gt_id = gt->info.id;
641 			logical_mask = bind_exec_queue_logical_mask(xe, gt, eci,
642 								    args->width,
643 								    args->num_placements);
644 			if (XE_IOCTL_DBG(xe, !logical_mask))
645 				return -EINVAL;
646 
647 			hwe = find_hw_engine(xe, eci[0]);
648 			if (XE_IOCTL_DBG(xe, !hwe))
649 				return -EINVAL;
650 
651 			/* The migration vm doesn't hold rpm ref */
652 			xe_device_mem_access_get(xe);
653 
654 			migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
655 			new = xe_exec_queue_create(xe, migrate_vm, logical_mask,
656 						   args->width, hwe,
657 						   EXEC_QUEUE_FLAG_PERSISTENT |
658 						   EXEC_QUEUE_FLAG_VM |
659 						   (id ?
660 						    EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
661 						    0));
662 
663 			xe_device_mem_access_put(xe); /* now held by engine */
664 
665 			xe_vm_put(migrate_vm);
666 			if (IS_ERR(new)) {
667 				err = PTR_ERR(new);
668 				if (q)
669 					goto put_exec_queue;
670 				return err;
671 			}
672 			if (id == 0)
673 				q = new;
674 			else
675 				list_add_tail(&new->multi_gt_list,
676 					      &q->multi_gt_link);
677 		}
678 	} else {
679 		gt = xe_device_get_gt(xe, eci[0].gt_id);
680 		logical_mask = calc_validate_logical_mask(xe, gt, eci,
681 							  args->width,
682 							  args->num_placements);
683 		if (XE_IOCTL_DBG(xe, !logical_mask))
684 			return -EINVAL;
685 
686 		hwe = find_hw_engine(xe, eci[0]);
687 		if (XE_IOCTL_DBG(xe, !hwe))
688 			return -EINVAL;
689 
690 		vm = xe_vm_lookup(xef, args->vm_id);
691 		if (XE_IOCTL_DBG(xe, !vm))
692 			return -ENOENT;
693 
694 		err = down_read_interruptible(&vm->lock);
695 		if (err) {
696 			xe_vm_put(vm);
697 			return err;
698 		}
699 
700 		if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
701 			up_read(&vm->lock);
702 			xe_vm_put(vm);
703 			return -ENOENT;
704 		}
705 
706 		q = xe_exec_queue_create(xe, vm, logical_mask,
707 					 args->width, hwe,
708 					 xe_vm_in_lr_mode(vm) ? 0 :
709 					 EXEC_QUEUE_FLAG_PERSISTENT);
710 		up_read(&vm->lock);
711 		xe_vm_put(vm);
712 		if (IS_ERR(q))
713 			return PTR_ERR(q);
714 
715 		if (xe_vm_in_preempt_fence_mode(vm)) {
716 			q->compute.context = dma_fence_context_alloc(1);
717 			spin_lock_init(&q->compute.lock);
718 
719 			err = xe_vm_add_compute_exec_queue(vm, q);
720 			if (XE_IOCTL_DBG(xe, err))
721 				goto put_exec_queue;
722 		}
723 	}
724 
725 	if (args->extensions) {
726 		err = exec_queue_user_extensions(xe, q, args->extensions, 0, true);
727 		if (XE_IOCTL_DBG(xe, err))
728 			goto kill_exec_queue;
729 	}
730 
731 	q->persistent.xef = xef;
732 
733 	mutex_lock(&xef->exec_queue.lock);
734 	err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
735 	mutex_unlock(&xef->exec_queue.lock);
736 	if (err)
737 		goto kill_exec_queue;
738 
739 	args->exec_queue_id = id;
740 
741 	return 0;
742 
743 kill_exec_queue:
744 	xe_exec_queue_kill(q);
745 put_exec_queue:
746 	xe_exec_queue_put(q);
747 	return err;
748 }
749 
750 int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
751 				     struct drm_file *file)
752 {
753 	struct xe_device *xe = to_xe_device(dev);
754 	struct xe_file *xef = to_xe_file(file);
755 	struct drm_xe_exec_queue_get_property *args = data;
756 	struct xe_exec_queue *q;
757 	int ret;
758 
759 	if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
760 		return -EINVAL;
761 
762 	q = xe_exec_queue_lookup(xef, args->exec_queue_id);
763 	if (XE_IOCTL_DBG(xe, !q))
764 		return -ENOENT;
765 
766 	switch (args->property) {
767 	case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN:
768 		args->value = !!(q->flags & EXEC_QUEUE_FLAG_BANNED);
769 		ret = 0;
770 		break;
771 	default:
772 		ret = -EINVAL;
773 	}
774 
775 	xe_exec_queue_put(q);
776 
777 	return ret;
778 }
779 
780 /**
781  * xe_exec_queue_is_lr() - Whether an exec_queue is long-running
782  * @q: The exec_queue
783  *
784  * Return: True if the exec_queue is long-running, false otherwise.
785  */
786 bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
787 {
788 	return q->vm && xe_vm_in_lr_mode(q->vm) &&
789 		!(q->flags & EXEC_QUEUE_FLAG_VM);
790 }
791 
792 static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q)
793 {
794 	return q->lrc->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc) - 1;
795 }
796 
797 /**
798  * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full
799  * @q: The exec_queue
800  *
801  * Return: True if the exec_queue's ring is full, false otherwise.
802  */
803 bool xe_exec_queue_ring_full(struct xe_exec_queue *q)
804 {
805 	struct xe_lrc *lrc = q->lrc;
806 	s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES;
807 
808 	return xe_exec_queue_num_job_inflight(q) >= max_job;
809 }
810 
811 /**
812  * xe_exec_queue_is_idle() - Whether an exec_queue is idle.
813  * @q: The exec_queue
814  *
815  * FIXME: Need to determine what to use as the short-lived
816  * timeline lock for the exec_queues, so that the return value
817  * of this function becomes more than just an advisory
818  * snapshot in time. The timeline lock must protect the
819  * seqno from racing submissions on the same exec_queue.
820  * Typically vm->resv, but user-created timeline locks use the migrate vm
821  * and never grabs the migrate vm->resv so we have a race there.
822  *
823  * Return: True if the exec_queue is idle, false otherwise.
824  */
825 bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
826 {
827 	if (xe_exec_queue_is_parallel(q)) {
828 		int i;
829 
830 		for (i = 0; i < q->width; ++i) {
831 			if (xe_lrc_seqno(&q->lrc[i]) !=
832 			    q->lrc[i].fence_ctx.next_seqno - 1)
833 				return false;
834 		}
835 
836 		return true;
837 	}
838 
839 	return xe_lrc_seqno(&q->lrc[0]) ==
840 		q->lrc[0].fence_ctx.next_seqno - 1;
841 }
842 
843 void xe_exec_queue_kill(struct xe_exec_queue *q)
844 {
845 	struct xe_exec_queue *eq = q, *next;
846 
847 	list_for_each_entry_safe(eq, next, &eq->multi_gt_list,
848 				 multi_gt_link) {
849 		q->ops->kill(eq);
850 		xe_vm_remove_compute_exec_queue(q->vm, eq);
851 	}
852 
853 	q->ops->kill(q);
854 	xe_vm_remove_compute_exec_queue(q->vm, q);
855 }
856 
857 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
858 				struct drm_file *file)
859 {
860 	struct xe_device *xe = to_xe_device(dev);
861 	struct xe_file *xef = to_xe_file(file);
862 	struct drm_xe_exec_queue_destroy *args = data;
863 	struct xe_exec_queue *q;
864 
865 	if (XE_IOCTL_DBG(xe, args->pad) ||
866 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
867 		return -EINVAL;
868 
869 	mutex_lock(&xef->exec_queue.lock);
870 	q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id);
871 	mutex_unlock(&xef->exec_queue.lock);
872 	if (XE_IOCTL_DBG(xe, !q))
873 		return -ENOENT;
874 
875 	if (!(q->flags & EXEC_QUEUE_FLAG_PERSISTENT))
876 		xe_exec_queue_kill(q);
877 	else
878 		xe_device_add_persistent_exec_queues(xe, q);
879 
880 	trace_xe_exec_queue_close(q);
881 	xe_exec_queue_put(q);
882 
883 	return 0;
884 }
885 
886 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
887 						    struct xe_vm *vm)
888 {
889 	if (q->flags & EXEC_QUEUE_FLAG_VM)
890 		lockdep_assert_held(&vm->lock);
891 	else
892 		xe_vm_assert_held(vm);
893 }
894 
895 /**
896  * xe_exec_queue_last_fence_put() - Drop ref to last fence
897  * @q: The exec queue
898  * @vm: The VM the engine does a bind or exec for
899  */
900 void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm)
901 {
902 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
903 
904 	if (q->last_fence) {
905 		dma_fence_put(q->last_fence);
906 		q->last_fence = NULL;
907 	}
908 }
909 
910 /**
911  * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked
912  * @q: The exec queue
913  *
914  * Only safe to be called from xe_exec_queue_destroy().
915  */
916 void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q)
917 {
918 	if (q->last_fence) {
919 		dma_fence_put(q->last_fence);
920 		q->last_fence = NULL;
921 	}
922 }
923 
924 /**
925  * xe_exec_queue_last_fence_get() - Get last fence
926  * @q: The exec queue
927  * @vm: The VM the engine does a bind or exec for
928  *
929  * Get last fence, takes a ref
930  *
931  * Returns: last fence if not signaled, dma fence stub if signaled
932  */
933 struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
934 					       struct xe_vm *vm)
935 {
936 	struct dma_fence *fence;
937 
938 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
939 
940 	if (q->last_fence &&
941 	    test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
942 		xe_exec_queue_last_fence_put(q, vm);
943 
944 	fence = q->last_fence ? q->last_fence : dma_fence_get_stub();
945 	dma_fence_get(fence);
946 	return fence;
947 }
948 
949 /**
950  * xe_exec_queue_last_fence_set() - Set last fence
951  * @q: The exec queue
952  * @vm: The VM the engine does a bind or exec for
953  * @fence: The fence
954  *
955  * Set the last fence for the engine. Increases reference count for fence, when
956  * closing engine xe_exec_queue_last_fence_put should be called.
957  */
958 void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
959 				  struct dma_fence *fence)
960 {
961 	xe_exec_queue_last_fence_lockdep_assert(q, vm);
962 
963 	xe_exec_queue_last_fence_put(q, vm);
964 	q->last_fence = dma_fence_get(fence);
965 }
966