xref: /linux/drivers/gpu/drm/xe/xe_guc_submit.c (revision 569d7db70e5dcf13fbf072f10e9096577ac1e565)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_guc_submit.h"
7 
8 #include <linux/bitfield.h>
9 #include <linux/bitmap.h>
10 #include <linux/circ_buf.h>
11 #include <linux/delay.h>
12 #include <linux/dma-fence-array.h>
13 
14 #include <drm/drm_managed.h>
15 
16 #include "abi/guc_actions_abi.h"
17 #include "abi/guc_klvs_abi.h"
18 #include "regs/xe_lrc_layout.h"
19 #include "xe_assert.h"
20 #include "xe_devcoredump.h"
21 #include "xe_device.h"
22 #include "xe_exec_queue.h"
23 #include "xe_force_wake.h"
24 #include "xe_gpu_scheduler.h"
25 #include "xe_gt.h"
26 #include "xe_gt_printk.h"
27 #include "xe_guc.h"
28 #include "xe_guc_ct.h"
29 #include "xe_guc_exec_queue_types.h"
30 #include "xe_guc_id_mgr.h"
31 #include "xe_guc_submit_types.h"
32 #include "xe_hw_engine.h"
33 #include "xe_hw_fence.h"
34 #include "xe_lrc.h"
35 #include "xe_macros.h"
36 #include "xe_map.h"
37 #include "xe_mocs.h"
38 #include "xe_pm.h"
39 #include "xe_ring_ops_types.h"
40 #include "xe_sched_job.h"
41 #include "xe_trace.h"
42 #include "xe_vm.h"
43 
44 static struct xe_guc *
45 exec_queue_to_guc(struct xe_exec_queue *q)
46 {
47 	return &q->gt->uc.guc;
48 }
49 
50 /*
51  * Helpers for engine state, using an atomic as some of the bits can transition
52  * as the same time (e.g. a suspend can be happning at the same time as schedule
53  * engine done being processed).
54  */
55 #define EXEC_QUEUE_STATE_REGISTERED		(1 << 0)
56 #define EXEC_QUEUE_STATE_ENABLED		(1 << 1)
57 #define EXEC_QUEUE_STATE_PENDING_ENABLE		(1 << 2)
58 #define EXEC_QUEUE_STATE_PENDING_DISABLE	(1 << 3)
59 #define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
60 #define EXEC_QUEUE_STATE_SUSPENDED		(1 << 5)
61 #define EXEC_QUEUE_STATE_RESET			(1 << 6)
62 #define EXEC_QUEUE_STATE_KILLED			(1 << 7)
63 #define EXEC_QUEUE_STATE_WEDGED			(1 << 8)
64 
65 static bool exec_queue_registered(struct xe_exec_queue *q)
66 {
67 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
68 }
69 
70 static void set_exec_queue_registered(struct xe_exec_queue *q)
71 {
72 	atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
73 }
74 
75 static void clear_exec_queue_registered(struct xe_exec_queue *q)
76 {
77 	atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
78 }
79 
80 static bool exec_queue_enabled(struct xe_exec_queue *q)
81 {
82 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED;
83 }
84 
85 static void set_exec_queue_enabled(struct xe_exec_queue *q)
86 {
87 	atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
88 }
89 
90 static void clear_exec_queue_enabled(struct xe_exec_queue *q)
91 {
92 	atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
93 }
94 
95 static bool exec_queue_pending_enable(struct xe_exec_queue *q)
96 {
97 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
98 }
99 
100 static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
101 {
102 	atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
103 }
104 
105 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
106 {
107 	atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
108 }
109 
110 static bool exec_queue_pending_disable(struct xe_exec_queue *q)
111 {
112 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
113 }
114 
115 static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
116 {
117 	atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
118 }
119 
120 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
121 {
122 	atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
123 }
124 
125 static bool exec_queue_destroyed(struct xe_exec_queue *q)
126 {
127 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
128 }
129 
130 static void set_exec_queue_destroyed(struct xe_exec_queue *q)
131 {
132 	atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
133 }
134 
135 static bool exec_queue_banned(struct xe_exec_queue *q)
136 {
137 	return (q->flags & EXEC_QUEUE_FLAG_BANNED);
138 }
139 
140 static void set_exec_queue_banned(struct xe_exec_queue *q)
141 {
142 	q->flags |= EXEC_QUEUE_FLAG_BANNED;
143 }
144 
145 static bool exec_queue_suspended(struct xe_exec_queue *q)
146 {
147 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED;
148 }
149 
150 static void set_exec_queue_suspended(struct xe_exec_queue *q)
151 {
152 	atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
153 }
154 
155 static void clear_exec_queue_suspended(struct xe_exec_queue *q)
156 {
157 	atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
158 }
159 
160 static bool exec_queue_reset(struct xe_exec_queue *q)
161 {
162 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
163 }
164 
165 static void set_exec_queue_reset(struct xe_exec_queue *q)
166 {
167 	atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
168 }
169 
170 static bool exec_queue_killed(struct xe_exec_queue *q)
171 {
172 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED;
173 }
174 
175 static void set_exec_queue_killed(struct xe_exec_queue *q)
176 {
177 	atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state);
178 }
179 
180 static bool exec_queue_wedged(struct xe_exec_queue *q)
181 {
182 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
183 }
184 
185 static void set_exec_queue_wedged(struct xe_exec_queue *q)
186 {
187 	atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state);
188 }
189 
190 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
191 {
192 	return exec_queue_banned(q) || (atomic_read(&q->guc->state) &
193 		(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED));
194 }
195 
196 #ifdef CONFIG_PROVE_LOCKING
197 static int alloc_submit_wq(struct xe_guc *guc)
198 {
199 	int i;
200 
201 	for (i = 0; i < NUM_SUBMIT_WQ; ++i) {
202 		guc->submission_state.submit_wq_pool[i] =
203 			alloc_ordered_workqueue("submit_wq", 0);
204 		if (!guc->submission_state.submit_wq_pool[i])
205 			goto err_free;
206 	}
207 
208 	return 0;
209 
210 err_free:
211 	while (i)
212 		destroy_workqueue(guc->submission_state.submit_wq_pool[--i]);
213 
214 	return -ENOMEM;
215 }
216 
217 static void free_submit_wq(struct xe_guc *guc)
218 {
219 	int i;
220 
221 	for (i = 0; i < NUM_SUBMIT_WQ; ++i)
222 		destroy_workqueue(guc->submission_state.submit_wq_pool[i]);
223 }
224 
225 static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
226 {
227 	int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ;
228 
229 	return guc->submission_state.submit_wq_pool[idx];
230 }
231 #else
232 static int alloc_submit_wq(struct xe_guc *guc)
233 {
234 	return 0;
235 }
236 
237 static void free_submit_wq(struct xe_guc *guc)
238 {
239 
240 }
241 
242 static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
243 {
244 	return NULL;
245 }
246 #endif
247 
248 static void guc_submit_fini(struct drm_device *drm, void *arg)
249 {
250 	struct xe_guc *guc = arg;
251 
252 	xa_destroy(&guc->submission_state.exec_queue_lookup);
253 	free_submit_wq(guc);
254 }
255 
256 static void guc_submit_wedged_fini(struct drm_device *drm, void *arg)
257 {
258 	struct xe_guc *guc = arg;
259 	struct xe_exec_queue *q;
260 	unsigned long index;
261 
262 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
263 		if (exec_queue_wedged(q))
264 			xe_exec_queue_put(q);
265 }
266 
267 static const struct xe_exec_queue_ops guc_exec_queue_ops;
268 
269 static void primelockdep(struct xe_guc *guc)
270 {
271 	if (!IS_ENABLED(CONFIG_LOCKDEP))
272 		return;
273 
274 	fs_reclaim_acquire(GFP_KERNEL);
275 
276 	mutex_lock(&guc->submission_state.lock);
277 	mutex_unlock(&guc->submission_state.lock);
278 
279 	fs_reclaim_release(GFP_KERNEL);
280 }
281 
282 /**
283  * xe_guc_submit_init() - Initialize GuC submission.
284  * @guc: the &xe_guc to initialize
285  * @num_ids: number of GuC context IDs to use
286  *
287  * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all
288  * GuC context IDs supported by the GuC firmware should be used for submission.
289  *
290  * Only VF drivers will have to provide explicit number of GuC context IDs
291  * that they can use for submission.
292  *
293  * Return: 0 on success or a negative error code on failure.
294  */
295 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
296 {
297 	struct xe_device *xe = guc_to_xe(guc);
298 	struct xe_gt *gt = guc_to_gt(guc);
299 	int err;
300 
301 	err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
302 	if (err)
303 		return err;
304 
305 	err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids);
306 	if (err)
307 		return err;
308 
309 	err = alloc_submit_wq(guc);
310 	if (err)
311 		return err;
312 
313 	gt->exec_queue_ops = &guc_exec_queue_ops;
314 
315 	xa_init(&guc->submission_state.exec_queue_lookup);
316 
317 	primelockdep(guc);
318 
319 	return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
320 }
321 
322 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
323 {
324 	int i;
325 
326 	lockdep_assert_held(&guc->submission_state.lock);
327 
328 	for (i = 0; i < xa_count; ++i)
329 		xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
330 
331 	xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
332 				     q->guc->id, q->width);
333 }
334 
335 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
336 {
337 	int ret;
338 	void *ptr;
339 	int i;
340 
341 	/*
342 	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
343 	 * worse case user gets -ENOMEM on engine create and has to try again.
344 	 *
345 	 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
346 	 * failure.
347 	 */
348 	lockdep_assert_held(&guc->submission_state.lock);
349 
350 	ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm,
351 					   q->width);
352 	if (ret < 0)
353 		return ret;
354 
355 	q->guc->id = ret;
356 
357 	for (i = 0; i < q->width; ++i) {
358 		ptr = xa_store(&guc->submission_state.exec_queue_lookup,
359 			       q->guc->id + i, q, GFP_NOWAIT);
360 		if (IS_ERR(ptr)) {
361 			ret = PTR_ERR(ptr);
362 			goto err_release;
363 		}
364 	}
365 
366 	return 0;
367 
368 err_release:
369 	__release_guc_id(guc, q, i);
370 
371 	return ret;
372 }
373 
374 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
375 {
376 	mutex_lock(&guc->submission_state.lock);
377 	__release_guc_id(guc, q, q->width);
378 	mutex_unlock(&guc->submission_state.lock);
379 }
380 
381 struct exec_queue_policy {
382 	u32 count;
383 	struct guc_update_exec_queue_policy h2g;
384 };
385 
386 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
387 {
388 	size_t bytes = sizeof(policy->h2g.header) +
389 		       (sizeof(policy->h2g.klv[0]) * policy->count);
390 
391 	return bytes / sizeof(u32);
392 }
393 
394 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
395 					      u16 guc_id)
396 {
397 	policy->h2g.header.action =
398 		XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
399 	policy->h2g.header.guc_id = guc_id;
400 	policy->count = 0;
401 }
402 
403 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
404 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
405 					   u32 data) \
406 { \
407 	XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
408 \
409 	policy->h2g.klv[policy->count].kl = \
410 		FIELD_PREP(GUC_KLV_0_KEY, \
411 			   GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
412 		FIELD_PREP(GUC_KLV_0_LEN, 1); \
413 	policy->h2g.klv[policy->count].value = data; \
414 	policy->count++; \
415 }
416 
417 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
418 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
419 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
420 #undef MAKE_EXEC_QUEUE_POLICY_ADD
421 
422 static const int xe_exec_queue_prio_to_guc[] = {
423 	[XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
424 	[XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
425 	[XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
426 	[XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
427 };
428 
429 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
430 {
431 	struct exec_queue_policy policy;
432 	struct xe_device *xe = guc_to_xe(guc);
433 	enum xe_exec_queue_priority prio = q->sched_props.priority;
434 	u32 timeslice_us = q->sched_props.timeslice_us;
435 	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
436 
437 	xe_assert(xe, exec_queue_registered(q));
438 
439 	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
440 	__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
441 	__guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
442 	__guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
443 
444 	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
445 		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
446 }
447 
448 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
449 {
450 	struct exec_queue_policy policy;
451 
452 	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
453 	__guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
454 
455 	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
456 		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
457 }
458 
459 #define parallel_read(xe_, map_, field_) \
460 	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
461 			field_)
462 #define parallel_write(xe_, map_, field_, val_) \
463 	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
464 			field_, val_)
465 
466 static void __register_mlrc_exec_queue(struct xe_guc *guc,
467 				       struct xe_exec_queue *q,
468 				       struct guc_ctxt_registration_info *info)
469 {
470 #define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
471 	struct xe_device *xe = guc_to_xe(guc);
472 	u32 action[MAX_MLRC_REG_SIZE];
473 	int len = 0;
474 	int i;
475 
476 	xe_assert(xe, xe_exec_queue_is_parallel(q));
477 
478 	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
479 	action[len++] = info->flags;
480 	action[len++] = info->context_idx;
481 	action[len++] = info->engine_class;
482 	action[len++] = info->engine_submit_mask;
483 	action[len++] = info->wq_desc_lo;
484 	action[len++] = info->wq_desc_hi;
485 	action[len++] = info->wq_base_lo;
486 	action[len++] = info->wq_base_hi;
487 	action[len++] = info->wq_size;
488 	action[len++] = q->width;
489 	action[len++] = info->hwlrca_lo;
490 	action[len++] = info->hwlrca_hi;
491 
492 	for (i = 1; i < q->width; ++i) {
493 		struct xe_lrc *lrc = q->lrc[i];
494 
495 		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
496 		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
497 	}
498 
499 	xe_assert(xe, len <= MAX_MLRC_REG_SIZE);
500 #undef MAX_MLRC_REG_SIZE
501 
502 	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
503 }
504 
505 static void __register_exec_queue(struct xe_guc *guc,
506 				  struct guc_ctxt_registration_info *info)
507 {
508 	u32 action[] = {
509 		XE_GUC_ACTION_REGISTER_CONTEXT,
510 		info->flags,
511 		info->context_idx,
512 		info->engine_class,
513 		info->engine_submit_mask,
514 		info->wq_desc_lo,
515 		info->wq_desc_hi,
516 		info->wq_base_lo,
517 		info->wq_base_hi,
518 		info->wq_size,
519 		info->hwlrca_lo,
520 		info->hwlrca_hi,
521 	};
522 
523 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
524 }
525 
526 static void register_exec_queue(struct xe_exec_queue *q)
527 {
528 	struct xe_guc *guc = exec_queue_to_guc(q);
529 	struct xe_device *xe = guc_to_xe(guc);
530 	struct xe_lrc *lrc = q->lrc[0];
531 	struct guc_ctxt_registration_info info;
532 
533 	xe_assert(xe, !exec_queue_registered(q));
534 
535 	memset(&info, 0, sizeof(info));
536 	info.context_idx = q->guc->id;
537 	info.engine_class = xe_engine_class_to_guc_class(q->class);
538 	info.engine_submit_mask = q->logical_mask;
539 	info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
540 	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
541 	info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
542 
543 	if (xe_exec_queue_is_parallel(q)) {
544 		u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
545 		struct iosys_map map = xe_lrc_parallel_map(lrc);
546 
547 		info.wq_desc_lo = lower_32_bits(ggtt_addr +
548 			offsetof(struct guc_submit_parallel_scratch, wq_desc));
549 		info.wq_desc_hi = upper_32_bits(ggtt_addr +
550 			offsetof(struct guc_submit_parallel_scratch, wq_desc));
551 		info.wq_base_lo = lower_32_bits(ggtt_addr +
552 			offsetof(struct guc_submit_parallel_scratch, wq[0]));
553 		info.wq_base_hi = upper_32_bits(ggtt_addr +
554 			offsetof(struct guc_submit_parallel_scratch, wq[0]));
555 		info.wq_size = WQ_SIZE;
556 
557 		q->guc->wqi_head = 0;
558 		q->guc->wqi_tail = 0;
559 		xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
560 		parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
561 	}
562 
563 	/*
564 	 * We must keep a reference for LR engines if engine is registered with
565 	 * the GuC as jobs signal immediately and can't destroy an engine if the
566 	 * GuC has a reference to it.
567 	 */
568 	if (xe_exec_queue_is_lr(q))
569 		xe_exec_queue_get(q);
570 
571 	set_exec_queue_registered(q);
572 	trace_xe_exec_queue_register(q);
573 	if (xe_exec_queue_is_parallel(q))
574 		__register_mlrc_exec_queue(guc, q, &info);
575 	else
576 		__register_exec_queue(guc, &info);
577 	init_policies(guc, q);
578 }
579 
580 static u32 wq_space_until_wrap(struct xe_exec_queue *q)
581 {
582 	return (WQ_SIZE - q->guc->wqi_tail);
583 }
584 
585 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
586 {
587 	struct xe_guc *guc = exec_queue_to_guc(q);
588 	struct xe_device *xe = guc_to_xe(guc);
589 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
590 	unsigned int sleep_period_ms = 1;
591 
592 #define AVAILABLE_SPACE \
593 	CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
594 	if (wqi_size > AVAILABLE_SPACE) {
595 try_again:
596 		q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
597 		if (wqi_size > AVAILABLE_SPACE) {
598 			if (sleep_period_ms == 1024) {
599 				xe_gt_reset_async(q->gt);
600 				return -ENODEV;
601 			}
602 
603 			msleep(sleep_period_ms);
604 			sleep_period_ms <<= 1;
605 			goto try_again;
606 		}
607 	}
608 #undef AVAILABLE_SPACE
609 
610 	return 0;
611 }
612 
613 static int wq_noop_append(struct xe_exec_queue *q)
614 {
615 	struct xe_guc *guc = exec_queue_to_guc(q);
616 	struct xe_device *xe = guc_to_xe(guc);
617 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
618 	u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
619 
620 	if (wq_wait_for_space(q, wq_space_until_wrap(q)))
621 		return -ENODEV;
622 
623 	xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw));
624 
625 	parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
626 		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
627 		       FIELD_PREP(WQ_LEN_MASK, len_dw));
628 	q->guc->wqi_tail = 0;
629 
630 	return 0;
631 }
632 
633 static void wq_item_append(struct xe_exec_queue *q)
634 {
635 	struct xe_guc *guc = exec_queue_to_guc(q);
636 	struct xe_device *xe = guc_to_xe(guc);
637 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
638 #define WQ_HEADER_SIZE	4	/* Includes 1 LRC address too */
639 	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
640 	u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
641 	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
642 	int i = 0, j;
643 
644 	if (wqi_size > wq_space_until_wrap(q)) {
645 		if (wq_noop_append(q))
646 			return;
647 	}
648 	if (wq_wait_for_space(q, wqi_size))
649 		return;
650 
651 	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
652 		FIELD_PREP(WQ_LEN_MASK, len_dw);
653 	wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
654 	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
655 		FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
656 	wqi[i++] = 0;
657 	for (j = 1; j < q->width; ++j) {
658 		struct xe_lrc *lrc = q->lrc[j];
659 
660 		wqi[i++] = lrc->ring.tail / sizeof(u64);
661 	}
662 
663 	xe_assert(xe, i == wqi_size / sizeof(u32));
664 
665 	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
666 				      wq[q->guc->wqi_tail / sizeof(u32)]));
667 	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
668 	q->guc->wqi_tail += wqi_size;
669 	xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE);
670 
671 	xe_device_wmb(xe);
672 
673 	map = xe_lrc_parallel_map(q->lrc[0]);
674 	parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
675 }
676 
677 #define RESUME_PENDING	~0x0ull
678 static void submit_exec_queue(struct xe_exec_queue *q)
679 {
680 	struct xe_guc *guc = exec_queue_to_guc(q);
681 	struct xe_device *xe = guc_to_xe(guc);
682 	struct xe_lrc *lrc = q->lrc[0];
683 	u32 action[3];
684 	u32 g2h_len = 0;
685 	u32 num_g2h = 0;
686 	int len = 0;
687 	bool extra_submit = false;
688 
689 	xe_assert(xe, exec_queue_registered(q));
690 
691 	if (xe_exec_queue_is_parallel(q))
692 		wq_item_append(q);
693 	else
694 		xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
695 
696 	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
697 		return;
698 
699 	if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
700 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
701 		action[len++] = q->guc->id;
702 		action[len++] = GUC_CONTEXT_ENABLE;
703 		g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
704 		num_g2h = 1;
705 		if (xe_exec_queue_is_parallel(q))
706 			extra_submit = true;
707 
708 		q->guc->resume_time = RESUME_PENDING;
709 		set_exec_queue_pending_enable(q);
710 		set_exec_queue_enabled(q);
711 		trace_xe_exec_queue_scheduling_enable(q);
712 	} else {
713 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
714 		action[len++] = q->guc->id;
715 		trace_xe_exec_queue_submit(q);
716 	}
717 
718 	xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
719 
720 	if (extra_submit) {
721 		len = 0;
722 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
723 		action[len++] = q->guc->id;
724 		trace_xe_exec_queue_submit(q);
725 
726 		xe_guc_ct_send(&guc->ct, action, len, 0, 0);
727 	}
728 }
729 
730 static struct dma_fence *
731 guc_exec_queue_run_job(struct drm_sched_job *drm_job)
732 {
733 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
734 	struct xe_exec_queue *q = job->q;
735 	struct xe_guc *guc = exec_queue_to_guc(q);
736 	struct xe_device *xe = guc_to_xe(guc);
737 	bool lr = xe_exec_queue_is_lr(q);
738 
739 	xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
740 		  exec_queue_banned(q) || exec_queue_suspended(q));
741 
742 	trace_xe_sched_job_run(job);
743 
744 	if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
745 		if (!exec_queue_registered(q))
746 			register_exec_queue(q);
747 		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
748 			q->ring_ops->emit_job(job);
749 		submit_exec_queue(q);
750 	}
751 
752 	if (lr) {
753 		xe_sched_job_set_error(job, -EOPNOTSUPP);
754 		return NULL;
755 	} else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) {
756 		return job->fence;
757 	} else {
758 		return dma_fence_get(job->fence);
759 	}
760 }
761 
762 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
763 {
764 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
765 
766 	xe_exec_queue_update_run_ticks(job->q);
767 
768 	trace_xe_sched_job_free(job);
769 	xe_sched_job_put(job);
770 }
771 
772 static int guc_read_stopped(struct xe_guc *guc)
773 {
774 	return atomic_read(&guc->submission_state.stopped);
775 }
776 
777 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable)			\
778 	u32 action[] = {						\
779 		XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,			\
780 		q->guc->id,						\
781 		GUC_CONTEXT_##enable_disable,				\
782 	}
783 
784 static void disable_scheduling_deregister(struct xe_guc *guc,
785 					  struct xe_exec_queue *q)
786 {
787 	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
788 	struct xe_device *xe = guc_to_xe(guc);
789 	int ret;
790 
791 	set_min_preemption_timeout(guc, q);
792 	smp_rmb();
793 	ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) ||
794 				 guc_read_stopped(guc), HZ * 5);
795 	if (!ret) {
796 		struct xe_gpu_scheduler *sched = &q->guc->sched;
797 
798 		drm_warn(&xe->drm, "Pending enable failed to respond");
799 		xe_sched_submission_start(sched);
800 		xe_gt_reset_async(q->gt);
801 		xe_sched_tdr_queue_imm(sched);
802 		return;
803 	}
804 
805 	clear_exec_queue_enabled(q);
806 	set_exec_queue_pending_disable(q);
807 	set_exec_queue_destroyed(q);
808 	trace_xe_exec_queue_scheduling_disable(q);
809 
810 	/*
811 	 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
812 	 * handler and we are not allowed to reserved G2H space in handlers.
813 	 */
814 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
815 		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
816 		       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
817 }
818 
819 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
820 {
821 	struct xe_guc *guc = exec_queue_to_guc(q);
822 	struct xe_device *xe = guc_to_xe(guc);
823 
824 	/** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
825 	wake_up_all(&xe->ufence_wq);
826 
827 	if (xe_exec_queue_is_lr(q))
828 		queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
829 	else
830 		xe_sched_tdr_queue_imm(&q->guc->sched);
831 }
832 
833 static bool guc_submit_hint_wedged(struct xe_guc *guc)
834 {
835 	struct xe_device *xe = guc_to_xe(guc);
836 	struct xe_exec_queue *q;
837 	unsigned long index;
838 	int err;
839 
840 	if (xe->wedged.mode != 2)
841 		return false;
842 
843 	if (xe_device_wedged(xe))
844 		return true;
845 
846 	xe_device_declare_wedged(xe);
847 
848 	xe_guc_submit_reset_prepare(guc);
849 	xe_guc_ct_stop(&guc->ct);
850 
851 	err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm,
852 				       guc_submit_wedged_fini, guc);
853 	if (err) {
854 		drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n");
855 		return true; /* Device is wedged anyway */
856 	}
857 
858 	mutex_lock(&guc->submission_state.lock);
859 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
860 		if (xe_exec_queue_get_unless_zero(q))
861 			set_exec_queue_wedged(q);
862 	mutex_unlock(&guc->submission_state.lock);
863 
864 	return true;
865 }
866 
867 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
868 {
869 	struct xe_guc_exec_queue *ge =
870 		container_of(w, struct xe_guc_exec_queue, lr_tdr);
871 	struct xe_exec_queue *q = ge->q;
872 	struct xe_guc *guc = exec_queue_to_guc(q);
873 	struct xe_device *xe = guc_to_xe(guc);
874 	struct xe_gpu_scheduler *sched = &ge->sched;
875 	bool wedged;
876 
877 	xe_assert(xe, xe_exec_queue_is_lr(q));
878 	trace_xe_exec_queue_lr_cleanup(q);
879 
880 	wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
881 
882 	/* Kill the run_job / process_msg entry points */
883 	xe_sched_submission_stop(sched);
884 
885 	/*
886 	 * Engine state now mostly stable, disable scheduling / deregister if
887 	 * needed. This cleanup routine might be called multiple times, where
888 	 * the actual async engine deregister drops the final engine ref.
889 	 * Calling disable_scheduling_deregister will mark the engine as
890 	 * destroyed and fire off the CT requests to disable scheduling /
891 	 * deregister, which we only want to do once. We also don't want to mark
892 	 * the engine as pending_disable again as this may race with the
893 	 * xe_guc_deregister_done_handler() which treats it as an unexpected
894 	 * state.
895 	 */
896 	if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
897 		struct xe_guc *guc = exec_queue_to_guc(q);
898 		int ret;
899 
900 		set_exec_queue_banned(q);
901 		disable_scheduling_deregister(guc, q);
902 
903 		/*
904 		 * Must wait for scheduling to be disabled before signalling
905 		 * any fences, if GT broken the GT reset code should signal us.
906 		 */
907 		ret = wait_event_timeout(guc->ct.wq,
908 					 !exec_queue_pending_disable(q) ||
909 					 guc_read_stopped(guc), HZ * 5);
910 		if (!ret) {
911 			drm_warn(&xe->drm, "Schedule disable failed to respond");
912 			xe_sched_submission_start(sched);
913 			xe_gt_reset_async(q->gt);
914 			return;
915 		}
916 	}
917 
918 	xe_sched_submission_start(sched);
919 }
920 
921 static enum drm_gpu_sched_stat
922 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
923 {
924 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
925 	struct xe_sched_job *tmp_job;
926 	struct xe_exec_queue *q = job->q;
927 	struct xe_gpu_scheduler *sched = &q->guc->sched;
928 	struct xe_device *xe = guc_to_xe(exec_queue_to_guc(q));
929 	int err = -ETIME;
930 	int i = 0;
931 	bool wedged;
932 
933 	/*
934 	 * TDR has fired before free job worker. Common if exec queue
935 	 * immediately closed after last fence signaled.
936 	 */
937 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
938 		guc_exec_queue_free_job(drm_job);
939 
940 		return DRM_GPU_SCHED_STAT_NOMINAL;
941 	}
942 
943 	drm_notice(&xe->drm, "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
944 		   xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
945 		   q->guc->id, q->flags);
946 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
947 		   "Kernel-submitted job timed out\n");
948 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
949 		   "VM job timed out on non-killed execqueue\n");
950 
951 	if (!exec_queue_killed(q))
952 		xe_devcoredump(job);
953 
954 	trace_xe_sched_job_timedout(job);
955 
956 	wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
957 
958 	/* Kill the run_job entry point */
959 	xe_sched_submission_stop(sched);
960 
961 	/*
962 	 * Kernel jobs should never fail, nor should VM jobs if they do
963 	 * somethings has gone wrong and the GT needs a reset
964 	 */
965 	if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
966 			(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
967 		if (!xe_sched_invalidate_job(job, 2)) {
968 			xe_sched_add_pending_job(sched, job);
969 			xe_sched_submission_start(sched);
970 			xe_gt_reset_async(q->gt);
971 			goto out;
972 		}
973 	}
974 
975 	/* Engine state now stable, disable scheduling if needed */
976 	if (!wedged && exec_queue_registered(q)) {
977 		struct xe_guc *guc = exec_queue_to_guc(q);
978 		int ret;
979 
980 		if (exec_queue_reset(q))
981 			err = -EIO;
982 		set_exec_queue_banned(q);
983 		if (!exec_queue_destroyed(q)) {
984 			xe_exec_queue_get(q);
985 			disable_scheduling_deregister(guc, q);
986 		}
987 
988 		/*
989 		 * Must wait for scheduling to be disabled before signalling
990 		 * any fences, if GT broken the GT reset code should signal us.
991 		 *
992 		 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
993 		 * error) messages which can cause the schedule disable to get
994 		 * lost. If this occurs, trigger a GT reset to recover.
995 		 */
996 		smp_rmb();
997 		ret = wait_event_timeout(guc->ct.wq,
998 					 !exec_queue_pending_disable(q) ||
999 					 guc_read_stopped(guc), HZ * 5);
1000 		if (!ret || guc_read_stopped(guc)) {
1001 			drm_warn(&xe->drm, "Schedule disable failed to respond");
1002 			xe_sched_add_pending_job(sched, job);
1003 			xe_sched_submission_start(sched);
1004 			xe_gt_reset_async(q->gt);
1005 			xe_sched_tdr_queue_imm(sched);
1006 			goto out;
1007 		}
1008 	}
1009 
1010 	/* Stop fence signaling */
1011 	xe_hw_fence_irq_stop(q->fence_irq);
1012 
1013 	/*
1014 	 * Fence state now stable, stop / start scheduler which cleans up any
1015 	 * fences that are complete
1016 	 */
1017 	xe_sched_add_pending_job(sched, job);
1018 	xe_sched_submission_start(sched);
1019 
1020 	xe_guc_exec_queue_trigger_cleanup(q);
1021 
1022 	/* Mark all outstanding jobs as bad, thus completing them */
1023 	spin_lock(&sched->base.job_list_lock);
1024 	list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
1025 		xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
1026 	spin_unlock(&sched->base.job_list_lock);
1027 
1028 	/* Start fence signaling */
1029 	xe_hw_fence_irq_start(q->fence_irq);
1030 
1031 out:
1032 	return DRM_GPU_SCHED_STAT_NOMINAL;
1033 }
1034 
1035 static void __guc_exec_queue_fini_async(struct work_struct *w)
1036 {
1037 	struct xe_guc_exec_queue *ge =
1038 		container_of(w, struct xe_guc_exec_queue, fini_async);
1039 	struct xe_exec_queue *q = ge->q;
1040 	struct xe_guc *guc = exec_queue_to_guc(q);
1041 
1042 	xe_pm_runtime_get(guc_to_xe(guc));
1043 	trace_xe_exec_queue_destroy(q);
1044 
1045 	if (xe_exec_queue_is_lr(q))
1046 		cancel_work_sync(&ge->lr_tdr);
1047 	release_guc_id(guc, q);
1048 	xe_sched_entity_fini(&ge->entity);
1049 	xe_sched_fini(&ge->sched);
1050 
1051 	kfree(ge);
1052 	xe_exec_queue_fini(q);
1053 	xe_pm_runtime_put(guc_to_xe(guc));
1054 }
1055 
1056 static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
1057 {
1058 	INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
1059 
1060 	/* We must block on kernel engines so slabs are empty on driver unload */
1061 	if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
1062 		__guc_exec_queue_fini_async(&q->guc->fini_async);
1063 	else
1064 		queue_work(system_wq, &q->guc->fini_async);
1065 }
1066 
1067 static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
1068 {
1069 	/*
1070 	 * Might be done from within the GPU scheduler, need to do async as we
1071 	 * fini the scheduler when the engine is fini'd, the scheduler can't
1072 	 * complete fini within itself (circular dependency). Async resolves
1073 	 * this we and don't really care when everything is fini'd, just that it
1074 	 * is.
1075 	 */
1076 	guc_exec_queue_fini_async(q);
1077 }
1078 
1079 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
1080 {
1081 	struct xe_exec_queue *q = msg->private_data;
1082 	struct xe_guc *guc = exec_queue_to_guc(q);
1083 	struct xe_device *xe = guc_to_xe(guc);
1084 
1085 	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
1086 	trace_xe_exec_queue_cleanup_entity(q);
1087 
1088 	if (exec_queue_registered(q))
1089 		disable_scheduling_deregister(guc, q);
1090 	else
1091 		__guc_exec_queue_fini(guc, q);
1092 }
1093 
1094 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
1095 {
1096 	return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
1097 }
1098 
1099 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
1100 {
1101 	struct xe_exec_queue *q = msg->private_data;
1102 	struct xe_guc *guc = exec_queue_to_guc(q);
1103 
1104 	if (guc_exec_queue_allowed_to_change_state(q))
1105 		init_policies(guc, q);
1106 	kfree(msg);
1107 }
1108 
1109 static void suspend_fence_signal(struct xe_exec_queue *q)
1110 {
1111 	struct xe_guc *guc = exec_queue_to_guc(q);
1112 	struct xe_device *xe = guc_to_xe(guc);
1113 
1114 	xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) ||
1115 		  guc_read_stopped(guc));
1116 	xe_assert(xe, q->guc->suspend_pending);
1117 
1118 	q->guc->suspend_pending = false;
1119 	smp_wmb();
1120 	wake_up(&q->guc->suspend_wait);
1121 }
1122 
1123 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
1124 {
1125 	struct xe_exec_queue *q = msg->private_data;
1126 	struct xe_guc *guc = exec_queue_to_guc(q);
1127 
1128 	if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
1129 	    exec_queue_enabled(q)) {
1130 		wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING ||
1131 			   guc_read_stopped(guc));
1132 
1133 		if (!guc_read_stopped(guc)) {
1134 			MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1135 			s64 since_resume_ms =
1136 				ktime_ms_delta(ktime_get(),
1137 					       q->guc->resume_time);
1138 			s64 wait_ms = q->vm->preempt.min_run_period_ms -
1139 				since_resume_ms;
1140 
1141 			if (wait_ms > 0 && q->guc->resume_time)
1142 				msleep(wait_ms);
1143 
1144 			set_exec_queue_suspended(q);
1145 			clear_exec_queue_enabled(q);
1146 			set_exec_queue_pending_disable(q);
1147 			trace_xe_exec_queue_scheduling_disable(q);
1148 
1149 			xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1150 				       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1151 		}
1152 	} else if (q->guc->suspend_pending) {
1153 		set_exec_queue_suspended(q);
1154 		suspend_fence_signal(q);
1155 	}
1156 }
1157 
1158 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
1159 {
1160 	struct xe_exec_queue *q = msg->private_data;
1161 	struct xe_guc *guc = exec_queue_to_guc(q);
1162 
1163 	if (guc_exec_queue_allowed_to_change_state(q)) {
1164 		MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
1165 
1166 		q->guc->resume_time = RESUME_PENDING;
1167 		clear_exec_queue_suspended(q);
1168 		set_exec_queue_pending_enable(q);
1169 		set_exec_queue_enabled(q);
1170 		trace_xe_exec_queue_scheduling_enable(q);
1171 
1172 		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1173 			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1174 	} else {
1175 		clear_exec_queue_suspended(q);
1176 	}
1177 }
1178 
1179 #define CLEANUP		1	/* Non-zero values to catch uninitialized msg */
1180 #define SET_SCHED_PROPS	2
1181 #define SUSPEND		3
1182 #define RESUME		4
1183 
1184 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
1185 {
1186 	trace_xe_sched_msg_recv(msg);
1187 
1188 	switch (msg->opcode) {
1189 	case CLEANUP:
1190 		__guc_exec_queue_process_msg_cleanup(msg);
1191 		break;
1192 	case SET_SCHED_PROPS:
1193 		__guc_exec_queue_process_msg_set_sched_props(msg);
1194 		break;
1195 	case SUSPEND:
1196 		__guc_exec_queue_process_msg_suspend(msg);
1197 		break;
1198 	case RESUME:
1199 		__guc_exec_queue_process_msg_resume(msg);
1200 		break;
1201 	default:
1202 		XE_WARN_ON("Unknown message type");
1203 	}
1204 }
1205 
1206 static const struct drm_sched_backend_ops drm_sched_ops = {
1207 	.run_job = guc_exec_queue_run_job,
1208 	.free_job = guc_exec_queue_free_job,
1209 	.timedout_job = guc_exec_queue_timedout_job,
1210 };
1211 
1212 static const struct xe_sched_backend_ops xe_sched_ops = {
1213 	.process_msg = guc_exec_queue_process_msg,
1214 };
1215 
1216 static int guc_exec_queue_init(struct xe_exec_queue *q)
1217 {
1218 	struct xe_gpu_scheduler *sched;
1219 	struct xe_guc *guc = exec_queue_to_guc(q);
1220 	struct xe_device *xe = guc_to_xe(guc);
1221 	struct xe_guc_exec_queue *ge;
1222 	long timeout;
1223 	int err;
1224 
1225 	xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc)));
1226 
1227 	ge = kzalloc(sizeof(*ge), GFP_KERNEL);
1228 	if (!ge)
1229 		return -ENOMEM;
1230 
1231 	q->guc = ge;
1232 	ge->q = q;
1233 	init_waitqueue_head(&ge->suspend_wait);
1234 
1235 	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
1236 		  msecs_to_jiffies(q->sched_props.job_timeout_ms);
1237 	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
1238 			    get_submit_wq(guc),
1239 			    q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
1240 			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
1241 			    q->name, gt_to_xe(q->gt)->drm.dev);
1242 	if (err)
1243 		goto err_free;
1244 
1245 	sched = &ge->sched;
1246 	err = xe_sched_entity_init(&ge->entity, sched);
1247 	if (err)
1248 		goto err_sched;
1249 
1250 	if (xe_exec_queue_is_lr(q))
1251 		INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
1252 
1253 	mutex_lock(&guc->submission_state.lock);
1254 
1255 	err = alloc_guc_id(guc, q);
1256 	if (err)
1257 		goto err_entity;
1258 
1259 	q->entity = &ge->entity;
1260 
1261 	if (guc_read_stopped(guc))
1262 		xe_sched_stop(sched);
1263 
1264 	mutex_unlock(&guc->submission_state.lock);
1265 
1266 	xe_exec_queue_assign_name(q, q->guc->id);
1267 
1268 	trace_xe_exec_queue_create(q);
1269 
1270 	return 0;
1271 
1272 err_entity:
1273 	mutex_unlock(&guc->submission_state.lock);
1274 	xe_sched_entity_fini(&ge->entity);
1275 err_sched:
1276 	xe_sched_fini(&ge->sched);
1277 err_free:
1278 	kfree(ge);
1279 
1280 	return err;
1281 }
1282 
1283 static void guc_exec_queue_kill(struct xe_exec_queue *q)
1284 {
1285 	trace_xe_exec_queue_kill(q);
1286 	set_exec_queue_killed(q);
1287 	xe_guc_exec_queue_trigger_cleanup(q);
1288 }
1289 
1290 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
1291 				   u32 opcode)
1292 {
1293 	INIT_LIST_HEAD(&msg->link);
1294 	msg->opcode = opcode;
1295 	msg->private_data = q;
1296 
1297 	trace_xe_sched_msg_add(msg);
1298 	xe_sched_add_msg(&q->guc->sched, msg);
1299 }
1300 
1301 #define STATIC_MSG_CLEANUP	0
1302 #define STATIC_MSG_SUSPEND	1
1303 #define STATIC_MSG_RESUME	2
1304 static void guc_exec_queue_fini(struct xe_exec_queue *q)
1305 {
1306 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
1307 
1308 	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
1309 		guc_exec_queue_add_msg(q, msg, CLEANUP);
1310 	else
1311 		__guc_exec_queue_fini(exec_queue_to_guc(q), q);
1312 }
1313 
1314 static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
1315 				       enum xe_exec_queue_priority priority)
1316 {
1317 	struct xe_sched_msg *msg;
1318 
1319 	if (q->sched_props.priority == priority ||
1320 	    exec_queue_killed_or_banned_or_wedged(q))
1321 		return 0;
1322 
1323 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1324 	if (!msg)
1325 		return -ENOMEM;
1326 
1327 	q->sched_props.priority = priority;
1328 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1329 
1330 	return 0;
1331 }
1332 
1333 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
1334 {
1335 	struct xe_sched_msg *msg;
1336 
1337 	if (q->sched_props.timeslice_us == timeslice_us ||
1338 	    exec_queue_killed_or_banned_or_wedged(q))
1339 		return 0;
1340 
1341 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1342 	if (!msg)
1343 		return -ENOMEM;
1344 
1345 	q->sched_props.timeslice_us = timeslice_us;
1346 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1347 
1348 	return 0;
1349 }
1350 
1351 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
1352 					      u32 preempt_timeout_us)
1353 {
1354 	struct xe_sched_msg *msg;
1355 
1356 	if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
1357 	    exec_queue_killed_or_banned_or_wedged(q))
1358 		return 0;
1359 
1360 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1361 	if (!msg)
1362 		return -ENOMEM;
1363 
1364 	q->sched_props.preempt_timeout_us = preempt_timeout_us;
1365 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1366 
1367 	return 0;
1368 }
1369 
1370 static int guc_exec_queue_suspend(struct xe_exec_queue *q)
1371 {
1372 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
1373 
1374 	if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending)
1375 		return -EINVAL;
1376 
1377 	q->guc->suspend_pending = true;
1378 	guc_exec_queue_add_msg(q, msg, SUSPEND);
1379 
1380 	return 0;
1381 }
1382 
1383 static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
1384 {
1385 	struct xe_guc *guc = exec_queue_to_guc(q);
1386 
1387 	wait_event(q->guc->suspend_wait, !q->guc->suspend_pending ||
1388 		   guc_read_stopped(guc));
1389 }
1390 
1391 static void guc_exec_queue_resume(struct xe_exec_queue *q)
1392 {
1393 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
1394 	struct xe_guc *guc = exec_queue_to_guc(q);
1395 	struct xe_device *xe = guc_to_xe(guc);
1396 
1397 	xe_assert(xe, !q->guc->suspend_pending);
1398 
1399 	guc_exec_queue_add_msg(q, msg, RESUME);
1400 }
1401 
1402 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
1403 {
1404 	return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q);
1405 }
1406 
1407 /*
1408  * All of these functions are an abstraction layer which other parts of XE can
1409  * use to trap into the GuC backend. All of these functions, aside from init,
1410  * really shouldn't do much other than trap into the DRM scheduler which
1411  * synchronizes these operations.
1412  */
1413 static const struct xe_exec_queue_ops guc_exec_queue_ops = {
1414 	.init = guc_exec_queue_init,
1415 	.kill = guc_exec_queue_kill,
1416 	.fini = guc_exec_queue_fini,
1417 	.set_priority = guc_exec_queue_set_priority,
1418 	.set_timeslice = guc_exec_queue_set_timeslice,
1419 	.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
1420 	.suspend = guc_exec_queue_suspend,
1421 	.suspend_wait = guc_exec_queue_suspend_wait,
1422 	.resume = guc_exec_queue_resume,
1423 	.reset_status = guc_exec_queue_reset_status,
1424 };
1425 
1426 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
1427 {
1428 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1429 
1430 	/* Stop scheduling + flush any DRM scheduler operations */
1431 	xe_sched_submission_stop(sched);
1432 
1433 	/* Clean up lost G2H + reset engine state */
1434 	if (exec_queue_registered(q)) {
1435 		if ((exec_queue_banned(q) && exec_queue_destroyed(q)) ||
1436 		    xe_exec_queue_is_lr(q))
1437 			xe_exec_queue_put(q);
1438 		else if (exec_queue_destroyed(q))
1439 			__guc_exec_queue_fini(guc, q);
1440 	}
1441 	if (q->guc->suspend_pending) {
1442 		set_exec_queue_suspended(q);
1443 		suspend_fence_signal(q);
1444 	}
1445 	atomic_and(EXEC_QUEUE_STATE_DESTROYED | EXEC_QUEUE_STATE_SUSPENDED,
1446 		   &q->guc->state);
1447 	q->guc->resume_time = 0;
1448 	trace_xe_exec_queue_stop(q);
1449 
1450 	/*
1451 	 * Ban any engine (aside from kernel and engines used for VM ops) with a
1452 	 * started but not complete job or if a job has gone through a GT reset
1453 	 * more than twice.
1454 	 */
1455 	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
1456 		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
1457 		bool ban = false;
1458 
1459 		if (job) {
1460 			if ((xe_sched_job_started(job) &&
1461 			    !xe_sched_job_completed(job)) ||
1462 			    xe_sched_invalidate_job(job, 2)) {
1463 				trace_xe_sched_job_ban(job);
1464 				ban = true;
1465 			}
1466 		} else if (xe_exec_queue_is_lr(q) &&
1467 			   (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) {
1468 			ban = true;
1469 		}
1470 
1471 		if (ban) {
1472 			set_exec_queue_banned(q);
1473 			xe_guc_exec_queue_trigger_cleanup(q);
1474 		}
1475 	}
1476 }
1477 
1478 int xe_guc_submit_reset_prepare(struct xe_guc *guc)
1479 {
1480 	int ret;
1481 
1482 	/*
1483 	 * Using an atomic here rather than submission_state.lock as this
1484 	 * function can be called while holding the CT lock (engine reset
1485 	 * failure). submission_state.lock needs the CT lock to resubmit jobs.
1486 	 * Atomic is not ideal, but it works to prevent against concurrent reset
1487 	 * and releasing any TDRs waiting on guc->submission_state.stopped.
1488 	 */
1489 	ret = atomic_fetch_or(1, &guc->submission_state.stopped);
1490 	smp_wmb();
1491 	wake_up_all(&guc->ct.wq);
1492 
1493 	return ret;
1494 }
1495 
1496 void xe_guc_submit_reset_wait(struct xe_guc *guc)
1497 {
1498 	wait_event(guc->ct.wq, !guc_read_stopped(guc));
1499 }
1500 
1501 void xe_guc_submit_stop(struct xe_guc *guc)
1502 {
1503 	struct xe_exec_queue *q;
1504 	unsigned long index;
1505 	struct xe_device *xe = guc_to_xe(guc);
1506 
1507 	xe_assert(xe, guc_read_stopped(guc) == 1);
1508 
1509 	mutex_lock(&guc->submission_state.lock);
1510 
1511 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1512 		guc_exec_queue_stop(guc, q);
1513 
1514 	mutex_unlock(&guc->submission_state.lock);
1515 
1516 	/*
1517 	 * No one can enter the backend at this point, aside from new engine
1518 	 * creation which is protected by guc->submission_state.lock.
1519 	 */
1520 
1521 }
1522 
1523 static void guc_exec_queue_start(struct xe_exec_queue *q)
1524 {
1525 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1526 
1527 	if (!exec_queue_killed_or_banned_or_wedged(q)) {
1528 		int i;
1529 
1530 		trace_xe_exec_queue_resubmit(q);
1531 		for (i = 0; i < q->width; ++i)
1532 			xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail);
1533 		xe_sched_resubmit_jobs(sched);
1534 	}
1535 
1536 	xe_sched_submission_start(sched);
1537 }
1538 
1539 int xe_guc_submit_start(struct xe_guc *guc)
1540 {
1541 	struct xe_exec_queue *q;
1542 	unsigned long index;
1543 	struct xe_device *xe = guc_to_xe(guc);
1544 
1545 	xe_assert(xe, guc_read_stopped(guc) == 1);
1546 
1547 	mutex_lock(&guc->submission_state.lock);
1548 	atomic_dec(&guc->submission_state.stopped);
1549 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1550 		guc_exec_queue_start(q);
1551 	mutex_unlock(&guc->submission_state.lock);
1552 
1553 	wake_up_all(&guc->ct.wq);
1554 
1555 	return 0;
1556 }
1557 
1558 static struct xe_exec_queue *
1559 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
1560 {
1561 	struct xe_device *xe = guc_to_xe(guc);
1562 	struct xe_exec_queue *q;
1563 
1564 	if (unlikely(guc_id >= GUC_ID_MAX)) {
1565 		drm_err(&xe->drm, "Invalid guc_id %u", guc_id);
1566 		return NULL;
1567 	}
1568 
1569 	q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
1570 	if (unlikely(!q)) {
1571 		drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id);
1572 		return NULL;
1573 	}
1574 
1575 	xe_assert(xe, guc_id >= q->guc->id);
1576 	xe_assert(xe, guc_id < (q->guc->id + q->width));
1577 
1578 	return q;
1579 }
1580 
1581 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
1582 {
1583 	u32 action[] = {
1584 		XE_GUC_ACTION_DEREGISTER_CONTEXT,
1585 		q->guc->id,
1586 	};
1587 
1588 	trace_xe_exec_queue_deregister(q);
1589 
1590 	xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
1591 }
1592 
1593 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q)
1594 {
1595 	trace_xe_exec_queue_scheduling_done(q);
1596 
1597 	if (exec_queue_pending_enable(q)) {
1598 		q->guc->resume_time = ktime_get();
1599 		clear_exec_queue_pending_enable(q);
1600 		smp_wmb();
1601 		wake_up_all(&guc->ct.wq);
1602 	} else {
1603 		clear_exec_queue_pending_disable(q);
1604 		if (q->guc->suspend_pending) {
1605 			suspend_fence_signal(q);
1606 		} else {
1607 			if (exec_queue_banned(q)) {
1608 				smp_wmb();
1609 				wake_up_all(&guc->ct.wq);
1610 			}
1611 			deregister_exec_queue(guc, q);
1612 		}
1613 	}
1614 }
1615 
1616 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
1617 {
1618 	struct xe_device *xe = guc_to_xe(guc);
1619 	struct xe_exec_queue *q;
1620 	u32 guc_id = msg[0];
1621 
1622 	if (unlikely(len < 2)) {
1623 		drm_err(&xe->drm, "Invalid length %u", len);
1624 		return -EPROTO;
1625 	}
1626 
1627 	q = g2h_exec_queue_lookup(guc, guc_id);
1628 	if (unlikely(!q))
1629 		return -EPROTO;
1630 
1631 	if (unlikely(!exec_queue_pending_enable(q) &&
1632 		     !exec_queue_pending_disable(q))) {
1633 		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
1634 			atomic_read(&q->guc->state));
1635 		return -EPROTO;
1636 	}
1637 
1638 	handle_sched_done(guc, q);
1639 
1640 	return 0;
1641 }
1642 
1643 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
1644 {
1645 	trace_xe_exec_queue_deregister_done(q);
1646 
1647 	clear_exec_queue_registered(q);
1648 
1649 	if (exec_queue_banned(q) || xe_exec_queue_is_lr(q))
1650 		xe_exec_queue_put(q);
1651 	else
1652 		__guc_exec_queue_fini(guc, q);
1653 }
1654 
1655 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
1656 {
1657 	struct xe_device *xe = guc_to_xe(guc);
1658 	struct xe_exec_queue *q;
1659 	u32 guc_id = msg[0];
1660 
1661 	if (unlikely(len < 1)) {
1662 		drm_err(&xe->drm, "Invalid length %u", len);
1663 		return -EPROTO;
1664 	}
1665 
1666 	q = g2h_exec_queue_lookup(guc, guc_id);
1667 	if (unlikely(!q))
1668 		return -EPROTO;
1669 
1670 	if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
1671 	    exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
1672 		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
1673 			atomic_read(&q->guc->state));
1674 		return -EPROTO;
1675 	}
1676 
1677 	handle_deregister_done(guc, q);
1678 
1679 	return 0;
1680 }
1681 
1682 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
1683 {
1684 	struct xe_gt *gt = guc_to_gt(guc);
1685 	struct xe_device *xe = guc_to_xe(guc);
1686 	struct xe_exec_queue *q;
1687 	u32 guc_id = msg[0];
1688 
1689 	if (unlikely(len < 1)) {
1690 		drm_err(&xe->drm, "Invalid length %u", len);
1691 		return -EPROTO;
1692 	}
1693 
1694 	q = g2h_exec_queue_lookup(guc, guc_id);
1695 	if (unlikely(!q))
1696 		return -EPROTO;
1697 
1698 	xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
1699 		   xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
1700 
1701 	/* FIXME: Do error capture, most likely async */
1702 
1703 	trace_xe_exec_queue_reset(q);
1704 
1705 	/*
1706 	 * A banned engine is a NOP at this point (came from
1707 	 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
1708 	 * jobs by setting timeout of the job to the minimum value kicking
1709 	 * guc_exec_queue_timedout_job.
1710 	 */
1711 	set_exec_queue_reset(q);
1712 	if (!exec_queue_banned(q))
1713 		xe_guc_exec_queue_trigger_cleanup(q);
1714 
1715 	return 0;
1716 }
1717 
1718 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
1719 					       u32 len)
1720 {
1721 	struct xe_gt *gt = guc_to_gt(guc);
1722 	struct xe_device *xe = guc_to_xe(guc);
1723 	struct xe_exec_queue *q;
1724 	u32 guc_id = msg[0];
1725 
1726 	if (unlikely(len < 1)) {
1727 		drm_err(&xe->drm, "Invalid length %u", len);
1728 		return -EPROTO;
1729 	}
1730 
1731 	q = g2h_exec_queue_lookup(guc, guc_id);
1732 	if (unlikely(!q))
1733 		return -EPROTO;
1734 
1735 	xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
1736 		  xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
1737 
1738 	trace_xe_exec_queue_memory_cat_error(q);
1739 
1740 	/* Treat the same as engine reset */
1741 	set_exec_queue_reset(q);
1742 	if (!exec_queue_banned(q))
1743 		xe_guc_exec_queue_trigger_cleanup(q);
1744 
1745 	return 0;
1746 }
1747 
1748 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
1749 {
1750 	struct xe_device *xe = guc_to_xe(guc);
1751 	u8 guc_class, instance;
1752 	u32 reason;
1753 
1754 	if (unlikely(len != 3)) {
1755 		drm_err(&xe->drm, "Invalid length %u", len);
1756 		return -EPROTO;
1757 	}
1758 
1759 	guc_class = msg[0];
1760 	instance = msg[1];
1761 	reason = msg[2];
1762 
1763 	/* Unexpected failure of a hardware feature, log an actual error */
1764 	drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X",
1765 		guc_class, instance, reason);
1766 
1767 	xe_gt_reset_async(guc_to_gt(guc));
1768 
1769 	return 0;
1770 }
1771 
1772 static void
1773 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
1774 				   struct xe_guc_submit_exec_queue_snapshot *snapshot)
1775 {
1776 	struct xe_guc *guc = exec_queue_to_guc(q);
1777 	struct xe_device *xe = guc_to_xe(guc);
1778 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
1779 	int i;
1780 
1781 	snapshot->guc.wqi_head = q->guc->wqi_head;
1782 	snapshot->guc.wqi_tail = q->guc->wqi_tail;
1783 	snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
1784 	snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
1785 	snapshot->parallel.wq_desc.status = parallel_read(xe, map,
1786 							  wq_desc.wq_status);
1787 
1788 	if (snapshot->parallel.wq_desc.head !=
1789 	    snapshot->parallel.wq_desc.tail) {
1790 		for (i = snapshot->parallel.wq_desc.head;
1791 		     i != snapshot->parallel.wq_desc.tail;
1792 		     i = (i + sizeof(u32)) % WQ_SIZE)
1793 			snapshot->parallel.wq[i / sizeof(u32)] =
1794 				parallel_read(xe, map, wq[i / sizeof(u32)]);
1795 	}
1796 }
1797 
1798 static void
1799 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
1800 				 struct drm_printer *p)
1801 {
1802 	int i;
1803 
1804 	drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
1805 		   snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
1806 	drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
1807 		   snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
1808 	drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
1809 
1810 	if (snapshot->parallel.wq_desc.head !=
1811 	    snapshot->parallel.wq_desc.tail) {
1812 		for (i = snapshot->parallel.wq_desc.head;
1813 		     i != snapshot->parallel.wq_desc.tail;
1814 		     i = (i + sizeof(u32)) % WQ_SIZE)
1815 			drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
1816 				   snapshot->parallel.wq[i / sizeof(u32)]);
1817 	}
1818 }
1819 
1820 /**
1821  * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
1822  * @q: faulty exec queue
1823  *
1824  * This can be printed out in a later stage like during dev_coredump
1825  * analysis.
1826  *
1827  * Returns: a GuC Submit Engine snapshot object that must be freed by the
1828  * caller, using `xe_guc_exec_queue_snapshot_free`.
1829  */
1830 struct xe_guc_submit_exec_queue_snapshot *
1831 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
1832 {
1833 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1834 	struct xe_guc_submit_exec_queue_snapshot *snapshot;
1835 	int i;
1836 
1837 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
1838 
1839 	if (!snapshot)
1840 		return NULL;
1841 
1842 	snapshot->guc.id = q->guc->id;
1843 	memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
1844 	snapshot->class = q->class;
1845 	snapshot->logical_mask = q->logical_mask;
1846 	snapshot->width = q->width;
1847 	snapshot->refcount = kref_read(&q->refcount);
1848 	snapshot->sched_timeout = sched->base.timeout;
1849 	snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
1850 	snapshot->sched_props.preempt_timeout_us =
1851 		q->sched_props.preempt_timeout_us;
1852 
1853 	snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *),
1854 				      GFP_ATOMIC);
1855 
1856 	if (snapshot->lrc) {
1857 		for (i = 0; i < q->width; ++i) {
1858 			struct xe_lrc *lrc = q->lrc[i];
1859 
1860 			snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
1861 		}
1862 	}
1863 
1864 	snapshot->schedule_state = atomic_read(&q->guc->state);
1865 	snapshot->exec_queue_flags = q->flags;
1866 
1867 	snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
1868 	if (snapshot->parallel_execution)
1869 		guc_exec_queue_wq_snapshot_capture(q, snapshot);
1870 
1871 	spin_lock(&sched->base.job_list_lock);
1872 	snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
1873 	snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
1874 					       sizeof(struct pending_list_snapshot),
1875 					       GFP_ATOMIC);
1876 
1877 	if (snapshot->pending_list) {
1878 		struct xe_sched_job *job_iter;
1879 
1880 		i = 0;
1881 		list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
1882 			snapshot->pending_list[i].seqno =
1883 				xe_sched_job_seqno(job_iter);
1884 			snapshot->pending_list[i].fence =
1885 				dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
1886 			snapshot->pending_list[i].finished =
1887 				dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
1888 				? 1 : 0;
1889 			i++;
1890 		}
1891 	}
1892 
1893 	spin_unlock(&sched->base.job_list_lock);
1894 
1895 	return snapshot;
1896 }
1897 
1898 /**
1899  * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
1900  * @snapshot: Previously captured snapshot of job.
1901  *
1902  * This captures some data that requires taking some locks, so it cannot be done in signaling path.
1903  */
1904 void
1905 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
1906 {
1907 	int i;
1908 
1909 	if (!snapshot || !snapshot->lrc)
1910 		return;
1911 
1912 	for (i = 0; i < snapshot->width; ++i)
1913 		xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]);
1914 }
1915 
1916 /**
1917  * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
1918  * @snapshot: GuC Submit Engine snapshot object.
1919  * @p: drm_printer where it will be printed out.
1920  *
1921  * This function prints out a given GuC Submit Engine snapshot object.
1922  */
1923 void
1924 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
1925 				 struct drm_printer *p)
1926 {
1927 	int i;
1928 
1929 	if (!snapshot)
1930 		return;
1931 
1932 	drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id);
1933 	drm_printf(p, "\tName: %s\n", snapshot->name);
1934 	drm_printf(p, "\tClass: %d\n", snapshot->class);
1935 	drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
1936 	drm_printf(p, "\tWidth: %d\n", snapshot->width);
1937 	drm_printf(p, "\tRef: %d\n", snapshot->refcount);
1938 	drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
1939 	drm_printf(p, "\tTimeslice: %u (us)\n",
1940 		   snapshot->sched_props.timeslice_us);
1941 	drm_printf(p, "\tPreempt timeout: %u (us)\n",
1942 		   snapshot->sched_props.preempt_timeout_us);
1943 
1944 	for (i = 0; snapshot->lrc && i < snapshot->width; ++i)
1945 		xe_lrc_snapshot_print(snapshot->lrc[i], p);
1946 
1947 	drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
1948 	drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
1949 
1950 	if (snapshot->parallel_execution)
1951 		guc_exec_queue_wq_snapshot_print(snapshot, p);
1952 
1953 	for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
1954 	     i++)
1955 		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
1956 			   snapshot->pending_list[i].seqno,
1957 			   snapshot->pending_list[i].fence,
1958 			   snapshot->pending_list[i].finished);
1959 }
1960 
1961 /**
1962  * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
1963  * snapshot.
1964  * @snapshot: GuC Submit Engine snapshot object.
1965  *
1966  * This function free all the memory that needed to be allocated at capture
1967  * time.
1968  */
1969 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
1970 {
1971 	int i;
1972 
1973 	if (!snapshot)
1974 		return;
1975 
1976 	if (snapshot->lrc) {
1977 		for (i = 0; i < snapshot->width; i++)
1978 			xe_lrc_snapshot_free(snapshot->lrc[i]);
1979 		kfree(snapshot->lrc);
1980 	}
1981 	kfree(snapshot->pending_list);
1982 	kfree(snapshot);
1983 }
1984 
1985 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
1986 {
1987 	struct xe_guc_submit_exec_queue_snapshot *snapshot;
1988 
1989 	snapshot = xe_guc_exec_queue_snapshot_capture(q);
1990 	xe_guc_exec_queue_snapshot_print(snapshot, p);
1991 	xe_guc_exec_queue_snapshot_free(snapshot);
1992 }
1993 
1994 /**
1995  * xe_guc_submit_print - GuC Submit Print.
1996  * @guc: GuC.
1997  * @p: drm_printer where it will be printed out.
1998  *
1999  * This function capture and prints snapshots of **all** GuC Engines.
2000  */
2001 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
2002 {
2003 	struct xe_exec_queue *q;
2004 	unsigned long index;
2005 
2006 	if (!xe_device_uc_enabled(guc_to_xe(guc)))
2007 		return;
2008 
2009 	mutex_lock(&guc->submission_state.lock);
2010 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2011 		guc_exec_queue_print(q, p);
2012 	mutex_unlock(&guc->submission_state.lock);
2013 }
2014