xref: /linux/drivers/gpu/drm/xe/xe_guc_submit.c (revision 8e65320d91cdc3b241d4b94855c88459b91abf66)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_guc_submit.h"
7 
8 #include <linux/bitfield.h>
9 #include <linux/bitmap.h>
10 #include <linux/circ_buf.h>
11 #include <linux/dma-fence-array.h>
12 
13 #include <drm/drm_managed.h>
14 
15 #include "abi/guc_actions_abi.h"
16 #include "abi/guc_actions_slpc_abi.h"
17 #include "abi/guc_klvs_abi.h"
18 #include "xe_assert.h"
19 #include "xe_bo.h"
20 #include "xe_devcoredump.h"
21 #include "xe_device.h"
22 #include "xe_exec_queue.h"
23 #include "xe_force_wake.h"
24 #include "xe_gpu_scheduler.h"
25 #include "xe_gt.h"
26 #include "xe_gt_clock.h"
27 #include "xe_gt_printk.h"
28 #include "xe_guc.h"
29 #include "xe_guc_capture.h"
30 #include "xe_guc_ct.h"
31 #include "xe_guc_exec_queue_types.h"
32 #include "xe_guc_id_mgr.h"
33 #include "xe_guc_klv_helpers.h"
34 #include "xe_guc_submit_types.h"
35 #include "xe_hw_engine.h"
36 #include "xe_lrc.h"
37 #include "xe_macros.h"
38 #include "xe_map.h"
39 #include "xe_mocs.h"
40 #include "xe_pm.h"
41 #include "xe_ring_ops_types.h"
42 #include "xe_sched_job.h"
43 #include "xe_sleep.h"
44 #include "xe_trace.h"
45 #include "xe_uc_fw.h"
46 #include "xe_vm.h"
47 
48 #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN		6
49 
50 static int guc_submit_reset_prepare(struct xe_guc *guc);
51 
52 static struct xe_guc *
exec_queue_to_guc(struct xe_exec_queue * q)53 exec_queue_to_guc(struct xe_exec_queue *q)
54 {
55 	return &q->gt->uc.guc;
56 }
57 
58 /*
59  * Helpers for engine state, using an atomic as some of the bits can transition
60  * as the same time (e.g. a suspend can be happning at the same time as schedule
61  * engine done being processed).
62  */
63 #define EXEC_QUEUE_STATE_REGISTERED		(1 << 0)
64 #define EXEC_QUEUE_STATE_ENABLED		(1 << 1)
65 #define EXEC_QUEUE_STATE_PENDING_ENABLE		(1 << 2)
66 #define EXEC_QUEUE_STATE_PENDING_DISABLE	(1 << 3)
67 #define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
68 #define EXEC_QUEUE_STATE_SUSPENDED		(1 << 5)
69 #define EXEC_QUEUE_STATE_RESET			(1 << 6)
70 #define EXEC_QUEUE_STATE_KILLED			(1 << 7)
71 #define EXEC_QUEUE_STATE_WEDGED			(1 << 8)
72 #define EXEC_QUEUE_STATE_BANNED			(1 << 9)
73 #define EXEC_QUEUE_STATE_PENDING_RESUME		(1 << 10)
74 
exec_queue_registered(struct xe_exec_queue * q)75 static bool exec_queue_registered(struct xe_exec_queue *q)
76 {
77 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
78 }
79 
set_exec_queue_registered(struct xe_exec_queue * q)80 static void set_exec_queue_registered(struct xe_exec_queue *q)
81 {
82 	atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
83 }
84 
clear_exec_queue_registered(struct xe_exec_queue * q)85 static void clear_exec_queue_registered(struct xe_exec_queue *q)
86 {
87 	atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
88 }
89 
exec_queue_enabled(struct xe_exec_queue * q)90 static bool exec_queue_enabled(struct xe_exec_queue *q)
91 {
92 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED;
93 }
94 
set_exec_queue_enabled(struct xe_exec_queue * q)95 static void set_exec_queue_enabled(struct xe_exec_queue *q)
96 {
97 	atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
98 }
99 
clear_exec_queue_enabled(struct xe_exec_queue * q)100 static void clear_exec_queue_enabled(struct xe_exec_queue *q)
101 {
102 	atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
103 }
104 
exec_queue_pending_enable(struct xe_exec_queue * q)105 static bool exec_queue_pending_enable(struct xe_exec_queue *q)
106 {
107 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
108 }
109 
set_exec_queue_pending_enable(struct xe_exec_queue * q)110 static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
111 {
112 	atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
113 }
114 
clear_exec_queue_pending_enable(struct xe_exec_queue * q)115 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
116 {
117 	atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
118 }
119 
exec_queue_pending_disable(struct xe_exec_queue * q)120 static bool exec_queue_pending_disable(struct xe_exec_queue *q)
121 {
122 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
123 }
124 
set_exec_queue_pending_disable(struct xe_exec_queue * q)125 static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
126 {
127 	atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
128 }
129 
clear_exec_queue_pending_disable(struct xe_exec_queue * q)130 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
131 {
132 	atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
133 }
134 
exec_queue_destroyed(struct xe_exec_queue * q)135 static bool exec_queue_destroyed(struct xe_exec_queue *q)
136 {
137 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
138 }
139 
set_exec_queue_destroyed(struct xe_exec_queue * q)140 static void set_exec_queue_destroyed(struct xe_exec_queue *q)
141 {
142 	atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
143 }
144 
clear_exec_queue_destroyed(struct xe_exec_queue * q)145 static void clear_exec_queue_destroyed(struct xe_exec_queue *q)
146 {
147 	atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
148 }
149 
exec_queue_banned(struct xe_exec_queue * q)150 static bool exec_queue_banned(struct xe_exec_queue *q)
151 {
152 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED;
153 }
154 
set_exec_queue_banned(struct xe_exec_queue * q)155 static void set_exec_queue_banned(struct xe_exec_queue *q)
156 {
157 	atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state);
158 }
159 
exec_queue_suspended(struct xe_exec_queue * q)160 static bool exec_queue_suspended(struct xe_exec_queue *q)
161 {
162 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED;
163 }
164 
set_exec_queue_suspended(struct xe_exec_queue * q)165 static void set_exec_queue_suspended(struct xe_exec_queue *q)
166 {
167 	atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
168 }
169 
clear_exec_queue_suspended(struct xe_exec_queue * q)170 static void clear_exec_queue_suspended(struct xe_exec_queue *q)
171 {
172 	atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
173 }
174 
exec_queue_reset(struct xe_exec_queue * q)175 static bool exec_queue_reset(struct xe_exec_queue *q)
176 {
177 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
178 }
179 
set_exec_queue_reset(struct xe_exec_queue * q)180 static void set_exec_queue_reset(struct xe_exec_queue *q)
181 {
182 	atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
183 }
184 
exec_queue_killed(struct xe_exec_queue * q)185 static bool exec_queue_killed(struct xe_exec_queue *q)
186 {
187 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED;
188 }
189 
set_exec_queue_killed(struct xe_exec_queue * q)190 static void set_exec_queue_killed(struct xe_exec_queue *q)
191 {
192 	atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state);
193 }
194 
exec_queue_wedged(struct xe_exec_queue * q)195 static bool exec_queue_wedged(struct xe_exec_queue *q)
196 {
197 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
198 }
199 
set_exec_queue_wedged(struct xe_exec_queue * q)200 static void set_exec_queue_wedged(struct xe_exec_queue *q)
201 {
202 	atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state);
203 }
204 
exec_queue_pending_resume(struct xe_exec_queue * q)205 static bool exec_queue_pending_resume(struct xe_exec_queue *q)
206 {
207 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME;
208 }
209 
set_exec_queue_pending_resume(struct xe_exec_queue * q)210 static void set_exec_queue_pending_resume(struct xe_exec_queue *q)
211 {
212 	atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state);
213 }
214 
clear_exec_queue_pending_resume(struct xe_exec_queue * q)215 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q)
216 {
217 	atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state);
218 }
219 
exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue * q)220 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
221 {
222 	return (atomic_read(&q->guc->state) &
223 		(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED |
224 		 EXEC_QUEUE_STATE_BANNED));
225 }
226 
guc_submit_sw_fini(struct drm_device * drm,void * arg)227 static void guc_submit_sw_fini(struct drm_device *drm, void *arg)
228 {
229 	struct xe_guc *guc = arg;
230 	struct xe_device *xe = guc_to_xe(guc);
231 	struct xe_gt *gt = guc_to_gt(guc);
232 	int ret;
233 
234 	ret = wait_event_timeout(guc->submission_state.fini_wq,
235 				 xa_empty(&guc->submission_state.exec_queue_lookup),
236 				 HZ * 5);
237 
238 	drain_workqueue(xe->destroy_wq);
239 
240 	xe_gt_assert(gt, ret);
241 
242 	xa_destroy(&guc->submission_state.exec_queue_lookup);
243 }
244 
guc_submit_fini(void * arg)245 static void guc_submit_fini(void *arg)
246 {
247 	struct xe_guc *guc = arg;
248 	struct xe_exec_queue *q;
249 	unsigned long index;
250 
251 	/* Drop any wedged queue refs */
252 	mutex_lock(&guc->submission_state.lock);
253 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
254 		if (exec_queue_wedged(q)) {
255 			mutex_unlock(&guc->submission_state.lock);
256 			xe_exec_queue_put(q);
257 			mutex_lock(&guc->submission_state.lock);
258 		}
259 	}
260 	mutex_unlock(&guc->submission_state.lock);
261 
262 	/* Forcefully kill any remaining exec queues */
263 	xe_guc_ct_stop(&guc->ct);
264 	guc_submit_reset_prepare(guc);
265 	xe_guc_softreset(guc);
266 	xe_guc_submit_stop(guc);
267 	xe_uc_fw_sanitize(&guc->fw);
268 	xe_guc_submit_pause_abort(guc);
269 }
270 
271 static const struct xe_exec_queue_ops guc_exec_queue_ops;
272 
primelockdep(struct xe_guc * guc)273 static void primelockdep(struct xe_guc *guc)
274 {
275 	if (!IS_ENABLED(CONFIG_LOCKDEP))
276 		return;
277 
278 	fs_reclaim_acquire(GFP_KERNEL);
279 
280 	mutex_lock(&guc->submission_state.lock);
281 	mutex_unlock(&guc->submission_state.lock);
282 
283 	fs_reclaim_release(GFP_KERNEL);
284 }
285 
286 /**
287  * xe_guc_submit_init() - Initialize GuC submission.
288  * @guc: the &xe_guc to initialize
289  * @num_ids: number of GuC context IDs to use
290  *
291  * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all
292  * GuC context IDs supported by the GuC firmware should be used for submission.
293  *
294  * Only VF drivers will have to provide explicit number of GuC context IDs
295  * that they can use for submission.
296  *
297  * Return: 0 on success or a negative error code on failure.
298  */
xe_guc_submit_init(struct xe_guc * guc,unsigned int num_ids)299 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
300 {
301 	struct xe_device *xe = guc_to_xe(guc);
302 	struct xe_gt *gt = guc_to_gt(guc);
303 	int err;
304 
305 	err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
306 	if (err)
307 		return err;
308 
309 	err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids);
310 	if (err)
311 		return err;
312 
313 	gt->exec_queue_ops = &guc_exec_queue_ops;
314 
315 	xa_init(&guc->submission_state.exec_queue_lookup);
316 
317 	init_waitqueue_head(&guc->submission_state.fini_wq);
318 
319 	primelockdep(guc);
320 
321 	guc->submission_state.initialized = true;
322 
323 	err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc);
324 	if (err)
325 		return err;
326 
327 	return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc);
328 }
329 
330 /*
331  * Given that we want to guarantee enough RCS throughput to avoid missing
332  * frames, we set the yield policy to 20% of each 80ms interval.
333  */
334 #define RC_YIELD_DURATION	80	/* in ms */
335 #define RC_YIELD_RATIO		20	/* in percent */
emit_render_compute_yield_klv(u32 * emit)336 static u32 *emit_render_compute_yield_klv(u32 *emit)
337 {
338 	*emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD);
339 	*emit++ = RC_YIELD_DURATION;
340 	*emit++ = RC_YIELD_RATIO;
341 
342 	return emit;
343 }
344 
345 #define SCHEDULING_POLICY_MAX_DWORDS 16
guc_init_global_schedule_policy(struct xe_guc * guc)346 static int guc_init_global_schedule_policy(struct xe_guc *guc)
347 {
348 	u32 data[SCHEDULING_POLICY_MAX_DWORDS];
349 	u32 *emit = data;
350 	u32 count = 0;
351 	int ret;
352 
353 	if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
354 		return 0;
355 
356 	*emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
357 
358 	if (CCS_INSTANCES(guc_to_gt(guc)))
359 		emit = emit_render_compute_yield_klv(emit);
360 
361 	count = emit - data;
362 	if (count > 1) {
363 		xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS);
364 
365 		ret = xe_guc_ct_send_block(&guc->ct, data, count);
366 		if (ret < 0) {
367 			xe_gt_err(guc_to_gt(guc),
368 				  "failed to enable GuC scheduling policies: %pe\n",
369 				  ERR_PTR(ret));
370 			return ret;
371 		}
372 	}
373 
374 	return 0;
375 }
376 
xe_guc_submit_enable(struct xe_guc * guc)377 int xe_guc_submit_enable(struct xe_guc *guc)
378 {
379 	int ret;
380 
381 	ret = guc_init_global_schedule_policy(guc);
382 	if (ret)
383 		return ret;
384 
385 	guc->submission_state.enabled = true;
386 
387 	return 0;
388 }
389 
xe_guc_submit_disable(struct xe_guc * guc)390 void xe_guc_submit_disable(struct xe_guc *guc)
391 {
392 	guc->submission_state.enabled = false;
393 }
394 
__release_guc_id(struct xe_guc * guc,struct xe_exec_queue * q,u32 xa_count)395 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
396 {
397 	int i;
398 
399 	lockdep_assert_held(&guc->submission_state.lock);
400 
401 	for (i = 0; i < xa_count; ++i)
402 		xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
403 
404 	xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
405 				     q->guc->id, q->width);
406 
407 	if (xa_empty(&guc->submission_state.exec_queue_lookup))
408 		wake_up(&guc->submission_state.fini_wq);
409 }
410 
alloc_guc_id(struct xe_guc * guc,struct xe_exec_queue * q)411 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
412 {
413 	int ret;
414 	int i;
415 
416 	/*
417 	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
418 	 * worse case user gets -ENOMEM on engine create and has to try again.
419 	 *
420 	 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
421 	 * failure.
422 	 */
423 	lockdep_assert_held(&guc->submission_state.lock);
424 
425 	ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm,
426 					   q->width);
427 	if (ret < 0)
428 		return ret;
429 
430 	q->guc->id = ret;
431 
432 	for (i = 0; i < q->width; ++i) {
433 		ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
434 				      q->guc->id + i, q, GFP_NOWAIT));
435 		if (ret)
436 			goto err_release;
437 	}
438 
439 	return 0;
440 
441 err_release:
442 	__release_guc_id(guc, q, i);
443 
444 	return ret;
445 }
446 
release_guc_id(struct xe_guc * guc,struct xe_exec_queue * q)447 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
448 {
449 	mutex_lock(&guc->submission_state.lock);
450 	__release_guc_id(guc, q, q->width);
451 	mutex_unlock(&guc->submission_state.lock);
452 }
453 
454 struct exec_queue_policy {
455 	u32 count;
456 	struct guc_update_exec_queue_policy h2g;
457 };
458 
__guc_exec_queue_policy_action_size(struct exec_queue_policy * policy)459 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
460 {
461 	size_t bytes = sizeof(policy->h2g.header) +
462 		       (sizeof(policy->h2g.klv[0]) * policy->count);
463 
464 	return bytes / sizeof(u32);
465 }
466 
__guc_exec_queue_policy_start_klv(struct exec_queue_policy * policy,u16 guc_id)467 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
468 					      u16 guc_id)
469 {
470 	policy->h2g.header.action =
471 		XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
472 	policy->h2g.header.guc_id = guc_id;
473 	policy->count = 0;
474 }
475 
476 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
477 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
478 					   u32 data) \
479 { \
480 	XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
481 \
482 	policy->h2g.klv[policy->count].kl = \
483 		FIELD_PREP(GUC_KLV_0_KEY, \
484 			   GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
485 		FIELD_PREP(GUC_KLV_0_LEN, 1); \
486 	policy->h2g.klv[policy->count].value = data; \
487 	policy->count++; \
488 }
489 
490 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
491 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
492 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
493 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY)
494 #undef MAKE_EXEC_QUEUE_POLICY_ADD
495 
496 static const int xe_exec_queue_prio_to_guc[] = {
497 	[XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
498 	[XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
499 	[XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
500 	[XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
501 };
502 
init_policies(struct xe_guc * guc,struct xe_exec_queue * q)503 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
504 {
505 	struct exec_queue_policy policy;
506 	enum xe_exec_queue_priority prio = q->sched_props.priority;
507 	u32 timeslice_us = q->sched_props.timeslice_us;
508 	u32 slpc_exec_queue_freq_req = 0;
509 	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
510 
511 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) &&
512 		     !xe_exec_queue_is_multi_queue_secondary(q));
513 
514 	if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY)
515 		slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
516 
517 	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
518 	__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
519 	__guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
520 	__guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
521 	__guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy,
522 							     slpc_exec_queue_freq_req);
523 
524 	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
525 		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
526 }
527 
set_min_preemption_timeout(struct xe_guc * guc,struct xe_exec_queue * q)528 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
529 {
530 	struct exec_queue_policy policy;
531 
532 	xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q));
533 
534 	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
535 	__guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
536 
537 	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
538 		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
539 }
540 
vf_recovery(struct xe_guc * guc)541 static bool vf_recovery(struct xe_guc *guc)
542 {
543 	return xe_gt_recovery_pending(guc_to_gt(guc));
544 }
545 
xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue * q)546 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
547 {
548 	struct xe_guc *guc = exec_queue_to_guc(q);
549 	struct xe_device *xe = guc_to_xe(guc);
550 
551 	/** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
552 	wake_up_all(&xe->ufence_wq);
553 
554 	xe_sched_tdr_queue_imm(&q->guc->sched);
555 }
556 
xe_guc_exec_queue_group_stop(struct xe_exec_queue * q)557 static void xe_guc_exec_queue_group_stop(struct xe_exec_queue *q)
558 {
559 	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
560 	struct xe_exec_queue_group *group = q->multi_queue.group;
561 	struct xe_exec_queue *eq, *next;
562 	LIST_HEAD(tmp);
563 
564 	xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
565 		     xe_exec_queue_is_multi_queue(q));
566 
567 	mutex_lock(&group->list_lock);
568 
569 	/*
570 	 * Stop all future queues being from executing while group is stopped.
571 	 */
572 	group->stopped = true;
573 
574 	list_for_each_entry_safe(eq, next, &group->list, multi_queue.link)
575 		/*
576 		 * Refcount prevents an attempted removal from &group->list,
577 		 * temporary list allows safe iteration after dropping
578 		 * &group->list_lock.
579 		 */
580 		if (xe_exec_queue_get_unless_zero(eq))
581 			list_move_tail(&eq->multi_queue.link, &tmp);
582 
583 	mutex_unlock(&group->list_lock);
584 
585 	/* We cannot stop under list lock without getting inversions */
586 	xe_sched_submission_stop(&primary->guc->sched);
587 	list_for_each_entry(eq, &tmp, multi_queue.link)
588 		xe_sched_submission_stop(&eq->guc->sched);
589 
590 	mutex_lock(&group->list_lock);
591 	list_for_each_entry_safe(eq, next, &tmp, multi_queue.link) {
592 		/*
593 		 * Corner where we got banned while stopping and not on
594 		 * &group->list
595 		 */
596 		if (READ_ONCE(group->banned))
597 			xe_guc_exec_queue_trigger_cleanup(eq);
598 
599 		list_move_tail(&eq->multi_queue.link, &group->list);
600 		xe_exec_queue_put(eq);
601 	}
602 	mutex_unlock(&group->list_lock);
603 }
604 
xe_guc_exec_queue_group_start(struct xe_exec_queue * q)605 static void xe_guc_exec_queue_group_start(struct xe_exec_queue *q)
606 {
607 	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
608 	struct xe_exec_queue_group *group = q->multi_queue.group;
609 	struct xe_exec_queue *eq;
610 
611 	xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
612 		     xe_exec_queue_is_multi_queue(q));
613 
614 	xe_sched_submission_start(&primary->guc->sched);
615 
616 	mutex_lock(&group->list_lock);
617 	group->stopped = false;
618 	list_for_each_entry(eq, &group->list, multi_queue.link)
619 		xe_sched_submission_start(&eq->guc->sched);
620 	mutex_unlock(&group->list_lock);
621 }
622 
xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue * q)623 static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q)
624 {
625 	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
626 	struct xe_exec_queue_group *group = q->multi_queue.group;
627 	struct xe_exec_queue *eq;
628 
629 	xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
630 		     xe_exec_queue_is_multi_queue(q));
631 
632 	/* Group banned, skip timeout check in TDR */
633 	WRITE_ONCE(group->banned, true);
634 	xe_guc_exec_queue_trigger_cleanup(primary);
635 
636 	mutex_lock(&group->list_lock);
637 	list_for_each_entry(eq, &group->list, multi_queue.link)
638 		xe_guc_exec_queue_trigger_cleanup(eq);
639 	mutex_unlock(&group->list_lock);
640 }
641 
xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue * q)642 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q)
643 {
644 	if (xe_exec_queue_is_multi_queue(q)) {
645 		struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
646 		struct xe_exec_queue_group *group = q->multi_queue.group;
647 		struct xe_exec_queue *eq;
648 
649 		/* Group banned, skip timeout check in TDR */
650 		WRITE_ONCE(group->banned, true);
651 
652 		set_exec_queue_reset(primary);
653 		if (!exec_queue_banned(primary))
654 			xe_guc_exec_queue_trigger_cleanup(primary);
655 
656 		mutex_lock(&group->list_lock);
657 		list_for_each_entry(eq, &group->list, multi_queue.link) {
658 			set_exec_queue_reset(eq);
659 			if (!exec_queue_banned(eq))
660 				xe_guc_exec_queue_trigger_cleanup(eq);
661 		}
662 		mutex_unlock(&group->list_lock);
663 	} else {
664 		set_exec_queue_reset(q);
665 		if (!exec_queue_banned(q))
666 			xe_guc_exec_queue_trigger_cleanup(q);
667 	}
668 }
669 
set_exec_queue_group_banned(struct xe_exec_queue * q)670 static void set_exec_queue_group_banned(struct xe_exec_queue *q)
671 {
672 	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
673 	struct xe_exec_queue_group *group = q->multi_queue.group;
674 	struct xe_exec_queue *eq;
675 
676 	/* Ban all queues of the multi-queue group */
677 	xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
678 		     xe_exec_queue_is_multi_queue(q));
679 	set_exec_queue_banned(primary);
680 
681 	mutex_lock(&group->list_lock);
682 	list_for_each_entry(eq, &group->list, multi_queue.link)
683 		set_exec_queue_banned(eq);
684 	mutex_unlock(&group->list_lock);
685 }
686 
687 /* Helper for context registration H2G */
688 struct guc_ctxt_registration_info {
689 	u32 flags;
690 	u32 context_idx;
691 	u32 engine_class;
692 	u32 engine_submit_mask;
693 	u32 wq_desc_lo;
694 	u32 wq_desc_hi;
695 	u32 wq_base_lo;
696 	u32 wq_base_hi;
697 	u32 wq_size;
698 	u32 cgp_lo;
699 	u32 cgp_hi;
700 	u32 hwlrca_lo;
701 	u32 hwlrca_hi;
702 };
703 
704 #define parallel_read(xe_, map_, field_) \
705 	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
706 			field_)
707 #define parallel_write(xe_, map_, field_, val_) \
708 	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
709 			field_, val_)
710 
711 /**
712  * DOC: Multi Queue Group GuC interface
713  *
714  * The multi queue group coordination between KMD and GuC is through a software
715  * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page
716  * allocated in the global GTT.
717  *
718  * CGP format:
719  *
720  * +-----------+---------------------------+---------------------------------------------+
721  * | DWORD     | Name                      | Description                                 |
722  * +-----------+---------------------------+---------------------------------------------+
723  * | 0         | Version                   | Bits [15:8]=Major ver, [7:0]=Minor ver      |
724  * +-----------+---------------------------+---------------------------------------------+
725  * | 1..15     | RESERVED                  | MBZ                                         |
726  * +-----------+---------------------------+---------------------------------------------+
727  * | 16        | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0             |
728  * +-----------+---------------------------+---------------------------------------------+
729  * | 17        | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32            |
730  * +-----------+---------------------------+---------------------------------------------+
731  * | 18..31    | RESERVED                  | MBZ                                         |
732  * +-----------+---------------------------+---------------------------------------------+
733  * | 32        | Q0CD_DW0                  | Queue 0 context LRC descriptor lower DWORD  |
734  * +-----------+---------------------------+---------------------------------------------+
735  * | 33        | Q0ContextIndex            | Context ID for Queue 0                      |
736  * +-----------+---------------------------+---------------------------------------------+
737  * | 34        | Q1CD_DW0                  | Queue 1 context LRC descriptor lower DWORD  |
738  * +-----------+---------------------------+---------------------------------------------+
739  * | 35        | Q1ContextIndex            | Context ID for Queue 1                      |
740  * +-----------+---------------------------+---------------------------------------------+
741  * | ...       |...                        | ...                                         |
742  * +-----------+---------------------------+---------------------------------------------+
743  * | 158       | Q63CD_DW0                 | Queue 63 context LRC descriptor lower DWORD |
744  * +-----------+---------------------------+---------------------------------------------+
745  * | 159       | Q63ContextIndex           | Context ID for Queue 63                     |
746  * +-----------+---------------------------+---------------------------------------------+
747  * | 160..1024 | RESERVED                  | MBZ                                         |
748  * +-----------+---------------------------+---------------------------------------------+
749  *
750  * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified
751  * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies
752  * the CGP address. When the secondary queues are added to the group, the CGP is
753  * updated with entry for that queue and GuC is notified through the H2G interface
754  * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages
755  * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also
756  * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any
757  * error in the CGP. Only one of these CGP update messages can be outstanding
758  * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW*
759  * fields indicate which queue entry is being updated in the CGP.
760  *
761  * The primary queue (Q0) represents the multi queue group context in GuC and
762  * submission on any queue of the group must be through Q0 GuC interface only.
763  *
764  * As it is not required to register secondary queues with GuC, the secondary queue
765  * context ids in the CGP are populated with Q0 context id.
766  */
767 
768 #define CGP_VERSION_MAJOR_SHIFT	8
769 
xe_guc_exec_queue_group_cgp_update(struct xe_device * xe,struct xe_exec_queue * q)770 static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe,
771 					       struct xe_exec_queue *q)
772 {
773 	struct xe_exec_queue_group *group = q->multi_queue.group;
774 	u32 guc_id = group->primary->guc->id;
775 
776 	/* Currently implementing CGP version 1.0 */
777 	xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32,
778 		  1 << CGP_VERSION_MAJOR_SHIFT);
779 
780 	xe_map_wr(xe, &group->cgp_bo->vmap,
781 		  (32 + q->multi_queue.pos * 2) * sizeof(u32),
782 		  u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0])));
783 
784 	xe_map_wr(xe, &group->cgp_bo->vmap,
785 		  (33 + q->multi_queue.pos * 2) * sizeof(u32),
786 		  u32, guc_id);
787 
788 	if (q->multi_queue.pos / 32) {
789 		xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32),
790 			  u32, BIT(q->multi_queue.pos % 32));
791 		xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0);
792 	} else {
793 		xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32),
794 			  u32, BIT(q->multi_queue.pos));
795 		xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0);
796 	}
797 }
798 
xe_guc_exec_queue_group_cgp_sync(struct xe_guc * guc,struct xe_exec_queue * q,const u32 * action,u32 len)799 static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc,
800 					     struct xe_exec_queue *q,
801 					     const u32 *action, u32 len)
802 {
803 	struct xe_exec_queue_group *group = q->multi_queue.group;
804 	struct xe_device *xe = guc_to_xe(guc);
805 	enum xe_multi_queue_priority priority;
806 	long ret;
807 
808 	/*
809 	 * As all queues of a multi queue group use single drm scheduler
810 	 * submit workqueue, CGP synchronization with GuC are serialized.
811 	 * Hence, no locking is required here.
812 	 * Wait for any pending CGP_SYNC_DONE response before updating the
813 	 * CGP page and sending CGP_SYNC message.
814 	 *
815 	 * FIXME: Support VF migration
816 	 */
817 	ret = wait_event_timeout(guc->ct.wq,
818 				 !READ_ONCE(group->sync_pending) ||
819 				 xe_guc_read_stopped(guc), HZ);
820 	if (!ret || xe_guc_read_stopped(guc)) {
821 		/* CGP_SYNC failed. Reset gt, cleanup the group */
822 		xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n");
823 		set_exec_queue_group_banned(q);
824 		xe_gt_reset_async(q->gt);
825 		xe_guc_exec_queue_group_trigger_cleanup(q);
826 		return;
827 	}
828 
829 	scoped_guard(spinlock, &q->multi_queue.lock)
830 		priority = q->multi_queue.priority;
831 
832 	xe_lrc_set_multi_queue_priority(q->lrc[0], priority);
833 	xe_guc_exec_queue_group_cgp_update(xe, q);
834 
835 	WRITE_ONCE(group->sync_pending, true);
836 	xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1);
837 }
838 
__register_exec_queue_group(struct xe_guc * guc,struct xe_exec_queue * q,struct guc_ctxt_registration_info * info)839 static void __register_exec_queue_group(struct xe_guc *guc,
840 					struct xe_exec_queue *q,
841 					struct guc_ctxt_registration_info *info)
842 {
843 #define MAX_MULTI_QUEUE_REG_SIZE	(8)
844 	u32 action[MAX_MULTI_QUEUE_REG_SIZE];
845 	int len = 0;
846 
847 	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE;
848 	action[len++] = info->flags;
849 	action[len++] = info->context_idx;
850 	action[len++] = info->engine_class;
851 	action[len++] = info->engine_submit_mask;
852 	action[len++] = 0; /* Reserved */
853 	action[len++] = info->cgp_lo;
854 	action[len++] = info->cgp_hi;
855 
856 	xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE);
857 #undef MAX_MULTI_QUEUE_REG_SIZE
858 
859 	/*
860 	 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a
861 	 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response
862 	 * from guc.
863 	 */
864 	xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
865 }
866 
xe_guc_exec_queue_group_add(struct xe_guc * guc,struct xe_exec_queue * q)867 static void xe_guc_exec_queue_group_add(struct xe_guc *guc,
868 					struct xe_exec_queue *q)
869 {
870 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE  (2)
871 	u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE];
872 	int len = 0;
873 
874 	xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q));
875 
876 	action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
877 	action[len++] = q->multi_queue.group->primary->guc->id;
878 
879 	xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE);
880 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE
881 
882 	/*
883 	 * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a
884 	 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response
885 	 * from guc.
886 	 */
887 	xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
888 }
889 
__register_mlrc_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q,struct guc_ctxt_registration_info * info)890 static void __register_mlrc_exec_queue(struct xe_guc *guc,
891 				       struct xe_exec_queue *q,
892 				       struct guc_ctxt_registration_info *info)
893 {
894 #define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
895 	u32 action[MAX_MLRC_REG_SIZE];
896 	int len = 0;
897 	int i;
898 
899 	xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q));
900 
901 	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
902 	action[len++] = info->flags;
903 	action[len++] = info->context_idx;
904 	action[len++] = info->engine_class;
905 	action[len++] = info->engine_submit_mask;
906 	action[len++] = info->wq_desc_lo;
907 	action[len++] = info->wq_desc_hi;
908 	action[len++] = info->wq_base_lo;
909 	action[len++] = info->wq_base_hi;
910 	action[len++] = info->wq_size;
911 	action[len++] = q->width;
912 	action[len++] = info->hwlrca_lo;
913 	action[len++] = info->hwlrca_hi;
914 
915 	for (i = 1; i < q->width; ++i) {
916 		struct xe_lrc *lrc = q->lrc[i];
917 
918 		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
919 		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
920 	}
921 
922 	/* explicitly checks some fields that we might fixup later */
923 	xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
924 		     action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]);
925 	xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
926 		     action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]);
927 	xe_gt_assert(guc_to_gt(guc), q->width ==
928 		     action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]);
929 	xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
930 		     action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]);
931 	xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE);
932 #undef MAX_MLRC_REG_SIZE
933 
934 	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
935 }
936 
__register_exec_queue(struct xe_guc * guc,struct guc_ctxt_registration_info * info)937 static void __register_exec_queue(struct xe_guc *guc,
938 				  struct guc_ctxt_registration_info *info)
939 {
940 	u32 action[] = {
941 		XE_GUC_ACTION_REGISTER_CONTEXT,
942 		info->flags,
943 		info->context_idx,
944 		info->engine_class,
945 		info->engine_submit_mask,
946 		info->wq_desc_lo,
947 		info->wq_desc_hi,
948 		info->wq_base_lo,
949 		info->wq_base_hi,
950 		info->wq_size,
951 		info->hwlrca_lo,
952 		info->hwlrca_hi,
953 	};
954 
955 	/* explicitly checks some fields that we might fixup later */
956 	xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
957 		     action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]);
958 	xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
959 		     action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]);
960 	xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
961 		     action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]);
962 
963 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
964 }
965 
register_exec_queue(struct xe_exec_queue * q,int ctx_type)966 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
967 {
968 	struct xe_guc *guc = exec_queue_to_guc(q);
969 	struct xe_device *xe = guc_to_xe(guc);
970 	struct xe_lrc *lrc = q->lrc[0];
971 	struct guc_ctxt_registration_info info;
972 
973 	xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q));
974 	xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT);
975 
976 	memset(&info, 0, sizeof(info));
977 	info.context_idx = q->guc->id;
978 	info.engine_class = xe_engine_class_to_guc_class(q->class);
979 	info.engine_submit_mask = q->logical_mask;
980 	info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
981 	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
982 	info.flags = CONTEXT_REGISTRATION_FLAG_KMD |
983 		FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
984 
985 	if (xe_exec_queue_is_multi_queue(q)) {
986 		struct xe_exec_queue_group *group = q->multi_queue.group;
987 
988 		info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo);
989 		info.cgp_hi = 0;
990 	}
991 
992 	if (xe_exec_queue_is_parallel(q)) {
993 		u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
994 		struct iosys_map map = xe_lrc_parallel_map(lrc);
995 
996 		info.wq_desc_lo = lower_32_bits(ggtt_addr +
997 			offsetof(struct guc_submit_parallel_scratch, wq_desc));
998 		info.wq_desc_hi = upper_32_bits(ggtt_addr +
999 			offsetof(struct guc_submit_parallel_scratch, wq_desc));
1000 		info.wq_base_lo = lower_32_bits(ggtt_addr +
1001 			offsetof(struct guc_submit_parallel_scratch, wq[0]));
1002 		info.wq_base_hi = upper_32_bits(ggtt_addr +
1003 			offsetof(struct guc_submit_parallel_scratch, wq[0]));
1004 		info.wq_size = WQ_SIZE;
1005 
1006 		q->guc->wqi_head = 0;
1007 		q->guc->wqi_tail = 0;
1008 		xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
1009 		parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
1010 	}
1011 
1012 	set_exec_queue_registered(q);
1013 	trace_xe_exec_queue_register(q);
1014 	if (xe_exec_queue_is_multi_queue_primary(q))
1015 		__register_exec_queue_group(guc, q, &info);
1016 	else if (xe_exec_queue_is_parallel(q))
1017 		__register_mlrc_exec_queue(guc, q, &info);
1018 	else if (!xe_exec_queue_is_multi_queue_secondary(q))
1019 		__register_exec_queue(guc, &info);
1020 
1021 	if (!xe_exec_queue_is_multi_queue_secondary(q))
1022 		init_policies(guc, q);
1023 
1024 	if (xe_exec_queue_is_multi_queue_secondary(q))
1025 		xe_guc_exec_queue_group_add(guc, q);
1026 }
1027 
wq_space_until_wrap(struct xe_exec_queue * q)1028 static u32 wq_space_until_wrap(struct xe_exec_queue *q)
1029 {
1030 	return (WQ_SIZE - q->guc->wqi_tail);
1031 }
1032 
wq_wait_for_space(struct xe_exec_queue * q,u32 wqi_size)1033 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
1034 {
1035 	struct xe_guc *guc = exec_queue_to_guc(q);
1036 	struct xe_device *xe = guc_to_xe(guc);
1037 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
1038 	unsigned int sleep_period_ms = 1, sleep_total_ms = 0;
1039 
1040 #define AVAILABLE_SPACE \
1041 	CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
1042 	if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) {
1043 try_again:
1044 		q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
1045 		if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) {
1046 			if (sleep_total_ms > 2000) {
1047 				xe_gt_reset_async(q->gt);
1048 				return -ENODEV;
1049 			}
1050 
1051 			sleep_total_ms += xe_sleep_exponential_ms(&sleep_period_ms, 64);
1052 			goto try_again;
1053 		}
1054 	}
1055 #undef AVAILABLE_SPACE
1056 
1057 	return 0;
1058 }
1059 
wq_noop_append(struct xe_exec_queue * q)1060 static int wq_noop_append(struct xe_exec_queue *q)
1061 {
1062 	struct xe_guc *guc = exec_queue_to_guc(q);
1063 	struct xe_device *xe = guc_to_xe(guc);
1064 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
1065 	u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
1066 
1067 	if (wq_wait_for_space(q, wq_space_until_wrap(q)))
1068 		return -ENODEV;
1069 
1070 	xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw));
1071 
1072 	parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
1073 		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
1074 		       FIELD_PREP(WQ_LEN_MASK, len_dw));
1075 	q->guc->wqi_tail = 0;
1076 
1077 	return 0;
1078 }
1079 
wq_item_append(struct xe_exec_queue * q)1080 static void wq_item_append(struct xe_exec_queue *q)
1081 {
1082 	struct xe_guc *guc = exec_queue_to_guc(q);
1083 	struct xe_device *xe = guc_to_xe(guc);
1084 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
1085 #define WQ_HEADER_SIZE	4	/* Includes 1 LRC address too */
1086 	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
1087 	u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
1088 	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
1089 	int i = 0, j;
1090 
1091 	if (wqi_size > wq_space_until_wrap(q)) {
1092 		if (wq_noop_append(q))
1093 			return;
1094 	}
1095 	if (wq_wait_for_space(q, wqi_size))
1096 		return;
1097 
1098 	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
1099 		FIELD_PREP(WQ_LEN_MASK, len_dw);
1100 	wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
1101 	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
1102 		FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
1103 	wqi[i++] = 0;
1104 	for (j = 1; j < q->width; ++j) {
1105 		struct xe_lrc *lrc = q->lrc[j];
1106 
1107 		wqi[i++] = lrc->ring.tail / sizeof(u64);
1108 	}
1109 
1110 	xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32));
1111 
1112 	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
1113 				      wq[q->guc->wqi_tail / sizeof(u32)]));
1114 	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
1115 	q->guc->wqi_tail += wqi_size;
1116 	xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE);
1117 
1118 	xe_device_wmb(xe);
1119 
1120 	map = xe_lrc_parallel_map(q->lrc[0]);
1121 	parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
1122 }
1123 
1124 #define RESUME_PENDING	~0x0ull
submit_exec_queue(struct xe_exec_queue * q,struct xe_sched_job * job)1125 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job)
1126 {
1127 	struct xe_guc *guc = exec_queue_to_guc(q);
1128 	struct xe_lrc *lrc = q->lrc[0];
1129 	u32 action[3];
1130 	u32 g2h_len = 0;
1131 	u32 num_g2h = 0;
1132 	int len = 0;
1133 	bool extra_submit = false;
1134 
1135 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1136 
1137 	if (!job->restore_replay || job->last_replay) {
1138 		if (xe_exec_queue_is_parallel(q))
1139 			wq_item_append(q);
1140 		else
1141 			xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
1142 		job->last_replay = false;
1143 	}
1144 
1145 	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
1146 		return;
1147 
1148 	/*
1149 	 * All queues in a multi-queue group will use the primary queue
1150 	 * of the group to interface with GuC. If primay is suspended,
1151 	 * just return. Jobs will get scheduled once primary is resumed.
1152 	 */
1153 	q = xe_exec_queue_multi_queue_primary(q);
1154 	if (exec_queue_suspended(q))
1155 		return;
1156 
1157 	if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
1158 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
1159 		action[len++] = q->guc->id;
1160 		action[len++] = GUC_CONTEXT_ENABLE;
1161 		g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
1162 		num_g2h = 1;
1163 		if (xe_exec_queue_is_parallel(q))
1164 			extra_submit = true;
1165 
1166 		q->guc->resume_time = RESUME_PENDING;
1167 		set_exec_queue_pending_enable(q);
1168 		set_exec_queue_enabled(q);
1169 		trace_xe_exec_queue_scheduling_enable(q);
1170 	} else {
1171 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
1172 		action[len++] = q->guc->id;
1173 		trace_xe_exec_queue_submit(q);
1174 	}
1175 
1176 	xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
1177 
1178 	if (extra_submit) {
1179 		len = 0;
1180 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
1181 		action[len++] = q->guc->id;
1182 		trace_xe_exec_queue_submit(q);
1183 
1184 		xe_guc_ct_send(&guc->ct, action, len, 0, 0);
1185 	}
1186 }
1187 
1188 static struct dma_fence *
guc_exec_queue_run_job(struct drm_sched_job * drm_job)1189 guc_exec_queue_run_job(struct drm_sched_job *drm_job)
1190 {
1191 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
1192 	struct xe_exec_queue *q = job->q;
1193 	struct xe_guc *guc = exec_queue_to_guc(q);
1194 	bool killed_or_banned_or_wedged =
1195 		exec_queue_killed_or_banned_or_wedged(q);
1196 
1197 	xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
1198 		     exec_queue_banned(q) || exec_queue_suspended(q));
1199 
1200 	trace_xe_sched_job_run(job);
1201 
1202 	if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) {
1203 		if (xe_exec_queue_is_multi_queue_secondary(q)) {
1204 			struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
1205 
1206 			if (exec_queue_killed_or_banned_or_wedged(primary)) {
1207 				killed_or_banned_or_wedged = true;
1208 				goto run_job_out;
1209 			}
1210 
1211 			if (!exec_queue_registered(primary))
1212 				register_exec_queue(primary, GUC_CONTEXT_NORMAL);
1213 		}
1214 
1215 		if (!exec_queue_registered(q))
1216 			register_exec_queue(q, GUC_CONTEXT_NORMAL);
1217 		if (!job->restore_replay)
1218 			q->ring_ops->emit_job(job);
1219 		submit_exec_queue(q, job);
1220 		job->restore_replay = false;
1221 	}
1222 
1223 run_job_out:
1224 
1225 	return job->fence;
1226 }
1227 
guc_exec_queue_free_job(struct drm_sched_job * drm_job)1228 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
1229 {
1230 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
1231 
1232 	trace_xe_sched_job_free(job);
1233 	xe_sched_job_put(job);
1234 }
1235 
xe_guc_read_stopped(struct xe_guc * guc)1236 int xe_guc_read_stopped(struct xe_guc *guc)
1237 {
1238 	return atomic_read(&guc->submission_state.stopped);
1239 }
1240 
1241 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc,
1242 						    struct xe_exec_queue *q,
1243 						    u32 runnable_state);
1244 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q);
1245 
1246 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable)			\
1247 	u32 action[] = {						\
1248 		XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,			\
1249 		q->guc->id,						\
1250 		GUC_CONTEXT_##enable_disable,				\
1251 	}
1252 
disable_scheduling_deregister(struct xe_guc * guc,struct xe_exec_queue * q)1253 static void disable_scheduling_deregister(struct xe_guc *guc,
1254 					  struct xe_exec_queue *q)
1255 {
1256 	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1257 	int ret;
1258 
1259 	if (!xe_exec_queue_is_multi_queue_secondary(q))
1260 		set_min_preemption_timeout(guc, q);
1261 
1262 	smp_rmb();
1263 	ret = wait_event_timeout(guc->ct.wq,
1264 				 (!exec_queue_pending_enable(q) &&
1265 				  !exec_queue_pending_disable(q)) ||
1266 					 xe_guc_read_stopped(guc) ||
1267 					 vf_recovery(guc),
1268 				 HZ * 5);
1269 	if (!ret && !vf_recovery(guc)) {
1270 		struct xe_gpu_scheduler *sched = &q->guc->sched;
1271 
1272 		xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n");
1273 		xe_sched_submission_start(sched);
1274 		xe_gt_reset_async(q->gt);
1275 		xe_sched_tdr_queue_imm(sched);
1276 		return;
1277 	}
1278 
1279 	clear_exec_queue_enabled(q);
1280 	set_exec_queue_pending_disable(q);
1281 	set_exec_queue_destroyed(q);
1282 	trace_xe_exec_queue_scheduling_disable(q);
1283 
1284 	/*
1285 	 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
1286 	 * handler and we are not allowed to reserved G2H space in handlers.
1287 	 */
1288 	if (xe_exec_queue_is_multi_queue_secondary(q))
1289 		handle_multi_queue_secondary_sched_done(guc, q, 0);
1290 	else
1291 		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1292 			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
1293 			       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
1294 }
1295 
1296 /**
1297  * xe_guc_submit_wedge() - Wedge GuC submission
1298  * @guc: the GuC object
1299  *
1300  * Save exec queue's registered with GuC state by taking a ref to each queue.
1301  * Register a DRMM handler to drop refs upon driver unload.
1302  */
xe_guc_submit_wedge(struct xe_guc * guc)1303 void xe_guc_submit_wedge(struct xe_guc *guc)
1304 {
1305 	struct xe_device *xe = guc_to_xe(guc);
1306 	struct xe_exec_queue *q;
1307 	unsigned long index;
1308 
1309 	xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
1310 
1311 	/*
1312 	 * If device is being wedged even before submission_state is
1313 	 * initialized, there's nothing to do here.
1314 	 */
1315 	if (!guc->submission_state.initialized)
1316 		return;
1317 
1318 	if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) {
1319 		mutex_lock(&guc->submission_state.lock);
1320 		xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1321 			if (xe_exec_queue_get_unless_zero(q))
1322 				set_exec_queue_wedged(q);
1323 		mutex_unlock(&guc->submission_state.lock);
1324 	} else {
1325 		/* Forcefully kill any remaining exec queues, signal fences */
1326 		guc_submit_reset_prepare(guc);
1327 		xe_guc_submit_stop(guc);
1328 		xe_guc_softreset(guc);
1329 		xe_uc_fw_sanitize(&guc->fw);
1330 		xe_guc_submit_pause_abort(guc);
1331 	}
1332 }
1333 
guc_submit_hint_wedged(struct xe_guc * guc)1334 static bool guc_submit_hint_wedged(struct xe_guc *guc)
1335 {
1336 	struct xe_device *xe = guc_to_xe(guc);
1337 
1338 	if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)
1339 		return false;
1340 
1341 	if (xe_device_wedged(xe))
1342 		return true;
1343 
1344 	xe_device_declare_wedged(xe);
1345 
1346 	return true;
1347 }
1348 
1349 #define ADJUST_FIVE_PERCENT(__t)	mul_u64_u32_div(__t, 105, 100)
1350 
check_timeout(struct xe_exec_queue * q,struct xe_sched_job * job)1351 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
1352 {
1353 	struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
1354 	u32 ctx_timestamp, ctx_job_timestamp;
1355 	u32 timeout_ms = q->sched_props.job_timeout_ms;
1356 	u32 diff;
1357 	u64 running_time_ms;
1358 
1359 	if (!xe_sched_job_started(job)) {
1360 		xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
1361 			   xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1362 			   q->guc->id);
1363 
1364 		return xe_sched_invalidate_job(job, 2);
1365 	}
1366 
1367 	ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0]));
1368 	if (ctx_timestamp == job->sample_timestamp) {
1369 		if (IS_SRIOV_VF(gt_to_xe(gt)))
1370 			xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck",
1371 				     xe_sched_job_seqno(job),
1372 				     xe_sched_job_lrc_seqno(job), q->guc->id);
1373 		else
1374 			xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck",
1375 				   xe_sched_job_seqno(job),
1376 				   xe_sched_job_lrc_seqno(job), q->guc->id);
1377 
1378 		return xe_sched_invalidate_job(job, 0);
1379 	}
1380 
1381 	job->sample_timestamp = ctx_timestamp;
1382 	ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
1383 
1384 	/*
1385 	 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
1386 	 * possible overflows with a high timeout.
1387 	 */
1388 	xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC);
1389 
1390 	diff = ctx_timestamp - ctx_job_timestamp;
1391 
1392 	/*
1393 	 * Ensure timeout is within 5% to account for an GuC scheduling latency
1394 	 */
1395 	running_time_ms =
1396 		ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff));
1397 
1398 	xe_gt_dbg(gt,
1399 		  "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x",
1400 		  xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1401 		  q->guc->id, running_time_ms, timeout_ms, diff);
1402 
1403 	return running_time_ms >= timeout_ms;
1404 }
1405 
enable_scheduling(struct xe_exec_queue * q)1406 static void enable_scheduling(struct xe_exec_queue *q)
1407 {
1408 	MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
1409 	struct xe_guc *guc = exec_queue_to_guc(q);
1410 	int ret;
1411 
1412 	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1413 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1414 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1415 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1416 
1417 	set_exec_queue_pending_enable(q);
1418 	set_exec_queue_enabled(q);
1419 	trace_xe_exec_queue_scheduling_enable(q);
1420 
1421 	if (xe_exec_queue_is_multi_queue_secondary(q))
1422 		handle_multi_queue_secondary_sched_done(guc, q, 1);
1423 	else
1424 		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1425 			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1426 
1427 	ret = wait_event_timeout(guc->ct.wq,
1428 				 !exec_queue_pending_enable(q) ||
1429 				 xe_guc_read_stopped(guc) ||
1430 				 vf_recovery(guc), HZ * 5);
1431 	if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) {
1432 		xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
1433 		set_exec_queue_banned(q);
1434 		xe_gt_reset_async(q->gt);
1435 		xe_sched_tdr_queue_imm(&q->guc->sched);
1436 	}
1437 }
1438 
disable_scheduling(struct xe_exec_queue * q,bool immediate)1439 static void disable_scheduling(struct xe_exec_queue *q, bool immediate)
1440 {
1441 	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1442 	struct xe_guc *guc = exec_queue_to_guc(q);
1443 
1444 	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1445 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1446 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1447 
1448 	if (immediate && !xe_exec_queue_is_multi_queue_secondary(q))
1449 		set_min_preemption_timeout(guc, q);
1450 	clear_exec_queue_enabled(q);
1451 	set_exec_queue_pending_disable(q);
1452 	trace_xe_exec_queue_scheduling_disable(q);
1453 
1454 	if (xe_exec_queue_is_multi_queue_secondary(q))
1455 		handle_multi_queue_secondary_sched_done(guc, q, 0);
1456 	else
1457 		xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1458 			       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1459 }
1460 
1461 static enum drm_gpu_sched_stat
guc_exec_queue_timedout_job(struct drm_sched_job * drm_job)1462 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
1463 {
1464 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
1465 	struct drm_sched_job *tmp_job;
1466 	struct xe_exec_queue *q = job->q, *primary;
1467 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1468 	struct xe_guc *guc = exec_queue_to_guc(q);
1469 	const char *process_name = "no process";
1470 	struct xe_device *xe = guc_to_xe(guc);
1471 	int err = -ETIME;
1472 	pid_t pid = -1;
1473 	bool wedged = false, skip_timeout_check;
1474 
1475 	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1476 
1477 	primary = xe_exec_queue_multi_queue_primary(q);
1478 
1479 	/*
1480 	 * TDR has fired before free job worker. Common if exec queue
1481 	 * immediately closed after last fence signaled. Add back to pending
1482 	 * list so job can be freed and kick scheduler ensuring free job is not
1483 	 * lost.
1484 	 */
1485 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) ||
1486 	    vf_recovery(guc))
1487 		return DRM_GPU_SCHED_STAT_NO_HANG;
1488 
1489 	/* Kill the run_job entry point */
1490 	if (xe_exec_queue_is_multi_queue(q))
1491 		xe_guc_exec_queue_group_stop(q);
1492 	else
1493 		xe_sched_submission_stop(sched);
1494 
1495 	/* Must check all state after stopping scheduler */
1496 	skip_timeout_check = exec_queue_reset(q) ||
1497 		exec_queue_killed_or_banned_or_wedged(q);
1498 
1499 	/* Skip timeout check if multi-queue group is banned */
1500 	if (xe_exec_queue_is_multi_queue(q) &&
1501 	    READ_ONCE(q->multi_queue.group->banned))
1502 		skip_timeout_check = true;
1503 
1504 	/* LR jobs can only get here if queue has been killed or hit an error */
1505 	if (xe_exec_queue_is_lr(q))
1506 		xe_gt_assert(guc_to_gt(guc), skip_timeout_check);
1507 
1508 	/*
1509 	 * If devcoredump not captured and GuC capture for the job is not ready
1510 	 * do manual capture first and decide later if we need to use it
1511 	 */
1512 	if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
1513 	    !xe_guc_capture_get_matching_and_lock(q)) {
1514 		/* take force wake before engine register manual capture */
1515 		CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
1516 		if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
1517 			xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
1518 
1519 		xe_engine_snapshot_capture_for_queue(q);
1520 	}
1521 
1522 	/*
1523 	 * Check if job is actually timed out, if so restart job execution and TDR
1524 	 */
1525 	if (!skip_timeout_check && !check_timeout(q, job))
1526 		goto rearm;
1527 
1528 	if (!exec_queue_killed(q))
1529 		wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
1530 
1531 	set_exec_queue_banned(q);
1532 
1533 	/* Kick job / queue off hardware */
1534 	if (!wedged && (exec_queue_enabled(primary) ||
1535 			exec_queue_pending_disable(primary))) {
1536 		int ret;
1537 
1538 		if (exec_queue_reset(primary))
1539 			err = -EIO;
1540 
1541 		if (xe_uc_fw_is_running(&guc->fw)) {
1542 			/*
1543 			 * Wait for any pending G2H to flush out before
1544 			 * modifying state
1545 			 */
1546 			ret = wait_event_timeout(guc->ct.wq,
1547 						 (!exec_queue_pending_enable(primary) &&
1548 						  !exec_queue_pending_disable(primary)) ||
1549 						 xe_guc_read_stopped(guc) ||
1550 						 vf_recovery(guc), HZ * 5);
1551 			if (vf_recovery(guc))
1552 				goto handle_vf_resume;
1553 			if (!ret || xe_guc_read_stopped(guc))
1554 				goto trigger_reset;
1555 
1556 			disable_scheduling(primary, skip_timeout_check);
1557 		}
1558 
1559 		/*
1560 		 * Must wait for scheduling to be disabled before signalling
1561 		 * any fences, if GT broken the GT reset code should signal us.
1562 		 *
1563 		 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
1564 		 * error) messages which can cause the schedule disable to get
1565 		 * lost. If this occurs, trigger a GT reset to recover.
1566 		 */
1567 		smp_rmb();
1568 		ret = wait_event_timeout(guc->ct.wq,
1569 					 !xe_uc_fw_is_running(&guc->fw) ||
1570 					 !exec_queue_pending_disable(primary) ||
1571 					 xe_guc_read_stopped(guc) ||
1572 					 vf_recovery(guc), HZ * 5);
1573 		if (vf_recovery(guc))
1574 			goto handle_vf_resume;
1575 		if (!ret || xe_guc_read_stopped(guc)) {
1576 trigger_reset:
1577 			if (!ret)
1578 				xe_gt_warn(guc_to_gt(guc),
1579 					   "Schedule disable failed to respond, guc_id=%d",
1580 					   primary->guc->id);
1581 			xe_devcoredump(primary, job,
1582 				       "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d",
1583 				       primary->guc->id, ret, xe_guc_read_stopped(guc));
1584 			xe_gt_reset_async(primary->gt);
1585 			xe_sched_tdr_queue_imm(sched);
1586 			goto rearm;
1587 		}
1588 	}
1589 
1590 	if (q->vm && q->vm->xef) {
1591 		process_name = q->vm->xef->process_name;
1592 		pid = q->vm->xef->pid;
1593 	}
1594 
1595 	if (!exec_queue_killed(q))
1596 		xe_gt_notice(guc_to_gt(guc),
1597 			     "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
1598 			     xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1599 			     q->guc->id, q->flags, process_name, pid);
1600 
1601 	trace_xe_sched_job_timedout(job);
1602 
1603 	if (!exec_queue_killed(q))
1604 		xe_devcoredump(q, job,
1605 			       "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
1606 			       xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1607 			       q->guc->id, q->flags);
1608 
1609 	/*
1610 	 * Kernel jobs should never fail, nor should VM jobs if they do
1611 	 * somethings has gone wrong and the GT needs a reset
1612 	 */
1613 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
1614 		   "Kernel-submitted job timed out\n");
1615 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
1616 		   "VM job timed out on non-killed execqueue\n");
1617 	if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
1618 			(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
1619 		if (!xe_sched_invalidate_job(job, 2)) {
1620 			xe_gt_reset_async(q->gt);
1621 			goto rearm;
1622 		}
1623 	}
1624 
1625 	/* Mark all outstanding jobs as bad, thus completing them */
1626 	xe_sched_job_set_error(job, err);
1627 	drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL)
1628 		xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED);
1629 
1630 	if (xe_exec_queue_is_multi_queue(q)) {
1631 		xe_guc_exec_queue_group_start(q);
1632 		xe_guc_exec_queue_group_trigger_cleanup(q);
1633 	} else {
1634 		xe_sched_submission_start(sched);
1635 		xe_guc_exec_queue_trigger_cleanup(q);
1636 	}
1637 
1638 	/*
1639 	 * We want the job added back to the pending list so it gets freed; this
1640 	 * is what DRM_GPU_SCHED_STAT_NO_HANG does.
1641 	 */
1642 	return DRM_GPU_SCHED_STAT_NO_HANG;
1643 
1644 rearm:
1645 	/*
1646 	 * XXX: Ideally want to adjust timeout based on current execution time
1647 	 * but there is not currently an easy way to do in DRM scheduler. With
1648 	 * some thought, do this in a follow up.
1649 	 */
1650 	if (xe_exec_queue_is_multi_queue(q))
1651 		xe_guc_exec_queue_group_start(q);
1652 	else
1653 		xe_sched_submission_start(sched);
1654 handle_vf_resume:
1655 	return DRM_GPU_SCHED_STAT_NO_HANG;
1656 }
1657 
guc_exec_queue_fini(struct xe_exec_queue * q)1658 static void guc_exec_queue_fini(struct xe_exec_queue *q)
1659 {
1660 	struct xe_guc_exec_queue *ge = q->guc;
1661 	struct xe_guc *guc = exec_queue_to_guc(q);
1662 
1663 	if (xe_exec_queue_is_multi_queue_secondary(q)) {
1664 		struct xe_exec_queue_group *group = q->multi_queue.group;
1665 
1666 		mutex_lock(&group->list_lock);
1667 		list_del(&q->multi_queue.link);
1668 		mutex_unlock(&group->list_lock);
1669 	}
1670 
1671 	release_guc_id(guc, q);
1672 	xe_sched_entity_fini(&ge->entity);
1673 	xe_sched_fini(&ge->sched);
1674 
1675 	/*
1676 	 * RCU free due sched being exported via DRM scheduler fences
1677 	 * (timeline name).
1678 	 */
1679 	kfree_rcu(ge, rcu);
1680 }
1681 
__guc_exec_queue_destroy_async(struct work_struct * w)1682 static void __guc_exec_queue_destroy_async(struct work_struct *w)
1683 {
1684 	struct xe_guc_exec_queue *ge =
1685 		container_of(w, struct xe_guc_exec_queue, destroy_async);
1686 	struct xe_exec_queue *q = ge->q;
1687 	struct xe_guc *guc = exec_queue_to_guc(q);
1688 
1689 	guard(xe_pm_runtime)(guc_to_xe(guc));
1690 	trace_xe_exec_queue_destroy(q);
1691 
1692 	/* Confirm no work left behind accessing device structures */
1693 	cancel_delayed_work_sync(&ge->sched.base.work_tdr);
1694 
1695 	xe_exec_queue_fini(q);
1696 }
1697 
guc_exec_queue_destroy_async(struct xe_exec_queue * q)1698 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q)
1699 {
1700 	struct xe_guc *guc = exec_queue_to_guc(q);
1701 	struct xe_device *xe = guc_to_xe(guc);
1702 
1703 	INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async);
1704 
1705 	/* We must block on kernel engines so slabs are empty on driver unload */
1706 	if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
1707 		__guc_exec_queue_destroy_async(&q->guc->destroy_async);
1708 	else
1709 		queue_work(xe->destroy_wq, &q->guc->destroy_async);
1710 }
1711 
__guc_exec_queue_destroy(struct xe_guc * guc,struct xe_exec_queue * q)1712 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q)
1713 {
1714 	/*
1715 	 * Might be done from within the GPU scheduler, need to do async as we
1716 	 * fini the scheduler when the engine is fini'd, the scheduler can't
1717 	 * complete fini within itself (circular dependency). Async resolves
1718 	 * this we and don't really care when everything is fini'd, just that it
1719 	 * is.
1720 	 */
1721 	guc_exec_queue_destroy_async(q);
1722 }
1723 
__guc_exec_queue_process_msg_cleanup(struct xe_sched_msg * msg)1724 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
1725 {
1726 	struct xe_exec_queue *q = msg->private_data;
1727 	struct xe_guc *guc = exec_queue_to_guc(q);
1728 
1729 	xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
1730 	trace_xe_exec_queue_cleanup_entity(q);
1731 
1732 	/*
1733 	 * Expected state transitions for cleanup:
1734 	 * - If the exec queue is registered and GuC firmware is running, we must first
1735 	 *   disable scheduling and deregister the queue to ensure proper teardown and
1736 	 *   resource release in the GuC, then destroy the exec queue on driver side.
1737 	 * - If the GuC is already stopped (e.g., during driver unload or GPU reset),
1738 	 *   we cannot expect a response for the deregister request. In this case,
1739 	 *   it is safe to directly destroy the exec queue on driver side, as the GuC
1740 	 *   will not process further requests and all resources must be cleaned up locally.
1741 	 */
1742 	if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw))
1743 		disable_scheduling_deregister(guc, q);
1744 	else
1745 		__guc_exec_queue_destroy(guc, q);
1746 }
1747 
guc_exec_queue_allowed_to_change_state(struct xe_exec_queue * q)1748 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
1749 {
1750 	return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
1751 }
1752 
__guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg * msg)1753 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
1754 {
1755 	struct xe_exec_queue *q = msg->private_data;
1756 	struct xe_guc *guc = exec_queue_to_guc(q);
1757 
1758 	if (guc_exec_queue_allowed_to_change_state(q))
1759 		init_policies(guc, q);
1760 	kfree(msg);
1761 }
1762 
__suspend_fence_signal(struct xe_exec_queue * q)1763 static void __suspend_fence_signal(struct xe_exec_queue *q)
1764 {
1765 	struct xe_guc *guc = exec_queue_to_guc(q);
1766 	struct xe_device *xe = guc_to_xe(guc);
1767 
1768 	if (!q->guc->suspend_pending)
1769 		return;
1770 
1771 	WRITE_ONCE(q->guc->suspend_pending, false);
1772 
1773 	/*
1774 	 * We use a GuC shared wait queue for VFs because the VF resfix start
1775 	 * interrupt must be able to wake all instances of suspend_wait. This
1776 	 * prevents the VF migration worker from being starved during
1777 	 * scheduling.
1778 	 */
1779 	if (IS_SRIOV_VF(xe))
1780 		wake_up_all(&guc->ct.wq);
1781 	else
1782 		wake_up(&q->guc->suspend_wait);
1783 }
1784 
suspend_fence_signal(struct xe_exec_queue * q)1785 static void suspend_fence_signal(struct xe_exec_queue *q)
1786 {
1787 	struct xe_guc *guc = exec_queue_to_guc(q);
1788 
1789 	xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) ||
1790 		     xe_guc_read_stopped(guc));
1791 	xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending);
1792 
1793 	__suspend_fence_signal(q);
1794 }
1795 
__guc_exec_queue_process_msg_suspend(struct xe_sched_msg * msg)1796 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
1797 {
1798 	struct xe_exec_queue *q = msg->private_data;
1799 	struct xe_guc *guc = exec_queue_to_guc(q);
1800 
1801 	if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
1802 	    exec_queue_enabled(q)) {
1803 		wait_event(guc->ct.wq, vf_recovery(guc) ||
1804 			   ((q->guc->resume_time != RESUME_PENDING ||
1805 			   xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q)));
1806 
1807 		if (!xe_guc_read_stopped(guc)) {
1808 			s64 since_resume_ms =
1809 				ktime_ms_delta(ktime_get(),
1810 					       q->guc->resume_time);
1811 			s64 wait_ms = q->vm->preempt.min_run_period_ms -
1812 				since_resume_ms;
1813 
1814 			if (wait_ms > 0 && q->guc->resume_time)
1815 				xe_sleep_relaxed_ms(wait_ms);
1816 
1817 			set_exec_queue_suspended(q);
1818 			disable_scheduling(q, false);
1819 		}
1820 	} else if (q->guc->suspend_pending) {
1821 		set_exec_queue_suspended(q);
1822 		suspend_fence_signal(q);
1823 	}
1824 }
1825 
__guc_exec_queue_process_msg_resume(struct xe_sched_msg * msg)1826 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
1827 {
1828 	struct xe_exec_queue *q = msg->private_data;
1829 
1830 	if (guc_exec_queue_allowed_to_change_state(q)) {
1831 		clear_exec_queue_suspended(q);
1832 		if (!exec_queue_enabled(q)) {
1833 			q->guc->resume_time = RESUME_PENDING;
1834 			set_exec_queue_pending_resume(q);
1835 			enable_scheduling(q);
1836 		}
1837 	} else {
1838 		clear_exec_queue_suspended(q);
1839 	}
1840 }
1841 
__guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg * msg)1842 static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg)
1843 {
1844 	struct xe_exec_queue *q = msg->private_data;
1845 
1846 	if (guc_exec_queue_allowed_to_change_state(q)) {
1847 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE        (2)
1848 		struct xe_guc *guc = exec_queue_to_guc(q);
1849 		struct xe_exec_queue_group *group = q->multi_queue.group;
1850 		u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE];
1851 		int len = 0;
1852 
1853 		action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
1854 		action[len++] = group->primary->guc->id;
1855 
1856 		xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE);
1857 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE
1858 
1859 		xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
1860 	}
1861 
1862 	kfree(msg);
1863 }
1864 
1865 #define CLEANUP				1	/* Non-zero values to catch uninitialized msg */
1866 #define SET_SCHED_PROPS			2
1867 #define SUSPEND				3
1868 #define RESUME				4
1869 #define SET_MULTI_QUEUE_PRIORITY	5
1870 #define OPCODE_MASK	0xf
1871 #define MSG_LOCKED	BIT(8)
1872 #define MSG_HEAD	BIT(9)
1873 
guc_exec_queue_process_msg(struct xe_sched_msg * msg)1874 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
1875 {
1876 	struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data));
1877 
1878 	trace_xe_sched_msg_recv(msg);
1879 
1880 	switch (msg->opcode) {
1881 	case CLEANUP:
1882 		__guc_exec_queue_process_msg_cleanup(msg);
1883 		break;
1884 	case SET_SCHED_PROPS:
1885 		__guc_exec_queue_process_msg_set_sched_props(msg);
1886 		break;
1887 	case SUSPEND:
1888 		__guc_exec_queue_process_msg_suspend(msg);
1889 		break;
1890 	case RESUME:
1891 		__guc_exec_queue_process_msg_resume(msg);
1892 		break;
1893 	case SET_MULTI_QUEUE_PRIORITY:
1894 		__guc_exec_queue_process_msg_set_multi_queue_priority(msg);
1895 		break;
1896 	default:
1897 		XE_WARN_ON("Unknown message type");
1898 	}
1899 
1900 	xe_pm_runtime_put(xe);
1901 }
1902 
1903 static const struct drm_sched_backend_ops drm_sched_ops = {
1904 	.run_job = guc_exec_queue_run_job,
1905 	.free_job = guc_exec_queue_free_job,
1906 	.timedout_job = guc_exec_queue_timedout_job,
1907 };
1908 
1909 static const struct xe_sched_backend_ops xe_sched_ops = {
1910 	.process_msg = guc_exec_queue_process_msg,
1911 };
1912 
guc_exec_queue_init(struct xe_exec_queue * q)1913 static int guc_exec_queue_init(struct xe_exec_queue *q)
1914 {
1915 	struct xe_gpu_scheduler *sched;
1916 	struct xe_guc *guc = exec_queue_to_guc(q);
1917 	struct workqueue_struct *submit_wq = NULL;
1918 	struct xe_guc_exec_queue *ge;
1919 	long timeout;
1920 	int err, i;
1921 
1922 	xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc)));
1923 
1924 	ge = kzalloc_obj(*ge);
1925 	if (!ge)
1926 		return -ENOMEM;
1927 
1928 	q->guc = ge;
1929 	ge->q = q;
1930 	init_rcu_head(&ge->rcu);
1931 	init_waitqueue_head(&ge->suspend_wait);
1932 
1933 	for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
1934 		INIT_LIST_HEAD(&ge->static_msgs[i].link);
1935 
1936 	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
1937 		  msecs_to_jiffies(q->sched_props.job_timeout_ms);
1938 
1939 	/*
1940 	 * Use primary queue's submit_wq for all secondary queues of a
1941 	 * multi queue group. This serialization avoids any locking around
1942 	 * CGP synchronization with GuC.
1943 	 */
1944 	if (xe_exec_queue_is_multi_queue_secondary(q)) {
1945 		struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
1946 
1947 		submit_wq = primary->guc->sched.base.submit_wq;
1948 	}
1949 
1950 	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
1951 			    submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64,
1952 			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
1953 			    q->name, gt_to_xe(q->gt)->drm.dev);
1954 	if (err)
1955 		goto err_free;
1956 
1957 	sched = &ge->sched;
1958 	err = xe_sched_entity_init(&ge->entity, sched);
1959 	if (err)
1960 		goto err_sched;
1961 
1962 	mutex_lock(&guc->submission_state.lock);
1963 
1964 	err = alloc_guc_id(guc, q);
1965 	if (err)
1966 		goto err_entity;
1967 
1968 	q->entity = &ge->entity;
1969 
1970 	if (xe_guc_read_stopped(guc) || vf_recovery(guc))
1971 		xe_sched_stop(sched);
1972 
1973 	mutex_unlock(&guc->submission_state.lock);
1974 
1975 	xe_exec_queue_assign_name(q, q->guc->id);
1976 
1977 	/*
1978 	 * Maintain secondary queues of the multi queue group in a list
1979 	 * for handling dependencies across the queues in the group.
1980 	 */
1981 	if (xe_exec_queue_is_multi_queue_secondary(q)) {
1982 		struct xe_exec_queue_group *group = q->multi_queue.group;
1983 
1984 		INIT_LIST_HEAD(&q->multi_queue.link);
1985 		mutex_lock(&group->list_lock);
1986 		if (group->stopped)
1987 			WRITE_ONCE(q->guc->sched.base.pause_submit, true);
1988 		list_add_tail(&q->multi_queue.link, &group->list);
1989 		mutex_unlock(&group->list_lock);
1990 	}
1991 
1992 	if (xe_exec_queue_is_multi_queue(q))
1993 		trace_xe_exec_queue_create_multi_queue(q);
1994 	else
1995 		trace_xe_exec_queue_create(q);
1996 
1997 	return 0;
1998 
1999 err_entity:
2000 	mutex_unlock(&guc->submission_state.lock);
2001 	xe_sched_entity_fini(&ge->entity);
2002 err_sched:
2003 	xe_sched_fini(&ge->sched);
2004 err_free:
2005 	kfree(ge);
2006 
2007 	return err;
2008 }
2009 
guc_exec_queue_kill(struct xe_exec_queue * q)2010 static void guc_exec_queue_kill(struct xe_exec_queue *q)
2011 {
2012 	trace_xe_exec_queue_kill(q);
2013 	set_exec_queue_killed(q);
2014 	__suspend_fence_signal(q);
2015 	xe_guc_exec_queue_trigger_cleanup(q);
2016 }
2017 
guc_exec_queue_add_msg(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)2018 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
2019 				   u32 opcode)
2020 {
2021 	xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
2022 
2023 	INIT_LIST_HEAD(&msg->link);
2024 	msg->opcode = opcode & OPCODE_MASK;
2025 	msg->private_data = q;
2026 
2027 	trace_xe_sched_msg_add(msg);
2028 	if (opcode & MSG_HEAD)
2029 		xe_sched_add_msg_head(&q->guc->sched, msg);
2030 	else if (opcode & MSG_LOCKED)
2031 		xe_sched_add_msg_locked(&q->guc->sched, msg);
2032 	else
2033 		xe_sched_add_msg(&q->guc->sched, msg);
2034 }
2035 
guc_exec_queue_try_add_msg_head(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)2036 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q,
2037 					    struct xe_sched_msg *msg,
2038 					    u32 opcode)
2039 {
2040 	if (!list_empty(&msg->link))
2041 		return;
2042 
2043 	guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD);
2044 }
2045 
guc_exec_queue_try_add_msg(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)2046 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
2047 				       struct xe_sched_msg *msg,
2048 				       u32 opcode)
2049 {
2050 	if (!list_empty(&msg->link))
2051 		return false;
2052 
2053 	guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED);
2054 
2055 	return true;
2056 }
2057 
2058 #define STATIC_MSG_CLEANUP	0
2059 #define STATIC_MSG_SUSPEND	1
2060 #define STATIC_MSG_RESUME	2
guc_exec_queue_destroy(struct xe_exec_queue * q)2061 static void guc_exec_queue_destroy(struct xe_exec_queue *q)
2062 {
2063 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
2064 
2065 	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
2066 		guc_exec_queue_add_msg(q, msg, CLEANUP);
2067 	else
2068 		__guc_exec_queue_destroy(exec_queue_to_guc(q), q);
2069 }
2070 
guc_exec_queue_set_priority(struct xe_exec_queue * q,enum xe_exec_queue_priority priority)2071 static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
2072 				       enum xe_exec_queue_priority priority)
2073 {
2074 	struct xe_sched_msg *msg;
2075 
2076 	if (q->sched_props.priority == priority ||
2077 	    exec_queue_killed_or_banned_or_wedged(q))
2078 		return 0;
2079 
2080 	msg = kmalloc_obj(*msg);
2081 	if (!msg)
2082 		return -ENOMEM;
2083 
2084 	q->sched_props.priority = priority;
2085 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
2086 
2087 	return 0;
2088 }
2089 
guc_exec_queue_set_timeslice(struct xe_exec_queue * q,u32 timeslice_us)2090 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
2091 {
2092 	struct xe_sched_msg *msg;
2093 
2094 	if (q->sched_props.timeslice_us == timeslice_us ||
2095 	    exec_queue_killed_or_banned_or_wedged(q))
2096 		return 0;
2097 
2098 	msg = kmalloc_obj(*msg);
2099 	if (!msg)
2100 		return -ENOMEM;
2101 
2102 	q->sched_props.timeslice_us = timeslice_us;
2103 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
2104 
2105 	return 0;
2106 }
2107 
guc_exec_queue_set_preempt_timeout(struct xe_exec_queue * q,u32 preempt_timeout_us)2108 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
2109 					      u32 preempt_timeout_us)
2110 {
2111 	struct xe_sched_msg *msg;
2112 
2113 	if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
2114 	    exec_queue_killed_or_banned_or_wedged(q))
2115 		return 0;
2116 
2117 	msg = kmalloc_obj(*msg);
2118 	if (!msg)
2119 		return -ENOMEM;
2120 
2121 	q->sched_props.preempt_timeout_us = preempt_timeout_us;
2122 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
2123 
2124 	return 0;
2125 }
2126 
guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue * q,enum xe_multi_queue_priority priority)2127 static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q,
2128 						   enum xe_multi_queue_priority priority)
2129 {
2130 	struct xe_sched_msg *msg;
2131 
2132 	xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q));
2133 
2134 	if (exec_queue_killed_or_banned_or_wedged(q))
2135 		return 0;
2136 
2137 	msg = kmalloc_obj(*msg);
2138 	if (!msg)
2139 		return -ENOMEM;
2140 
2141 	scoped_guard(spinlock, &q->multi_queue.lock) {
2142 		if (q->multi_queue.priority == priority) {
2143 			kfree(msg);
2144 			return 0;
2145 		}
2146 
2147 		q->multi_queue.priority = priority;
2148 	}
2149 
2150 	guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY);
2151 
2152 	return 0;
2153 }
2154 
guc_exec_queue_suspend(struct xe_exec_queue * q)2155 static int guc_exec_queue_suspend(struct xe_exec_queue *q)
2156 {
2157 	struct xe_gpu_scheduler *sched = &q->guc->sched;
2158 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
2159 
2160 	if (exec_queue_killed_or_banned_or_wedged(q))
2161 		return -EINVAL;
2162 
2163 	xe_sched_msg_lock(sched);
2164 	if (guc_exec_queue_try_add_msg(q, msg, SUSPEND))
2165 		q->guc->suspend_pending = true;
2166 	xe_sched_msg_unlock(sched);
2167 
2168 	return 0;
2169 }
2170 
guc_exec_queue_suspend_wait(struct xe_exec_queue * q)2171 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
2172 {
2173 	struct xe_guc *guc = exec_queue_to_guc(q);
2174 	struct xe_device *xe = guc_to_xe(guc);
2175 	int ret;
2176 
2177 	/*
2178 	 * Likely don't need to check exec_queue_killed() as we clear
2179 	 * suspend_pending upon kill but to be paranoid but races in which
2180 	 * suspend_pending is set after kill also check kill here.
2181 	 */
2182 #define WAIT_COND \
2183 	(!READ_ONCE(q->guc->suspend_pending) ||	exec_queue_killed(q) || \
2184 	 xe_guc_read_stopped(guc))
2185 
2186 retry:
2187 	if (IS_SRIOV_VF(xe))
2188 		ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND ||
2189 						       vf_recovery(guc),
2190 						       HZ * 5);
2191 	else
2192 		ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
2193 						       WAIT_COND, HZ * 5);
2194 
2195 	if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc))))
2196 		return -EAGAIN;
2197 
2198 	if (!ret) {
2199 		xe_gt_warn(guc_to_gt(guc),
2200 			   "Suspend fence, guc_id=%d, failed to respond",
2201 			   q->guc->id);
2202 		/* XXX: Trigger GT reset? */
2203 		return -ETIME;
2204 	} else if (IS_SRIOV_VF(xe) && !WAIT_COND) {
2205 		/* Corner case on RESFIX DONE where vf_recovery() changes */
2206 		goto retry;
2207 	}
2208 
2209 #undef WAIT_COND
2210 
2211 	return ret < 0 ? ret : 0;
2212 }
2213 
guc_exec_queue_resume(struct xe_exec_queue * q)2214 static void guc_exec_queue_resume(struct xe_exec_queue *q)
2215 {
2216 	struct xe_gpu_scheduler *sched = &q->guc->sched;
2217 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
2218 	struct xe_guc *guc = exec_queue_to_guc(q);
2219 
2220 	xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending);
2221 
2222 	xe_sched_msg_lock(sched);
2223 	guc_exec_queue_try_add_msg(q, msg, RESUME);
2224 	xe_sched_msg_unlock(sched);
2225 }
2226 
guc_exec_queue_reset_status(struct xe_exec_queue * q)2227 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
2228 {
2229 	if (xe_exec_queue_is_multi_queue_secondary(q) &&
2230 	    guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q)))
2231 		return true;
2232 
2233 	return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q);
2234 }
2235 
guc_exec_queue_active(struct xe_exec_queue * q)2236 static bool guc_exec_queue_active(struct xe_exec_queue *q)
2237 {
2238 	struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
2239 
2240 	return exec_queue_enabled(primary) &&
2241 		!exec_queue_pending_disable(primary);
2242 }
2243 
2244 /*
2245  * All of these functions are an abstraction layer which other parts of Xe can
2246  * use to trap into the GuC backend. All of these functions, aside from init,
2247  * really shouldn't do much other than trap into the DRM scheduler which
2248  * synchronizes these operations.
2249  */
2250 static const struct xe_exec_queue_ops guc_exec_queue_ops = {
2251 	.init = guc_exec_queue_init,
2252 	.kill = guc_exec_queue_kill,
2253 	.fini = guc_exec_queue_fini,
2254 	.destroy = guc_exec_queue_destroy,
2255 	.set_priority = guc_exec_queue_set_priority,
2256 	.set_timeslice = guc_exec_queue_set_timeslice,
2257 	.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
2258 	.set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority,
2259 	.suspend = guc_exec_queue_suspend,
2260 	.suspend_wait = guc_exec_queue_suspend_wait,
2261 	.resume = guc_exec_queue_resume,
2262 	.reset_status = guc_exec_queue_reset_status,
2263 	.active = guc_exec_queue_active,
2264 };
2265 
guc_exec_queue_stop(struct xe_guc * guc,struct xe_exec_queue * q)2266 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
2267 {
2268 	struct xe_gpu_scheduler *sched = &q->guc->sched;
2269 	bool do_destroy = false;
2270 
2271 	/* Stop scheduling + flush any DRM scheduler operations */
2272 	xe_sched_submission_stop(sched);
2273 
2274 	/* Clean up lost G2H + reset engine state */
2275 	if (exec_queue_registered(q)) {
2276 		if (exec_queue_destroyed(q))
2277 			do_destroy = true;
2278 	}
2279 	if (q->guc->suspend_pending) {
2280 		set_exec_queue_suspended(q);
2281 		suspend_fence_signal(q);
2282 	}
2283 	atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED |
2284 		   EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED |
2285 		   EXEC_QUEUE_STATE_SUSPENDED,
2286 		   &q->guc->state);
2287 	q->guc->resume_time = 0;
2288 	trace_xe_exec_queue_stop(q);
2289 
2290 	/*
2291 	 * Ban any engine (aside from kernel and engines used for VM ops) with a
2292 	 * started but not complete job or if a job has gone through a GT reset
2293 	 * more than twice.
2294 	 */
2295 	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
2296 		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
2297 		bool ban = false;
2298 
2299 		if (job) {
2300 			if ((xe_sched_job_started(job) &&
2301 			    !xe_sched_job_completed(job)) ||
2302 			    xe_sched_invalidate_job(job, 2)) {
2303 				trace_xe_sched_job_ban(job);
2304 				ban = true;
2305 			}
2306 		}
2307 
2308 		if (ban) {
2309 			set_exec_queue_banned(q);
2310 			xe_guc_exec_queue_trigger_cleanup(q);
2311 		}
2312 	}
2313 
2314 	if (do_destroy)
2315 		__guc_exec_queue_destroy(guc, q);
2316 }
2317 
guc_submit_reset_prepare(struct xe_guc * guc)2318 static int guc_submit_reset_prepare(struct xe_guc *guc)
2319 {
2320 	int ret;
2321 
2322 	/*
2323 	 * Using an atomic here rather than submission_state.lock as this
2324 	 * function can be called while holding the CT lock (engine reset
2325 	 * failure). submission_state.lock needs the CT lock to resubmit jobs.
2326 	 * Atomic is not ideal, but it works to prevent against concurrent reset
2327 	 * and releasing any TDRs waiting on guc->submission_state.stopped.
2328 	 */
2329 	ret = atomic_fetch_or(1, &guc->submission_state.stopped);
2330 	smp_wmb();
2331 	wake_up_all(&guc->ct.wq);
2332 
2333 	return ret;
2334 }
2335 
xe_guc_submit_reset_prepare(struct xe_guc * guc)2336 int xe_guc_submit_reset_prepare(struct xe_guc *guc)
2337 {
2338 	if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc)))
2339 		return 0;
2340 
2341 	if (!guc->submission_state.initialized)
2342 		return 0;
2343 
2344 	return guc_submit_reset_prepare(guc);
2345 }
2346 
xe_guc_submit_reset_wait(struct xe_guc * guc)2347 void xe_guc_submit_reset_wait(struct xe_guc *guc)
2348 {
2349 	wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
2350 		   !xe_guc_read_stopped(guc));
2351 }
2352 
xe_guc_submit_stop(struct xe_guc * guc)2353 void xe_guc_submit_stop(struct xe_guc *guc)
2354 {
2355 	struct xe_exec_queue *q;
2356 	unsigned long index;
2357 
2358 	xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
2359 
2360 	mutex_lock(&guc->submission_state.lock);
2361 
2362 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2363 		/* Prevent redundant attempts to stop parallel queues */
2364 		if (q->guc->id != index)
2365 			continue;
2366 
2367 		guc_exec_queue_stop(guc, q);
2368 	}
2369 
2370 	mutex_unlock(&guc->submission_state.lock);
2371 
2372 	/*
2373 	 * No one can enter the backend at this point, aside from new engine
2374 	 * creation which is protected by guc->submission_state.lock.
2375 	 */
2376 
2377 }
2378 
guc_exec_queue_revert_pending_state_change(struct xe_guc * guc,struct xe_exec_queue * q)2379 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc,
2380 						       struct xe_exec_queue *q)
2381 {
2382 	bool pending_enable, pending_disable, pending_resume;
2383 
2384 	pending_enable = exec_queue_pending_enable(q);
2385 	pending_resume = exec_queue_pending_resume(q);
2386 
2387 	if (pending_enable && pending_resume) {
2388 		q->guc->needs_resume = true;
2389 		xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d",
2390 			  q->guc->id);
2391 	}
2392 
2393 	if (pending_enable && !pending_resume) {
2394 		clear_exec_queue_registered(q);
2395 		xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d",
2396 			  q->guc->id);
2397 	}
2398 
2399 	if (pending_enable) {
2400 		clear_exec_queue_enabled(q);
2401 		clear_exec_queue_pending_resume(q);
2402 		clear_exec_queue_pending_enable(q);
2403 		xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d",
2404 			  q->guc->id);
2405 	}
2406 
2407 	if (exec_queue_destroyed(q) && exec_queue_registered(q)) {
2408 		clear_exec_queue_destroyed(q);
2409 		q->guc->needs_cleanup = true;
2410 		xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d",
2411 			  q->guc->id);
2412 	}
2413 
2414 	pending_disable = exec_queue_pending_disable(q);
2415 
2416 	if (pending_disable && exec_queue_suspended(q)) {
2417 		clear_exec_queue_suspended(q);
2418 		q->guc->needs_suspend = true;
2419 		xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d",
2420 			  q->guc->id);
2421 	}
2422 
2423 	if (pending_disable) {
2424 		if (!pending_enable)
2425 			set_exec_queue_enabled(q);
2426 		clear_exec_queue_pending_disable(q);
2427 		xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d",
2428 			  q->guc->id);
2429 	}
2430 
2431 	q->guc->resume_time = 0;
2432 }
2433 
lrc_parallel_clear(struct xe_lrc * lrc)2434 static void lrc_parallel_clear(struct xe_lrc *lrc)
2435 {
2436 	struct xe_device *xe = gt_to_xe(lrc->gt);
2437 	struct iosys_map map = xe_lrc_parallel_map(lrc);
2438 	int i;
2439 
2440 	for (i = 0; i < WQ_SIZE / sizeof(u32); ++i)
2441 		parallel_write(xe, map, wq[i],
2442 			       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
2443 			       FIELD_PREP(WQ_LEN_MASK, 0));
2444 }
2445 
2446 /*
2447  * This function is quite complex but only real way to ensure no state is lost
2448  * during VF resume flows. The function scans the queue state, make adjustments
2449  * as needed, and queues jobs / messages which replayed upon unpause.
2450  */
guc_exec_queue_pause(struct xe_guc * guc,struct xe_exec_queue * q)2451 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q)
2452 {
2453 	struct xe_gpu_scheduler *sched = &q->guc->sched;
2454 	struct xe_sched_job *job;
2455 	int i;
2456 
2457 	lockdep_assert_held(&guc->submission_state.lock);
2458 
2459 	/* Stop scheduling + flush any DRM scheduler operations */
2460 	xe_sched_submission_stop(sched);
2461 	cancel_delayed_work_sync(&sched->base.work_tdr);
2462 
2463 	guc_exec_queue_revert_pending_state_change(guc, q);
2464 
2465 	if (xe_exec_queue_is_parallel(q)) {
2466 		/* Pairs with WRITE_ONCE in __xe_exec_queue_init  */
2467 		struct xe_lrc *lrc = READ_ONCE(q->lrc[0]);
2468 
2469 		/*
2470 		 * NOP existing WQ commands that may contain stale GGTT
2471 		 * addresses. These will be replayed upon unpause. The hardware
2472 		 * seems to get confused if the WQ head/tail pointers are
2473 		 * adjusted.
2474 		 */
2475 		if (lrc)
2476 			lrc_parallel_clear(lrc);
2477 	}
2478 
2479 	job = xe_sched_first_pending_job(sched);
2480 	if (job) {
2481 		job->restore_replay = true;
2482 
2483 		/*
2484 		 * Adjust software tail so jobs submitted overwrite previous
2485 		 * position in ring buffer with new GGTT addresses.
2486 		 */
2487 		for (i = 0; i < q->width; ++i)
2488 			q->lrc[i]->ring.tail = job->ptrs[i].head;
2489 	}
2490 }
2491 
2492 /**
2493  * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC.
2494  * @guc: the &xe_guc struct instance whose scheduler is to be disabled
2495  */
xe_guc_submit_pause(struct xe_guc * guc)2496 void xe_guc_submit_pause(struct xe_guc *guc)
2497 {
2498 	struct xe_exec_queue *q;
2499 	unsigned long index;
2500 
2501 	mutex_lock(&guc->submission_state.lock);
2502 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2503 		xe_sched_submission_stop(&q->guc->sched);
2504 	mutex_unlock(&guc->submission_state.lock);
2505 }
2506 
2507 /**
2508  * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF.
2509  * @guc: the &xe_guc struct instance whose scheduler is to be disabled
2510  */
xe_guc_submit_pause_vf(struct xe_guc * guc)2511 void xe_guc_submit_pause_vf(struct xe_guc *guc)
2512 {
2513 	struct xe_exec_queue *q;
2514 	unsigned long index;
2515 
2516 	xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc)));
2517 	xe_gt_assert(guc_to_gt(guc), vf_recovery(guc));
2518 
2519 	mutex_lock(&guc->submission_state.lock);
2520 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2521 		/* Prevent redundant attempts to stop parallel queues */
2522 		if (q->guc->id != index)
2523 			continue;
2524 
2525 		guc_exec_queue_pause(guc, q);
2526 	}
2527 	mutex_unlock(&guc->submission_state.lock);
2528 }
2529 
guc_exec_queue_start(struct xe_exec_queue * q)2530 static void guc_exec_queue_start(struct xe_exec_queue *q)
2531 {
2532 	struct xe_gpu_scheduler *sched = &q->guc->sched;
2533 
2534 	if (!exec_queue_killed_or_banned_or_wedged(q)) {
2535 		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
2536 		int i;
2537 
2538 		trace_xe_exec_queue_resubmit(q);
2539 		if (job) {
2540 			for (i = 0; i < q->width; ++i) {
2541 				/*
2542 				 * The GuC context is unregistered at this point
2543 				 * time, adjusting software ring tail ensures
2544 				 * jobs are rewritten in original placement,
2545 				 * adjusting LRC tail ensures the newly loaded
2546 				 * GuC / contexts only view the LRC tail
2547 				 * increasing as jobs are written out.
2548 				 */
2549 				q->lrc[i]->ring.tail = job->ptrs[i].head;
2550 				xe_lrc_set_ring_tail(q->lrc[i],
2551 						     xe_lrc_ring_head(q->lrc[i]));
2552 			}
2553 		}
2554 		xe_sched_resubmit_jobs(sched);
2555 	}
2556 
2557 	xe_sched_submission_start(sched);
2558 	xe_sched_submission_resume_tdr(sched);
2559 }
2560 
xe_guc_submit_start(struct xe_guc * guc)2561 int xe_guc_submit_start(struct xe_guc *guc)
2562 {
2563 	struct xe_exec_queue *q;
2564 	unsigned long index;
2565 
2566 	xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
2567 
2568 	mutex_lock(&guc->submission_state.lock);
2569 	atomic_dec(&guc->submission_state.stopped);
2570 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2571 		/* Prevent redundant attempts to start parallel queues */
2572 		if (q->guc->id != index)
2573 			continue;
2574 
2575 		guc_exec_queue_start(q);
2576 	}
2577 	mutex_unlock(&guc->submission_state.lock);
2578 
2579 	wake_up_all(&guc->ct.wq);
2580 
2581 	return 0;
2582 }
2583 
guc_exec_queue_unpause_prepare(struct xe_guc * guc,struct xe_exec_queue * q)2584 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc,
2585 					   struct xe_exec_queue *q)
2586 {
2587 	struct xe_gpu_scheduler *sched = &q->guc->sched;
2588 	struct xe_sched_job *job = NULL;
2589 	struct drm_sched_job *s_job;
2590 	bool restore_replay = false;
2591 
2592 	drm_sched_for_each_pending_job(s_job, &sched->base, NULL) {
2593 		job = to_xe_sched_job(s_job);
2594 		restore_replay |= job->restore_replay;
2595 		if (restore_replay) {
2596 			xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d",
2597 				  q->guc->id, xe_sched_job_seqno(job));
2598 
2599 			q->ring_ops->emit_job(job);
2600 			job->restore_replay = true;
2601 		}
2602 	}
2603 
2604 	if (job)
2605 		job->last_replay = true;
2606 }
2607 
2608 /**
2609  * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF.
2610  * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause
2611  */
xe_guc_submit_unpause_prepare_vf(struct xe_guc * guc)2612 void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc)
2613 {
2614 	struct xe_exec_queue *q;
2615 	unsigned long index;
2616 
2617 	xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc)));
2618 	xe_gt_assert(guc_to_gt(guc), vf_recovery(guc));
2619 
2620 	mutex_lock(&guc->submission_state.lock);
2621 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2622 		/* Prevent redundant attempts to stop parallel queues */
2623 		if (q->guc->id != index)
2624 			continue;
2625 
2626 		guc_exec_queue_unpause_prepare(guc, q);
2627 	}
2628 	mutex_unlock(&guc->submission_state.lock);
2629 }
2630 
guc_exec_queue_replay_pending_state_change(struct xe_exec_queue * q)2631 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q)
2632 {
2633 	struct xe_gpu_scheduler *sched = &q->guc->sched;
2634 	struct xe_sched_msg *msg;
2635 
2636 	if (q->guc->needs_cleanup) {
2637 		msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
2638 
2639 		guc_exec_queue_add_msg(q, msg, CLEANUP);
2640 		q->guc->needs_cleanup = false;
2641 	}
2642 
2643 	if (q->guc->needs_suspend) {
2644 		msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
2645 
2646 		xe_sched_msg_lock(sched);
2647 		guc_exec_queue_try_add_msg_head(q, msg, SUSPEND);
2648 		xe_sched_msg_unlock(sched);
2649 
2650 		q->guc->needs_suspend = false;
2651 	}
2652 
2653 	/*
2654 	 * The resume must be in the message queue before the suspend as it is
2655 	 * not possible for a resume to be issued if a suspend pending is, but
2656 	 * the inverse is possible.
2657 	 */
2658 	if (q->guc->needs_resume) {
2659 		msg = q->guc->static_msgs + STATIC_MSG_RESUME;
2660 
2661 		xe_sched_msg_lock(sched);
2662 		guc_exec_queue_try_add_msg_head(q, msg, RESUME);
2663 		xe_sched_msg_unlock(sched);
2664 
2665 		q->guc->needs_resume = false;
2666 	}
2667 }
2668 
guc_exec_queue_unpause(struct xe_guc * guc,struct xe_exec_queue * q)2669 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q)
2670 {
2671 	struct xe_gpu_scheduler *sched = &q->guc->sched;
2672 	bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q);
2673 
2674 	lockdep_assert_held(&guc->submission_state.lock);
2675 
2676 	xe_sched_resubmit_jobs(sched);
2677 	guc_exec_queue_replay_pending_state_change(q);
2678 	xe_sched_submission_start(sched);
2679 	if (needs_tdr)
2680 		xe_guc_exec_queue_trigger_cleanup(q);
2681 	xe_sched_submission_resume_tdr(sched);
2682 }
2683 
2684 /**
2685  * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC.
2686  * @guc: the &xe_guc struct instance whose scheduler is to be enabled
2687  */
xe_guc_submit_unpause(struct xe_guc * guc)2688 void xe_guc_submit_unpause(struct xe_guc *guc)
2689 {
2690 	struct xe_exec_queue *q;
2691 	unsigned long index;
2692 
2693 	mutex_lock(&guc->submission_state.lock);
2694 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2695 		xe_sched_submission_start(&q->guc->sched);
2696 	mutex_unlock(&guc->submission_state.lock);
2697 }
2698 
2699 /**
2700  * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF.
2701  * @guc: the &xe_guc struct instance whose scheduler is to be enabled
2702  */
xe_guc_submit_unpause_vf(struct xe_guc * guc)2703 void xe_guc_submit_unpause_vf(struct xe_guc *guc)
2704 {
2705 	struct xe_exec_queue *q;
2706 	unsigned long index;
2707 
2708 	xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc)));
2709 
2710 	mutex_lock(&guc->submission_state.lock);
2711 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2712 		/*
2713 		 * Prevent redundant attempts to stop parallel queues, or queues
2714 		 * created after resfix done.
2715 		 */
2716 		if (q->guc->id != index ||
2717 		    !drm_sched_is_stopped(&q->guc->sched.base))
2718 			continue;
2719 
2720 		guc_exec_queue_unpause(guc, q);
2721 	}
2722 	mutex_unlock(&guc->submission_state.lock);
2723 }
2724 
2725 /**
2726  * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC.
2727  * @guc: the &xe_guc struct instance whose scheduler is to be aborted
2728  */
xe_guc_submit_pause_abort(struct xe_guc * guc)2729 void xe_guc_submit_pause_abort(struct xe_guc *guc)
2730 {
2731 	struct xe_exec_queue *q;
2732 	unsigned long index;
2733 
2734 	mutex_lock(&guc->submission_state.lock);
2735 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2736 		struct xe_gpu_scheduler *sched = &q->guc->sched;
2737 
2738 		/* Prevent redundant attempts to stop parallel queues */
2739 		if (q->guc->id != index)
2740 			continue;
2741 
2742 		xe_sched_submission_start(sched);
2743 		guc_exec_queue_kill(q);
2744 	}
2745 	mutex_unlock(&guc->submission_state.lock);
2746 }
2747 
2748 static struct xe_exec_queue *
g2h_exec_queue_lookup(struct xe_guc * guc,u32 guc_id)2749 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
2750 {
2751 	struct xe_gt *gt = guc_to_gt(guc);
2752 	struct xe_exec_queue *q;
2753 
2754 	if (unlikely(guc_id >= GUC_ID_MAX)) {
2755 		xe_gt_err(gt, "Invalid guc_id %u\n", guc_id);
2756 		return NULL;
2757 	}
2758 
2759 	q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
2760 	if (unlikely(!q)) {
2761 		xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id);
2762 		return NULL;
2763 	}
2764 
2765 	xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id);
2766 	xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width));
2767 
2768 	return q;
2769 }
2770 
deregister_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q)2771 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
2772 {
2773 	u32 action[] = {
2774 		XE_GUC_ACTION_DEREGISTER_CONTEXT,
2775 		q->guc->id,
2776 	};
2777 
2778 	xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q));
2779 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
2780 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
2781 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
2782 
2783 	trace_xe_exec_queue_deregister(q);
2784 
2785 	if (xe_exec_queue_is_multi_queue_secondary(q))
2786 		handle_deregister_done(guc, q);
2787 	else
2788 		xe_guc_ct_send_g2h_handler(&guc->ct, action,
2789 					   ARRAY_SIZE(action));
2790 }
2791 
handle_sched_done(struct xe_guc * guc,struct xe_exec_queue * q,u32 runnable_state)2792 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
2793 			      u32 runnable_state)
2794 {
2795 	trace_xe_exec_queue_scheduling_done(q);
2796 
2797 	if (runnable_state == 1) {
2798 		xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q));
2799 
2800 		q->guc->resume_time = ktime_get();
2801 		clear_exec_queue_pending_resume(q);
2802 		clear_exec_queue_pending_enable(q);
2803 		smp_wmb();
2804 		wake_up_all(&guc->ct.wq);
2805 	} else {
2806 		xe_gt_assert(guc_to_gt(guc), runnable_state == 0);
2807 		xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q));
2808 
2809 		if (q->guc->suspend_pending) {
2810 			clear_exec_queue_pending_disable(q);
2811 			suspend_fence_signal(q);
2812 		} else {
2813 			if (exec_queue_banned(q)) {
2814 				smp_wmb();
2815 				wake_up_all(&guc->ct.wq);
2816 			}
2817 			if (exec_queue_destroyed(q)) {
2818 				/*
2819 				 * Make sure to clear the pending_disable only
2820 				 * after sampling the destroyed state. We want
2821 				 * to ensure we don't trigger the unregister too
2822 				 * early with something intending to only
2823 				 * disable scheduling. The caller doing the
2824 				 * destroy must wait for an ongoing
2825 				 * pending_disable before marking as destroyed.
2826 				 */
2827 				clear_exec_queue_pending_disable(q);
2828 				deregister_exec_queue(guc, q);
2829 			} else {
2830 				clear_exec_queue_pending_disable(q);
2831 			}
2832 		}
2833 	}
2834 }
2835 
handle_multi_queue_secondary_sched_done(struct xe_guc * guc,struct xe_exec_queue * q,u32 runnable_state)2836 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc,
2837 						    struct xe_exec_queue *q,
2838 						    u32 runnable_state)
2839 {
2840 	/* Take CT lock here as handle_sched_done() do send a h2g message */
2841 	mutex_lock(&guc->ct.lock);
2842 	handle_sched_done(guc, q, runnable_state);
2843 	mutex_unlock(&guc->ct.lock);
2844 }
2845 
xe_guc_sched_done_handler(struct xe_guc * guc,u32 * msg,u32 len)2846 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
2847 {
2848 	struct xe_exec_queue *q;
2849 	u32 guc_id, runnable_state;
2850 
2851 	if (unlikely(len < 2))
2852 		return -EPROTO;
2853 
2854 	guc_id = msg[0];
2855 	runnable_state = msg[1];
2856 
2857 	q = g2h_exec_queue_lookup(guc, guc_id);
2858 	if (unlikely(!q))
2859 		return -EPROTO;
2860 
2861 	if (unlikely(!exec_queue_pending_enable(q) &&
2862 		     !exec_queue_pending_disable(q))) {
2863 		xe_gt_err(guc_to_gt(guc),
2864 			  "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u",
2865 			  atomic_read(&q->guc->state), q->guc->id,
2866 			  runnable_state);
2867 		return -EPROTO;
2868 	}
2869 
2870 	handle_sched_done(guc, q, runnable_state);
2871 
2872 	return 0;
2873 }
2874 
handle_deregister_done(struct xe_guc * guc,struct xe_exec_queue * q)2875 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
2876 {
2877 	trace_xe_exec_queue_deregister_done(q);
2878 
2879 	clear_exec_queue_registered(q);
2880 	__guc_exec_queue_destroy(guc, q);
2881 }
2882 
xe_guc_deregister_done_handler(struct xe_guc * guc,u32 * msg,u32 len)2883 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
2884 {
2885 	struct xe_exec_queue *q;
2886 	u32 guc_id;
2887 
2888 	if (unlikely(len < 1))
2889 		return -EPROTO;
2890 
2891 	guc_id = msg[0];
2892 
2893 	q = g2h_exec_queue_lookup(guc, guc_id);
2894 	if (unlikely(!q))
2895 		return -EPROTO;
2896 
2897 	if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
2898 	    exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
2899 		xe_gt_err(guc_to_gt(guc),
2900 			  "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d",
2901 			  atomic_read(&q->guc->state), q->guc->id);
2902 		return -EPROTO;
2903 	}
2904 
2905 	handle_deregister_done(guc, q);
2906 
2907 	return 0;
2908 }
2909 
xe_guc_exec_queue_reset_handler(struct xe_guc * guc,u32 * msg,u32 len)2910 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
2911 {
2912 	struct xe_gt *gt = guc_to_gt(guc);
2913 	struct xe_exec_queue *q;
2914 	u32 guc_id;
2915 
2916 	if (unlikely(len < 1))
2917 		return -EPROTO;
2918 
2919 	guc_id = msg[0];
2920 
2921 	q = g2h_exec_queue_lookup(guc, guc_id);
2922 	if (unlikely(!q))
2923 		return -EPROTO;
2924 
2925 	xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x",
2926 		   xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id,
2927 		   atomic_read(&q->guc->state));
2928 
2929 	trace_xe_exec_queue_reset(q);
2930 
2931 	/*
2932 	 * A banned engine is a NOP at this point (came from
2933 	 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
2934 	 * jobs by setting timeout of the job to the minimum value kicking
2935 	 * guc_exec_queue_timedout_job.
2936 	 */
2937 	xe_guc_exec_queue_reset_trigger_cleanup(q);
2938 
2939 	return 0;
2940 }
2941 
2942 /*
2943  * xe_guc_error_capture_handler - Handler of GuC captured message
2944  * @guc: The GuC object
2945  * @msg: Point to the message
2946  * @len: The message length
2947  *
2948  * When GuC captured data is ready, GuC will send message
2949  * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
2950  * called 1st to check status before process the data comes with the message.
2951  *
2952  * Returns: error code. 0 if success
2953  */
xe_guc_error_capture_handler(struct xe_guc * guc,u32 * msg,u32 len)2954 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len)
2955 {
2956 	u32 status;
2957 
2958 	if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN))
2959 		return -EPROTO;
2960 
2961 	status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
2962 	if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
2963 		xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space");
2964 
2965 	xe_guc_capture_process(guc);
2966 
2967 	return 0;
2968 }
2969 
xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc * guc,u32 * msg,u32 len)2970 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
2971 					       u32 len)
2972 {
2973 	struct xe_gt *gt = guc_to_gt(guc);
2974 	struct xe_exec_queue *q;
2975 	u32 guc_id;
2976 	u32 type = XE_GUC_CAT_ERR_TYPE_INVALID;
2977 
2978 	if (unlikely(!len || len > 2))
2979 		return -EPROTO;
2980 
2981 	guc_id = msg[0];
2982 
2983 	if (len == 2)
2984 		type = msg[1];
2985 
2986 	if (guc_id == GUC_ID_UNKNOWN) {
2987 		/*
2988 		 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF
2989 		 * context. In such case only PF will be notified about that fault.
2990 		 */
2991 		xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n");
2992 		return 0;
2993 	}
2994 
2995 	q = g2h_exec_queue_lookup(guc, guc_id);
2996 	if (unlikely(!q))
2997 		return -EPROTO;
2998 
2999 	/*
3000 	 * The type is HW-defined and changes based on platform, so we don't
3001 	 * decode it in the kernel and only check if it is valid.
3002 	 * See bspec 54047 and 72187 for details.
3003 	 */
3004 	if (type != XE_GUC_CAT_ERR_TYPE_INVALID)
3005 		xe_gt_info(gt,
3006 			   "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d",
3007 			   type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
3008 	else
3009 		xe_gt_info(gt,
3010 			   "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d",
3011 			   xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
3012 
3013 	trace_xe_exec_queue_memory_cat_error(q);
3014 
3015 	/* Treat the same as engine reset */
3016 	xe_guc_exec_queue_reset_trigger_cleanup(q);
3017 
3018 	return 0;
3019 }
3020 
xe_guc_exec_queue_reset_failure_handler(struct xe_guc * guc,u32 * msg,u32 len)3021 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
3022 {
3023 	struct xe_gt *gt = guc_to_gt(guc);
3024 	u8 guc_class, instance;
3025 	u32 reason;
3026 
3027 	if (unlikely(len != 3))
3028 		return -EPROTO;
3029 
3030 	guc_class = msg[0];
3031 	instance = msg[1];
3032 	reason = msg[2];
3033 
3034 	/* Unexpected failure of a hardware feature, log an actual error */
3035 	xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X",
3036 		  guc_class, instance, reason);
3037 
3038 	xe_gt_reset_async(gt);
3039 
3040 	return 0;
3041 }
3042 
xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc * guc,u32 * msg,u32 len)3043 int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg,
3044 						u32 len)
3045 {
3046 	struct xe_gt *gt = guc_to_gt(guc);
3047 	struct xe_device *xe = guc_to_xe(guc);
3048 	struct xe_exec_queue *q;
3049 	u32 guc_id = msg[2];
3050 
3051 	if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) {
3052 		drm_err(&xe->drm, "Invalid length %u", len);
3053 		return -EPROTO;
3054 	}
3055 
3056 	q = g2h_exec_queue_lookup(guc, guc_id);
3057 	if (unlikely(!q))
3058 		return -EPROTO;
3059 
3060 	xe_gt_dbg(gt,
3061 		  "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x",
3062 		  msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]);
3063 
3064 	trace_xe_exec_queue_cgp_context_error(q);
3065 
3066 	/* Treat the same as engine reset */
3067 	xe_guc_exec_queue_reset_trigger_cleanup(q);
3068 
3069 	return 0;
3070 }
3071 
3072 /**
3073  * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler
3074  * @guc: guc
3075  * @msg: message indicating CGP sync done
3076  * @len: length of message
3077  *
3078  * Set multi queue group's sync_pending flag to false and wakeup anyone waiting
3079  * for CGP synchronization to complete.
3080  *
3081  * Return: 0 on success, -EPROTO for malformed messages.
3082  */
xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc * guc,u32 * msg,u32 len)3083 int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
3084 {
3085 	struct xe_device *xe = guc_to_xe(guc);
3086 	struct xe_exec_queue *q;
3087 	u32 guc_id = msg[0];
3088 
3089 	if (unlikely(len < 1)) {
3090 		drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len);
3091 		return -EPROTO;
3092 	}
3093 
3094 	q = g2h_exec_queue_lookup(guc, guc_id);
3095 	if (unlikely(!q))
3096 		return -EPROTO;
3097 
3098 	if (!xe_exec_queue_is_multi_queue_primary(q)) {
3099 		drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response");
3100 		return -EPROTO;
3101 	}
3102 
3103 	/* Wakeup the serialized cgp update wait */
3104 	WRITE_ONCE(q->multi_queue.group->sync_pending, false);
3105 	xe_guc_ct_wake_waiters(&guc->ct);
3106 
3107 	return 0;
3108 }
3109 
3110 static void
guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue * q,struct xe_guc_submit_exec_queue_snapshot * snapshot)3111 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
3112 				   struct xe_guc_submit_exec_queue_snapshot *snapshot)
3113 {
3114 	struct xe_guc *guc = exec_queue_to_guc(q);
3115 	struct xe_device *xe = guc_to_xe(guc);
3116 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
3117 	int i;
3118 
3119 	snapshot->guc.wqi_head = q->guc->wqi_head;
3120 	snapshot->guc.wqi_tail = q->guc->wqi_tail;
3121 	snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
3122 	snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
3123 	snapshot->parallel.wq_desc.status = parallel_read(xe, map,
3124 							  wq_desc.wq_status);
3125 
3126 	if (snapshot->parallel.wq_desc.head !=
3127 	    snapshot->parallel.wq_desc.tail) {
3128 		for (i = snapshot->parallel.wq_desc.head;
3129 		     i != snapshot->parallel.wq_desc.tail;
3130 		     i = (i + sizeof(u32)) % WQ_SIZE)
3131 			snapshot->parallel.wq[i / sizeof(u32)] =
3132 				parallel_read(xe, map, wq[i / sizeof(u32)]);
3133 	}
3134 }
3135 
3136 static void
guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot * snapshot,struct drm_printer * p)3137 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
3138 				 struct drm_printer *p)
3139 {
3140 	int i;
3141 
3142 	drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
3143 		   snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
3144 	drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
3145 		   snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
3146 	drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
3147 
3148 	if (snapshot->parallel.wq_desc.head !=
3149 	    snapshot->parallel.wq_desc.tail) {
3150 		for (i = snapshot->parallel.wq_desc.head;
3151 		     i != snapshot->parallel.wq_desc.tail;
3152 		     i = (i + sizeof(u32)) % WQ_SIZE)
3153 			drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
3154 				   snapshot->parallel.wq[i / sizeof(u32)]);
3155 	}
3156 }
3157 
3158 /**
3159  * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
3160  * @q: faulty exec queue
3161  *
3162  * This can be printed out in a later stage like during dev_coredump
3163  * analysis.
3164  *
3165  * Returns: a GuC Submit Engine snapshot object that must be freed by the
3166  * caller, using `xe_guc_exec_queue_snapshot_free`.
3167  */
3168 struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue * q)3169 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
3170 {
3171 	struct xe_gpu_scheduler *sched = &q->guc->sched;
3172 	struct xe_guc_submit_exec_queue_snapshot *snapshot;
3173 	int i;
3174 
3175 	snapshot = kzalloc_obj(*snapshot, GFP_ATOMIC);
3176 
3177 	if (!snapshot)
3178 		return NULL;
3179 
3180 	snapshot->guc.id = q->guc->id;
3181 	memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
3182 	snapshot->class = q->class;
3183 	snapshot->logical_mask = q->logical_mask;
3184 	snapshot->width = q->width;
3185 	snapshot->refcount = kref_read(&q->refcount);
3186 	snapshot->sched_timeout = sched->base.timeout;
3187 	snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
3188 	snapshot->sched_props.preempt_timeout_us =
3189 		q->sched_props.preempt_timeout_us;
3190 
3191 	snapshot->lrc = kmalloc_objs(struct xe_lrc_snapshot *, q->width,
3192 				     GFP_ATOMIC);
3193 
3194 	if (snapshot->lrc) {
3195 		for (i = 0; i < q->width; ++i) {
3196 			struct xe_lrc *lrc = q->lrc[i];
3197 
3198 			snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
3199 		}
3200 	}
3201 
3202 	snapshot->schedule_state = atomic_read(&q->guc->state);
3203 	snapshot->exec_queue_flags = q->flags;
3204 
3205 	snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
3206 	if (snapshot->parallel_execution)
3207 		guc_exec_queue_wq_snapshot_capture(q, snapshot);
3208 
3209 	if (xe_exec_queue_is_multi_queue(q)) {
3210 		snapshot->multi_queue.valid = true;
3211 		snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id;
3212 		snapshot->multi_queue.pos = q->multi_queue.pos;
3213 	}
3214 
3215 	return snapshot;
3216 }
3217 
3218 /**
3219  * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
3220  * @snapshot: Previously captured snapshot of job.
3221  *
3222  * This captures some data that requires taking some locks, so it cannot be done in signaling path.
3223  */
3224 void
xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot * snapshot)3225 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
3226 {
3227 	int i;
3228 
3229 	if (!snapshot || !snapshot->lrc)
3230 		return;
3231 
3232 	for (i = 0; i < snapshot->width; ++i)
3233 		xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]);
3234 }
3235 
3236 /**
3237  * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
3238  * @snapshot: GuC Submit Engine snapshot object.
3239  * @p: drm_printer where it will be printed out.
3240  *
3241  * This function prints out a given GuC Submit Engine snapshot object.
3242  */
3243 void
xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot * snapshot,struct drm_printer * p)3244 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
3245 				 struct drm_printer *p)
3246 {
3247 	int i;
3248 
3249 	if (!snapshot)
3250 		return;
3251 
3252 	drm_printf(p, "GuC ID: %d\n", snapshot->guc.id);
3253 	drm_printf(p, "\tName: %s\n", snapshot->name);
3254 	drm_printf(p, "\tClass: %d\n", snapshot->class);
3255 	drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
3256 	drm_printf(p, "\tWidth: %d\n", snapshot->width);
3257 	drm_printf(p, "\tRef: %d\n", snapshot->refcount);
3258 	drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
3259 	drm_printf(p, "\tTimeslice: %u (us)\n",
3260 		   snapshot->sched_props.timeslice_us);
3261 	drm_printf(p, "\tPreempt timeout: %u (us)\n",
3262 		   snapshot->sched_props.preempt_timeout_us);
3263 
3264 	for (i = 0; snapshot->lrc && i < snapshot->width; ++i)
3265 		xe_lrc_snapshot_print(snapshot->lrc[i], p);
3266 
3267 	drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
3268 	drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
3269 
3270 	if (snapshot->parallel_execution)
3271 		guc_exec_queue_wq_snapshot_print(snapshot, p);
3272 
3273 	if (snapshot->multi_queue.valid) {
3274 		drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary);
3275 		drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos);
3276 	}
3277 }
3278 
3279 /**
3280  * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
3281  * snapshot.
3282  * @snapshot: GuC Submit Engine snapshot object.
3283  *
3284  * This function free all the memory that needed to be allocated at capture
3285  * time.
3286  */
xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot * snapshot)3287 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
3288 {
3289 	int i;
3290 
3291 	if (!snapshot)
3292 		return;
3293 
3294 	if (snapshot->lrc) {
3295 		for (i = 0; i < snapshot->width; i++)
3296 			xe_lrc_snapshot_free(snapshot->lrc[i]);
3297 		kfree(snapshot->lrc);
3298 	}
3299 	kfree(snapshot);
3300 }
3301 
guc_exec_queue_print(struct xe_exec_queue * q,struct drm_printer * p)3302 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
3303 {
3304 	struct xe_guc_submit_exec_queue_snapshot *snapshot;
3305 
3306 	snapshot = xe_guc_exec_queue_snapshot_capture(q);
3307 	xe_guc_exec_queue_snapshot_print(snapshot, p);
3308 	xe_guc_exec_queue_snapshot_free(snapshot);
3309 }
3310 
3311 /**
3312  * xe_guc_register_vf_exec_queue - Register exec queue for a given context type.
3313  * @q: Execution queue
3314  * @ctx_type: Type of the context
3315  *
3316  * This function registers the execution queue with the guc. Special context
3317  * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE
3318  * are only applicable for IGPU and in the VF.
3319  * Submits the execution queue to GUC after registering it.
3320  *
3321  * Returns - None.
3322  */
xe_guc_register_vf_exec_queue(struct xe_exec_queue * q,int ctx_type)3323 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type)
3324 {
3325 	struct xe_guc *guc = exec_queue_to_guc(q);
3326 	struct xe_device *xe = guc_to_xe(guc);
3327 	struct xe_gt *gt = guc_to_gt(guc);
3328 
3329 	xe_gt_assert(gt, IS_SRIOV_VF(xe));
3330 	xe_gt_assert(gt, !IS_DGFX(xe));
3331 	xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE ||
3332 		     ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE);
3333 	xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0));
3334 
3335 	register_exec_queue(q, ctx_type);
3336 	enable_scheduling(q);
3337 }
3338 
3339 /**
3340  * xe_guc_submit_print - GuC Submit Print.
3341  * @guc: GuC.
3342  * @p: drm_printer where it will be printed out.
3343  *
3344  * This function capture and prints snapshots of **all** GuC Engines.
3345  */
xe_guc_submit_print(struct xe_guc * guc,struct drm_printer * p)3346 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
3347 {
3348 	struct xe_exec_queue *q;
3349 	unsigned long index;
3350 
3351 	if (!xe_device_uc_enabled(guc_to_xe(guc)))
3352 		return;
3353 
3354 	mutex_lock(&guc->submission_state.lock);
3355 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
3356 		guc_exec_queue_print(q, p);
3357 	mutex_unlock(&guc->submission_state.lock);
3358 }
3359 
3360 /**
3361  * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues
3362  * registered with the GuC
3363  * @guc: GuC.
3364  *
3365  * Return: true if any MLRC queue is registered with the GuC, false otherwise.
3366  */
xe_guc_has_registered_mlrc_queues(struct xe_guc * guc)3367 bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc)
3368 {
3369 	struct xe_exec_queue *q;
3370 	unsigned long index;
3371 
3372 	guard(mutex)(&guc->submission_state.lock);
3373 
3374 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
3375 		if (q->width > 1)
3376 			return true;
3377 
3378 	return false;
3379 }
3380 
3381 /**
3382  * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all
3383  * exec queues registered to given GuC.
3384  * @guc: the &xe_guc struct instance
3385  * @scratch: scratch buffer to be used as temporary storage
3386  *
3387  * Returns: zero on success, negative error code on failure.
3388  */
xe_guc_contexts_hwsp_rebase(struct xe_guc * guc,void * scratch)3389 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch)
3390 {
3391 	struct xe_exec_queue *q;
3392 	unsigned long index;
3393 	int err = 0;
3394 
3395 	mutex_lock(&guc->submission_state.lock);
3396 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
3397 		/* Prevent redundant attempts to stop parallel queues */
3398 		if (q->guc->id != index)
3399 			continue;
3400 
3401 		err = xe_exec_queue_contexts_hwsp_rebase(q, scratch);
3402 		if (err)
3403 			break;
3404 	}
3405 	mutex_unlock(&guc->submission_state.lock);
3406 
3407 	return err;
3408 }
3409