xref: /linux/drivers/gpu/drm/xe/xe_guc_submit.c (revision 390db60f8e2bd21fae544917eb3a8618265c058c)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_guc_submit.h"
7 
8 #include <linux/bitfield.h>
9 #include <linux/bitmap.h>
10 #include <linux/circ_buf.h>
11 #include <linux/delay.h>
12 #include <linux/dma-fence-array.h>
13 #include <linux/math64.h>
14 
15 #include <drm/drm_managed.h>
16 
17 #include "abi/guc_actions_abi.h"
18 #include "abi/guc_actions_slpc_abi.h"
19 #include "abi/guc_klvs_abi.h"
20 #include "regs/xe_lrc_layout.h"
21 #include "xe_assert.h"
22 #include "xe_devcoredump.h"
23 #include "xe_device.h"
24 #include "xe_exec_queue.h"
25 #include "xe_force_wake.h"
26 #include "xe_gpu_scheduler.h"
27 #include "xe_gt.h"
28 #include "xe_gt_clock.h"
29 #include "xe_gt_printk.h"
30 #include "xe_guc.h"
31 #include "xe_guc_capture.h"
32 #include "xe_guc_ct.h"
33 #include "xe_guc_exec_queue_types.h"
34 #include "xe_guc_id_mgr.h"
35 #include "xe_guc_klv_helpers.h"
36 #include "xe_guc_submit_types.h"
37 #include "xe_hw_engine.h"
38 #include "xe_hw_fence.h"
39 #include "xe_lrc.h"
40 #include "xe_macros.h"
41 #include "xe_map.h"
42 #include "xe_mocs.h"
43 #include "xe_pm.h"
44 #include "xe_ring_ops_types.h"
45 #include "xe_sched_job.h"
46 #include "xe_trace.h"
47 #include "xe_uc_fw.h"
48 #include "xe_vm.h"
49 
50 static struct xe_guc *
exec_queue_to_guc(struct xe_exec_queue * q)51 exec_queue_to_guc(struct xe_exec_queue *q)
52 {
53 	return &q->gt->uc.guc;
54 }
55 
56 /*
57  * Helpers for engine state, using an atomic as some of the bits can transition
58  * as the same time (e.g. a suspend can be happning at the same time as schedule
59  * engine done being processed).
60  */
61 #define EXEC_QUEUE_STATE_REGISTERED		(1 << 0)
62 #define EXEC_QUEUE_STATE_ENABLED		(1 << 1)
63 #define EXEC_QUEUE_STATE_PENDING_ENABLE		(1 << 2)
64 #define EXEC_QUEUE_STATE_PENDING_DISABLE	(1 << 3)
65 #define EXEC_QUEUE_STATE_DESTROYED		(1 << 4)
66 #define EXEC_QUEUE_STATE_SUSPENDED		(1 << 5)
67 #define EXEC_QUEUE_STATE_RESET			(1 << 6)
68 #define EXEC_QUEUE_STATE_KILLED			(1 << 7)
69 #define EXEC_QUEUE_STATE_WEDGED			(1 << 8)
70 #define EXEC_QUEUE_STATE_BANNED			(1 << 9)
71 #define EXEC_QUEUE_STATE_CHECK_TIMEOUT		(1 << 10)
72 #define EXEC_QUEUE_STATE_EXTRA_REF		(1 << 11)
73 
exec_queue_registered(struct xe_exec_queue * q)74 static bool exec_queue_registered(struct xe_exec_queue *q)
75 {
76 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
77 }
78 
set_exec_queue_registered(struct xe_exec_queue * q)79 static void set_exec_queue_registered(struct xe_exec_queue *q)
80 {
81 	atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
82 }
83 
clear_exec_queue_registered(struct xe_exec_queue * q)84 static void clear_exec_queue_registered(struct xe_exec_queue *q)
85 {
86 	atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
87 }
88 
exec_queue_enabled(struct xe_exec_queue * q)89 static bool exec_queue_enabled(struct xe_exec_queue *q)
90 {
91 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED;
92 }
93 
set_exec_queue_enabled(struct xe_exec_queue * q)94 static void set_exec_queue_enabled(struct xe_exec_queue *q)
95 {
96 	atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
97 }
98 
clear_exec_queue_enabled(struct xe_exec_queue * q)99 static void clear_exec_queue_enabled(struct xe_exec_queue *q)
100 {
101 	atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
102 }
103 
exec_queue_pending_enable(struct xe_exec_queue * q)104 static bool exec_queue_pending_enable(struct xe_exec_queue *q)
105 {
106 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
107 }
108 
set_exec_queue_pending_enable(struct xe_exec_queue * q)109 static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
110 {
111 	atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
112 }
113 
clear_exec_queue_pending_enable(struct xe_exec_queue * q)114 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
115 {
116 	atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
117 }
118 
exec_queue_pending_disable(struct xe_exec_queue * q)119 static bool exec_queue_pending_disable(struct xe_exec_queue *q)
120 {
121 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
122 }
123 
set_exec_queue_pending_disable(struct xe_exec_queue * q)124 static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
125 {
126 	atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
127 }
128 
clear_exec_queue_pending_disable(struct xe_exec_queue * q)129 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
130 {
131 	atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
132 }
133 
exec_queue_destroyed(struct xe_exec_queue * q)134 static bool exec_queue_destroyed(struct xe_exec_queue *q)
135 {
136 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
137 }
138 
set_exec_queue_destroyed(struct xe_exec_queue * q)139 static void set_exec_queue_destroyed(struct xe_exec_queue *q)
140 {
141 	atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
142 }
143 
exec_queue_banned(struct xe_exec_queue * q)144 static bool exec_queue_banned(struct xe_exec_queue *q)
145 {
146 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED;
147 }
148 
set_exec_queue_banned(struct xe_exec_queue * q)149 static void set_exec_queue_banned(struct xe_exec_queue *q)
150 {
151 	atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state);
152 }
153 
exec_queue_suspended(struct xe_exec_queue * q)154 static bool exec_queue_suspended(struct xe_exec_queue *q)
155 {
156 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED;
157 }
158 
set_exec_queue_suspended(struct xe_exec_queue * q)159 static void set_exec_queue_suspended(struct xe_exec_queue *q)
160 {
161 	atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
162 }
163 
clear_exec_queue_suspended(struct xe_exec_queue * q)164 static void clear_exec_queue_suspended(struct xe_exec_queue *q)
165 {
166 	atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
167 }
168 
exec_queue_reset(struct xe_exec_queue * q)169 static bool exec_queue_reset(struct xe_exec_queue *q)
170 {
171 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
172 }
173 
set_exec_queue_reset(struct xe_exec_queue * q)174 static void set_exec_queue_reset(struct xe_exec_queue *q)
175 {
176 	atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
177 }
178 
exec_queue_killed(struct xe_exec_queue * q)179 static bool exec_queue_killed(struct xe_exec_queue *q)
180 {
181 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED;
182 }
183 
set_exec_queue_killed(struct xe_exec_queue * q)184 static void set_exec_queue_killed(struct xe_exec_queue *q)
185 {
186 	atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state);
187 }
188 
exec_queue_wedged(struct xe_exec_queue * q)189 static bool exec_queue_wedged(struct xe_exec_queue *q)
190 {
191 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
192 }
193 
set_exec_queue_wedged(struct xe_exec_queue * q)194 static void set_exec_queue_wedged(struct xe_exec_queue *q)
195 {
196 	atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state);
197 }
198 
exec_queue_check_timeout(struct xe_exec_queue * q)199 static bool exec_queue_check_timeout(struct xe_exec_queue *q)
200 {
201 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT;
202 }
203 
set_exec_queue_check_timeout(struct xe_exec_queue * q)204 static void set_exec_queue_check_timeout(struct xe_exec_queue *q)
205 {
206 	atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state);
207 }
208 
clear_exec_queue_check_timeout(struct xe_exec_queue * q)209 static void clear_exec_queue_check_timeout(struct xe_exec_queue *q)
210 {
211 	atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state);
212 }
213 
exec_queue_extra_ref(struct xe_exec_queue * q)214 static bool exec_queue_extra_ref(struct xe_exec_queue *q)
215 {
216 	return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF;
217 }
218 
set_exec_queue_extra_ref(struct xe_exec_queue * q)219 static void set_exec_queue_extra_ref(struct xe_exec_queue *q)
220 {
221 	atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state);
222 }
223 
exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue * q)224 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
225 {
226 	return (atomic_read(&q->guc->state) &
227 		(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED |
228 		 EXEC_QUEUE_STATE_BANNED));
229 }
230 
guc_submit_fini(struct drm_device * drm,void * arg)231 static void guc_submit_fini(struct drm_device *drm, void *arg)
232 {
233 	struct xe_guc *guc = arg;
234 	struct xe_device *xe = guc_to_xe(guc);
235 	struct xe_gt *gt = guc_to_gt(guc);
236 	int ret;
237 
238 	ret = wait_event_timeout(guc->submission_state.fini_wq,
239 				 xa_empty(&guc->submission_state.exec_queue_lookup),
240 				 HZ * 5);
241 
242 	drain_workqueue(xe->destroy_wq);
243 
244 	xe_gt_assert(gt, ret);
245 
246 	xa_destroy(&guc->submission_state.exec_queue_lookup);
247 }
248 
guc_submit_wedged_fini(void * arg)249 static void guc_submit_wedged_fini(void *arg)
250 {
251 	struct xe_guc *guc = arg;
252 	struct xe_exec_queue *q;
253 	unsigned long index;
254 
255 	mutex_lock(&guc->submission_state.lock);
256 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
257 		if (exec_queue_wedged(q)) {
258 			mutex_unlock(&guc->submission_state.lock);
259 			xe_exec_queue_put(q);
260 			mutex_lock(&guc->submission_state.lock);
261 		}
262 	}
263 	mutex_unlock(&guc->submission_state.lock);
264 }
265 
266 static const struct xe_exec_queue_ops guc_exec_queue_ops;
267 
primelockdep(struct xe_guc * guc)268 static void primelockdep(struct xe_guc *guc)
269 {
270 	if (!IS_ENABLED(CONFIG_LOCKDEP))
271 		return;
272 
273 	fs_reclaim_acquire(GFP_KERNEL);
274 
275 	mutex_lock(&guc->submission_state.lock);
276 	mutex_unlock(&guc->submission_state.lock);
277 
278 	fs_reclaim_release(GFP_KERNEL);
279 }
280 
281 /**
282  * xe_guc_submit_init() - Initialize GuC submission.
283  * @guc: the &xe_guc to initialize
284  * @num_ids: number of GuC context IDs to use
285  *
286  * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all
287  * GuC context IDs supported by the GuC firmware should be used for submission.
288  *
289  * Only VF drivers will have to provide explicit number of GuC context IDs
290  * that they can use for submission.
291  *
292  * Return: 0 on success or a negative error code on failure.
293  */
xe_guc_submit_init(struct xe_guc * guc,unsigned int num_ids)294 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
295 {
296 	struct xe_device *xe = guc_to_xe(guc);
297 	struct xe_gt *gt = guc_to_gt(guc);
298 	int err;
299 
300 	err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
301 	if (err)
302 		return err;
303 
304 	err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids);
305 	if (err)
306 		return err;
307 
308 	gt->exec_queue_ops = &guc_exec_queue_ops;
309 
310 	xa_init(&guc->submission_state.exec_queue_lookup);
311 
312 	init_waitqueue_head(&guc->submission_state.fini_wq);
313 
314 	primelockdep(guc);
315 
316 	guc->submission_state.initialized = true;
317 
318 	return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
319 }
320 
321 /*
322  * Given that we want to guarantee enough RCS throughput to avoid missing
323  * frames, we set the yield policy to 20% of each 80ms interval.
324  */
325 #define RC_YIELD_DURATION	80	/* in ms */
326 #define RC_YIELD_RATIO		20	/* in percent */
emit_render_compute_yield_klv(u32 * emit)327 static u32 *emit_render_compute_yield_klv(u32 *emit)
328 {
329 	*emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD);
330 	*emit++ = RC_YIELD_DURATION;
331 	*emit++ = RC_YIELD_RATIO;
332 
333 	return emit;
334 }
335 
336 #define SCHEDULING_POLICY_MAX_DWORDS 16
guc_init_global_schedule_policy(struct xe_guc * guc)337 static int guc_init_global_schedule_policy(struct xe_guc *guc)
338 {
339 	u32 data[SCHEDULING_POLICY_MAX_DWORDS];
340 	u32 *emit = data;
341 	u32 count = 0;
342 	int ret;
343 
344 	if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
345 		return 0;
346 
347 	*emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
348 
349 	if (CCS_MASK(guc_to_gt(guc)))
350 		emit = emit_render_compute_yield_klv(emit);
351 
352 	count = emit - data;
353 	if (count > 1) {
354 		xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS);
355 
356 		ret = xe_guc_ct_send_block(&guc->ct, data, count);
357 		if (ret < 0) {
358 			xe_gt_err(guc_to_gt(guc),
359 				  "failed to enable GuC scheduling policies: %pe\n",
360 				  ERR_PTR(ret));
361 			return ret;
362 		}
363 	}
364 
365 	return 0;
366 }
367 
xe_guc_submit_enable(struct xe_guc * guc)368 int xe_guc_submit_enable(struct xe_guc *guc)
369 {
370 	int ret;
371 
372 	ret = guc_init_global_schedule_policy(guc);
373 	if (ret)
374 		return ret;
375 
376 	guc->submission_state.enabled = true;
377 
378 	return 0;
379 }
380 
xe_guc_submit_disable(struct xe_guc * guc)381 void xe_guc_submit_disable(struct xe_guc *guc)
382 {
383 	guc->submission_state.enabled = false;
384 }
385 
__release_guc_id(struct xe_guc * guc,struct xe_exec_queue * q,u32 xa_count)386 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
387 {
388 	int i;
389 
390 	lockdep_assert_held(&guc->submission_state.lock);
391 
392 	for (i = 0; i < xa_count; ++i)
393 		xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
394 
395 	xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
396 				     q->guc->id, q->width);
397 
398 	if (xa_empty(&guc->submission_state.exec_queue_lookup))
399 		wake_up(&guc->submission_state.fini_wq);
400 }
401 
alloc_guc_id(struct xe_guc * guc,struct xe_exec_queue * q)402 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
403 {
404 	int ret;
405 	int i;
406 
407 	/*
408 	 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
409 	 * worse case user gets -ENOMEM on engine create and has to try again.
410 	 *
411 	 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
412 	 * failure.
413 	 */
414 	lockdep_assert_held(&guc->submission_state.lock);
415 
416 	ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm,
417 					   q->width);
418 	if (ret < 0)
419 		return ret;
420 
421 	q->guc->id = ret;
422 
423 	for (i = 0; i < q->width; ++i) {
424 		ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
425 				      q->guc->id + i, q, GFP_NOWAIT));
426 		if (ret)
427 			goto err_release;
428 	}
429 
430 	return 0;
431 
432 err_release:
433 	__release_guc_id(guc, q, i);
434 
435 	return ret;
436 }
437 
release_guc_id(struct xe_guc * guc,struct xe_exec_queue * q)438 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
439 {
440 	mutex_lock(&guc->submission_state.lock);
441 	__release_guc_id(guc, q, q->width);
442 	mutex_unlock(&guc->submission_state.lock);
443 }
444 
445 struct exec_queue_policy {
446 	u32 count;
447 	struct guc_update_exec_queue_policy h2g;
448 };
449 
__guc_exec_queue_policy_action_size(struct exec_queue_policy * policy)450 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
451 {
452 	size_t bytes = sizeof(policy->h2g.header) +
453 		       (sizeof(policy->h2g.klv[0]) * policy->count);
454 
455 	return bytes / sizeof(u32);
456 }
457 
__guc_exec_queue_policy_start_klv(struct exec_queue_policy * policy,u16 guc_id)458 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
459 					      u16 guc_id)
460 {
461 	policy->h2g.header.action =
462 		XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
463 	policy->h2g.header.guc_id = guc_id;
464 	policy->count = 0;
465 }
466 
467 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
468 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
469 					   u32 data) \
470 { \
471 	XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
472 \
473 	policy->h2g.klv[policy->count].kl = \
474 		FIELD_PREP(GUC_KLV_0_KEY, \
475 			   GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
476 		FIELD_PREP(GUC_KLV_0_LEN, 1); \
477 	policy->h2g.klv[policy->count].value = data; \
478 	policy->count++; \
479 }
480 
481 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
482 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
483 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
484 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY)
485 #undef MAKE_EXEC_QUEUE_POLICY_ADD
486 
487 static const int xe_exec_queue_prio_to_guc[] = {
488 	[XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
489 	[XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
490 	[XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
491 	[XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
492 };
493 
init_policies(struct xe_guc * guc,struct xe_exec_queue * q)494 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
495 {
496 	struct exec_queue_policy policy;
497 	enum xe_exec_queue_priority prio = q->sched_props.priority;
498 	u32 timeslice_us = q->sched_props.timeslice_us;
499 	u32 slpc_exec_queue_freq_req = 0;
500 	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
501 
502 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
503 
504 	if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY)
505 		slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
506 
507 	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
508 	__guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
509 	__guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
510 	__guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
511 	__guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy,
512 							     slpc_exec_queue_freq_req);
513 
514 	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
515 		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
516 }
517 
set_min_preemption_timeout(struct xe_guc * guc,struct xe_exec_queue * q)518 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
519 {
520 	struct exec_queue_policy policy;
521 
522 	__guc_exec_queue_policy_start_klv(&policy, q->guc->id);
523 	__guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
524 
525 	xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
526 		       __guc_exec_queue_policy_action_size(&policy), 0, 0);
527 }
528 
529 #define parallel_read(xe_, map_, field_) \
530 	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
531 			field_)
532 #define parallel_write(xe_, map_, field_, val_) \
533 	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
534 			field_, val_)
535 
__register_mlrc_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q,struct guc_ctxt_registration_info * info)536 static void __register_mlrc_exec_queue(struct xe_guc *guc,
537 				       struct xe_exec_queue *q,
538 				       struct guc_ctxt_registration_info *info)
539 {
540 #define MAX_MLRC_REG_SIZE      (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
541 	u32 action[MAX_MLRC_REG_SIZE];
542 	int len = 0;
543 	int i;
544 
545 	xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q));
546 
547 	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
548 	action[len++] = info->flags;
549 	action[len++] = info->context_idx;
550 	action[len++] = info->engine_class;
551 	action[len++] = info->engine_submit_mask;
552 	action[len++] = info->wq_desc_lo;
553 	action[len++] = info->wq_desc_hi;
554 	action[len++] = info->wq_base_lo;
555 	action[len++] = info->wq_base_hi;
556 	action[len++] = info->wq_size;
557 	action[len++] = q->width;
558 	action[len++] = info->hwlrca_lo;
559 	action[len++] = info->hwlrca_hi;
560 
561 	for (i = 1; i < q->width; ++i) {
562 		struct xe_lrc *lrc = q->lrc[i];
563 
564 		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
565 		action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
566 	}
567 
568 	/* explicitly checks some fields that we might fixup later */
569 	xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
570 		     action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]);
571 	xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
572 		     action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]);
573 	xe_gt_assert(guc_to_gt(guc), q->width ==
574 		     action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]);
575 	xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
576 		     action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]);
577 	xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE);
578 #undef MAX_MLRC_REG_SIZE
579 
580 	xe_guc_ct_send(&guc->ct, action, len, 0, 0);
581 }
582 
__register_exec_queue(struct xe_guc * guc,struct guc_ctxt_registration_info * info)583 static void __register_exec_queue(struct xe_guc *guc,
584 				  struct guc_ctxt_registration_info *info)
585 {
586 	u32 action[] = {
587 		XE_GUC_ACTION_REGISTER_CONTEXT,
588 		info->flags,
589 		info->context_idx,
590 		info->engine_class,
591 		info->engine_submit_mask,
592 		info->wq_desc_lo,
593 		info->wq_desc_hi,
594 		info->wq_base_lo,
595 		info->wq_base_hi,
596 		info->wq_size,
597 		info->hwlrca_lo,
598 		info->hwlrca_hi,
599 	};
600 
601 	/* explicitly checks some fields that we might fixup later */
602 	xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
603 		     action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]);
604 	xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
605 		     action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]);
606 	xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
607 		     action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]);
608 
609 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
610 }
611 
register_exec_queue(struct xe_exec_queue * q,int ctx_type)612 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
613 {
614 	struct xe_guc *guc = exec_queue_to_guc(q);
615 	struct xe_device *xe = guc_to_xe(guc);
616 	struct xe_lrc *lrc = q->lrc[0];
617 	struct guc_ctxt_registration_info info;
618 
619 	xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q));
620 	xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT);
621 
622 	memset(&info, 0, sizeof(info));
623 	info.context_idx = q->guc->id;
624 	info.engine_class = xe_engine_class_to_guc_class(q->class);
625 	info.engine_submit_mask = q->logical_mask;
626 	info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
627 	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
628 	info.flags = CONTEXT_REGISTRATION_FLAG_KMD |
629 		FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
630 
631 	if (xe_exec_queue_is_parallel(q)) {
632 		u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
633 		struct iosys_map map = xe_lrc_parallel_map(lrc);
634 
635 		info.wq_desc_lo = lower_32_bits(ggtt_addr +
636 			offsetof(struct guc_submit_parallel_scratch, wq_desc));
637 		info.wq_desc_hi = upper_32_bits(ggtt_addr +
638 			offsetof(struct guc_submit_parallel_scratch, wq_desc));
639 		info.wq_base_lo = lower_32_bits(ggtt_addr +
640 			offsetof(struct guc_submit_parallel_scratch, wq[0]));
641 		info.wq_base_hi = upper_32_bits(ggtt_addr +
642 			offsetof(struct guc_submit_parallel_scratch, wq[0]));
643 		info.wq_size = WQ_SIZE;
644 
645 		q->guc->wqi_head = 0;
646 		q->guc->wqi_tail = 0;
647 		xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
648 		parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
649 	}
650 
651 	/*
652 	 * We must keep a reference for LR engines if engine is registered with
653 	 * the GuC as jobs signal immediately and can't destroy an engine if the
654 	 * GuC has a reference to it.
655 	 */
656 	if (xe_exec_queue_is_lr(q))
657 		xe_exec_queue_get(q);
658 
659 	set_exec_queue_registered(q);
660 	trace_xe_exec_queue_register(q);
661 	if (xe_exec_queue_is_parallel(q))
662 		__register_mlrc_exec_queue(guc, q, &info);
663 	else
664 		__register_exec_queue(guc, &info);
665 	init_policies(guc, q);
666 }
667 
wq_space_until_wrap(struct xe_exec_queue * q)668 static u32 wq_space_until_wrap(struct xe_exec_queue *q)
669 {
670 	return (WQ_SIZE - q->guc->wqi_tail);
671 }
672 
wq_wait_for_space(struct xe_exec_queue * q,u32 wqi_size)673 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
674 {
675 	struct xe_guc *guc = exec_queue_to_guc(q);
676 	struct xe_device *xe = guc_to_xe(guc);
677 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
678 	unsigned int sleep_period_ms = 1;
679 
680 #define AVAILABLE_SPACE \
681 	CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
682 	if (wqi_size > AVAILABLE_SPACE) {
683 try_again:
684 		q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
685 		if (wqi_size > AVAILABLE_SPACE) {
686 			if (sleep_period_ms == 1024) {
687 				xe_gt_reset_async(q->gt);
688 				return -ENODEV;
689 			}
690 
691 			msleep(sleep_period_ms);
692 			sleep_period_ms <<= 1;
693 			goto try_again;
694 		}
695 	}
696 #undef AVAILABLE_SPACE
697 
698 	return 0;
699 }
700 
wq_noop_append(struct xe_exec_queue * q)701 static int wq_noop_append(struct xe_exec_queue *q)
702 {
703 	struct xe_guc *guc = exec_queue_to_guc(q);
704 	struct xe_device *xe = guc_to_xe(guc);
705 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
706 	u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
707 
708 	if (wq_wait_for_space(q, wq_space_until_wrap(q)))
709 		return -ENODEV;
710 
711 	xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw));
712 
713 	parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
714 		       FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
715 		       FIELD_PREP(WQ_LEN_MASK, len_dw));
716 	q->guc->wqi_tail = 0;
717 
718 	return 0;
719 }
720 
wq_item_append(struct xe_exec_queue * q)721 static void wq_item_append(struct xe_exec_queue *q)
722 {
723 	struct xe_guc *guc = exec_queue_to_guc(q);
724 	struct xe_device *xe = guc_to_xe(guc);
725 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
726 #define WQ_HEADER_SIZE	4	/* Includes 1 LRC address too */
727 	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
728 	u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
729 	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
730 	int i = 0, j;
731 
732 	if (wqi_size > wq_space_until_wrap(q)) {
733 		if (wq_noop_append(q))
734 			return;
735 	}
736 	if (wq_wait_for_space(q, wqi_size))
737 		return;
738 
739 	xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN);
740 	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
741 		FIELD_PREP(WQ_LEN_MASK, len_dw);
742 	xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW);
743 	wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
744 	xe_gt_assert(guc_to_gt(guc), i ==
745 		     XE_GUC_CONTEXT_WQ_EL_INFO_DATA_2_GUCCTX_RINGTAIL_FREEZEPOCS);
746 	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
747 		FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
748 	xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_3_WI_FENCE_ID);
749 	wqi[i++] = 0;
750 	xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_CHILD_LIST_DATA_4_RINGTAIL);
751 	for (j = 1; j < q->width; ++j) {
752 		struct xe_lrc *lrc = q->lrc[j];
753 
754 		wqi[i++] = lrc->ring.tail / sizeof(u64);
755 	}
756 
757 	xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32));
758 
759 	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
760 				      wq[q->guc->wqi_tail / sizeof(u32)]));
761 	xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
762 	q->guc->wqi_tail += wqi_size;
763 	xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE);
764 
765 	xe_device_wmb(xe);
766 
767 	map = xe_lrc_parallel_map(q->lrc[0]);
768 	parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
769 }
770 
wq_items_rebase(struct xe_exec_queue * q)771 static int wq_items_rebase(struct xe_exec_queue *q)
772 {
773 	struct xe_guc *guc = exec_queue_to_guc(q);
774 	struct xe_device *xe = guc_to_xe(guc);
775 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
776 	int i = q->guc->wqi_head;
777 
778 	/* the ring starts after a header struct */
779 	iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[0]));
780 
781 	while ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) {
782 		u32 len_dw, type, val;
783 
784 		if (drm_WARN_ON_ONCE(&xe->drm, i < 0 || i > 2 * WQ_SIZE))
785 			break;
786 
787 		val = xe_map_rd_ring_u32(xe, &map, i / sizeof(u32) +
788 					 XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN,
789 					 WQ_SIZE / sizeof(u32));
790 		len_dw = FIELD_GET(WQ_LEN_MASK, val);
791 		type = FIELD_GET(WQ_TYPE_MASK, val);
792 
793 		if (drm_WARN_ON_ONCE(&xe->drm, len_dw >= WQ_SIZE / sizeof(u32)))
794 			break;
795 
796 		if (type == WQ_TYPE_MULTI_LRC) {
797 			val = xe_lrc_descriptor(q->lrc[0]);
798 			xe_map_wr_ring_u32(xe, &map, i / sizeof(u32) +
799 					   XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW,
800 					   WQ_SIZE / sizeof(u32), val);
801 		} else if (drm_WARN_ON_ONCE(&xe->drm, type != WQ_TYPE_NOOP)) {
802 			break;
803 		}
804 
805 		i += (len_dw + 1) * sizeof(u32);
806 	}
807 
808 	if ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) {
809 		xe_gt_err(q->gt, "Exec queue fixups incomplete - wqi parse failed\n");
810 		return -EBADMSG;
811 	}
812 	return 0;
813 }
814 
815 #define RESUME_PENDING	~0x0ull
submit_exec_queue(struct xe_exec_queue * q)816 static void submit_exec_queue(struct xe_exec_queue *q)
817 {
818 	struct xe_guc *guc = exec_queue_to_guc(q);
819 	struct xe_lrc *lrc = q->lrc[0];
820 	u32 action[3];
821 	u32 g2h_len = 0;
822 	u32 num_g2h = 0;
823 	int len = 0;
824 	bool extra_submit = false;
825 
826 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
827 
828 	if (xe_exec_queue_is_parallel(q))
829 		wq_item_append(q);
830 	else
831 		xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
832 
833 	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
834 		return;
835 
836 	if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
837 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
838 		action[len++] = q->guc->id;
839 		action[len++] = GUC_CONTEXT_ENABLE;
840 		g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
841 		num_g2h = 1;
842 		if (xe_exec_queue_is_parallel(q))
843 			extra_submit = true;
844 
845 		q->guc->resume_time = RESUME_PENDING;
846 		set_exec_queue_pending_enable(q);
847 		set_exec_queue_enabled(q);
848 		trace_xe_exec_queue_scheduling_enable(q);
849 	} else {
850 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
851 		action[len++] = q->guc->id;
852 		trace_xe_exec_queue_submit(q);
853 	}
854 
855 	xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
856 
857 	if (extra_submit) {
858 		len = 0;
859 		action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
860 		action[len++] = q->guc->id;
861 		trace_xe_exec_queue_submit(q);
862 
863 		xe_guc_ct_send(&guc->ct, action, len, 0, 0);
864 	}
865 }
866 
867 static struct dma_fence *
guc_exec_queue_run_job(struct drm_sched_job * drm_job)868 guc_exec_queue_run_job(struct drm_sched_job *drm_job)
869 {
870 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
871 	struct xe_exec_queue *q = job->q;
872 	struct xe_guc *guc = exec_queue_to_guc(q);
873 	struct dma_fence *fence = NULL;
874 	bool lr = xe_exec_queue_is_lr(q);
875 
876 	xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
877 		     exec_queue_banned(q) || exec_queue_suspended(q));
878 
879 	trace_xe_sched_job_run(job);
880 
881 	if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
882 		if (!exec_queue_registered(q))
883 			register_exec_queue(q, GUC_CONTEXT_NORMAL);
884 		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
885 			q->ring_ops->emit_job(job);
886 		submit_exec_queue(q);
887 	}
888 
889 	if (lr) {
890 		xe_sched_job_set_error(job, -EOPNOTSUPP);
891 		dma_fence_put(job->fence);	/* Drop ref from xe_sched_job_arm */
892 	} else {
893 		fence = job->fence;
894 	}
895 
896 	return fence;
897 }
898 
899 /**
900  * xe_guc_jobs_ring_rebase - Re-emit ring commands of requests pending
901  * on all queues under a guc.
902  * @guc: the &xe_guc struct instance
903  */
xe_guc_jobs_ring_rebase(struct xe_guc * guc)904 void xe_guc_jobs_ring_rebase(struct xe_guc *guc)
905 {
906 	struct xe_exec_queue *q;
907 	unsigned long index;
908 
909 	/*
910 	 * This routine is used within VF migration recovery. This means
911 	 * using the lock here introduces a restriction: we cannot wait
912 	 * for any GFX HW response while the lock is taken.
913 	 */
914 	mutex_lock(&guc->submission_state.lock);
915 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
916 		if (exec_queue_killed_or_banned_or_wedged(q))
917 			continue;
918 		xe_exec_queue_jobs_ring_restore(q);
919 	}
920 	mutex_unlock(&guc->submission_state.lock);
921 }
922 
guc_exec_queue_free_job(struct drm_sched_job * drm_job)923 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
924 {
925 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
926 
927 	trace_xe_sched_job_free(job);
928 	xe_sched_job_put(job);
929 }
930 
xe_guc_read_stopped(struct xe_guc * guc)931 int xe_guc_read_stopped(struct xe_guc *guc)
932 {
933 	return atomic_read(&guc->submission_state.stopped);
934 }
935 
936 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable)			\
937 	u32 action[] = {						\
938 		XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET,			\
939 		q->guc->id,						\
940 		GUC_CONTEXT_##enable_disable,				\
941 	}
942 
disable_scheduling_deregister(struct xe_guc * guc,struct xe_exec_queue * q)943 static void disable_scheduling_deregister(struct xe_guc *guc,
944 					  struct xe_exec_queue *q)
945 {
946 	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
947 	int ret;
948 
949 	set_min_preemption_timeout(guc, q);
950 	smp_rmb();
951 	ret = wait_event_timeout(guc->ct.wq,
952 				 (!exec_queue_pending_enable(q) &&
953 				  !exec_queue_pending_disable(q)) ||
954 					 xe_guc_read_stopped(guc),
955 				 HZ * 5);
956 	if (!ret) {
957 		struct xe_gpu_scheduler *sched = &q->guc->sched;
958 
959 		xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n");
960 		xe_sched_submission_start(sched);
961 		xe_gt_reset_async(q->gt);
962 		xe_sched_tdr_queue_imm(sched);
963 		return;
964 	}
965 
966 	clear_exec_queue_enabled(q);
967 	set_exec_queue_pending_disable(q);
968 	set_exec_queue_destroyed(q);
969 	trace_xe_exec_queue_scheduling_disable(q);
970 
971 	/*
972 	 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
973 	 * handler and we are not allowed to reserved G2H space in handlers.
974 	 */
975 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
976 		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
977 		       G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
978 }
979 
xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue * q)980 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
981 {
982 	struct xe_guc *guc = exec_queue_to_guc(q);
983 	struct xe_device *xe = guc_to_xe(guc);
984 
985 	/** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
986 	wake_up_all(&xe->ufence_wq);
987 
988 	if (xe_exec_queue_is_lr(q))
989 		queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
990 	else
991 		xe_sched_tdr_queue_imm(&q->guc->sched);
992 }
993 
994 /**
995  * xe_guc_submit_wedge() - Wedge GuC submission
996  * @guc: the GuC object
997  *
998  * Save exec queue's registered with GuC state by taking a ref to each queue.
999  * Register a DRMM handler to drop refs upon driver unload.
1000  */
xe_guc_submit_wedge(struct xe_guc * guc)1001 void xe_guc_submit_wedge(struct xe_guc *guc)
1002 {
1003 	struct xe_gt *gt = guc_to_gt(guc);
1004 	struct xe_exec_queue *q;
1005 	unsigned long index;
1006 	int err;
1007 
1008 	xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
1009 
1010 	/*
1011 	 * If device is being wedged even before submission_state is
1012 	 * initialized, there's nothing to do here.
1013 	 */
1014 	if (!guc->submission_state.initialized)
1015 		return;
1016 
1017 	err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
1018 				       guc_submit_wedged_fini, guc);
1019 	if (err) {
1020 		xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; "
1021 			  "Although device is wedged.\n");
1022 		return;
1023 	}
1024 
1025 	mutex_lock(&guc->submission_state.lock);
1026 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1027 		if (xe_exec_queue_get_unless_zero(q))
1028 			set_exec_queue_wedged(q);
1029 	mutex_unlock(&guc->submission_state.lock);
1030 }
1031 
guc_submit_hint_wedged(struct xe_guc * guc)1032 static bool guc_submit_hint_wedged(struct xe_guc *guc)
1033 {
1034 	struct xe_device *xe = guc_to_xe(guc);
1035 
1036 	if (xe->wedged.mode != 2)
1037 		return false;
1038 
1039 	if (xe_device_wedged(xe))
1040 		return true;
1041 
1042 	xe_device_declare_wedged(xe);
1043 
1044 	return true;
1045 }
1046 
xe_guc_exec_queue_lr_cleanup(struct work_struct * w)1047 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
1048 {
1049 	struct xe_guc_exec_queue *ge =
1050 		container_of(w, struct xe_guc_exec_queue, lr_tdr);
1051 	struct xe_exec_queue *q = ge->q;
1052 	struct xe_guc *guc = exec_queue_to_guc(q);
1053 	struct xe_gpu_scheduler *sched = &ge->sched;
1054 	bool wedged = false;
1055 
1056 	xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q));
1057 	trace_xe_exec_queue_lr_cleanup(q);
1058 
1059 	if (!exec_queue_killed(q))
1060 		wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
1061 
1062 	/* Kill the run_job / process_msg entry points */
1063 	xe_sched_submission_stop(sched);
1064 
1065 	/*
1066 	 * Engine state now mostly stable, disable scheduling / deregister if
1067 	 * needed. This cleanup routine might be called multiple times, where
1068 	 * the actual async engine deregister drops the final engine ref.
1069 	 * Calling disable_scheduling_deregister will mark the engine as
1070 	 * destroyed and fire off the CT requests to disable scheduling /
1071 	 * deregister, which we only want to do once. We also don't want to mark
1072 	 * the engine as pending_disable again as this may race with the
1073 	 * xe_guc_deregister_done_handler() which treats it as an unexpected
1074 	 * state.
1075 	 */
1076 	if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
1077 		struct xe_guc *guc = exec_queue_to_guc(q);
1078 		int ret;
1079 
1080 		set_exec_queue_banned(q);
1081 		disable_scheduling_deregister(guc, q);
1082 
1083 		/*
1084 		 * Must wait for scheduling to be disabled before signalling
1085 		 * any fences, if GT broken the GT reset code should signal us.
1086 		 */
1087 		ret = wait_event_timeout(guc->ct.wq,
1088 					 !exec_queue_pending_disable(q) ||
1089 					 xe_guc_read_stopped(guc), HZ * 5);
1090 		if (!ret) {
1091 			xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n",
1092 				   q->guc->id);
1093 			xe_devcoredump(q, NULL, "Schedule disable failed to respond, guc_id=%d\n",
1094 				       q->guc->id);
1095 			xe_sched_submission_start(sched);
1096 			xe_gt_reset_async(q->gt);
1097 			return;
1098 		}
1099 	}
1100 
1101 	if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0]))
1102 		xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id);
1103 
1104 	xe_sched_submission_start(sched);
1105 }
1106 
1107 #define ADJUST_FIVE_PERCENT(__t)	mul_u64_u32_div(__t, 105, 100)
1108 
check_timeout(struct xe_exec_queue * q,struct xe_sched_job * job)1109 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
1110 {
1111 	struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
1112 	u32 ctx_timestamp, ctx_job_timestamp;
1113 	u32 timeout_ms = q->sched_props.job_timeout_ms;
1114 	u32 diff;
1115 	u64 running_time_ms;
1116 
1117 	if (!xe_sched_job_started(job)) {
1118 		xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
1119 			   xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1120 			   q->guc->id);
1121 
1122 		return xe_sched_invalidate_job(job, 2);
1123 	}
1124 
1125 	ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0]));
1126 	ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
1127 
1128 	/*
1129 	 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
1130 	 * possible overflows with a high timeout.
1131 	 */
1132 	xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC);
1133 
1134 	diff = ctx_timestamp - ctx_job_timestamp;
1135 
1136 	/*
1137 	 * Ensure timeout is within 5% to account for an GuC scheduling latency
1138 	 */
1139 	running_time_ms =
1140 		ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff));
1141 
1142 	xe_gt_dbg(gt,
1143 		  "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x",
1144 		  xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1145 		  q->guc->id, running_time_ms, timeout_ms, diff);
1146 
1147 	return running_time_ms >= timeout_ms;
1148 }
1149 
enable_scheduling(struct xe_exec_queue * q)1150 static void enable_scheduling(struct xe_exec_queue *q)
1151 {
1152 	MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
1153 	struct xe_guc *guc = exec_queue_to_guc(q);
1154 	int ret;
1155 
1156 	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1157 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1158 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1159 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1160 
1161 	set_exec_queue_pending_enable(q);
1162 	set_exec_queue_enabled(q);
1163 	trace_xe_exec_queue_scheduling_enable(q);
1164 
1165 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1166 		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1167 
1168 	ret = wait_event_timeout(guc->ct.wq,
1169 				 !exec_queue_pending_enable(q) ||
1170 				 xe_guc_read_stopped(guc), HZ * 5);
1171 	if (!ret || xe_guc_read_stopped(guc)) {
1172 		xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
1173 		set_exec_queue_banned(q);
1174 		xe_gt_reset_async(q->gt);
1175 		xe_sched_tdr_queue_imm(&q->guc->sched);
1176 	}
1177 }
1178 
disable_scheduling(struct xe_exec_queue * q,bool immediate)1179 static void disable_scheduling(struct xe_exec_queue *q, bool immediate)
1180 {
1181 	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1182 	struct xe_guc *guc = exec_queue_to_guc(q);
1183 
1184 	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1185 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1186 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1187 
1188 	if (immediate)
1189 		set_min_preemption_timeout(guc, q);
1190 	clear_exec_queue_enabled(q);
1191 	set_exec_queue_pending_disable(q);
1192 	trace_xe_exec_queue_scheduling_disable(q);
1193 
1194 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1195 		       G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1196 }
1197 
__deregister_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q)1198 static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
1199 {
1200 	u32 action[] = {
1201 		XE_GUC_ACTION_DEREGISTER_CONTEXT,
1202 		q->guc->id,
1203 	};
1204 
1205 	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1206 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1207 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1208 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1209 
1210 	set_exec_queue_destroyed(q);
1211 	trace_xe_exec_queue_deregister(q);
1212 
1213 	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1214 		       G2H_LEN_DW_DEREGISTER_CONTEXT, 1);
1215 }
1216 
1217 static enum drm_gpu_sched_stat
guc_exec_queue_timedout_job(struct drm_sched_job * drm_job)1218 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
1219 {
1220 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
1221 	struct xe_sched_job *tmp_job;
1222 	struct xe_exec_queue *q = job->q;
1223 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1224 	struct xe_guc *guc = exec_queue_to_guc(q);
1225 	const char *process_name = "no process";
1226 	struct xe_device *xe = guc_to_xe(guc);
1227 	unsigned int fw_ref;
1228 	int err = -ETIME;
1229 	pid_t pid = -1;
1230 	int i = 0;
1231 	bool wedged = false, skip_timeout_check;
1232 
1233 	/*
1234 	 * TDR has fired before free job worker. Common if exec queue
1235 	 * immediately closed after last fence signaled. Add back to pending
1236 	 * list so job can be freed and kick scheduler ensuring free job is not
1237 	 * lost.
1238 	 */
1239 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags))
1240 		return DRM_GPU_SCHED_STAT_NO_HANG;
1241 
1242 	/* Kill the run_job entry point */
1243 	xe_sched_submission_stop(sched);
1244 
1245 	/* Must check all state after stopping scheduler */
1246 	skip_timeout_check = exec_queue_reset(q) ||
1247 		exec_queue_killed_or_banned_or_wedged(q) ||
1248 		exec_queue_destroyed(q);
1249 
1250 	/*
1251 	 * If devcoredump not captured and GuC capture for the job is not ready
1252 	 * do manual capture first and decide later if we need to use it
1253 	 */
1254 	if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
1255 	    !xe_guc_capture_get_matching_and_lock(q)) {
1256 		/* take force wake before engine register manual capture */
1257 		fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
1258 		if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
1259 			xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
1260 
1261 		xe_engine_snapshot_capture_for_queue(q);
1262 
1263 		xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
1264 	}
1265 
1266 	/*
1267 	 * XXX: Sampling timeout doesn't work in wedged mode as we have to
1268 	 * modify scheduling state to read timestamp. We could read the
1269 	 * timestamp from a register to accumulate current running time but this
1270 	 * doesn't work for SRIOV. For now assuming timeouts in wedged mode are
1271 	 * genuine timeouts.
1272 	 */
1273 	if (!exec_queue_killed(q))
1274 		wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
1275 
1276 	/* Engine state now stable, disable scheduling to check timestamp */
1277 	if (!wedged && exec_queue_registered(q)) {
1278 		int ret;
1279 
1280 		if (exec_queue_reset(q))
1281 			err = -EIO;
1282 
1283 		if (!exec_queue_destroyed(q)) {
1284 			/*
1285 			 * Wait for any pending G2H to flush out before
1286 			 * modifying state
1287 			 */
1288 			ret = wait_event_timeout(guc->ct.wq,
1289 						 (!exec_queue_pending_enable(q) &&
1290 						  !exec_queue_pending_disable(q)) ||
1291 						 xe_guc_read_stopped(guc), HZ * 5);
1292 			if (!ret || xe_guc_read_stopped(guc))
1293 				goto trigger_reset;
1294 
1295 			/*
1296 			 * Flag communicates to G2H handler that schedule
1297 			 * disable originated from a timeout check. The G2H then
1298 			 * avoid triggering cleanup or deregistering the exec
1299 			 * queue.
1300 			 */
1301 			set_exec_queue_check_timeout(q);
1302 			disable_scheduling(q, skip_timeout_check);
1303 		}
1304 
1305 		/*
1306 		 * Must wait for scheduling to be disabled before signalling
1307 		 * any fences, if GT broken the GT reset code should signal us.
1308 		 *
1309 		 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
1310 		 * error) messages which can cause the schedule disable to get
1311 		 * lost. If this occurs, trigger a GT reset to recover.
1312 		 */
1313 		smp_rmb();
1314 		ret = wait_event_timeout(guc->ct.wq,
1315 					 !exec_queue_pending_disable(q) ||
1316 					 xe_guc_read_stopped(guc), HZ * 5);
1317 		if (!ret || xe_guc_read_stopped(guc)) {
1318 trigger_reset:
1319 			if (!ret)
1320 				xe_gt_warn(guc_to_gt(guc),
1321 					   "Schedule disable failed to respond, guc_id=%d",
1322 					   q->guc->id);
1323 			xe_devcoredump(q, job,
1324 				       "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d",
1325 				       q->guc->id, ret, xe_guc_read_stopped(guc));
1326 			set_exec_queue_extra_ref(q);
1327 			xe_exec_queue_get(q);	/* GT reset owns this */
1328 			set_exec_queue_banned(q);
1329 			xe_gt_reset_async(q->gt);
1330 			xe_sched_tdr_queue_imm(sched);
1331 			goto rearm;
1332 		}
1333 	}
1334 
1335 	/*
1336 	 * Check if job is actually timed out, if so restart job execution and TDR
1337 	 */
1338 	if (!wedged && !skip_timeout_check && !check_timeout(q, job) &&
1339 	    !exec_queue_reset(q) && exec_queue_registered(q)) {
1340 		clear_exec_queue_check_timeout(q);
1341 		goto sched_enable;
1342 	}
1343 
1344 	if (q->vm && q->vm->xef) {
1345 		process_name = q->vm->xef->process_name;
1346 		pid = q->vm->xef->pid;
1347 	}
1348 
1349 	if (!exec_queue_killed(q))
1350 		xe_gt_notice(guc_to_gt(guc),
1351 			     "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
1352 			     xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1353 			     q->guc->id, q->flags, process_name, pid);
1354 
1355 	trace_xe_sched_job_timedout(job);
1356 
1357 	if (!exec_queue_killed(q))
1358 		xe_devcoredump(q, job,
1359 			       "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
1360 			       xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1361 			       q->guc->id, q->flags);
1362 
1363 	/*
1364 	 * Kernel jobs should never fail, nor should VM jobs if they do
1365 	 * somethings has gone wrong and the GT needs a reset
1366 	 */
1367 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
1368 		   "Kernel-submitted job timed out\n");
1369 	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
1370 		   "VM job timed out on non-killed execqueue\n");
1371 	if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
1372 			(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
1373 		if (!xe_sched_invalidate_job(job, 2)) {
1374 			clear_exec_queue_check_timeout(q);
1375 			xe_gt_reset_async(q->gt);
1376 			goto rearm;
1377 		}
1378 	}
1379 
1380 	/* Finish cleaning up exec queue via deregister */
1381 	set_exec_queue_banned(q);
1382 	if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
1383 		set_exec_queue_extra_ref(q);
1384 		xe_exec_queue_get(q);
1385 		__deregister_exec_queue(guc, q);
1386 	}
1387 
1388 	/* Stop fence signaling */
1389 	xe_hw_fence_irq_stop(q->fence_irq);
1390 
1391 	/*
1392 	 * Fence state now stable, stop / start scheduler which cleans up any
1393 	 * fences that are complete
1394 	 */
1395 	xe_sched_add_pending_job(sched, job);
1396 	xe_sched_submission_start(sched);
1397 
1398 	xe_guc_exec_queue_trigger_cleanup(q);
1399 
1400 	/* Mark all outstanding jobs as bad, thus completing them */
1401 	spin_lock(&sched->base.job_list_lock);
1402 	list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
1403 		xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
1404 	spin_unlock(&sched->base.job_list_lock);
1405 
1406 	/* Start fence signaling */
1407 	xe_hw_fence_irq_start(q->fence_irq);
1408 
1409 	return DRM_GPU_SCHED_STAT_RESET;
1410 
1411 sched_enable:
1412 	enable_scheduling(q);
1413 rearm:
1414 	/*
1415 	 * XXX: Ideally want to adjust timeout based on current execution time
1416 	 * but there is not currently an easy way to do in DRM scheduler. With
1417 	 * some thought, do this in a follow up.
1418 	 */
1419 	xe_sched_submission_start(sched);
1420 	return DRM_GPU_SCHED_STAT_NO_HANG;
1421 }
1422 
guc_exec_queue_fini(struct xe_exec_queue * q)1423 static void guc_exec_queue_fini(struct xe_exec_queue *q)
1424 {
1425 	struct xe_guc_exec_queue *ge = q->guc;
1426 	struct xe_guc *guc = exec_queue_to_guc(q);
1427 
1428 	release_guc_id(guc, q);
1429 	xe_sched_entity_fini(&ge->entity);
1430 	xe_sched_fini(&ge->sched);
1431 
1432 	/*
1433 	 * RCU free due sched being exported via DRM scheduler fences
1434 	 * (timeline name).
1435 	 */
1436 	kfree_rcu(ge, rcu);
1437 }
1438 
__guc_exec_queue_destroy_async(struct work_struct * w)1439 static void __guc_exec_queue_destroy_async(struct work_struct *w)
1440 {
1441 	struct xe_guc_exec_queue *ge =
1442 		container_of(w, struct xe_guc_exec_queue, destroy_async);
1443 	struct xe_exec_queue *q = ge->q;
1444 	struct xe_guc *guc = exec_queue_to_guc(q);
1445 
1446 	xe_pm_runtime_get(guc_to_xe(guc));
1447 	trace_xe_exec_queue_destroy(q);
1448 
1449 	if (xe_exec_queue_is_lr(q))
1450 		cancel_work_sync(&ge->lr_tdr);
1451 	/* Confirm no work left behind accessing device structures */
1452 	cancel_delayed_work_sync(&ge->sched.base.work_tdr);
1453 
1454 	xe_exec_queue_fini(q);
1455 
1456 	xe_pm_runtime_put(guc_to_xe(guc));
1457 }
1458 
guc_exec_queue_destroy_async(struct xe_exec_queue * q)1459 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q)
1460 {
1461 	struct xe_guc *guc = exec_queue_to_guc(q);
1462 	struct xe_device *xe = guc_to_xe(guc);
1463 
1464 	INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async);
1465 
1466 	/* We must block on kernel engines so slabs are empty on driver unload */
1467 	if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
1468 		__guc_exec_queue_destroy_async(&q->guc->destroy_async);
1469 	else
1470 		queue_work(xe->destroy_wq, &q->guc->destroy_async);
1471 }
1472 
__guc_exec_queue_destroy(struct xe_guc * guc,struct xe_exec_queue * q)1473 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q)
1474 {
1475 	/*
1476 	 * Might be done from within the GPU scheduler, need to do async as we
1477 	 * fini the scheduler when the engine is fini'd, the scheduler can't
1478 	 * complete fini within itself (circular dependency). Async resolves
1479 	 * this we and don't really care when everything is fini'd, just that it
1480 	 * is.
1481 	 */
1482 	guc_exec_queue_destroy_async(q);
1483 }
1484 
__guc_exec_queue_process_msg_cleanup(struct xe_sched_msg * msg)1485 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
1486 {
1487 	struct xe_exec_queue *q = msg->private_data;
1488 	struct xe_guc *guc = exec_queue_to_guc(q);
1489 
1490 	xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
1491 	trace_xe_exec_queue_cleanup_entity(q);
1492 
1493 	/*
1494 	 * Expected state transitions for cleanup:
1495 	 * - If the exec queue is registered and GuC firmware is running, we must first
1496 	 *   disable scheduling and deregister the queue to ensure proper teardown and
1497 	 *   resource release in the GuC, then destroy the exec queue on driver side.
1498 	 * - If the GuC is already stopped (e.g., during driver unload or GPU reset),
1499 	 *   we cannot expect a response for the deregister request. In this case,
1500 	 *   it is safe to directly destroy the exec queue on driver side, as the GuC
1501 	 *   will not process further requests and all resources must be cleaned up locally.
1502 	 */
1503 	if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw))
1504 		disable_scheduling_deregister(guc, q);
1505 	else
1506 		__guc_exec_queue_destroy(guc, q);
1507 }
1508 
guc_exec_queue_allowed_to_change_state(struct xe_exec_queue * q)1509 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
1510 {
1511 	return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
1512 }
1513 
__guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg * msg)1514 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
1515 {
1516 	struct xe_exec_queue *q = msg->private_data;
1517 	struct xe_guc *guc = exec_queue_to_guc(q);
1518 
1519 	if (guc_exec_queue_allowed_to_change_state(q))
1520 		init_policies(guc, q);
1521 	kfree(msg);
1522 }
1523 
__suspend_fence_signal(struct xe_exec_queue * q)1524 static void __suspend_fence_signal(struct xe_exec_queue *q)
1525 {
1526 	if (!q->guc->suspend_pending)
1527 		return;
1528 
1529 	WRITE_ONCE(q->guc->suspend_pending, false);
1530 	wake_up(&q->guc->suspend_wait);
1531 }
1532 
suspend_fence_signal(struct xe_exec_queue * q)1533 static void suspend_fence_signal(struct xe_exec_queue *q)
1534 {
1535 	struct xe_guc *guc = exec_queue_to_guc(q);
1536 
1537 	xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) ||
1538 		     xe_guc_read_stopped(guc));
1539 	xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending);
1540 
1541 	__suspend_fence_signal(q);
1542 }
1543 
__guc_exec_queue_process_msg_suspend(struct xe_sched_msg * msg)1544 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
1545 {
1546 	struct xe_exec_queue *q = msg->private_data;
1547 	struct xe_guc *guc = exec_queue_to_guc(q);
1548 
1549 	if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
1550 	    exec_queue_enabled(q)) {
1551 		wait_event(guc->ct.wq, (q->guc->resume_time != RESUME_PENDING ||
1552 			   xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q));
1553 
1554 		if (!xe_guc_read_stopped(guc)) {
1555 			s64 since_resume_ms =
1556 				ktime_ms_delta(ktime_get(),
1557 					       q->guc->resume_time);
1558 			s64 wait_ms = q->vm->preempt.min_run_period_ms -
1559 				since_resume_ms;
1560 
1561 			if (wait_ms > 0 && q->guc->resume_time)
1562 				msleep(wait_ms);
1563 
1564 			set_exec_queue_suspended(q);
1565 			disable_scheduling(q, false);
1566 		}
1567 	} else if (q->guc->suspend_pending) {
1568 		set_exec_queue_suspended(q);
1569 		suspend_fence_signal(q);
1570 	}
1571 }
1572 
__guc_exec_queue_process_msg_resume(struct xe_sched_msg * msg)1573 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
1574 {
1575 	struct xe_exec_queue *q = msg->private_data;
1576 
1577 	if (guc_exec_queue_allowed_to_change_state(q)) {
1578 		clear_exec_queue_suspended(q);
1579 		if (!exec_queue_enabled(q)) {
1580 			q->guc->resume_time = RESUME_PENDING;
1581 			enable_scheduling(q);
1582 		}
1583 	} else {
1584 		clear_exec_queue_suspended(q);
1585 	}
1586 }
1587 
1588 #define CLEANUP		1	/* Non-zero values to catch uninitialized msg */
1589 #define SET_SCHED_PROPS	2
1590 #define SUSPEND		3
1591 #define RESUME		4
1592 #define OPCODE_MASK	0xf
1593 #define MSG_LOCKED	BIT(8)
1594 
guc_exec_queue_process_msg(struct xe_sched_msg * msg)1595 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
1596 {
1597 	struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data));
1598 
1599 	trace_xe_sched_msg_recv(msg);
1600 
1601 	switch (msg->opcode) {
1602 	case CLEANUP:
1603 		__guc_exec_queue_process_msg_cleanup(msg);
1604 		break;
1605 	case SET_SCHED_PROPS:
1606 		__guc_exec_queue_process_msg_set_sched_props(msg);
1607 		break;
1608 	case SUSPEND:
1609 		__guc_exec_queue_process_msg_suspend(msg);
1610 		break;
1611 	case RESUME:
1612 		__guc_exec_queue_process_msg_resume(msg);
1613 		break;
1614 	default:
1615 		XE_WARN_ON("Unknown message type");
1616 	}
1617 
1618 	xe_pm_runtime_put(xe);
1619 }
1620 
1621 static const struct drm_sched_backend_ops drm_sched_ops = {
1622 	.run_job = guc_exec_queue_run_job,
1623 	.free_job = guc_exec_queue_free_job,
1624 	.timedout_job = guc_exec_queue_timedout_job,
1625 };
1626 
1627 static const struct xe_sched_backend_ops xe_sched_ops = {
1628 	.process_msg = guc_exec_queue_process_msg,
1629 };
1630 
guc_exec_queue_init(struct xe_exec_queue * q)1631 static int guc_exec_queue_init(struct xe_exec_queue *q)
1632 {
1633 	struct xe_gpu_scheduler *sched;
1634 	struct xe_guc *guc = exec_queue_to_guc(q);
1635 	struct xe_guc_exec_queue *ge;
1636 	long timeout;
1637 	int err, i;
1638 
1639 	xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc)));
1640 
1641 	ge = kzalloc(sizeof(*ge), GFP_KERNEL);
1642 	if (!ge)
1643 		return -ENOMEM;
1644 
1645 	q->guc = ge;
1646 	ge->q = q;
1647 	init_rcu_head(&ge->rcu);
1648 	init_waitqueue_head(&ge->suspend_wait);
1649 
1650 	for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
1651 		INIT_LIST_HEAD(&ge->static_msgs[i].link);
1652 
1653 	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
1654 		  msecs_to_jiffies(q->sched_props.job_timeout_ms);
1655 	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
1656 			    NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
1657 			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
1658 			    q->name, gt_to_xe(q->gt)->drm.dev);
1659 	if (err)
1660 		goto err_free;
1661 
1662 	sched = &ge->sched;
1663 	err = xe_sched_entity_init(&ge->entity, sched);
1664 	if (err)
1665 		goto err_sched;
1666 
1667 	if (xe_exec_queue_is_lr(q))
1668 		INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
1669 
1670 	mutex_lock(&guc->submission_state.lock);
1671 
1672 	err = alloc_guc_id(guc, q);
1673 	if (err)
1674 		goto err_entity;
1675 
1676 	q->entity = &ge->entity;
1677 
1678 	if (xe_guc_read_stopped(guc))
1679 		xe_sched_stop(sched);
1680 
1681 	mutex_unlock(&guc->submission_state.lock);
1682 
1683 	xe_exec_queue_assign_name(q, q->guc->id);
1684 
1685 	trace_xe_exec_queue_create(q);
1686 
1687 	return 0;
1688 
1689 err_entity:
1690 	mutex_unlock(&guc->submission_state.lock);
1691 	xe_sched_entity_fini(&ge->entity);
1692 err_sched:
1693 	xe_sched_fini(&ge->sched);
1694 err_free:
1695 	kfree(ge);
1696 
1697 	return err;
1698 }
1699 
guc_exec_queue_kill(struct xe_exec_queue * q)1700 static void guc_exec_queue_kill(struct xe_exec_queue *q)
1701 {
1702 	trace_xe_exec_queue_kill(q);
1703 	set_exec_queue_killed(q);
1704 	__suspend_fence_signal(q);
1705 	xe_guc_exec_queue_trigger_cleanup(q);
1706 }
1707 
guc_exec_queue_add_msg(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)1708 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
1709 				   u32 opcode)
1710 {
1711 	xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
1712 
1713 	INIT_LIST_HEAD(&msg->link);
1714 	msg->opcode = opcode & OPCODE_MASK;
1715 	msg->private_data = q;
1716 
1717 	trace_xe_sched_msg_add(msg);
1718 	if (opcode & MSG_LOCKED)
1719 		xe_sched_add_msg_locked(&q->guc->sched, msg);
1720 	else
1721 		xe_sched_add_msg(&q->guc->sched, msg);
1722 }
1723 
guc_exec_queue_try_add_msg(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)1724 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
1725 				       struct xe_sched_msg *msg,
1726 				       u32 opcode)
1727 {
1728 	if (!list_empty(&msg->link))
1729 		return false;
1730 
1731 	guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED);
1732 
1733 	return true;
1734 }
1735 
1736 #define STATIC_MSG_CLEANUP	0
1737 #define STATIC_MSG_SUSPEND	1
1738 #define STATIC_MSG_RESUME	2
guc_exec_queue_destroy(struct xe_exec_queue * q)1739 static void guc_exec_queue_destroy(struct xe_exec_queue *q)
1740 {
1741 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
1742 
1743 	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
1744 		guc_exec_queue_add_msg(q, msg, CLEANUP);
1745 	else
1746 		__guc_exec_queue_destroy(exec_queue_to_guc(q), q);
1747 }
1748 
guc_exec_queue_set_priority(struct xe_exec_queue * q,enum xe_exec_queue_priority priority)1749 static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
1750 				       enum xe_exec_queue_priority priority)
1751 {
1752 	struct xe_sched_msg *msg;
1753 
1754 	if (q->sched_props.priority == priority ||
1755 	    exec_queue_killed_or_banned_or_wedged(q))
1756 		return 0;
1757 
1758 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1759 	if (!msg)
1760 		return -ENOMEM;
1761 
1762 	q->sched_props.priority = priority;
1763 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1764 
1765 	return 0;
1766 }
1767 
guc_exec_queue_set_timeslice(struct xe_exec_queue * q,u32 timeslice_us)1768 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
1769 {
1770 	struct xe_sched_msg *msg;
1771 
1772 	if (q->sched_props.timeslice_us == timeslice_us ||
1773 	    exec_queue_killed_or_banned_or_wedged(q))
1774 		return 0;
1775 
1776 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1777 	if (!msg)
1778 		return -ENOMEM;
1779 
1780 	q->sched_props.timeslice_us = timeslice_us;
1781 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1782 
1783 	return 0;
1784 }
1785 
guc_exec_queue_set_preempt_timeout(struct xe_exec_queue * q,u32 preempt_timeout_us)1786 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
1787 					      u32 preempt_timeout_us)
1788 {
1789 	struct xe_sched_msg *msg;
1790 
1791 	if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
1792 	    exec_queue_killed_or_banned_or_wedged(q))
1793 		return 0;
1794 
1795 	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1796 	if (!msg)
1797 		return -ENOMEM;
1798 
1799 	q->sched_props.preempt_timeout_us = preempt_timeout_us;
1800 	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1801 
1802 	return 0;
1803 }
1804 
guc_exec_queue_suspend(struct xe_exec_queue * q)1805 static int guc_exec_queue_suspend(struct xe_exec_queue *q)
1806 {
1807 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1808 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
1809 
1810 	if (exec_queue_killed_or_banned_or_wedged(q))
1811 		return -EINVAL;
1812 
1813 	xe_sched_msg_lock(sched);
1814 	if (guc_exec_queue_try_add_msg(q, msg, SUSPEND))
1815 		q->guc->suspend_pending = true;
1816 	xe_sched_msg_unlock(sched);
1817 
1818 	return 0;
1819 }
1820 
guc_exec_queue_suspend_wait(struct xe_exec_queue * q)1821 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
1822 {
1823 	struct xe_guc *guc = exec_queue_to_guc(q);
1824 	int ret;
1825 
1826 	/*
1827 	 * Likely don't need to check exec_queue_killed() as we clear
1828 	 * suspend_pending upon kill but to be paranoid but races in which
1829 	 * suspend_pending is set after kill also check kill here.
1830 	 */
1831 	ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
1832 					       !READ_ONCE(q->guc->suspend_pending) ||
1833 					       exec_queue_killed(q) ||
1834 					       xe_guc_read_stopped(guc),
1835 					       HZ * 5);
1836 
1837 	if (!ret) {
1838 		xe_gt_warn(guc_to_gt(guc),
1839 			   "Suspend fence, guc_id=%d, failed to respond",
1840 			   q->guc->id);
1841 		/* XXX: Trigger GT reset? */
1842 		return -ETIME;
1843 	}
1844 
1845 	return ret < 0 ? ret : 0;
1846 }
1847 
guc_exec_queue_resume(struct xe_exec_queue * q)1848 static void guc_exec_queue_resume(struct xe_exec_queue *q)
1849 {
1850 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1851 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
1852 	struct xe_guc *guc = exec_queue_to_guc(q);
1853 
1854 	xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending);
1855 
1856 	xe_sched_msg_lock(sched);
1857 	guc_exec_queue_try_add_msg(q, msg, RESUME);
1858 	xe_sched_msg_unlock(sched);
1859 }
1860 
guc_exec_queue_reset_status(struct xe_exec_queue * q)1861 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
1862 {
1863 	return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q);
1864 }
1865 
1866 /*
1867  * All of these functions are an abstraction layer which other parts of XE can
1868  * use to trap into the GuC backend. All of these functions, aside from init,
1869  * really shouldn't do much other than trap into the DRM scheduler which
1870  * synchronizes these operations.
1871  */
1872 static const struct xe_exec_queue_ops guc_exec_queue_ops = {
1873 	.init = guc_exec_queue_init,
1874 	.kill = guc_exec_queue_kill,
1875 	.fini = guc_exec_queue_fini,
1876 	.destroy = guc_exec_queue_destroy,
1877 	.set_priority = guc_exec_queue_set_priority,
1878 	.set_timeslice = guc_exec_queue_set_timeslice,
1879 	.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
1880 	.suspend = guc_exec_queue_suspend,
1881 	.suspend_wait = guc_exec_queue_suspend_wait,
1882 	.resume = guc_exec_queue_resume,
1883 	.reset_status = guc_exec_queue_reset_status,
1884 };
1885 
guc_exec_queue_stop(struct xe_guc * guc,struct xe_exec_queue * q)1886 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
1887 {
1888 	struct xe_gpu_scheduler *sched = &q->guc->sched;
1889 
1890 	/* Stop scheduling + flush any DRM scheduler operations */
1891 	xe_sched_submission_stop(sched);
1892 
1893 	/* Clean up lost G2H + reset engine state */
1894 	if (exec_queue_registered(q)) {
1895 		if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
1896 			xe_exec_queue_put(q);
1897 		else if (exec_queue_destroyed(q))
1898 			__guc_exec_queue_destroy(guc, q);
1899 	}
1900 	if (q->guc->suspend_pending) {
1901 		set_exec_queue_suspended(q);
1902 		suspend_fence_signal(q);
1903 	}
1904 	atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED |
1905 		   EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED |
1906 		   EXEC_QUEUE_STATE_SUSPENDED,
1907 		   &q->guc->state);
1908 	q->guc->resume_time = 0;
1909 	trace_xe_exec_queue_stop(q);
1910 
1911 	/*
1912 	 * Ban any engine (aside from kernel and engines used for VM ops) with a
1913 	 * started but not complete job or if a job has gone through a GT reset
1914 	 * more than twice.
1915 	 */
1916 	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
1917 		struct xe_sched_job *job = xe_sched_first_pending_job(sched);
1918 		bool ban = false;
1919 
1920 		if (job) {
1921 			if ((xe_sched_job_started(job) &&
1922 			    !xe_sched_job_completed(job)) ||
1923 			    xe_sched_invalidate_job(job, 2)) {
1924 				trace_xe_sched_job_ban(job);
1925 				ban = true;
1926 			}
1927 		} else if (xe_exec_queue_is_lr(q) &&
1928 			   !xe_lrc_ring_is_idle(q->lrc[0])) {
1929 			ban = true;
1930 		}
1931 
1932 		if (ban) {
1933 			set_exec_queue_banned(q);
1934 			xe_guc_exec_queue_trigger_cleanup(q);
1935 		}
1936 	}
1937 }
1938 
1939 /**
1940  * xe_guc_submit_reset_block - Disallow reset calls on given GuC.
1941  * @guc: the &xe_guc struct instance
1942  */
xe_guc_submit_reset_block(struct xe_guc * guc)1943 int xe_guc_submit_reset_block(struct xe_guc *guc)
1944 {
1945 	return atomic_fetch_or(1, &guc->submission_state.reset_blocked);
1946 }
1947 
1948 /**
1949  * xe_guc_submit_reset_unblock - Allow back reset calls on given GuC.
1950  * @guc: the &xe_guc struct instance
1951  */
xe_guc_submit_reset_unblock(struct xe_guc * guc)1952 void xe_guc_submit_reset_unblock(struct xe_guc *guc)
1953 {
1954 	atomic_set_release(&guc->submission_state.reset_blocked, 0);
1955 	wake_up_all(&guc->ct.wq);
1956 }
1957 
guc_submit_reset_is_blocked(struct xe_guc * guc)1958 static int guc_submit_reset_is_blocked(struct xe_guc *guc)
1959 {
1960 	return atomic_read_acquire(&guc->submission_state.reset_blocked);
1961 }
1962 
1963 /* Maximum time of blocking reset */
1964 #define RESET_BLOCK_PERIOD_MAX (HZ * 5)
1965 
1966 /**
1967  * xe_guc_wait_reset_unblock - Wait until reset blocking flag is lifted, or timeout.
1968  * @guc: the &xe_guc struct instance
1969  */
xe_guc_wait_reset_unblock(struct xe_guc * guc)1970 int xe_guc_wait_reset_unblock(struct xe_guc *guc)
1971 {
1972 	return wait_event_timeout(guc->ct.wq,
1973 				  !guc_submit_reset_is_blocked(guc), RESET_BLOCK_PERIOD_MAX);
1974 }
1975 
xe_guc_submit_reset_prepare(struct xe_guc * guc)1976 int xe_guc_submit_reset_prepare(struct xe_guc *guc)
1977 {
1978 	int ret;
1979 
1980 	if (!guc->submission_state.initialized)
1981 		return 0;
1982 
1983 	/*
1984 	 * Using an atomic here rather than submission_state.lock as this
1985 	 * function can be called while holding the CT lock (engine reset
1986 	 * failure). submission_state.lock needs the CT lock to resubmit jobs.
1987 	 * Atomic is not ideal, but it works to prevent against concurrent reset
1988 	 * and releasing any TDRs waiting on guc->submission_state.stopped.
1989 	 */
1990 	ret = atomic_fetch_or(1, &guc->submission_state.stopped);
1991 	smp_wmb();
1992 	wake_up_all(&guc->ct.wq);
1993 
1994 	return ret;
1995 }
1996 
xe_guc_submit_reset_wait(struct xe_guc * guc)1997 void xe_guc_submit_reset_wait(struct xe_guc *guc)
1998 {
1999 	wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
2000 		   !xe_guc_read_stopped(guc));
2001 }
2002 
xe_guc_submit_stop(struct xe_guc * guc)2003 void xe_guc_submit_stop(struct xe_guc *guc)
2004 {
2005 	struct xe_exec_queue *q;
2006 	unsigned long index;
2007 
2008 	xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
2009 
2010 	mutex_lock(&guc->submission_state.lock);
2011 
2012 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2013 		/* Prevent redundant attempts to stop parallel queues */
2014 		if (q->guc->id != index)
2015 			continue;
2016 
2017 		guc_exec_queue_stop(guc, q);
2018 	}
2019 
2020 	mutex_unlock(&guc->submission_state.lock);
2021 
2022 	/*
2023 	 * No one can enter the backend at this point, aside from new engine
2024 	 * creation which is protected by guc->submission_state.lock.
2025 	 */
2026 
2027 }
2028 
2029 /**
2030  * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC.
2031  * @guc: the &xe_guc struct instance whose scheduler is to be disabled
2032  */
xe_guc_submit_pause(struct xe_guc * guc)2033 void xe_guc_submit_pause(struct xe_guc *guc)
2034 {
2035 	struct xe_exec_queue *q;
2036 	unsigned long index;
2037 
2038 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2039 		xe_sched_submission_stop_async(&q->guc->sched);
2040 }
2041 
guc_exec_queue_start(struct xe_exec_queue * q)2042 static void guc_exec_queue_start(struct xe_exec_queue *q)
2043 {
2044 	struct xe_gpu_scheduler *sched = &q->guc->sched;
2045 
2046 	if (!exec_queue_killed_or_banned_or_wedged(q)) {
2047 		int i;
2048 
2049 		trace_xe_exec_queue_resubmit(q);
2050 		for (i = 0; i < q->width; ++i)
2051 			xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail);
2052 		xe_sched_resubmit_jobs(sched);
2053 	}
2054 
2055 	xe_sched_submission_start(sched);
2056 	xe_sched_submission_resume_tdr(sched);
2057 }
2058 
xe_guc_submit_start(struct xe_guc * guc)2059 int xe_guc_submit_start(struct xe_guc *guc)
2060 {
2061 	struct xe_exec_queue *q;
2062 	unsigned long index;
2063 
2064 	xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
2065 
2066 	mutex_lock(&guc->submission_state.lock);
2067 	atomic_dec(&guc->submission_state.stopped);
2068 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2069 		/* Prevent redundant attempts to start parallel queues */
2070 		if (q->guc->id != index)
2071 			continue;
2072 
2073 		guc_exec_queue_start(q);
2074 	}
2075 	mutex_unlock(&guc->submission_state.lock);
2076 
2077 	wake_up_all(&guc->ct.wq);
2078 
2079 	return 0;
2080 }
2081 
guc_exec_queue_unpause(struct xe_exec_queue * q)2082 static void guc_exec_queue_unpause(struct xe_exec_queue *q)
2083 {
2084 	struct xe_gpu_scheduler *sched = &q->guc->sched;
2085 
2086 	xe_sched_submission_start(sched);
2087 }
2088 
2089 /**
2090  * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC.
2091  * @guc: the &xe_guc struct instance whose scheduler is to be enabled
2092  */
xe_guc_submit_unpause(struct xe_guc * guc)2093 void xe_guc_submit_unpause(struct xe_guc *guc)
2094 {
2095 	struct xe_exec_queue *q;
2096 	unsigned long index;
2097 
2098 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2099 		guc_exec_queue_unpause(q);
2100 
2101 	wake_up_all(&guc->ct.wq);
2102 }
2103 
2104 static struct xe_exec_queue *
g2h_exec_queue_lookup(struct xe_guc * guc,u32 guc_id)2105 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
2106 {
2107 	struct xe_gt *gt = guc_to_gt(guc);
2108 	struct xe_exec_queue *q;
2109 
2110 	if (unlikely(guc_id >= GUC_ID_MAX)) {
2111 		xe_gt_err(gt, "Invalid guc_id %u\n", guc_id);
2112 		return NULL;
2113 	}
2114 
2115 	q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
2116 	if (unlikely(!q)) {
2117 		xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id);
2118 		return NULL;
2119 	}
2120 
2121 	xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id);
2122 	xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width));
2123 
2124 	return q;
2125 }
2126 
deregister_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q)2127 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
2128 {
2129 	u32 action[] = {
2130 		XE_GUC_ACTION_DEREGISTER_CONTEXT,
2131 		q->guc->id,
2132 	};
2133 
2134 	xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q));
2135 	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
2136 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
2137 	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
2138 
2139 	trace_xe_exec_queue_deregister(q);
2140 
2141 	xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
2142 }
2143 
handle_sched_done(struct xe_guc * guc,struct xe_exec_queue * q,u32 runnable_state)2144 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
2145 			      u32 runnable_state)
2146 {
2147 	trace_xe_exec_queue_scheduling_done(q);
2148 
2149 	if (runnable_state == 1) {
2150 		xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q));
2151 
2152 		q->guc->resume_time = ktime_get();
2153 		clear_exec_queue_pending_enable(q);
2154 		smp_wmb();
2155 		wake_up_all(&guc->ct.wq);
2156 	} else {
2157 		bool check_timeout = exec_queue_check_timeout(q);
2158 
2159 		xe_gt_assert(guc_to_gt(guc), runnable_state == 0);
2160 		xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q));
2161 
2162 		if (q->guc->suspend_pending) {
2163 			suspend_fence_signal(q);
2164 			clear_exec_queue_pending_disable(q);
2165 		} else {
2166 			if (exec_queue_banned(q) || check_timeout) {
2167 				smp_wmb();
2168 				wake_up_all(&guc->ct.wq);
2169 			}
2170 			if (!check_timeout && exec_queue_destroyed(q)) {
2171 				/*
2172 				 * Make sure to clear the pending_disable only
2173 				 * after sampling the destroyed state. We want
2174 				 * to ensure we don't trigger the unregister too
2175 				 * early with something intending to only
2176 				 * disable scheduling. The caller doing the
2177 				 * destroy must wait for an ongoing
2178 				 * pending_disable before marking as destroyed.
2179 				 */
2180 				clear_exec_queue_pending_disable(q);
2181 				deregister_exec_queue(guc, q);
2182 			} else {
2183 				clear_exec_queue_pending_disable(q);
2184 			}
2185 		}
2186 	}
2187 }
2188 
xe_guc_sched_done_handler(struct xe_guc * guc,u32 * msg,u32 len)2189 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
2190 {
2191 	struct xe_exec_queue *q;
2192 	u32 guc_id, runnable_state;
2193 
2194 	if (unlikely(len < 2))
2195 		return -EPROTO;
2196 
2197 	guc_id = msg[0];
2198 	runnable_state = msg[1];
2199 
2200 	q = g2h_exec_queue_lookup(guc, guc_id);
2201 	if (unlikely(!q))
2202 		return -EPROTO;
2203 
2204 	if (unlikely(!exec_queue_pending_enable(q) &&
2205 		     !exec_queue_pending_disable(q))) {
2206 		xe_gt_err(guc_to_gt(guc),
2207 			  "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u",
2208 			  atomic_read(&q->guc->state), q->guc->id,
2209 			  runnable_state);
2210 		return -EPROTO;
2211 	}
2212 
2213 	handle_sched_done(guc, q, runnable_state);
2214 
2215 	return 0;
2216 }
2217 
handle_deregister_done(struct xe_guc * guc,struct xe_exec_queue * q)2218 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
2219 {
2220 	trace_xe_exec_queue_deregister_done(q);
2221 
2222 	clear_exec_queue_registered(q);
2223 
2224 	if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
2225 		xe_exec_queue_put(q);
2226 	else
2227 		__guc_exec_queue_destroy(guc, q);
2228 }
2229 
xe_guc_deregister_done_handler(struct xe_guc * guc,u32 * msg,u32 len)2230 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
2231 {
2232 	struct xe_exec_queue *q;
2233 	u32 guc_id;
2234 
2235 	if (unlikely(len < 1))
2236 		return -EPROTO;
2237 
2238 	guc_id = msg[0];
2239 
2240 	q = g2h_exec_queue_lookup(guc, guc_id);
2241 	if (unlikely(!q))
2242 		return -EPROTO;
2243 
2244 	if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
2245 	    exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
2246 		xe_gt_err(guc_to_gt(guc),
2247 			  "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d",
2248 			  atomic_read(&q->guc->state), q->guc->id);
2249 		return -EPROTO;
2250 	}
2251 
2252 	handle_deregister_done(guc, q);
2253 
2254 	return 0;
2255 }
2256 
xe_guc_exec_queue_reset_handler(struct xe_guc * guc,u32 * msg,u32 len)2257 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
2258 {
2259 	struct xe_gt *gt = guc_to_gt(guc);
2260 	struct xe_exec_queue *q;
2261 	u32 guc_id;
2262 
2263 	if (unlikely(len < 1))
2264 		return -EPROTO;
2265 
2266 	guc_id = msg[0];
2267 
2268 	q = g2h_exec_queue_lookup(guc, guc_id);
2269 	if (unlikely(!q))
2270 		return -EPROTO;
2271 
2272 	xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
2273 		   xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2274 
2275 	trace_xe_exec_queue_reset(q);
2276 
2277 	/*
2278 	 * A banned engine is a NOP at this point (came from
2279 	 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
2280 	 * jobs by setting timeout of the job to the minimum value kicking
2281 	 * guc_exec_queue_timedout_job.
2282 	 */
2283 	set_exec_queue_reset(q);
2284 	if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
2285 		xe_guc_exec_queue_trigger_cleanup(q);
2286 
2287 	return 0;
2288 }
2289 
2290 /*
2291  * xe_guc_error_capture_handler - Handler of GuC captured message
2292  * @guc: The GuC object
2293  * @msg: Point to the message
2294  * @len: The message length
2295  *
2296  * When GuC captured data is ready, GuC will send message
2297  * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
2298  * called 1st to check status before process the data comes with the message.
2299  *
2300  * Returns: error code. 0 if success
2301  */
xe_guc_error_capture_handler(struct xe_guc * guc,u32 * msg,u32 len)2302 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len)
2303 {
2304 	u32 status;
2305 
2306 	if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN))
2307 		return -EPROTO;
2308 
2309 	status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
2310 	if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
2311 		xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space");
2312 
2313 	xe_guc_capture_process(guc);
2314 
2315 	return 0;
2316 }
2317 
xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc * guc,u32 * msg,u32 len)2318 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
2319 					       u32 len)
2320 {
2321 	struct xe_gt *gt = guc_to_gt(guc);
2322 	struct xe_exec_queue *q;
2323 	u32 guc_id;
2324 	u32 type = XE_GUC_CAT_ERR_TYPE_INVALID;
2325 
2326 	if (unlikely(!len || len > 2))
2327 		return -EPROTO;
2328 
2329 	guc_id = msg[0];
2330 
2331 	if (len == 2)
2332 		type = msg[1];
2333 
2334 	if (guc_id == GUC_ID_UNKNOWN) {
2335 		/*
2336 		 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF
2337 		 * context. In such case only PF will be notified about that fault.
2338 		 */
2339 		xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n");
2340 		return 0;
2341 	}
2342 
2343 	q = g2h_exec_queue_lookup(guc, guc_id);
2344 	if (unlikely(!q))
2345 		return -EPROTO;
2346 
2347 	/*
2348 	 * The type is HW-defined and changes based on platform, so we don't
2349 	 * decode it in the kernel and only check if it is valid.
2350 	 * See bspec 54047 and 72187 for details.
2351 	 */
2352 	if (type != XE_GUC_CAT_ERR_TYPE_INVALID)
2353 		xe_gt_dbg(gt,
2354 			  "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d",
2355 			  type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2356 	else
2357 		xe_gt_dbg(gt,
2358 			  "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d",
2359 			  xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2360 
2361 	trace_xe_exec_queue_memory_cat_error(q);
2362 
2363 	/* Treat the same as engine reset */
2364 	set_exec_queue_reset(q);
2365 	if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
2366 		xe_guc_exec_queue_trigger_cleanup(q);
2367 
2368 	return 0;
2369 }
2370 
xe_guc_exec_queue_reset_failure_handler(struct xe_guc * guc,u32 * msg,u32 len)2371 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
2372 {
2373 	struct xe_gt *gt = guc_to_gt(guc);
2374 	u8 guc_class, instance;
2375 	u32 reason;
2376 
2377 	if (unlikely(len != 3))
2378 		return -EPROTO;
2379 
2380 	guc_class = msg[0];
2381 	instance = msg[1];
2382 	reason = msg[2];
2383 
2384 	/* Unexpected failure of a hardware feature, log an actual error */
2385 	xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X",
2386 		  guc_class, instance, reason);
2387 
2388 	xe_gt_reset_async(gt);
2389 
2390 	return 0;
2391 }
2392 
2393 static void
guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue * q,struct xe_guc_submit_exec_queue_snapshot * snapshot)2394 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
2395 				   struct xe_guc_submit_exec_queue_snapshot *snapshot)
2396 {
2397 	struct xe_guc *guc = exec_queue_to_guc(q);
2398 	struct xe_device *xe = guc_to_xe(guc);
2399 	struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
2400 	int i;
2401 
2402 	snapshot->guc.wqi_head = q->guc->wqi_head;
2403 	snapshot->guc.wqi_tail = q->guc->wqi_tail;
2404 	snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
2405 	snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
2406 	snapshot->parallel.wq_desc.status = parallel_read(xe, map,
2407 							  wq_desc.wq_status);
2408 
2409 	if (snapshot->parallel.wq_desc.head !=
2410 	    snapshot->parallel.wq_desc.tail) {
2411 		for (i = snapshot->parallel.wq_desc.head;
2412 		     i != snapshot->parallel.wq_desc.tail;
2413 		     i = (i + sizeof(u32)) % WQ_SIZE)
2414 			snapshot->parallel.wq[i / sizeof(u32)] =
2415 				parallel_read(xe, map, wq[i / sizeof(u32)]);
2416 	}
2417 }
2418 
2419 static void
guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot * snapshot,struct drm_printer * p)2420 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
2421 				 struct drm_printer *p)
2422 {
2423 	int i;
2424 
2425 	drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
2426 		   snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
2427 	drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
2428 		   snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
2429 	drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
2430 
2431 	if (snapshot->parallel.wq_desc.head !=
2432 	    snapshot->parallel.wq_desc.tail) {
2433 		for (i = snapshot->parallel.wq_desc.head;
2434 		     i != snapshot->parallel.wq_desc.tail;
2435 		     i = (i + sizeof(u32)) % WQ_SIZE)
2436 			drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
2437 				   snapshot->parallel.wq[i / sizeof(u32)]);
2438 	}
2439 }
2440 
2441 /**
2442  * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
2443  * @q: faulty exec queue
2444  *
2445  * This can be printed out in a later stage like during dev_coredump
2446  * analysis.
2447  *
2448  * Returns: a GuC Submit Engine snapshot object that must be freed by the
2449  * caller, using `xe_guc_exec_queue_snapshot_free`.
2450  */
2451 struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue * q)2452 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
2453 {
2454 	struct xe_gpu_scheduler *sched = &q->guc->sched;
2455 	struct xe_guc_submit_exec_queue_snapshot *snapshot;
2456 	int i;
2457 
2458 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
2459 
2460 	if (!snapshot)
2461 		return NULL;
2462 
2463 	snapshot->guc.id = q->guc->id;
2464 	memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
2465 	snapshot->class = q->class;
2466 	snapshot->logical_mask = q->logical_mask;
2467 	snapshot->width = q->width;
2468 	snapshot->refcount = kref_read(&q->refcount);
2469 	snapshot->sched_timeout = sched->base.timeout;
2470 	snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
2471 	snapshot->sched_props.preempt_timeout_us =
2472 		q->sched_props.preempt_timeout_us;
2473 
2474 	snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *),
2475 				      GFP_ATOMIC);
2476 
2477 	if (snapshot->lrc) {
2478 		for (i = 0; i < q->width; ++i) {
2479 			struct xe_lrc *lrc = q->lrc[i];
2480 
2481 			snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
2482 		}
2483 	}
2484 
2485 	snapshot->schedule_state = atomic_read(&q->guc->state);
2486 	snapshot->exec_queue_flags = q->flags;
2487 
2488 	snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
2489 	if (snapshot->parallel_execution)
2490 		guc_exec_queue_wq_snapshot_capture(q, snapshot);
2491 
2492 	spin_lock(&sched->base.job_list_lock);
2493 	snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
2494 	snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
2495 					       sizeof(struct pending_list_snapshot),
2496 					       GFP_ATOMIC);
2497 
2498 	if (snapshot->pending_list) {
2499 		struct xe_sched_job *job_iter;
2500 
2501 		i = 0;
2502 		list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
2503 			snapshot->pending_list[i].seqno =
2504 				xe_sched_job_seqno(job_iter);
2505 			snapshot->pending_list[i].fence =
2506 				dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
2507 			snapshot->pending_list[i].finished =
2508 				dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
2509 				? 1 : 0;
2510 			i++;
2511 		}
2512 	}
2513 
2514 	spin_unlock(&sched->base.job_list_lock);
2515 
2516 	return snapshot;
2517 }
2518 
2519 /**
2520  * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
2521  * @snapshot: Previously captured snapshot of job.
2522  *
2523  * This captures some data that requires taking some locks, so it cannot be done in signaling path.
2524  */
2525 void
xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot * snapshot)2526 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
2527 {
2528 	int i;
2529 
2530 	if (!snapshot || !snapshot->lrc)
2531 		return;
2532 
2533 	for (i = 0; i < snapshot->width; ++i)
2534 		xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]);
2535 }
2536 
2537 /**
2538  * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
2539  * @snapshot: GuC Submit Engine snapshot object.
2540  * @p: drm_printer where it will be printed out.
2541  *
2542  * This function prints out a given GuC Submit Engine snapshot object.
2543  */
2544 void
xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot * snapshot,struct drm_printer * p)2545 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
2546 				 struct drm_printer *p)
2547 {
2548 	int i;
2549 
2550 	if (!snapshot)
2551 		return;
2552 
2553 	drm_printf(p, "GuC ID: %d\n", snapshot->guc.id);
2554 	drm_printf(p, "\tName: %s\n", snapshot->name);
2555 	drm_printf(p, "\tClass: %d\n", snapshot->class);
2556 	drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
2557 	drm_printf(p, "\tWidth: %d\n", snapshot->width);
2558 	drm_printf(p, "\tRef: %d\n", snapshot->refcount);
2559 	drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
2560 	drm_printf(p, "\tTimeslice: %u (us)\n",
2561 		   snapshot->sched_props.timeslice_us);
2562 	drm_printf(p, "\tPreempt timeout: %u (us)\n",
2563 		   snapshot->sched_props.preempt_timeout_us);
2564 
2565 	for (i = 0; snapshot->lrc && i < snapshot->width; ++i)
2566 		xe_lrc_snapshot_print(snapshot->lrc[i], p);
2567 
2568 	drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
2569 	drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
2570 
2571 	if (snapshot->parallel_execution)
2572 		guc_exec_queue_wq_snapshot_print(snapshot, p);
2573 
2574 	for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
2575 	     i++)
2576 		drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
2577 			   snapshot->pending_list[i].seqno,
2578 			   snapshot->pending_list[i].fence,
2579 			   snapshot->pending_list[i].finished);
2580 }
2581 
2582 /**
2583  * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
2584  * snapshot.
2585  * @snapshot: GuC Submit Engine snapshot object.
2586  *
2587  * This function free all the memory that needed to be allocated at capture
2588  * time.
2589  */
xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot * snapshot)2590 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
2591 {
2592 	int i;
2593 
2594 	if (!snapshot)
2595 		return;
2596 
2597 	if (snapshot->lrc) {
2598 		for (i = 0; i < snapshot->width; i++)
2599 			xe_lrc_snapshot_free(snapshot->lrc[i]);
2600 		kfree(snapshot->lrc);
2601 	}
2602 	kfree(snapshot->pending_list);
2603 	kfree(snapshot);
2604 }
2605 
guc_exec_queue_print(struct xe_exec_queue * q,struct drm_printer * p)2606 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
2607 {
2608 	struct xe_guc_submit_exec_queue_snapshot *snapshot;
2609 
2610 	snapshot = xe_guc_exec_queue_snapshot_capture(q);
2611 	xe_guc_exec_queue_snapshot_print(snapshot, p);
2612 	xe_guc_exec_queue_snapshot_free(snapshot);
2613 }
2614 
2615 /**
2616  * xe_guc_register_vf_exec_queue - Register exec queue for a given context type.
2617  * @q: Execution queue
2618  * @ctx_type: Type of the context
2619  *
2620  * This function registers the execution queue with the guc. Special context
2621  * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE
2622  * are only applicable for IGPU and in the VF.
2623  * Submits the execution queue to GUC after registering it.
2624  *
2625  * Returns - None.
2626  */
xe_guc_register_vf_exec_queue(struct xe_exec_queue * q,int ctx_type)2627 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type)
2628 {
2629 	struct xe_guc *guc = exec_queue_to_guc(q);
2630 	struct xe_device *xe = guc_to_xe(guc);
2631 	struct xe_gt *gt = guc_to_gt(guc);
2632 
2633 	xe_gt_assert(gt, IS_SRIOV_VF(xe));
2634 	xe_gt_assert(gt, !IS_DGFX(xe));
2635 	xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE ||
2636 		     ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE);
2637 	xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0));
2638 
2639 	register_exec_queue(q, ctx_type);
2640 	enable_scheduling(q);
2641 }
2642 
2643 /**
2644  * xe_guc_submit_print - GuC Submit Print.
2645  * @guc: GuC.
2646  * @p: drm_printer where it will be printed out.
2647  *
2648  * This function capture and prints snapshots of **all** GuC Engines.
2649  */
xe_guc_submit_print(struct xe_guc * guc,struct drm_printer * p)2650 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
2651 {
2652 	struct xe_exec_queue *q;
2653 	unsigned long index;
2654 
2655 	if (!xe_device_uc_enabled(guc_to_xe(guc)))
2656 		return;
2657 
2658 	mutex_lock(&guc->submission_state.lock);
2659 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2660 		guc_exec_queue_print(q, p);
2661 	mutex_unlock(&guc->submission_state.lock);
2662 }
2663 
2664 /**
2665  * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all
2666  * exec queues registered to given GuC.
2667  * @guc: the &xe_guc struct instance
2668  * @scratch: scratch buffer to be used as temporary storage
2669  *
2670  * Returns: zero on success, negative error code on failure.
2671  */
xe_guc_contexts_hwsp_rebase(struct xe_guc * guc,void * scratch)2672 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch)
2673 {
2674 	struct xe_exec_queue *q;
2675 	unsigned long index;
2676 	int err = 0;
2677 
2678 	mutex_lock(&guc->submission_state.lock);
2679 	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2680 		err = xe_exec_queue_contexts_hwsp_rebase(q, scratch);
2681 		if (err)
2682 			break;
2683 		if (xe_exec_queue_is_parallel(q))
2684 			err = wq_items_rebase(q);
2685 		if (err)
2686 			break;
2687 	}
2688 	mutex_unlock(&guc->submission_state.lock);
2689 
2690 	return err;
2691 }
2692