1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_guc_submit.h"
7
8 #include <linux/bitfield.h>
9 #include <linux/bitmap.h>
10 #include <linux/circ_buf.h>
11 #include <linux/delay.h>
12 #include <linux/dma-fence-array.h>
13 #include <linux/math64.h>
14
15 #include <drm/drm_managed.h>
16
17 #include "abi/guc_actions_abi.h"
18 #include "abi/guc_actions_slpc_abi.h"
19 #include "abi/guc_klvs_abi.h"
20 #include "xe_assert.h"
21 #include "xe_bo.h"
22 #include "xe_devcoredump.h"
23 #include "xe_device.h"
24 #include "xe_exec_queue.h"
25 #include "xe_force_wake.h"
26 #include "xe_gpu_scheduler.h"
27 #include "xe_gt.h"
28 #include "xe_gt_clock.h"
29 #include "xe_gt_printk.h"
30 #include "xe_guc.h"
31 #include "xe_guc_capture.h"
32 #include "xe_guc_ct.h"
33 #include "xe_guc_exec_queue_types.h"
34 #include "xe_guc_id_mgr.h"
35 #include "xe_guc_klv_helpers.h"
36 #include "xe_guc_submit_types.h"
37 #include "xe_hw_engine.h"
38 #include "xe_lrc.h"
39 #include "xe_macros.h"
40 #include "xe_map.h"
41 #include "xe_mocs.h"
42 #include "xe_pm.h"
43 #include "xe_ring_ops_types.h"
44 #include "xe_sched_job.h"
45 #include "xe_trace.h"
46 #include "xe_uc_fw.h"
47 #include "xe_vm.h"
48
49 #define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6
50
51 static struct xe_guc *
exec_queue_to_guc(struct xe_exec_queue * q)52 exec_queue_to_guc(struct xe_exec_queue *q)
53 {
54 return &q->gt->uc.guc;
55 }
56
57 /*
58 * Helpers for engine state, using an atomic as some of the bits can transition
59 * as the same time (e.g. a suspend can be happning at the same time as schedule
60 * engine done being processed).
61 */
62 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0)
63 #define EXEC_QUEUE_STATE_ENABLED (1 << 1)
64 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2)
65 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3)
66 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4)
67 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5)
68 #define EXEC_QUEUE_STATE_RESET (1 << 6)
69 #define EXEC_QUEUE_STATE_KILLED (1 << 7)
70 #define EXEC_QUEUE_STATE_WEDGED (1 << 8)
71 #define EXEC_QUEUE_STATE_BANNED (1 << 9)
72 #define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 10)
73 #define EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND (1 << 11)
74
exec_queue_registered(struct xe_exec_queue * q)75 static bool exec_queue_registered(struct xe_exec_queue *q)
76 {
77 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
78 }
79
set_exec_queue_registered(struct xe_exec_queue * q)80 static void set_exec_queue_registered(struct xe_exec_queue *q)
81 {
82 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
83 }
84
clear_exec_queue_registered(struct xe_exec_queue * q)85 static void clear_exec_queue_registered(struct xe_exec_queue *q)
86 {
87 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
88 }
89
exec_queue_enabled(struct xe_exec_queue * q)90 static bool exec_queue_enabled(struct xe_exec_queue *q)
91 {
92 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED;
93 }
94
set_exec_queue_enabled(struct xe_exec_queue * q)95 static void set_exec_queue_enabled(struct xe_exec_queue *q)
96 {
97 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
98 }
99
clear_exec_queue_enabled(struct xe_exec_queue * q)100 static void clear_exec_queue_enabled(struct xe_exec_queue *q)
101 {
102 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
103 }
104
exec_queue_pending_enable(struct xe_exec_queue * q)105 static bool exec_queue_pending_enable(struct xe_exec_queue *q)
106 {
107 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
108 }
109
set_exec_queue_pending_enable(struct xe_exec_queue * q)110 static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
111 {
112 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
113 }
114
clear_exec_queue_pending_enable(struct xe_exec_queue * q)115 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
116 {
117 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
118 }
119
exec_queue_pending_disable(struct xe_exec_queue * q)120 static bool exec_queue_pending_disable(struct xe_exec_queue *q)
121 {
122 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
123 }
124
set_exec_queue_pending_disable(struct xe_exec_queue * q)125 static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
126 {
127 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
128 }
129
clear_exec_queue_pending_disable(struct xe_exec_queue * q)130 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
131 {
132 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
133 }
134
exec_queue_destroyed(struct xe_exec_queue * q)135 static bool exec_queue_destroyed(struct xe_exec_queue *q)
136 {
137 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
138 }
139
set_exec_queue_destroyed(struct xe_exec_queue * q)140 static void set_exec_queue_destroyed(struct xe_exec_queue *q)
141 {
142 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
143 }
144
clear_exec_queue_destroyed(struct xe_exec_queue * q)145 static void clear_exec_queue_destroyed(struct xe_exec_queue *q)
146 {
147 atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
148 }
149
exec_queue_banned(struct xe_exec_queue * q)150 static bool exec_queue_banned(struct xe_exec_queue *q)
151 {
152 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED;
153 }
154
set_exec_queue_banned(struct xe_exec_queue * q)155 static void set_exec_queue_banned(struct xe_exec_queue *q)
156 {
157 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state);
158 }
159
exec_queue_suspended(struct xe_exec_queue * q)160 static bool exec_queue_suspended(struct xe_exec_queue *q)
161 {
162 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED;
163 }
164
set_exec_queue_suspended(struct xe_exec_queue * q)165 static void set_exec_queue_suspended(struct xe_exec_queue *q)
166 {
167 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
168 }
169
clear_exec_queue_suspended(struct xe_exec_queue * q)170 static void clear_exec_queue_suspended(struct xe_exec_queue *q)
171 {
172 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
173 }
174
exec_queue_reset(struct xe_exec_queue * q)175 static bool exec_queue_reset(struct xe_exec_queue *q)
176 {
177 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
178 }
179
set_exec_queue_reset(struct xe_exec_queue * q)180 static void set_exec_queue_reset(struct xe_exec_queue *q)
181 {
182 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
183 }
184
exec_queue_killed(struct xe_exec_queue * q)185 static bool exec_queue_killed(struct xe_exec_queue *q)
186 {
187 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED;
188 }
189
set_exec_queue_killed(struct xe_exec_queue * q)190 static void set_exec_queue_killed(struct xe_exec_queue *q)
191 {
192 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state);
193 }
194
exec_queue_wedged(struct xe_exec_queue * q)195 static bool exec_queue_wedged(struct xe_exec_queue *q)
196 {
197 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
198 }
199
set_exec_queue_wedged(struct xe_exec_queue * q)200 static void set_exec_queue_wedged(struct xe_exec_queue *q)
201 {
202 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state);
203 }
204
exec_queue_pending_resume(struct xe_exec_queue * q)205 static bool exec_queue_pending_resume(struct xe_exec_queue *q)
206 {
207 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME;
208 }
209
set_exec_queue_pending_resume(struct xe_exec_queue * q)210 static void set_exec_queue_pending_resume(struct xe_exec_queue *q)
211 {
212 atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state);
213 }
214
clear_exec_queue_pending_resume(struct xe_exec_queue * q)215 static void clear_exec_queue_pending_resume(struct xe_exec_queue *q)
216 {
217 atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state);
218 }
219
exec_queue_idle_skip_suspend(struct xe_exec_queue * q)220 static bool exec_queue_idle_skip_suspend(struct xe_exec_queue *q)
221 {
222 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND;
223 }
224
set_exec_queue_idle_skip_suspend(struct xe_exec_queue * q)225 static void set_exec_queue_idle_skip_suspend(struct xe_exec_queue *q)
226 {
227 atomic_or(EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state);
228 }
229
clear_exec_queue_idle_skip_suspend(struct xe_exec_queue * q)230 static void clear_exec_queue_idle_skip_suspend(struct xe_exec_queue *q)
231 {
232 atomic_and(~EXEC_QUEUE_STATE_IDLE_SKIP_SUSPEND, &q->guc->state);
233 }
234
exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue * q)235 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
236 {
237 return (atomic_read(&q->guc->state) &
238 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED |
239 EXEC_QUEUE_STATE_BANNED));
240 }
241
guc_submit_fini(struct drm_device * drm,void * arg)242 static void guc_submit_fini(struct drm_device *drm, void *arg)
243 {
244 struct xe_guc *guc = arg;
245 struct xe_device *xe = guc_to_xe(guc);
246 struct xe_gt *gt = guc_to_gt(guc);
247 int ret;
248
249 ret = wait_event_timeout(guc->submission_state.fini_wq,
250 xa_empty(&guc->submission_state.exec_queue_lookup),
251 HZ * 5);
252
253 drain_workqueue(xe->destroy_wq);
254
255 xe_gt_assert(gt, ret);
256
257 xa_destroy(&guc->submission_state.exec_queue_lookup);
258 }
259
guc_submit_wedged_fini(void * arg)260 static void guc_submit_wedged_fini(void *arg)
261 {
262 struct xe_guc *guc = arg;
263 struct xe_exec_queue *q;
264 unsigned long index;
265
266 mutex_lock(&guc->submission_state.lock);
267 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
268 if (exec_queue_wedged(q)) {
269 mutex_unlock(&guc->submission_state.lock);
270 xe_exec_queue_put(q);
271 mutex_lock(&guc->submission_state.lock);
272 }
273 }
274 mutex_unlock(&guc->submission_state.lock);
275 }
276
277 static const struct xe_exec_queue_ops guc_exec_queue_ops;
278
primelockdep(struct xe_guc * guc)279 static void primelockdep(struct xe_guc *guc)
280 {
281 if (!IS_ENABLED(CONFIG_LOCKDEP))
282 return;
283
284 fs_reclaim_acquire(GFP_KERNEL);
285
286 mutex_lock(&guc->submission_state.lock);
287 mutex_unlock(&guc->submission_state.lock);
288
289 fs_reclaim_release(GFP_KERNEL);
290 }
291
292 /**
293 * xe_guc_submit_init() - Initialize GuC submission.
294 * @guc: the &xe_guc to initialize
295 * @num_ids: number of GuC context IDs to use
296 *
297 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all
298 * GuC context IDs supported by the GuC firmware should be used for submission.
299 *
300 * Only VF drivers will have to provide explicit number of GuC context IDs
301 * that they can use for submission.
302 *
303 * Return: 0 on success or a negative error code on failure.
304 */
xe_guc_submit_init(struct xe_guc * guc,unsigned int num_ids)305 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
306 {
307 struct xe_device *xe = guc_to_xe(guc);
308 struct xe_gt *gt = guc_to_gt(guc);
309 int err;
310
311 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
312 if (err)
313 return err;
314
315 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids);
316 if (err)
317 return err;
318
319 gt->exec_queue_ops = &guc_exec_queue_ops;
320
321 xa_init(&guc->submission_state.exec_queue_lookup);
322
323 init_waitqueue_head(&guc->submission_state.fini_wq);
324
325 primelockdep(guc);
326
327 guc->submission_state.initialized = true;
328
329 return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
330 }
331
332 /*
333 * Given that we want to guarantee enough RCS throughput to avoid missing
334 * frames, we set the yield policy to 20% of each 80ms interval.
335 */
336 #define RC_YIELD_DURATION 80 /* in ms */
337 #define RC_YIELD_RATIO 20 /* in percent */
emit_render_compute_yield_klv(u32 * emit)338 static u32 *emit_render_compute_yield_klv(u32 *emit)
339 {
340 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD);
341 *emit++ = RC_YIELD_DURATION;
342 *emit++ = RC_YIELD_RATIO;
343
344 return emit;
345 }
346
347 #define SCHEDULING_POLICY_MAX_DWORDS 16
guc_init_global_schedule_policy(struct xe_guc * guc)348 static int guc_init_global_schedule_policy(struct xe_guc *guc)
349 {
350 u32 data[SCHEDULING_POLICY_MAX_DWORDS];
351 u32 *emit = data;
352 u32 count = 0;
353 int ret;
354
355 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
356 return 0;
357
358 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
359
360 if (CCS_INSTANCES(guc_to_gt(guc)))
361 emit = emit_render_compute_yield_klv(emit);
362
363 count = emit - data;
364 if (count > 1) {
365 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS);
366
367 ret = xe_guc_ct_send_block(&guc->ct, data, count);
368 if (ret < 0) {
369 xe_gt_err(guc_to_gt(guc),
370 "failed to enable GuC scheduling policies: %pe\n",
371 ERR_PTR(ret));
372 return ret;
373 }
374 }
375
376 return 0;
377 }
378
xe_guc_submit_enable(struct xe_guc * guc)379 int xe_guc_submit_enable(struct xe_guc *guc)
380 {
381 int ret;
382
383 ret = guc_init_global_schedule_policy(guc);
384 if (ret)
385 return ret;
386
387 guc->submission_state.enabled = true;
388
389 return 0;
390 }
391
xe_guc_submit_disable(struct xe_guc * guc)392 void xe_guc_submit_disable(struct xe_guc *guc)
393 {
394 guc->submission_state.enabled = false;
395 }
396
__release_guc_id(struct xe_guc * guc,struct xe_exec_queue * q,u32 xa_count)397 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
398 {
399 int i;
400
401 lockdep_assert_held(&guc->submission_state.lock);
402
403 for (i = 0; i < xa_count; ++i)
404 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
405
406 xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
407 q->guc->id, q->width);
408
409 if (xa_empty(&guc->submission_state.exec_queue_lookup))
410 wake_up(&guc->submission_state.fini_wq);
411 }
412
alloc_guc_id(struct xe_guc * guc,struct xe_exec_queue * q)413 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
414 {
415 int ret;
416 int i;
417
418 /*
419 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
420 * worse case user gets -ENOMEM on engine create and has to try again.
421 *
422 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
423 * failure.
424 */
425 lockdep_assert_held(&guc->submission_state.lock);
426
427 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm,
428 q->width);
429 if (ret < 0)
430 return ret;
431
432 q->guc->id = ret;
433
434 for (i = 0; i < q->width; ++i) {
435 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
436 q->guc->id + i, q, GFP_NOWAIT));
437 if (ret)
438 goto err_release;
439 }
440
441 return 0;
442
443 err_release:
444 __release_guc_id(guc, q, i);
445
446 return ret;
447 }
448
release_guc_id(struct xe_guc * guc,struct xe_exec_queue * q)449 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
450 {
451 mutex_lock(&guc->submission_state.lock);
452 __release_guc_id(guc, q, q->width);
453 mutex_unlock(&guc->submission_state.lock);
454 }
455
456 struct exec_queue_policy {
457 u32 count;
458 struct guc_update_exec_queue_policy h2g;
459 };
460
__guc_exec_queue_policy_action_size(struct exec_queue_policy * policy)461 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
462 {
463 size_t bytes = sizeof(policy->h2g.header) +
464 (sizeof(policy->h2g.klv[0]) * policy->count);
465
466 return bytes / sizeof(u32);
467 }
468
__guc_exec_queue_policy_start_klv(struct exec_queue_policy * policy,u16 guc_id)469 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
470 u16 guc_id)
471 {
472 policy->h2g.header.action =
473 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
474 policy->h2g.header.guc_id = guc_id;
475 policy->count = 0;
476 }
477
478 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
479 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
480 u32 data) \
481 { \
482 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
483 \
484 policy->h2g.klv[policy->count].kl = \
485 FIELD_PREP(GUC_KLV_0_KEY, \
486 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
487 FIELD_PREP(GUC_KLV_0_LEN, 1); \
488 policy->h2g.klv[policy->count].value = data; \
489 policy->count++; \
490 }
491
492 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
493 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
494 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
495 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY)
496 #undef MAKE_EXEC_QUEUE_POLICY_ADD
497
498 static const int xe_exec_queue_prio_to_guc[] = {
499 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
500 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
501 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
502 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
503 };
504
init_policies(struct xe_guc * guc,struct xe_exec_queue * q)505 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
506 {
507 struct exec_queue_policy policy;
508 enum xe_exec_queue_priority prio = q->sched_props.priority;
509 u32 timeslice_us = q->sched_props.timeslice_us;
510 u32 slpc_exec_queue_freq_req = 0;
511 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
512
513 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q) &&
514 !xe_exec_queue_is_multi_queue_secondary(q));
515
516 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY)
517 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
518
519 __guc_exec_queue_policy_start_klv(&policy, q->guc->id);
520 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
521 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
522 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
523 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy,
524 slpc_exec_queue_freq_req);
525
526 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
527 __guc_exec_queue_policy_action_size(&policy), 0, 0);
528 }
529
set_min_preemption_timeout(struct xe_guc * guc,struct xe_exec_queue * q)530 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
531 {
532 struct exec_queue_policy policy;
533
534 xe_assert(guc_to_xe(guc), !xe_exec_queue_is_multi_queue_secondary(q));
535
536 __guc_exec_queue_policy_start_klv(&policy, q->guc->id);
537 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
538
539 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
540 __guc_exec_queue_policy_action_size(&policy), 0, 0);
541 }
542
vf_recovery(struct xe_guc * guc)543 static bool vf_recovery(struct xe_guc *guc)
544 {
545 return xe_gt_recovery_pending(guc_to_gt(guc));
546 }
547
xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue * q)548 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
549 {
550 struct xe_guc *guc = exec_queue_to_guc(q);
551 struct xe_device *xe = guc_to_xe(guc);
552
553 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
554 wake_up_all(&xe->ufence_wq);
555
556 xe_sched_tdr_queue_imm(&q->guc->sched);
557 }
558
xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue * q)559 static void xe_guc_exec_queue_group_trigger_cleanup(struct xe_exec_queue *q)
560 {
561 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
562 struct xe_exec_queue_group *group = q->multi_queue.group;
563 struct xe_exec_queue *eq;
564
565 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
566 xe_exec_queue_is_multi_queue(q));
567
568 /* Group banned, skip timeout check in TDR */
569 WRITE_ONCE(group->banned, true);
570 xe_guc_exec_queue_trigger_cleanup(primary);
571
572 mutex_lock(&group->list_lock);
573 list_for_each_entry(eq, &group->list, multi_queue.link)
574 xe_guc_exec_queue_trigger_cleanup(eq);
575 mutex_unlock(&group->list_lock);
576 }
577
xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue * q)578 static void xe_guc_exec_queue_reset_trigger_cleanup(struct xe_exec_queue *q)
579 {
580 if (xe_exec_queue_is_multi_queue(q)) {
581 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
582 struct xe_exec_queue_group *group = q->multi_queue.group;
583 struct xe_exec_queue *eq;
584
585 /* Group banned, skip timeout check in TDR */
586 WRITE_ONCE(group->banned, true);
587
588 set_exec_queue_reset(primary);
589 if (!exec_queue_banned(primary))
590 xe_guc_exec_queue_trigger_cleanup(primary);
591
592 mutex_lock(&group->list_lock);
593 list_for_each_entry(eq, &group->list, multi_queue.link) {
594 set_exec_queue_reset(eq);
595 if (!exec_queue_banned(eq))
596 xe_guc_exec_queue_trigger_cleanup(eq);
597 }
598 mutex_unlock(&group->list_lock);
599 } else {
600 set_exec_queue_reset(q);
601 if (!exec_queue_banned(q))
602 xe_guc_exec_queue_trigger_cleanup(q);
603 }
604 }
605
set_exec_queue_group_banned(struct xe_exec_queue * q)606 static void set_exec_queue_group_banned(struct xe_exec_queue *q)
607 {
608 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
609 struct xe_exec_queue_group *group = q->multi_queue.group;
610 struct xe_exec_queue *eq;
611
612 /* Ban all queues of the multi-queue group */
613 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)),
614 xe_exec_queue_is_multi_queue(q));
615 set_exec_queue_banned(primary);
616
617 mutex_lock(&group->list_lock);
618 list_for_each_entry(eq, &group->list, multi_queue.link)
619 set_exec_queue_banned(eq);
620 mutex_unlock(&group->list_lock);
621 }
622
623 /* Helper for context registration H2G */
624 struct guc_ctxt_registration_info {
625 u32 flags;
626 u32 context_idx;
627 u32 engine_class;
628 u32 engine_submit_mask;
629 u32 wq_desc_lo;
630 u32 wq_desc_hi;
631 u32 wq_base_lo;
632 u32 wq_base_hi;
633 u32 wq_size;
634 u32 cgp_lo;
635 u32 cgp_hi;
636 u32 hwlrca_lo;
637 u32 hwlrca_hi;
638 };
639
640 #define parallel_read(xe_, map_, field_) \
641 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
642 field_)
643 #define parallel_write(xe_, map_, field_, val_) \
644 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
645 field_, val_)
646
647 /**
648 * DOC: Multi Queue Group GuC interface
649 *
650 * The multi queue group coordination between KMD and GuC is through a software
651 * construct called Context Group Page (CGP). The CGP is a KMD managed 4KB page
652 * allocated in the global GTT.
653 *
654 * CGP format:
655 *
656 * +-----------+---------------------------+---------------------------------------------+
657 * | DWORD | Name | Description |
658 * +-----------+---------------------------+---------------------------------------------+
659 * | 0 | Version | Bits [15:8]=Major ver, [7:0]=Minor ver |
660 * +-----------+---------------------------+---------------------------------------------+
661 * | 1..15 | RESERVED | MBZ |
662 * +-----------+---------------------------+---------------------------------------------+
663 * | 16 | KMD_QUEUE_UPDATE_MASK_DW0 | KMD queue mask for queues 31..0 |
664 * +-----------+---------------------------+---------------------------------------------+
665 * | 17 | KMD_QUEUE_UPDATE_MASK_DW1 | KMD queue mask for queues 63..32 |
666 * +-----------+---------------------------+---------------------------------------------+
667 * | 18..31 | RESERVED | MBZ |
668 * +-----------+---------------------------+---------------------------------------------+
669 * | 32 | Q0CD_DW0 | Queue 0 context LRC descriptor lower DWORD |
670 * +-----------+---------------------------+---------------------------------------------+
671 * | 33 | Q0ContextIndex | Context ID for Queue 0 |
672 * +-----------+---------------------------+---------------------------------------------+
673 * | 34 | Q1CD_DW0 | Queue 1 context LRC descriptor lower DWORD |
674 * +-----------+---------------------------+---------------------------------------------+
675 * | 35 | Q1ContextIndex | Context ID for Queue 1 |
676 * +-----------+---------------------------+---------------------------------------------+
677 * | ... |... | ... |
678 * +-----------+---------------------------+---------------------------------------------+
679 * | 158 | Q63CD_DW0 | Queue 63 context LRC descriptor lower DWORD |
680 * +-----------+---------------------------+---------------------------------------------+
681 * | 159 | Q63ContextIndex | Context ID for Queue 63 |
682 * +-----------+---------------------------+---------------------------------------------+
683 * | 160..1024 | RESERVED | MBZ |
684 * +-----------+---------------------------+---------------------------------------------+
685 *
686 * While registering Q0 with GuC, CGP is updated with Q0 entry and GuC is notified
687 * through XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE H2G message which specifies
688 * the CGP address. When the secondary queues are added to the group, the CGP is
689 * updated with entry for that queue and GuC is notified through the H2G interface
690 * XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC. GuC responds to these H2G messages
691 * with a XE_GUC_ACTION_NOTIFY_MULTIQ_CONTEXT_CGP_SYNC_DONE G2H message. GuC also
692 * sends a XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CGP_CONTEXT_ERROR notification for any
693 * error in the CGP. Only one of these CGP update messages can be outstanding
694 * (waiting for GuC response) at any time. The bits in KMD_QUEUE_UPDATE_MASK_DW*
695 * fields indicate which queue entry is being updated in the CGP.
696 *
697 * The primary queue (Q0) represents the multi queue group context in GuC and
698 * submission on any queue of the group must be through Q0 GuC interface only.
699 *
700 * As it is not required to register secondary queues with GuC, the secondary queue
701 * context ids in the CGP are populated with Q0 context id.
702 */
703
704 #define CGP_VERSION_MAJOR_SHIFT 8
705
xe_guc_exec_queue_group_cgp_update(struct xe_device * xe,struct xe_exec_queue * q)706 static void xe_guc_exec_queue_group_cgp_update(struct xe_device *xe,
707 struct xe_exec_queue *q)
708 {
709 struct xe_exec_queue_group *group = q->multi_queue.group;
710 u32 guc_id = group->primary->guc->id;
711
712 /* Currently implementing CGP version 1.0 */
713 xe_map_wr(xe, &group->cgp_bo->vmap, 0, u32,
714 1 << CGP_VERSION_MAJOR_SHIFT);
715
716 xe_map_wr(xe, &group->cgp_bo->vmap,
717 (32 + q->multi_queue.pos * 2) * sizeof(u32),
718 u32, lower_32_bits(xe_lrc_descriptor(q->lrc[0])));
719
720 xe_map_wr(xe, &group->cgp_bo->vmap,
721 (33 + q->multi_queue.pos * 2) * sizeof(u32),
722 u32, guc_id);
723
724 if (q->multi_queue.pos / 32) {
725 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32),
726 u32, BIT(q->multi_queue.pos % 32));
727 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32), u32, 0);
728 } else {
729 xe_map_wr(xe, &group->cgp_bo->vmap, 16 * sizeof(u32),
730 u32, BIT(q->multi_queue.pos));
731 xe_map_wr(xe, &group->cgp_bo->vmap, 17 * sizeof(u32), u32, 0);
732 }
733 }
734
xe_guc_exec_queue_group_cgp_sync(struct xe_guc * guc,struct xe_exec_queue * q,const u32 * action,u32 len)735 static void xe_guc_exec_queue_group_cgp_sync(struct xe_guc *guc,
736 struct xe_exec_queue *q,
737 const u32 *action, u32 len)
738 {
739 struct xe_exec_queue_group *group = q->multi_queue.group;
740 struct xe_device *xe = guc_to_xe(guc);
741 long ret;
742
743 /*
744 * As all queues of a multi queue group use single drm scheduler
745 * submit workqueue, CGP synchronization with GuC are serialized.
746 * Hence, no locking is required here.
747 * Wait for any pending CGP_SYNC_DONE response before updating the
748 * CGP page and sending CGP_SYNC message.
749 *
750 * FIXME: Support VF migration
751 */
752 ret = wait_event_timeout(guc->ct.wq,
753 !READ_ONCE(group->sync_pending) ||
754 xe_guc_read_stopped(guc), HZ);
755 if (!ret || xe_guc_read_stopped(guc)) {
756 /* CGP_SYNC failed. Reset gt, cleanup the group */
757 xe_gt_warn(guc_to_gt(guc), "Wait for CGP_SYNC_DONE response failed!\n");
758 set_exec_queue_group_banned(q);
759 xe_gt_reset_async(q->gt);
760 xe_guc_exec_queue_group_trigger_cleanup(q);
761 return;
762 }
763
764 xe_lrc_set_multi_queue_priority(q->lrc[0], q->multi_queue.priority);
765 xe_guc_exec_queue_group_cgp_update(xe, q);
766
767 WRITE_ONCE(group->sync_pending, true);
768 xe_guc_ct_send(&guc->ct, action, len, G2H_LEN_DW_MULTI_QUEUE_CONTEXT, 1);
769 }
770
__register_exec_queue_group(struct xe_guc * guc,struct xe_exec_queue * q,struct guc_ctxt_registration_info * info)771 static void __register_exec_queue_group(struct xe_guc *guc,
772 struct xe_exec_queue *q,
773 struct guc_ctxt_registration_info *info)
774 {
775 #define MAX_MULTI_QUEUE_REG_SIZE (8)
776 u32 action[MAX_MULTI_QUEUE_REG_SIZE];
777 int len = 0;
778
779 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE;
780 action[len++] = info->flags;
781 action[len++] = info->context_idx;
782 action[len++] = info->engine_class;
783 action[len++] = info->engine_submit_mask;
784 action[len++] = 0; /* Reserved */
785 action[len++] = info->cgp_lo;
786 action[len++] = info->cgp_hi;
787
788 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_REG_SIZE);
789 #undef MAX_MULTI_QUEUE_REG_SIZE
790
791 /*
792 * The above XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_QUEUE do expect a
793 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response
794 * from guc.
795 */
796 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
797 }
798
xe_guc_exec_queue_group_add(struct xe_guc * guc,struct xe_exec_queue * q)799 static void xe_guc_exec_queue_group_add(struct xe_guc *guc,
800 struct xe_exec_queue *q)
801 {
802 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2)
803 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE];
804 int len = 0;
805
806 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_multi_queue_secondary(q));
807
808 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
809 action[len++] = q->multi_queue.group->primary->guc->id;
810
811 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE);
812 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE
813
814 /*
815 * The above XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC do expect a
816 * XE_GUC_ACTION_NOTIFY_MULTI_QUEUE_CONTEXT_CGP_SYNC_DONE response
817 * from guc.
818 */
819 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
820 }
821
__register_mlrc_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q,struct guc_ctxt_registration_info * info)822 static void __register_mlrc_exec_queue(struct xe_guc *guc,
823 struct xe_exec_queue *q,
824 struct guc_ctxt_registration_info *info)
825 {
826 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
827 u32 action[MAX_MLRC_REG_SIZE];
828 int len = 0;
829 int i;
830
831 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q));
832
833 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
834 action[len++] = info->flags;
835 action[len++] = info->context_idx;
836 action[len++] = info->engine_class;
837 action[len++] = info->engine_submit_mask;
838 action[len++] = info->wq_desc_lo;
839 action[len++] = info->wq_desc_hi;
840 action[len++] = info->wq_base_lo;
841 action[len++] = info->wq_base_hi;
842 action[len++] = info->wq_size;
843 action[len++] = q->width;
844 action[len++] = info->hwlrca_lo;
845 action[len++] = info->hwlrca_hi;
846
847 for (i = 1; i < q->width; ++i) {
848 struct xe_lrc *lrc = q->lrc[i];
849
850 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
851 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
852 }
853
854 /* explicitly checks some fields that we might fixup later */
855 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
856 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]);
857 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
858 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]);
859 xe_gt_assert(guc_to_gt(guc), q->width ==
860 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]);
861 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
862 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]);
863 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE);
864 #undef MAX_MLRC_REG_SIZE
865
866 xe_guc_ct_send(&guc->ct, action, len, 0, 0);
867 }
868
__register_exec_queue(struct xe_guc * guc,struct guc_ctxt_registration_info * info)869 static void __register_exec_queue(struct xe_guc *guc,
870 struct guc_ctxt_registration_info *info)
871 {
872 u32 action[] = {
873 XE_GUC_ACTION_REGISTER_CONTEXT,
874 info->flags,
875 info->context_idx,
876 info->engine_class,
877 info->engine_submit_mask,
878 info->wq_desc_lo,
879 info->wq_desc_hi,
880 info->wq_base_lo,
881 info->wq_base_hi,
882 info->wq_size,
883 info->hwlrca_lo,
884 info->hwlrca_hi,
885 };
886
887 /* explicitly checks some fields that we might fixup later */
888 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
889 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]);
890 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
891 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]);
892 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
893 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]);
894
895 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
896 }
897
register_exec_queue(struct xe_exec_queue * q,int ctx_type)898 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
899 {
900 struct xe_guc *guc = exec_queue_to_guc(q);
901 struct xe_device *xe = guc_to_xe(guc);
902 struct xe_lrc *lrc = q->lrc[0];
903 struct guc_ctxt_registration_info info;
904
905 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q));
906 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT);
907
908 memset(&info, 0, sizeof(info));
909 info.context_idx = q->guc->id;
910 info.engine_class = xe_engine_class_to_guc_class(q->class);
911 info.engine_submit_mask = q->logical_mask;
912 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
913 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
914 info.flags = CONTEXT_REGISTRATION_FLAG_KMD |
915 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
916
917 if (xe_exec_queue_is_multi_queue(q)) {
918 struct xe_exec_queue_group *group = q->multi_queue.group;
919
920 info.cgp_lo = xe_bo_ggtt_addr(group->cgp_bo);
921 info.cgp_hi = 0;
922 }
923
924 if (xe_exec_queue_is_parallel(q)) {
925 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
926 struct iosys_map map = xe_lrc_parallel_map(lrc);
927
928 info.wq_desc_lo = lower_32_bits(ggtt_addr +
929 offsetof(struct guc_submit_parallel_scratch, wq_desc));
930 info.wq_desc_hi = upper_32_bits(ggtt_addr +
931 offsetof(struct guc_submit_parallel_scratch, wq_desc));
932 info.wq_base_lo = lower_32_bits(ggtt_addr +
933 offsetof(struct guc_submit_parallel_scratch, wq[0]));
934 info.wq_base_hi = upper_32_bits(ggtt_addr +
935 offsetof(struct guc_submit_parallel_scratch, wq[0]));
936 info.wq_size = WQ_SIZE;
937
938 q->guc->wqi_head = 0;
939 q->guc->wqi_tail = 0;
940 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
941 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
942 }
943
944 set_exec_queue_registered(q);
945 trace_xe_exec_queue_register(q);
946 if (xe_exec_queue_is_multi_queue_primary(q))
947 __register_exec_queue_group(guc, q, &info);
948 else if (xe_exec_queue_is_parallel(q))
949 __register_mlrc_exec_queue(guc, q, &info);
950 else if (!xe_exec_queue_is_multi_queue_secondary(q))
951 __register_exec_queue(guc, &info);
952
953 if (!xe_exec_queue_is_multi_queue_secondary(q))
954 init_policies(guc, q);
955
956 if (xe_exec_queue_is_multi_queue_secondary(q))
957 xe_guc_exec_queue_group_add(guc, q);
958 }
959
wq_space_until_wrap(struct xe_exec_queue * q)960 static u32 wq_space_until_wrap(struct xe_exec_queue *q)
961 {
962 return (WQ_SIZE - q->guc->wqi_tail);
963 }
964
relaxed_ms_sleep(unsigned int delay_ms)965 static inline void relaxed_ms_sleep(unsigned int delay_ms)
966 {
967 unsigned long min_us, max_us;
968
969 if (!delay_ms)
970 return;
971
972 if (delay_ms > 20) {
973 msleep(delay_ms);
974 return;
975 }
976
977 min_us = mul_u32_u32(delay_ms, 1000);
978 max_us = min_us + 500;
979
980 usleep_range(min_us, max_us);
981 }
982
wq_wait_for_space(struct xe_exec_queue * q,u32 wqi_size)983 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
984 {
985 struct xe_guc *guc = exec_queue_to_guc(q);
986 struct xe_device *xe = guc_to_xe(guc);
987 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
988 unsigned int sleep_period_ms = 1, sleep_total_ms = 0;
989
990 #define AVAILABLE_SPACE \
991 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
992 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) {
993 try_again:
994 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
995 if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) {
996 if (sleep_total_ms > 2000) {
997 xe_gt_reset_async(q->gt);
998 return -ENODEV;
999 }
1000
1001 msleep(sleep_period_ms);
1002 sleep_total_ms += sleep_period_ms;
1003 if (sleep_period_ms < 64)
1004 sleep_period_ms <<= 1;
1005 goto try_again;
1006 }
1007 }
1008 #undef AVAILABLE_SPACE
1009
1010 return 0;
1011 }
1012
wq_noop_append(struct xe_exec_queue * q)1013 static int wq_noop_append(struct xe_exec_queue *q)
1014 {
1015 struct xe_guc *guc = exec_queue_to_guc(q);
1016 struct xe_device *xe = guc_to_xe(guc);
1017 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
1018 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
1019
1020 if (wq_wait_for_space(q, wq_space_until_wrap(q)))
1021 return -ENODEV;
1022
1023 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw));
1024
1025 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
1026 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
1027 FIELD_PREP(WQ_LEN_MASK, len_dw));
1028 q->guc->wqi_tail = 0;
1029
1030 return 0;
1031 }
1032
wq_item_append(struct xe_exec_queue * q)1033 static void wq_item_append(struct xe_exec_queue *q)
1034 {
1035 struct xe_guc *guc = exec_queue_to_guc(q);
1036 struct xe_device *xe = guc_to_xe(guc);
1037 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
1038 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */
1039 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
1040 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
1041 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
1042 int i = 0, j;
1043
1044 if (wqi_size > wq_space_until_wrap(q)) {
1045 if (wq_noop_append(q))
1046 return;
1047 }
1048 if (wq_wait_for_space(q, wqi_size))
1049 return;
1050
1051 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
1052 FIELD_PREP(WQ_LEN_MASK, len_dw);
1053 wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
1054 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
1055 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
1056 wqi[i++] = 0;
1057 for (j = 1; j < q->width; ++j) {
1058 struct xe_lrc *lrc = q->lrc[j];
1059
1060 wqi[i++] = lrc->ring.tail / sizeof(u64);
1061 }
1062
1063 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32));
1064
1065 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
1066 wq[q->guc->wqi_tail / sizeof(u32)]));
1067 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
1068 q->guc->wqi_tail += wqi_size;
1069 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE);
1070
1071 xe_device_wmb(xe);
1072
1073 map = xe_lrc_parallel_map(q->lrc[0]);
1074 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
1075 }
1076
1077 #define RESUME_PENDING ~0x0ull
submit_exec_queue(struct xe_exec_queue * q,struct xe_sched_job * job)1078 static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job)
1079 {
1080 struct xe_guc *guc = exec_queue_to_guc(q);
1081 struct xe_lrc *lrc = q->lrc[0];
1082 u32 action[3];
1083 u32 g2h_len = 0;
1084 u32 num_g2h = 0;
1085 int len = 0;
1086 bool extra_submit = false;
1087
1088 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1089
1090 if (!job->restore_replay || job->last_replay) {
1091 if (xe_exec_queue_is_parallel(q))
1092 wq_item_append(q);
1093 else if (!exec_queue_idle_skip_suspend(q))
1094 xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
1095 job->last_replay = false;
1096 }
1097
1098 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
1099 return;
1100
1101 /*
1102 * All queues in a multi-queue group will use the primary queue
1103 * of the group to interface with GuC.
1104 */
1105 q = xe_exec_queue_multi_queue_primary(q);
1106
1107 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
1108 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
1109 action[len++] = q->guc->id;
1110 action[len++] = GUC_CONTEXT_ENABLE;
1111 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
1112 num_g2h = 1;
1113 if (xe_exec_queue_is_parallel(q))
1114 extra_submit = true;
1115
1116 q->guc->resume_time = RESUME_PENDING;
1117 set_exec_queue_pending_enable(q);
1118 set_exec_queue_enabled(q);
1119 trace_xe_exec_queue_scheduling_enable(q);
1120 } else {
1121 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
1122 action[len++] = q->guc->id;
1123 trace_xe_exec_queue_submit(q);
1124 }
1125
1126 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
1127
1128 if (extra_submit) {
1129 len = 0;
1130 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
1131 action[len++] = q->guc->id;
1132 trace_xe_exec_queue_submit(q);
1133
1134 xe_guc_ct_send(&guc->ct, action, len, 0, 0);
1135 }
1136 }
1137
1138 static struct dma_fence *
guc_exec_queue_run_job(struct drm_sched_job * drm_job)1139 guc_exec_queue_run_job(struct drm_sched_job *drm_job)
1140 {
1141 struct xe_sched_job *job = to_xe_sched_job(drm_job);
1142 struct xe_exec_queue *q = job->q;
1143 struct xe_guc *guc = exec_queue_to_guc(q);
1144 bool killed_or_banned_or_wedged =
1145 exec_queue_killed_or_banned_or_wedged(q);
1146
1147 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
1148 exec_queue_banned(q) || exec_queue_suspended(q));
1149
1150 trace_xe_sched_job_run(job);
1151
1152 if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) {
1153 if (xe_exec_queue_is_multi_queue_secondary(q)) {
1154 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
1155
1156 if (exec_queue_killed_or_banned_or_wedged(primary)) {
1157 killed_or_banned_or_wedged = true;
1158 goto run_job_out;
1159 }
1160
1161 if (!exec_queue_registered(primary))
1162 register_exec_queue(primary, GUC_CONTEXT_NORMAL);
1163 }
1164
1165 if (!exec_queue_registered(q))
1166 register_exec_queue(q, GUC_CONTEXT_NORMAL);
1167 if (!job->restore_replay)
1168 q->ring_ops->emit_job(job);
1169 submit_exec_queue(q, job);
1170 job->restore_replay = false;
1171 }
1172
1173 run_job_out:
1174
1175 return job->fence;
1176 }
1177
guc_exec_queue_free_job(struct drm_sched_job * drm_job)1178 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
1179 {
1180 struct xe_sched_job *job = to_xe_sched_job(drm_job);
1181
1182 trace_xe_sched_job_free(job);
1183 xe_sched_job_put(job);
1184 }
1185
xe_guc_read_stopped(struct xe_guc * guc)1186 int xe_guc_read_stopped(struct xe_guc *guc)
1187 {
1188 return atomic_read(&guc->submission_state.stopped);
1189 }
1190
1191 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc,
1192 struct xe_exec_queue *q,
1193 u32 runnable_state);
1194 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q);
1195
1196 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \
1197 u32 action[] = { \
1198 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \
1199 q->guc->id, \
1200 GUC_CONTEXT_##enable_disable, \
1201 }
1202
disable_scheduling_deregister(struct xe_guc * guc,struct xe_exec_queue * q)1203 static void disable_scheduling_deregister(struct xe_guc *guc,
1204 struct xe_exec_queue *q)
1205 {
1206 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1207 int ret;
1208
1209 if (!xe_exec_queue_is_multi_queue_secondary(q))
1210 set_min_preemption_timeout(guc, q);
1211
1212 smp_rmb();
1213 ret = wait_event_timeout(guc->ct.wq,
1214 (!exec_queue_pending_enable(q) &&
1215 !exec_queue_pending_disable(q)) ||
1216 xe_guc_read_stopped(guc) ||
1217 vf_recovery(guc),
1218 HZ * 5);
1219 if (!ret && !vf_recovery(guc)) {
1220 struct xe_gpu_scheduler *sched = &q->guc->sched;
1221
1222 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n");
1223 xe_sched_submission_start(sched);
1224 xe_gt_reset_async(q->gt);
1225 xe_sched_tdr_queue_imm(sched);
1226 return;
1227 }
1228
1229 clear_exec_queue_enabled(q);
1230 set_exec_queue_pending_disable(q);
1231 set_exec_queue_destroyed(q);
1232 trace_xe_exec_queue_scheduling_disable(q);
1233
1234 /*
1235 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
1236 * handler and we are not allowed to reserved G2H space in handlers.
1237 */
1238 if (xe_exec_queue_is_multi_queue_secondary(q))
1239 handle_multi_queue_secondary_sched_done(guc, q, 0);
1240 else
1241 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1242 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
1243 G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
1244 }
1245
1246 /**
1247 * xe_guc_submit_wedge() - Wedge GuC submission
1248 * @guc: the GuC object
1249 *
1250 * Save exec queue's registered with GuC state by taking a ref to each queue.
1251 * Register a DRMM handler to drop refs upon driver unload.
1252 */
xe_guc_submit_wedge(struct xe_guc * guc)1253 void xe_guc_submit_wedge(struct xe_guc *guc)
1254 {
1255 struct xe_gt *gt = guc_to_gt(guc);
1256 struct xe_exec_queue *q;
1257 unsigned long index;
1258 int err;
1259
1260 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
1261
1262 /*
1263 * If device is being wedged even before submission_state is
1264 * initialized, there's nothing to do here.
1265 */
1266 if (!guc->submission_state.initialized)
1267 return;
1268
1269 err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
1270 guc_submit_wedged_fini, guc);
1271 if (err) {
1272 xe_gt_err(gt, "Failed to register clean-up in wedged.mode=%s; "
1273 "Although device is wedged.\n",
1274 xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET));
1275 return;
1276 }
1277
1278 mutex_lock(&guc->submission_state.lock);
1279 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1280 if (xe_exec_queue_get_unless_zero(q))
1281 set_exec_queue_wedged(q);
1282 mutex_unlock(&guc->submission_state.lock);
1283 }
1284
guc_submit_hint_wedged(struct xe_guc * guc)1285 static bool guc_submit_hint_wedged(struct xe_guc *guc)
1286 {
1287 struct xe_device *xe = guc_to_xe(guc);
1288
1289 if (xe->wedged.mode != XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET)
1290 return false;
1291
1292 if (xe_device_wedged(xe))
1293 return true;
1294
1295 xe_device_declare_wedged(xe);
1296
1297 return true;
1298 }
1299
1300 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100)
1301
check_timeout(struct xe_exec_queue * q,struct xe_sched_job * job)1302 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
1303 {
1304 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
1305 u32 ctx_timestamp, ctx_job_timestamp;
1306 u32 timeout_ms = q->sched_props.job_timeout_ms;
1307 u32 diff;
1308 u64 running_time_ms;
1309
1310 if (!xe_sched_job_started(job)) {
1311 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
1312 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1313 q->guc->id);
1314
1315 return xe_sched_invalidate_job(job, 2);
1316 }
1317
1318 ctx_timestamp = lower_32_bits(xe_lrc_timestamp(q->lrc[0]));
1319 if (ctx_timestamp == job->sample_timestamp) {
1320 if (IS_SRIOV_VF(gt_to_xe(gt)))
1321 xe_gt_notice(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck",
1322 xe_sched_job_seqno(job),
1323 xe_sched_job_lrc_seqno(job), q->guc->id);
1324 else
1325 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, timestamp stuck",
1326 xe_sched_job_seqno(job),
1327 xe_sched_job_lrc_seqno(job), q->guc->id);
1328
1329 return xe_sched_invalidate_job(job, 0);
1330 }
1331
1332 job->sample_timestamp = ctx_timestamp;
1333 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
1334
1335 /*
1336 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
1337 * possible overflows with a high timeout.
1338 */
1339 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC);
1340
1341 diff = ctx_timestamp - ctx_job_timestamp;
1342
1343 /*
1344 * Ensure timeout is within 5% to account for an GuC scheduling latency
1345 */
1346 running_time_ms =
1347 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff));
1348
1349 xe_gt_dbg(gt,
1350 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x",
1351 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1352 q->guc->id, running_time_ms, timeout_ms, diff);
1353
1354 return running_time_ms >= timeout_ms;
1355 }
1356
enable_scheduling(struct xe_exec_queue * q)1357 static void enable_scheduling(struct xe_exec_queue *q)
1358 {
1359 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
1360 struct xe_guc *guc = exec_queue_to_guc(q);
1361 int ret;
1362
1363 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1364 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1365 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1366 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1367
1368 set_exec_queue_pending_enable(q);
1369 set_exec_queue_enabled(q);
1370 trace_xe_exec_queue_scheduling_enable(q);
1371
1372 if (xe_exec_queue_is_multi_queue_secondary(q))
1373 handle_multi_queue_secondary_sched_done(guc, q, 1);
1374 else
1375 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1376 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1377
1378 ret = wait_event_timeout(guc->ct.wq,
1379 !exec_queue_pending_enable(q) ||
1380 xe_guc_read_stopped(guc) ||
1381 vf_recovery(guc), HZ * 5);
1382 if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) {
1383 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
1384 set_exec_queue_banned(q);
1385 xe_gt_reset_async(q->gt);
1386 xe_sched_tdr_queue_imm(&q->guc->sched);
1387 }
1388 }
1389
disable_scheduling(struct xe_exec_queue * q,bool immediate)1390 static void disable_scheduling(struct xe_exec_queue *q, bool immediate)
1391 {
1392 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1393 struct xe_guc *guc = exec_queue_to_guc(q);
1394
1395 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1396 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1397 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1398
1399 if (immediate && !xe_exec_queue_is_multi_queue_secondary(q))
1400 set_min_preemption_timeout(guc, q);
1401 clear_exec_queue_enabled(q);
1402 set_exec_queue_pending_disable(q);
1403 trace_xe_exec_queue_scheduling_disable(q);
1404
1405 if (xe_exec_queue_is_multi_queue_secondary(q))
1406 handle_multi_queue_secondary_sched_done(guc, q, 0);
1407 else
1408 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1409 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1410 }
1411
1412 static enum drm_gpu_sched_stat
guc_exec_queue_timedout_job(struct drm_sched_job * drm_job)1413 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
1414 {
1415 struct xe_sched_job *job = to_xe_sched_job(drm_job);
1416 struct drm_sched_job *tmp_job;
1417 struct xe_exec_queue *q = job->q;
1418 struct xe_gpu_scheduler *sched = &q->guc->sched;
1419 struct xe_guc *guc = exec_queue_to_guc(q);
1420 const char *process_name = "no process";
1421 struct xe_device *xe = guc_to_xe(guc);
1422 int err = -ETIME;
1423 pid_t pid = -1;
1424 bool wedged = false, skip_timeout_check;
1425
1426 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1427
1428 /*
1429 * TDR has fired before free job worker. Common if exec queue
1430 * immediately closed after last fence signaled. Add back to pending
1431 * list so job can be freed and kick scheduler ensuring free job is not
1432 * lost.
1433 */
1434 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) ||
1435 vf_recovery(guc))
1436 return DRM_GPU_SCHED_STAT_NO_HANG;
1437
1438 /* Kill the run_job entry point */
1439 xe_sched_submission_stop(sched);
1440
1441 /* Must check all state after stopping scheduler */
1442 skip_timeout_check = exec_queue_reset(q) ||
1443 exec_queue_killed_or_banned_or_wedged(q);
1444
1445 /* Skip timeout check if multi-queue group is banned */
1446 if (xe_exec_queue_is_multi_queue(q) &&
1447 READ_ONCE(q->multi_queue.group->banned))
1448 skip_timeout_check = true;
1449
1450 /* LR jobs can only get here if queue has been killed or hit an error */
1451 if (xe_exec_queue_is_lr(q))
1452 xe_gt_assert(guc_to_gt(guc), skip_timeout_check);
1453
1454 /*
1455 * FIXME: In multi-queue scenario, the TDR must ensure that the whole
1456 * multi-queue group is off the HW before signaling the fences to avoid
1457 * possible memory corruptions. This means disabling scheduling on the
1458 * primary queue before or during the secondary queue's TDR. Need to
1459 * implement this in least obtrusive way.
1460 */
1461
1462 /*
1463 * If devcoredump not captured and GuC capture for the job is not ready
1464 * do manual capture first and decide later if we need to use it
1465 */
1466 if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
1467 !xe_guc_capture_get_matching_and_lock(q)) {
1468 /* take force wake before engine register manual capture */
1469 CLASS(xe_force_wake, fw_ref)(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
1470 if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
1471 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
1472
1473 xe_engine_snapshot_capture_for_queue(q);
1474 }
1475
1476 /*
1477 * Check if job is actually timed out, if so restart job execution and TDR
1478 */
1479 if (!skip_timeout_check && !check_timeout(q, job))
1480 goto rearm;
1481
1482 if (!exec_queue_killed(q))
1483 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
1484
1485 set_exec_queue_banned(q);
1486
1487 /* Kick job / queue off hardware */
1488 if (!wedged && (exec_queue_enabled(q) || exec_queue_pending_disable(q))) {
1489 int ret;
1490
1491 if (exec_queue_reset(q))
1492 err = -EIO;
1493
1494 if (xe_uc_fw_is_running(&guc->fw)) {
1495 /*
1496 * Wait for any pending G2H to flush out before
1497 * modifying state
1498 */
1499 ret = wait_event_timeout(guc->ct.wq,
1500 (!exec_queue_pending_enable(q) &&
1501 !exec_queue_pending_disable(q)) ||
1502 xe_guc_read_stopped(guc) ||
1503 vf_recovery(guc), HZ * 5);
1504 if (vf_recovery(guc))
1505 goto handle_vf_resume;
1506 if (!ret || xe_guc_read_stopped(guc))
1507 goto trigger_reset;
1508
1509 disable_scheduling(q, skip_timeout_check);
1510 }
1511
1512 /*
1513 * Must wait for scheduling to be disabled before signalling
1514 * any fences, if GT broken the GT reset code should signal us.
1515 *
1516 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
1517 * error) messages which can cause the schedule disable to get
1518 * lost. If this occurs, trigger a GT reset to recover.
1519 */
1520 smp_rmb();
1521 ret = wait_event_timeout(guc->ct.wq,
1522 !xe_uc_fw_is_running(&guc->fw) ||
1523 !exec_queue_pending_disable(q) ||
1524 xe_guc_read_stopped(guc) ||
1525 vf_recovery(guc), HZ * 5);
1526 if (vf_recovery(guc))
1527 goto handle_vf_resume;
1528 if (!ret || xe_guc_read_stopped(guc)) {
1529 trigger_reset:
1530 if (!ret)
1531 xe_gt_warn(guc_to_gt(guc),
1532 "Schedule disable failed to respond, guc_id=%d",
1533 q->guc->id);
1534 xe_devcoredump(q, job,
1535 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d",
1536 q->guc->id, ret, xe_guc_read_stopped(guc));
1537 xe_gt_reset_async(q->gt);
1538 xe_sched_tdr_queue_imm(sched);
1539 goto rearm;
1540 }
1541 }
1542
1543 if (q->vm && q->vm->xef) {
1544 process_name = q->vm->xef->process_name;
1545 pid = q->vm->xef->pid;
1546 }
1547
1548 if (!exec_queue_killed(q))
1549 xe_gt_notice(guc_to_gt(guc),
1550 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
1551 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1552 q->guc->id, q->flags, process_name, pid);
1553
1554 trace_xe_sched_job_timedout(job);
1555
1556 if (!exec_queue_killed(q))
1557 xe_devcoredump(q, job,
1558 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
1559 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1560 q->guc->id, q->flags);
1561
1562 /*
1563 * Kernel jobs should never fail, nor should VM jobs if they do
1564 * somethings has gone wrong and the GT needs a reset
1565 */
1566 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
1567 "Kernel-submitted job timed out\n");
1568 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
1569 "VM job timed out on non-killed execqueue\n");
1570 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
1571 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
1572 if (!xe_sched_invalidate_job(job, 2)) {
1573 xe_gt_reset_async(q->gt);
1574 goto rearm;
1575 }
1576 }
1577
1578 /* Mark all outstanding jobs as bad, thus completing them */
1579 xe_sched_job_set_error(job, err);
1580 drm_sched_for_each_pending_job(tmp_job, &sched->base, NULL)
1581 xe_sched_job_set_error(to_xe_sched_job(tmp_job), -ECANCELED);
1582
1583 xe_sched_submission_start(sched);
1584
1585 if (xe_exec_queue_is_multi_queue(q))
1586 xe_guc_exec_queue_group_trigger_cleanup(q);
1587 else
1588 xe_guc_exec_queue_trigger_cleanup(q);
1589
1590 /*
1591 * We want the job added back to the pending list so it gets freed; this
1592 * is what DRM_GPU_SCHED_STAT_NO_HANG does.
1593 */
1594 return DRM_GPU_SCHED_STAT_NO_HANG;
1595
1596 rearm:
1597 /*
1598 * XXX: Ideally want to adjust timeout based on current execution time
1599 * but there is not currently an easy way to do in DRM scheduler. With
1600 * some thought, do this in a follow up.
1601 */
1602 xe_sched_submission_start(sched);
1603 handle_vf_resume:
1604 return DRM_GPU_SCHED_STAT_NO_HANG;
1605 }
1606
guc_exec_queue_fini(struct xe_exec_queue * q)1607 static void guc_exec_queue_fini(struct xe_exec_queue *q)
1608 {
1609 struct xe_guc_exec_queue *ge = q->guc;
1610 struct xe_guc *guc = exec_queue_to_guc(q);
1611
1612 release_guc_id(guc, q);
1613 xe_sched_entity_fini(&ge->entity);
1614 xe_sched_fini(&ge->sched);
1615
1616 /*
1617 * RCU free due sched being exported via DRM scheduler fences
1618 * (timeline name).
1619 */
1620 kfree_rcu(ge, rcu);
1621 }
1622
__guc_exec_queue_destroy_async(struct work_struct * w)1623 static void __guc_exec_queue_destroy_async(struct work_struct *w)
1624 {
1625 struct xe_guc_exec_queue *ge =
1626 container_of(w, struct xe_guc_exec_queue, destroy_async);
1627 struct xe_exec_queue *q = ge->q;
1628 struct xe_guc *guc = exec_queue_to_guc(q);
1629
1630 guard(xe_pm_runtime)(guc_to_xe(guc));
1631 trace_xe_exec_queue_destroy(q);
1632
1633 if (xe_exec_queue_is_multi_queue_secondary(q)) {
1634 struct xe_exec_queue_group *group = q->multi_queue.group;
1635
1636 mutex_lock(&group->list_lock);
1637 list_del(&q->multi_queue.link);
1638 mutex_unlock(&group->list_lock);
1639 }
1640
1641 /* Confirm no work left behind accessing device structures */
1642 cancel_delayed_work_sync(&ge->sched.base.work_tdr);
1643
1644 xe_exec_queue_fini(q);
1645 }
1646
guc_exec_queue_destroy_async(struct xe_exec_queue * q)1647 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q)
1648 {
1649 struct xe_guc *guc = exec_queue_to_guc(q);
1650 struct xe_device *xe = guc_to_xe(guc);
1651
1652 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async);
1653
1654 /* We must block on kernel engines so slabs are empty on driver unload */
1655 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
1656 __guc_exec_queue_destroy_async(&q->guc->destroy_async);
1657 else
1658 queue_work(xe->destroy_wq, &q->guc->destroy_async);
1659 }
1660
__guc_exec_queue_destroy(struct xe_guc * guc,struct xe_exec_queue * q)1661 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q)
1662 {
1663 /*
1664 * Might be done from within the GPU scheduler, need to do async as we
1665 * fini the scheduler when the engine is fini'd, the scheduler can't
1666 * complete fini within itself (circular dependency). Async resolves
1667 * this we and don't really care when everything is fini'd, just that it
1668 * is.
1669 */
1670 guc_exec_queue_destroy_async(q);
1671 }
1672
__guc_exec_queue_process_msg_cleanup(struct xe_sched_msg * msg)1673 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
1674 {
1675 struct xe_exec_queue *q = msg->private_data;
1676 struct xe_guc *guc = exec_queue_to_guc(q);
1677
1678 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
1679 trace_xe_exec_queue_cleanup_entity(q);
1680
1681 /*
1682 * Expected state transitions for cleanup:
1683 * - If the exec queue is registered and GuC firmware is running, we must first
1684 * disable scheduling and deregister the queue to ensure proper teardown and
1685 * resource release in the GuC, then destroy the exec queue on driver side.
1686 * - If the GuC is already stopped (e.g., during driver unload or GPU reset),
1687 * we cannot expect a response for the deregister request. In this case,
1688 * it is safe to directly destroy the exec queue on driver side, as the GuC
1689 * will not process further requests and all resources must be cleaned up locally.
1690 */
1691 if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw))
1692 disable_scheduling_deregister(guc, q);
1693 else
1694 __guc_exec_queue_destroy(guc, q);
1695 }
1696
guc_exec_queue_allowed_to_change_state(struct xe_exec_queue * q)1697 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
1698 {
1699 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
1700 }
1701
__guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg * msg)1702 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
1703 {
1704 struct xe_exec_queue *q = msg->private_data;
1705 struct xe_guc *guc = exec_queue_to_guc(q);
1706
1707 if (guc_exec_queue_allowed_to_change_state(q))
1708 init_policies(guc, q);
1709 kfree(msg);
1710 }
1711
__suspend_fence_signal(struct xe_exec_queue * q)1712 static void __suspend_fence_signal(struct xe_exec_queue *q)
1713 {
1714 struct xe_guc *guc = exec_queue_to_guc(q);
1715 struct xe_device *xe = guc_to_xe(guc);
1716
1717 if (!q->guc->suspend_pending)
1718 return;
1719
1720 WRITE_ONCE(q->guc->suspend_pending, false);
1721
1722 /*
1723 * We use a GuC shared wait queue for VFs because the VF resfix start
1724 * interrupt must be able to wake all instances of suspend_wait. This
1725 * prevents the VF migration worker from being starved during
1726 * scheduling.
1727 */
1728 if (IS_SRIOV_VF(xe))
1729 wake_up_all(&guc->ct.wq);
1730 else
1731 wake_up(&q->guc->suspend_wait);
1732 }
1733
suspend_fence_signal(struct xe_exec_queue * q)1734 static void suspend_fence_signal(struct xe_exec_queue *q)
1735 {
1736 struct xe_guc *guc = exec_queue_to_guc(q);
1737
1738 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) ||
1739 xe_guc_read_stopped(guc));
1740 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending);
1741
1742 __suspend_fence_signal(q);
1743 }
1744
__guc_exec_queue_process_msg_suspend(struct xe_sched_msg * msg)1745 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
1746 {
1747 struct xe_exec_queue *q = msg->private_data;
1748 struct xe_guc *guc = exec_queue_to_guc(q);
1749 bool idle_skip_suspend = xe_exec_queue_idle_skip_suspend(q);
1750
1751 if (!idle_skip_suspend && guc_exec_queue_allowed_to_change_state(q) &&
1752 !exec_queue_suspended(q) && exec_queue_enabled(q)) {
1753 wait_event(guc->ct.wq, vf_recovery(guc) ||
1754 ((q->guc->resume_time != RESUME_PENDING ||
1755 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q)));
1756
1757 if (!xe_guc_read_stopped(guc)) {
1758 s64 since_resume_ms =
1759 ktime_ms_delta(ktime_get(),
1760 q->guc->resume_time);
1761 s64 wait_ms = q->vm->preempt.min_run_period_ms -
1762 since_resume_ms;
1763
1764 if (wait_ms > 0 && q->guc->resume_time)
1765 relaxed_ms_sleep(wait_ms);
1766
1767 set_exec_queue_suspended(q);
1768 disable_scheduling(q, false);
1769 }
1770 } else if (q->guc->suspend_pending) {
1771 if (idle_skip_suspend)
1772 set_exec_queue_idle_skip_suspend(q);
1773 set_exec_queue_suspended(q);
1774 suspend_fence_signal(q);
1775 }
1776 }
1777
sched_context(struct xe_exec_queue * q)1778 static void sched_context(struct xe_exec_queue *q)
1779 {
1780 struct xe_guc *guc = exec_queue_to_guc(q);
1781 struct xe_lrc *lrc = q->lrc[0];
1782 u32 action[] = {
1783 XE_GUC_ACTION_SCHED_CONTEXT,
1784 q->guc->id,
1785 };
1786
1787 xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_parallel(q));
1788 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1789 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1790 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1791
1792 trace_xe_exec_queue_submit(q);
1793
1794 xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
1795 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
1796 }
1797
__guc_exec_queue_process_msg_resume(struct xe_sched_msg * msg)1798 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
1799 {
1800 struct xe_exec_queue *q = msg->private_data;
1801
1802 if (guc_exec_queue_allowed_to_change_state(q)) {
1803 clear_exec_queue_suspended(q);
1804 if (!exec_queue_enabled(q)) {
1805 if (exec_queue_idle_skip_suspend(q)) {
1806 struct xe_lrc *lrc = q->lrc[0];
1807
1808 clear_exec_queue_idle_skip_suspend(q);
1809 xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
1810 }
1811 q->guc->resume_time = RESUME_PENDING;
1812 set_exec_queue_pending_resume(q);
1813 enable_scheduling(q);
1814 } else if (exec_queue_idle_skip_suspend(q)) {
1815 clear_exec_queue_idle_skip_suspend(q);
1816 sched_context(q);
1817 }
1818 } else {
1819 clear_exec_queue_suspended(q);
1820 clear_exec_queue_idle_skip_suspend(q);
1821 }
1822 }
1823
__guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg * msg)1824 static void __guc_exec_queue_process_msg_set_multi_queue_priority(struct xe_sched_msg *msg)
1825 {
1826 struct xe_exec_queue *q = msg->private_data;
1827
1828 if (guc_exec_queue_allowed_to_change_state(q)) {
1829 #define MAX_MULTI_QUEUE_CGP_SYNC_SIZE (2)
1830 struct xe_guc *guc = exec_queue_to_guc(q);
1831 struct xe_exec_queue_group *group = q->multi_queue.group;
1832 u32 action[MAX_MULTI_QUEUE_CGP_SYNC_SIZE];
1833 int len = 0;
1834
1835 action[len++] = XE_GUC_ACTION_MULTI_QUEUE_CONTEXT_CGP_SYNC;
1836 action[len++] = group->primary->guc->id;
1837
1838 xe_gt_assert(guc_to_gt(guc), len <= MAX_MULTI_QUEUE_CGP_SYNC_SIZE);
1839 #undef MAX_MULTI_QUEUE_CGP_SYNC_SIZE
1840
1841 xe_guc_exec_queue_group_cgp_sync(guc, q, action, len);
1842 }
1843
1844 kfree(msg);
1845 }
1846
1847 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */
1848 #define SET_SCHED_PROPS 2
1849 #define SUSPEND 3
1850 #define RESUME 4
1851 #define SET_MULTI_QUEUE_PRIORITY 5
1852 #define OPCODE_MASK 0xf
1853 #define MSG_LOCKED BIT(8)
1854 #define MSG_HEAD BIT(9)
1855
guc_exec_queue_process_msg(struct xe_sched_msg * msg)1856 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
1857 {
1858 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data));
1859
1860 trace_xe_sched_msg_recv(msg);
1861
1862 switch (msg->opcode) {
1863 case CLEANUP:
1864 __guc_exec_queue_process_msg_cleanup(msg);
1865 break;
1866 case SET_SCHED_PROPS:
1867 __guc_exec_queue_process_msg_set_sched_props(msg);
1868 break;
1869 case SUSPEND:
1870 __guc_exec_queue_process_msg_suspend(msg);
1871 break;
1872 case RESUME:
1873 __guc_exec_queue_process_msg_resume(msg);
1874 break;
1875 case SET_MULTI_QUEUE_PRIORITY:
1876 __guc_exec_queue_process_msg_set_multi_queue_priority(msg);
1877 break;
1878 default:
1879 XE_WARN_ON("Unknown message type");
1880 }
1881
1882 xe_pm_runtime_put(xe);
1883 }
1884
1885 static const struct drm_sched_backend_ops drm_sched_ops = {
1886 .run_job = guc_exec_queue_run_job,
1887 .free_job = guc_exec_queue_free_job,
1888 .timedout_job = guc_exec_queue_timedout_job,
1889 };
1890
1891 static const struct xe_sched_backend_ops xe_sched_ops = {
1892 .process_msg = guc_exec_queue_process_msg,
1893 };
1894
guc_exec_queue_init(struct xe_exec_queue * q)1895 static int guc_exec_queue_init(struct xe_exec_queue *q)
1896 {
1897 struct xe_gpu_scheduler *sched;
1898 struct xe_guc *guc = exec_queue_to_guc(q);
1899 struct workqueue_struct *submit_wq = NULL;
1900 struct xe_guc_exec_queue *ge;
1901 long timeout;
1902 int err, i;
1903
1904 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc)));
1905
1906 ge = kzalloc_obj(*ge, GFP_KERNEL);
1907 if (!ge)
1908 return -ENOMEM;
1909
1910 q->guc = ge;
1911 ge->q = q;
1912 init_rcu_head(&ge->rcu);
1913 init_waitqueue_head(&ge->suspend_wait);
1914
1915 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
1916 INIT_LIST_HEAD(&ge->static_msgs[i].link);
1917
1918 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
1919 msecs_to_jiffies(q->sched_props.job_timeout_ms);
1920
1921 /*
1922 * Use primary queue's submit_wq for all secondary queues of a
1923 * multi queue group. This serialization avoids any locking around
1924 * CGP synchronization with GuC.
1925 */
1926 if (xe_exec_queue_is_multi_queue_secondary(q)) {
1927 struct xe_exec_queue *primary = xe_exec_queue_multi_queue_primary(q);
1928
1929 submit_wq = primary->guc->sched.base.submit_wq;
1930 }
1931
1932 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
1933 submit_wq, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64,
1934 timeout, guc_to_gt(guc)->ordered_wq, NULL,
1935 q->name, gt_to_xe(q->gt)->drm.dev);
1936 if (err)
1937 goto err_free;
1938
1939 sched = &ge->sched;
1940 err = xe_sched_entity_init(&ge->entity, sched);
1941 if (err)
1942 goto err_sched;
1943
1944 mutex_lock(&guc->submission_state.lock);
1945
1946 err = alloc_guc_id(guc, q);
1947 if (err)
1948 goto err_entity;
1949
1950 q->entity = &ge->entity;
1951
1952 if (xe_guc_read_stopped(guc) || vf_recovery(guc))
1953 xe_sched_stop(sched);
1954
1955 mutex_unlock(&guc->submission_state.lock);
1956
1957 xe_exec_queue_assign_name(q, q->guc->id);
1958
1959 /*
1960 * Maintain secondary queues of the multi queue group in a list
1961 * for handling dependencies across the queues in the group.
1962 */
1963 if (xe_exec_queue_is_multi_queue_secondary(q)) {
1964 struct xe_exec_queue_group *group = q->multi_queue.group;
1965
1966 INIT_LIST_HEAD(&q->multi_queue.link);
1967 mutex_lock(&group->list_lock);
1968 list_add_tail(&q->multi_queue.link, &group->list);
1969 mutex_unlock(&group->list_lock);
1970 }
1971
1972 if (xe_exec_queue_is_multi_queue(q))
1973 trace_xe_exec_queue_create_multi_queue(q);
1974 else
1975 trace_xe_exec_queue_create(q);
1976
1977 return 0;
1978
1979 err_entity:
1980 mutex_unlock(&guc->submission_state.lock);
1981 xe_sched_entity_fini(&ge->entity);
1982 err_sched:
1983 xe_sched_fini(&ge->sched);
1984 err_free:
1985 kfree(ge);
1986
1987 return err;
1988 }
1989
guc_exec_queue_kill(struct xe_exec_queue * q)1990 static void guc_exec_queue_kill(struct xe_exec_queue *q)
1991 {
1992 trace_xe_exec_queue_kill(q);
1993 set_exec_queue_killed(q);
1994 __suspend_fence_signal(q);
1995 xe_guc_exec_queue_trigger_cleanup(q);
1996 }
1997
guc_exec_queue_add_msg(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)1998 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
1999 u32 opcode)
2000 {
2001 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
2002
2003 INIT_LIST_HEAD(&msg->link);
2004 msg->opcode = opcode & OPCODE_MASK;
2005 msg->private_data = q;
2006
2007 trace_xe_sched_msg_add(msg);
2008 if (opcode & MSG_HEAD)
2009 xe_sched_add_msg_head(&q->guc->sched, msg);
2010 else if (opcode & MSG_LOCKED)
2011 xe_sched_add_msg_locked(&q->guc->sched, msg);
2012 else
2013 xe_sched_add_msg(&q->guc->sched, msg);
2014 }
2015
guc_exec_queue_try_add_msg_head(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)2016 static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q,
2017 struct xe_sched_msg *msg,
2018 u32 opcode)
2019 {
2020 if (!list_empty(&msg->link))
2021 return;
2022
2023 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD);
2024 }
2025
guc_exec_queue_try_add_msg(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)2026 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
2027 struct xe_sched_msg *msg,
2028 u32 opcode)
2029 {
2030 if (!list_empty(&msg->link))
2031 return false;
2032
2033 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED);
2034
2035 return true;
2036 }
2037
2038 #define STATIC_MSG_CLEANUP 0
2039 #define STATIC_MSG_SUSPEND 1
2040 #define STATIC_MSG_RESUME 2
guc_exec_queue_destroy(struct xe_exec_queue * q)2041 static void guc_exec_queue_destroy(struct xe_exec_queue *q)
2042 {
2043 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
2044
2045 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
2046 guc_exec_queue_add_msg(q, msg, CLEANUP);
2047 else
2048 __guc_exec_queue_destroy(exec_queue_to_guc(q), q);
2049 }
2050
guc_exec_queue_set_priority(struct xe_exec_queue * q,enum xe_exec_queue_priority priority)2051 static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
2052 enum xe_exec_queue_priority priority)
2053 {
2054 struct xe_sched_msg *msg;
2055
2056 if (q->sched_props.priority == priority ||
2057 exec_queue_killed_or_banned_or_wedged(q))
2058 return 0;
2059
2060 msg = kmalloc_obj(*msg, GFP_KERNEL);
2061 if (!msg)
2062 return -ENOMEM;
2063
2064 q->sched_props.priority = priority;
2065 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
2066
2067 return 0;
2068 }
2069
guc_exec_queue_set_timeslice(struct xe_exec_queue * q,u32 timeslice_us)2070 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
2071 {
2072 struct xe_sched_msg *msg;
2073
2074 if (q->sched_props.timeslice_us == timeslice_us ||
2075 exec_queue_killed_or_banned_or_wedged(q))
2076 return 0;
2077
2078 msg = kmalloc_obj(*msg, GFP_KERNEL);
2079 if (!msg)
2080 return -ENOMEM;
2081
2082 q->sched_props.timeslice_us = timeslice_us;
2083 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
2084
2085 return 0;
2086 }
2087
guc_exec_queue_set_preempt_timeout(struct xe_exec_queue * q,u32 preempt_timeout_us)2088 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
2089 u32 preempt_timeout_us)
2090 {
2091 struct xe_sched_msg *msg;
2092
2093 if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
2094 exec_queue_killed_or_banned_or_wedged(q))
2095 return 0;
2096
2097 msg = kmalloc_obj(*msg, GFP_KERNEL);
2098 if (!msg)
2099 return -ENOMEM;
2100
2101 q->sched_props.preempt_timeout_us = preempt_timeout_us;
2102 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
2103
2104 return 0;
2105 }
2106
guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue * q,enum xe_multi_queue_priority priority)2107 static int guc_exec_queue_set_multi_queue_priority(struct xe_exec_queue *q,
2108 enum xe_multi_queue_priority priority)
2109 {
2110 struct xe_sched_msg *msg;
2111
2112 xe_gt_assert(guc_to_gt(exec_queue_to_guc(q)), xe_exec_queue_is_multi_queue(q));
2113
2114 if (q->multi_queue.priority == priority ||
2115 exec_queue_killed_or_banned_or_wedged(q))
2116 return 0;
2117
2118 msg = kmalloc_obj(*msg, GFP_KERNEL);
2119 if (!msg)
2120 return -ENOMEM;
2121
2122 q->multi_queue.priority = priority;
2123 guc_exec_queue_add_msg(q, msg, SET_MULTI_QUEUE_PRIORITY);
2124
2125 return 0;
2126 }
2127
guc_exec_queue_suspend(struct xe_exec_queue * q)2128 static int guc_exec_queue_suspend(struct xe_exec_queue *q)
2129 {
2130 struct xe_gpu_scheduler *sched = &q->guc->sched;
2131 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
2132
2133 if (exec_queue_killed_or_banned_or_wedged(q))
2134 return -EINVAL;
2135
2136 xe_sched_msg_lock(sched);
2137 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND))
2138 q->guc->suspend_pending = true;
2139 xe_sched_msg_unlock(sched);
2140
2141 return 0;
2142 }
2143
guc_exec_queue_suspend_wait(struct xe_exec_queue * q)2144 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
2145 {
2146 struct xe_guc *guc = exec_queue_to_guc(q);
2147 struct xe_device *xe = guc_to_xe(guc);
2148 int ret;
2149
2150 /*
2151 * Likely don't need to check exec_queue_killed() as we clear
2152 * suspend_pending upon kill but to be paranoid but races in which
2153 * suspend_pending is set after kill also check kill here.
2154 */
2155 #define WAIT_COND \
2156 (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \
2157 xe_guc_read_stopped(guc))
2158
2159 retry:
2160 if (IS_SRIOV_VF(xe))
2161 ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND ||
2162 vf_recovery(guc),
2163 HZ * 5);
2164 else
2165 ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
2166 WAIT_COND, HZ * 5);
2167
2168 if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc))))
2169 return -EAGAIN;
2170
2171 if (!ret) {
2172 xe_gt_warn(guc_to_gt(guc),
2173 "Suspend fence, guc_id=%d, failed to respond",
2174 q->guc->id);
2175 /* XXX: Trigger GT reset? */
2176 return -ETIME;
2177 } else if (IS_SRIOV_VF(xe) && !WAIT_COND) {
2178 /* Corner case on RESFIX DONE where vf_recovery() changes */
2179 goto retry;
2180 }
2181
2182 #undef WAIT_COND
2183
2184 return ret < 0 ? ret : 0;
2185 }
2186
guc_exec_queue_resume(struct xe_exec_queue * q)2187 static void guc_exec_queue_resume(struct xe_exec_queue *q)
2188 {
2189 struct xe_gpu_scheduler *sched = &q->guc->sched;
2190 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
2191 struct xe_guc *guc = exec_queue_to_guc(q);
2192
2193 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending);
2194
2195 xe_sched_msg_lock(sched);
2196 guc_exec_queue_try_add_msg(q, msg, RESUME);
2197 xe_sched_msg_unlock(sched);
2198 }
2199
guc_exec_queue_reset_status(struct xe_exec_queue * q)2200 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
2201 {
2202 if (xe_exec_queue_is_multi_queue_secondary(q) &&
2203 guc_exec_queue_reset_status(xe_exec_queue_multi_queue_primary(q)))
2204 return true;
2205
2206 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q);
2207 }
2208
2209 /*
2210 * All of these functions are an abstraction layer which other parts of Xe can
2211 * use to trap into the GuC backend. All of these functions, aside from init,
2212 * really shouldn't do much other than trap into the DRM scheduler which
2213 * synchronizes these operations.
2214 */
2215 static const struct xe_exec_queue_ops guc_exec_queue_ops = {
2216 .init = guc_exec_queue_init,
2217 .kill = guc_exec_queue_kill,
2218 .fini = guc_exec_queue_fini,
2219 .destroy = guc_exec_queue_destroy,
2220 .set_priority = guc_exec_queue_set_priority,
2221 .set_timeslice = guc_exec_queue_set_timeslice,
2222 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
2223 .set_multi_queue_priority = guc_exec_queue_set_multi_queue_priority,
2224 .suspend = guc_exec_queue_suspend,
2225 .suspend_wait = guc_exec_queue_suspend_wait,
2226 .resume = guc_exec_queue_resume,
2227 .reset_status = guc_exec_queue_reset_status,
2228 };
2229
guc_exec_queue_stop(struct xe_guc * guc,struct xe_exec_queue * q)2230 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
2231 {
2232 struct xe_gpu_scheduler *sched = &q->guc->sched;
2233
2234 /* Stop scheduling + flush any DRM scheduler operations */
2235 xe_sched_submission_stop(sched);
2236
2237 /* Clean up lost G2H + reset engine state */
2238 if (exec_queue_registered(q)) {
2239 if (exec_queue_destroyed(q))
2240 __guc_exec_queue_destroy(guc, q);
2241 }
2242 if (q->guc->suspend_pending) {
2243 set_exec_queue_suspended(q);
2244 suspend_fence_signal(q);
2245 }
2246 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED |
2247 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED |
2248 EXEC_QUEUE_STATE_SUSPENDED,
2249 &q->guc->state);
2250 q->guc->resume_time = 0;
2251 trace_xe_exec_queue_stop(q);
2252
2253 /*
2254 * Ban any engine (aside from kernel and engines used for VM ops) with a
2255 * started but not complete job or if a job has gone through a GT reset
2256 * more than twice.
2257 */
2258 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
2259 struct xe_sched_job *job = xe_sched_first_pending_job(sched);
2260 bool ban = false;
2261
2262 if (job) {
2263 if ((xe_sched_job_started(job) &&
2264 !xe_sched_job_completed(job)) ||
2265 xe_sched_invalidate_job(job, 2)) {
2266 trace_xe_sched_job_ban(job);
2267 ban = true;
2268 }
2269 }
2270
2271 if (ban) {
2272 set_exec_queue_banned(q);
2273 xe_guc_exec_queue_trigger_cleanup(q);
2274 }
2275 }
2276 }
2277
xe_guc_submit_reset_prepare(struct xe_guc * guc)2278 int xe_guc_submit_reset_prepare(struct xe_guc *guc)
2279 {
2280 int ret;
2281
2282 if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc)))
2283 return 0;
2284
2285 if (!guc->submission_state.initialized)
2286 return 0;
2287
2288 /*
2289 * Using an atomic here rather than submission_state.lock as this
2290 * function can be called while holding the CT lock (engine reset
2291 * failure). submission_state.lock needs the CT lock to resubmit jobs.
2292 * Atomic is not ideal, but it works to prevent against concurrent reset
2293 * and releasing any TDRs waiting on guc->submission_state.stopped.
2294 */
2295 ret = atomic_fetch_or(1, &guc->submission_state.stopped);
2296 smp_wmb();
2297 wake_up_all(&guc->ct.wq);
2298
2299 return ret;
2300 }
2301
xe_guc_submit_reset_wait(struct xe_guc * guc)2302 void xe_guc_submit_reset_wait(struct xe_guc *guc)
2303 {
2304 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
2305 !xe_guc_read_stopped(guc));
2306 }
2307
xe_guc_submit_stop(struct xe_guc * guc)2308 void xe_guc_submit_stop(struct xe_guc *guc)
2309 {
2310 struct xe_exec_queue *q;
2311 unsigned long index;
2312
2313 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
2314
2315 mutex_lock(&guc->submission_state.lock);
2316
2317 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2318 /* Prevent redundant attempts to stop parallel queues */
2319 if (q->guc->id != index)
2320 continue;
2321
2322 guc_exec_queue_stop(guc, q);
2323 }
2324
2325 mutex_unlock(&guc->submission_state.lock);
2326
2327 /*
2328 * No one can enter the backend at this point, aside from new engine
2329 * creation which is protected by guc->submission_state.lock.
2330 */
2331
2332 }
2333
guc_exec_queue_revert_pending_state_change(struct xe_guc * guc,struct xe_exec_queue * q)2334 static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc,
2335 struct xe_exec_queue *q)
2336 {
2337 bool pending_enable, pending_disable, pending_resume;
2338
2339 pending_enable = exec_queue_pending_enable(q);
2340 pending_resume = exec_queue_pending_resume(q);
2341
2342 if (pending_enable && pending_resume) {
2343 q->guc->needs_resume = true;
2344 xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d",
2345 q->guc->id);
2346 }
2347
2348 if (pending_enable && !pending_resume) {
2349 clear_exec_queue_registered(q);
2350 xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d",
2351 q->guc->id);
2352 }
2353
2354 if (pending_enable) {
2355 clear_exec_queue_enabled(q);
2356 clear_exec_queue_pending_resume(q);
2357 clear_exec_queue_pending_enable(q);
2358 xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d",
2359 q->guc->id);
2360 }
2361
2362 if (exec_queue_destroyed(q) && exec_queue_registered(q)) {
2363 clear_exec_queue_destroyed(q);
2364 q->guc->needs_cleanup = true;
2365 xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d",
2366 q->guc->id);
2367 }
2368
2369 pending_disable = exec_queue_pending_disable(q);
2370
2371 if (pending_disable && exec_queue_suspended(q)) {
2372 clear_exec_queue_suspended(q);
2373 q->guc->needs_suspend = true;
2374 xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d",
2375 q->guc->id);
2376 }
2377
2378 if (pending_disable) {
2379 if (!pending_enable)
2380 set_exec_queue_enabled(q);
2381 clear_exec_queue_pending_disable(q);
2382 xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d",
2383 q->guc->id);
2384 }
2385
2386 q->guc->resume_time = 0;
2387 }
2388
lrc_parallel_clear(struct xe_lrc * lrc)2389 static void lrc_parallel_clear(struct xe_lrc *lrc)
2390 {
2391 struct xe_device *xe = gt_to_xe(lrc->gt);
2392 struct iosys_map map = xe_lrc_parallel_map(lrc);
2393 int i;
2394
2395 for (i = 0; i < WQ_SIZE / sizeof(u32); ++i)
2396 parallel_write(xe, map, wq[i],
2397 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
2398 FIELD_PREP(WQ_LEN_MASK, 0));
2399 }
2400
2401 /*
2402 * This function is quite complex but only real way to ensure no state is lost
2403 * during VF resume flows. The function scans the queue state, make adjustments
2404 * as needed, and queues jobs / messages which replayed upon unpause.
2405 */
guc_exec_queue_pause(struct xe_guc * guc,struct xe_exec_queue * q)2406 static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q)
2407 {
2408 struct xe_gpu_scheduler *sched = &q->guc->sched;
2409 struct xe_sched_job *job;
2410 int i;
2411
2412 lockdep_assert_held(&guc->submission_state.lock);
2413
2414 /* Stop scheduling + flush any DRM scheduler operations */
2415 xe_sched_submission_stop(sched);
2416 cancel_delayed_work_sync(&sched->base.work_tdr);
2417
2418 guc_exec_queue_revert_pending_state_change(guc, q);
2419
2420 if (xe_exec_queue_is_parallel(q)) {
2421 /* Pairs with WRITE_ONCE in __xe_exec_queue_init */
2422 struct xe_lrc *lrc = READ_ONCE(q->lrc[0]);
2423
2424 /*
2425 * NOP existing WQ commands that may contain stale GGTT
2426 * addresses. These will be replayed upon unpause. The hardware
2427 * seems to get confused if the WQ head/tail pointers are
2428 * adjusted.
2429 */
2430 if (lrc)
2431 lrc_parallel_clear(lrc);
2432 }
2433
2434 job = xe_sched_first_pending_job(sched);
2435 if (job) {
2436 job->restore_replay = true;
2437
2438 /*
2439 * Adjust software tail so jobs submitted overwrite previous
2440 * position in ring buffer with new GGTT addresses.
2441 */
2442 for (i = 0; i < q->width; ++i)
2443 q->lrc[i]->ring.tail = job->ptrs[i].head;
2444 }
2445 }
2446
2447 /**
2448 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC.
2449 * @guc: the &xe_guc struct instance whose scheduler is to be disabled
2450 */
xe_guc_submit_pause(struct xe_guc * guc)2451 void xe_guc_submit_pause(struct xe_guc *guc)
2452 {
2453 struct xe_exec_queue *q;
2454 unsigned long index;
2455
2456 mutex_lock(&guc->submission_state.lock);
2457 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2458 xe_sched_submission_stop(&q->guc->sched);
2459 mutex_unlock(&guc->submission_state.lock);
2460 }
2461
2462 /**
2463 * xe_guc_submit_pause_vf - Stop further runs of submission tasks for VF.
2464 * @guc: the &xe_guc struct instance whose scheduler is to be disabled
2465 */
xe_guc_submit_pause_vf(struct xe_guc * guc)2466 void xe_guc_submit_pause_vf(struct xe_guc *guc)
2467 {
2468 struct xe_exec_queue *q;
2469 unsigned long index;
2470
2471 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc)));
2472 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc));
2473
2474 mutex_lock(&guc->submission_state.lock);
2475 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2476 /* Prevent redundant attempts to stop parallel queues */
2477 if (q->guc->id != index)
2478 continue;
2479
2480 guc_exec_queue_pause(guc, q);
2481 }
2482 mutex_unlock(&guc->submission_state.lock);
2483 }
2484
guc_exec_queue_start(struct xe_exec_queue * q)2485 static void guc_exec_queue_start(struct xe_exec_queue *q)
2486 {
2487 struct xe_gpu_scheduler *sched = &q->guc->sched;
2488
2489 if (!exec_queue_killed_or_banned_or_wedged(q)) {
2490 struct xe_sched_job *job = xe_sched_first_pending_job(sched);
2491 int i;
2492
2493 trace_xe_exec_queue_resubmit(q);
2494 if (job) {
2495 for (i = 0; i < q->width; ++i) {
2496 /*
2497 * The GuC context is unregistered at this point
2498 * time, adjusting software ring tail ensures
2499 * jobs are rewritten in original placement,
2500 * adjusting LRC tail ensures the newly loaded
2501 * GuC / contexts only view the LRC tail
2502 * increasing as jobs are written out.
2503 */
2504 q->lrc[i]->ring.tail = job->ptrs[i].head;
2505 xe_lrc_set_ring_tail(q->lrc[i],
2506 xe_lrc_ring_head(q->lrc[i]));
2507 }
2508 }
2509 xe_sched_resubmit_jobs(sched);
2510 }
2511
2512 xe_sched_submission_start(sched);
2513 xe_sched_submission_resume_tdr(sched);
2514 }
2515
xe_guc_submit_start(struct xe_guc * guc)2516 int xe_guc_submit_start(struct xe_guc *guc)
2517 {
2518 struct xe_exec_queue *q;
2519 unsigned long index;
2520
2521 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
2522
2523 mutex_lock(&guc->submission_state.lock);
2524 atomic_dec(&guc->submission_state.stopped);
2525 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2526 /* Prevent redundant attempts to start parallel queues */
2527 if (q->guc->id != index)
2528 continue;
2529
2530 guc_exec_queue_start(q);
2531 }
2532 mutex_unlock(&guc->submission_state.lock);
2533
2534 wake_up_all(&guc->ct.wq);
2535
2536 return 0;
2537 }
2538
guc_exec_queue_unpause_prepare(struct xe_guc * guc,struct xe_exec_queue * q)2539 static void guc_exec_queue_unpause_prepare(struct xe_guc *guc,
2540 struct xe_exec_queue *q)
2541 {
2542 struct xe_gpu_scheduler *sched = &q->guc->sched;
2543 struct xe_sched_job *job = NULL;
2544 struct drm_sched_job *s_job;
2545 bool restore_replay = false;
2546
2547 drm_sched_for_each_pending_job(s_job, &sched->base, NULL) {
2548 job = to_xe_sched_job(s_job);
2549 restore_replay |= job->restore_replay;
2550 if (restore_replay) {
2551 xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d",
2552 q->guc->id, xe_sched_job_seqno(job));
2553
2554 q->ring_ops->emit_job(job);
2555 job->restore_replay = true;
2556 }
2557 }
2558
2559 if (job)
2560 job->last_replay = true;
2561 }
2562
2563 /**
2564 * xe_guc_submit_unpause_prepare_vf - Prepare unpause submission tasks for VF.
2565 * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause
2566 */
xe_guc_submit_unpause_prepare_vf(struct xe_guc * guc)2567 void xe_guc_submit_unpause_prepare_vf(struct xe_guc *guc)
2568 {
2569 struct xe_exec_queue *q;
2570 unsigned long index;
2571
2572 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc)));
2573 xe_gt_assert(guc_to_gt(guc), vf_recovery(guc));
2574
2575 mutex_lock(&guc->submission_state.lock);
2576 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2577 /* Prevent redundant attempts to stop parallel queues */
2578 if (q->guc->id != index)
2579 continue;
2580
2581 guc_exec_queue_unpause_prepare(guc, q);
2582 }
2583 mutex_unlock(&guc->submission_state.lock);
2584 }
2585
guc_exec_queue_replay_pending_state_change(struct xe_exec_queue * q)2586 static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q)
2587 {
2588 struct xe_gpu_scheduler *sched = &q->guc->sched;
2589 struct xe_sched_msg *msg;
2590
2591 if (q->guc->needs_cleanup) {
2592 msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
2593
2594 guc_exec_queue_add_msg(q, msg, CLEANUP);
2595 q->guc->needs_cleanup = false;
2596 }
2597
2598 if (q->guc->needs_suspend) {
2599 msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
2600
2601 xe_sched_msg_lock(sched);
2602 guc_exec_queue_try_add_msg_head(q, msg, SUSPEND);
2603 xe_sched_msg_unlock(sched);
2604
2605 q->guc->needs_suspend = false;
2606 }
2607
2608 /*
2609 * The resume must be in the message queue before the suspend as it is
2610 * not possible for a resume to be issued if a suspend pending is, but
2611 * the inverse is possible.
2612 */
2613 if (q->guc->needs_resume) {
2614 msg = q->guc->static_msgs + STATIC_MSG_RESUME;
2615
2616 xe_sched_msg_lock(sched);
2617 guc_exec_queue_try_add_msg_head(q, msg, RESUME);
2618 xe_sched_msg_unlock(sched);
2619
2620 q->guc->needs_resume = false;
2621 }
2622 }
2623
guc_exec_queue_unpause(struct xe_guc * guc,struct xe_exec_queue * q)2624 static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q)
2625 {
2626 struct xe_gpu_scheduler *sched = &q->guc->sched;
2627 bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q);
2628
2629 lockdep_assert_held(&guc->submission_state.lock);
2630
2631 xe_sched_resubmit_jobs(sched);
2632 guc_exec_queue_replay_pending_state_change(q);
2633 xe_sched_submission_start(sched);
2634 if (needs_tdr)
2635 xe_guc_exec_queue_trigger_cleanup(q);
2636 xe_sched_submission_resume_tdr(sched);
2637 }
2638
2639 /**
2640 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC.
2641 * @guc: the &xe_guc struct instance whose scheduler is to be enabled
2642 */
xe_guc_submit_unpause(struct xe_guc * guc)2643 void xe_guc_submit_unpause(struct xe_guc *guc)
2644 {
2645 struct xe_exec_queue *q;
2646 unsigned long index;
2647
2648 mutex_lock(&guc->submission_state.lock);
2649 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2650 xe_sched_submission_start(&q->guc->sched);
2651 mutex_unlock(&guc->submission_state.lock);
2652 }
2653
2654 /**
2655 * xe_guc_submit_unpause_vf - Allow further runs of submission tasks for VF.
2656 * @guc: the &xe_guc struct instance whose scheduler is to be enabled
2657 */
xe_guc_submit_unpause_vf(struct xe_guc * guc)2658 void xe_guc_submit_unpause_vf(struct xe_guc *guc)
2659 {
2660 struct xe_exec_queue *q;
2661 unsigned long index;
2662
2663 xe_gt_assert(guc_to_gt(guc), IS_SRIOV_VF(guc_to_xe(guc)));
2664
2665 mutex_lock(&guc->submission_state.lock);
2666 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2667 /*
2668 * Prevent redundant attempts to stop parallel queues, or queues
2669 * created after resfix done.
2670 */
2671 if (q->guc->id != index ||
2672 !drm_sched_is_stopped(&q->guc->sched.base))
2673 continue;
2674
2675 guc_exec_queue_unpause(guc, q);
2676 }
2677 mutex_unlock(&guc->submission_state.lock);
2678 }
2679
2680 /**
2681 * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC.
2682 * @guc: the &xe_guc struct instance whose scheduler is to be aborted
2683 */
xe_guc_submit_pause_abort(struct xe_guc * guc)2684 void xe_guc_submit_pause_abort(struct xe_guc *guc)
2685 {
2686 struct xe_exec_queue *q;
2687 unsigned long index;
2688
2689 mutex_lock(&guc->submission_state.lock);
2690 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2691 struct xe_gpu_scheduler *sched = &q->guc->sched;
2692
2693 /* Prevent redundant attempts to stop parallel queues */
2694 if (q->guc->id != index)
2695 continue;
2696
2697 xe_sched_submission_start(sched);
2698 if (exec_queue_killed_or_banned_or_wedged(q))
2699 xe_guc_exec_queue_trigger_cleanup(q);
2700 }
2701 mutex_unlock(&guc->submission_state.lock);
2702 }
2703
2704 static struct xe_exec_queue *
g2h_exec_queue_lookup(struct xe_guc * guc,u32 guc_id)2705 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
2706 {
2707 struct xe_gt *gt = guc_to_gt(guc);
2708 struct xe_exec_queue *q;
2709
2710 if (unlikely(guc_id >= GUC_ID_MAX)) {
2711 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id);
2712 return NULL;
2713 }
2714
2715 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
2716 if (unlikely(!q)) {
2717 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id);
2718 return NULL;
2719 }
2720
2721 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id);
2722 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width));
2723
2724 return q;
2725 }
2726
deregister_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q)2727 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
2728 {
2729 u32 action[] = {
2730 XE_GUC_ACTION_DEREGISTER_CONTEXT,
2731 q->guc->id,
2732 };
2733
2734 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q));
2735 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
2736 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
2737 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
2738
2739 trace_xe_exec_queue_deregister(q);
2740
2741 if (xe_exec_queue_is_multi_queue_secondary(q))
2742 handle_deregister_done(guc, q);
2743 else
2744 xe_guc_ct_send_g2h_handler(&guc->ct, action,
2745 ARRAY_SIZE(action));
2746 }
2747
handle_sched_done(struct xe_guc * guc,struct xe_exec_queue * q,u32 runnable_state)2748 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
2749 u32 runnable_state)
2750 {
2751 trace_xe_exec_queue_scheduling_done(q);
2752
2753 if (runnable_state == 1) {
2754 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q));
2755
2756 q->guc->resume_time = ktime_get();
2757 clear_exec_queue_pending_resume(q);
2758 clear_exec_queue_pending_enable(q);
2759 smp_wmb();
2760 wake_up_all(&guc->ct.wq);
2761 } else {
2762 xe_gt_assert(guc_to_gt(guc), runnable_state == 0);
2763 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q));
2764
2765 if (q->guc->suspend_pending) {
2766 suspend_fence_signal(q);
2767 clear_exec_queue_pending_disable(q);
2768 } else {
2769 if (exec_queue_banned(q)) {
2770 smp_wmb();
2771 wake_up_all(&guc->ct.wq);
2772 }
2773 if (exec_queue_destroyed(q)) {
2774 /*
2775 * Make sure to clear the pending_disable only
2776 * after sampling the destroyed state. We want
2777 * to ensure we don't trigger the unregister too
2778 * early with something intending to only
2779 * disable scheduling. The caller doing the
2780 * destroy must wait for an ongoing
2781 * pending_disable before marking as destroyed.
2782 */
2783 clear_exec_queue_pending_disable(q);
2784 deregister_exec_queue(guc, q);
2785 } else {
2786 clear_exec_queue_pending_disable(q);
2787 }
2788 }
2789 }
2790 }
2791
handle_multi_queue_secondary_sched_done(struct xe_guc * guc,struct xe_exec_queue * q,u32 runnable_state)2792 static void handle_multi_queue_secondary_sched_done(struct xe_guc *guc,
2793 struct xe_exec_queue *q,
2794 u32 runnable_state)
2795 {
2796 /* Take CT lock here as handle_sched_done() do send a h2g message */
2797 mutex_lock(&guc->ct.lock);
2798 handle_sched_done(guc, q, runnable_state);
2799 mutex_unlock(&guc->ct.lock);
2800 }
2801
xe_guc_sched_done_handler(struct xe_guc * guc,u32 * msg,u32 len)2802 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
2803 {
2804 struct xe_exec_queue *q;
2805 u32 guc_id, runnable_state;
2806
2807 if (unlikely(len < 2))
2808 return -EPROTO;
2809
2810 guc_id = msg[0];
2811 runnable_state = msg[1];
2812
2813 q = g2h_exec_queue_lookup(guc, guc_id);
2814 if (unlikely(!q))
2815 return -EPROTO;
2816
2817 if (unlikely(!exec_queue_pending_enable(q) &&
2818 !exec_queue_pending_disable(q))) {
2819 xe_gt_err(guc_to_gt(guc),
2820 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u",
2821 atomic_read(&q->guc->state), q->guc->id,
2822 runnable_state);
2823 return -EPROTO;
2824 }
2825
2826 handle_sched_done(guc, q, runnable_state);
2827
2828 return 0;
2829 }
2830
handle_deregister_done(struct xe_guc * guc,struct xe_exec_queue * q)2831 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
2832 {
2833 trace_xe_exec_queue_deregister_done(q);
2834
2835 clear_exec_queue_registered(q);
2836 __guc_exec_queue_destroy(guc, q);
2837 }
2838
xe_guc_deregister_done_handler(struct xe_guc * guc,u32 * msg,u32 len)2839 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
2840 {
2841 struct xe_exec_queue *q;
2842 u32 guc_id;
2843
2844 if (unlikely(len < 1))
2845 return -EPROTO;
2846
2847 guc_id = msg[0];
2848
2849 q = g2h_exec_queue_lookup(guc, guc_id);
2850 if (unlikely(!q))
2851 return -EPROTO;
2852
2853 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
2854 exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
2855 xe_gt_err(guc_to_gt(guc),
2856 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d",
2857 atomic_read(&q->guc->state), q->guc->id);
2858 return -EPROTO;
2859 }
2860
2861 handle_deregister_done(guc, q);
2862
2863 return 0;
2864 }
2865
xe_guc_exec_queue_reset_handler(struct xe_guc * guc,u32 * msg,u32 len)2866 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
2867 {
2868 struct xe_gt *gt = guc_to_gt(guc);
2869 struct xe_exec_queue *q;
2870 u32 guc_id;
2871
2872 if (unlikely(len < 1))
2873 return -EPROTO;
2874
2875 guc_id = msg[0];
2876
2877 q = g2h_exec_queue_lookup(guc, guc_id);
2878 if (unlikely(!q))
2879 return -EPROTO;
2880
2881 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d, state=0x%0x",
2882 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id,
2883 atomic_read(&q->guc->state));
2884
2885 trace_xe_exec_queue_reset(q);
2886
2887 /*
2888 * A banned engine is a NOP at this point (came from
2889 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
2890 * jobs by setting timeout of the job to the minimum value kicking
2891 * guc_exec_queue_timedout_job.
2892 */
2893 xe_guc_exec_queue_reset_trigger_cleanup(q);
2894
2895 return 0;
2896 }
2897
2898 /*
2899 * xe_guc_error_capture_handler - Handler of GuC captured message
2900 * @guc: The GuC object
2901 * @msg: Point to the message
2902 * @len: The message length
2903 *
2904 * When GuC captured data is ready, GuC will send message
2905 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
2906 * called 1st to check status before process the data comes with the message.
2907 *
2908 * Returns: error code. 0 if success
2909 */
xe_guc_error_capture_handler(struct xe_guc * guc,u32 * msg,u32 len)2910 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len)
2911 {
2912 u32 status;
2913
2914 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN))
2915 return -EPROTO;
2916
2917 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
2918 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
2919 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space");
2920
2921 xe_guc_capture_process(guc);
2922
2923 return 0;
2924 }
2925
xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc * guc,u32 * msg,u32 len)2926 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
2927 u32 len)
2928 {
2929 struct xe_gt *gt = guc_to_gt(guc);
2930 struct xe_exec_queue *q;
2931 u32 guc_id;
2932 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID;
2933
2934 if (unlikely(!len || len > 2))
2935 return -EPROTO;
2936
2937 guc_id = msg[0];
2938
2939 if (len == 2)
2940 type = msg[1];
2941
2942 if (guc_id == GUC_ID_UNKNOWN) {
2943 /*
2944 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF
2945 * context. In such case only PF will be notified about that fault.
2946 */
2947 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n");
2948 return 0;
2949 }
2950
2951 q = g2h_exec_queue_lookup(guc, guc_id);
2952 if (unlikely(!q))
2953 return -EPROTO;
2954
2955 /*
2956 * The type is HW-defined and changes based on platform, so we don't
2957 * decode it in the kernel and only check if it is valid.
2958 * See bspec 54047 and 72187 for details.
2959 */
2960 if (type != XE_GUC_CAT_ERR_TYPE_INVALID)
2961 xe_gt_info(gt,
2962 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d",
2963 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2964 else
2965 xe_gt_info(gt,
2966 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d",
2967 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2968
2969 trace_xe_exec_queue_memory_cat_error(q);
2970
2971 /* Treat the same as engine reset */
2972 xe_guc_exec_queue_reset_trigger_cleanup(q);
2973
2974 return 0;
2975 }
2976
xe_guc_exec_queue_reset_failure_handler(struct xe_guc * guc,u32 * msg,u32 len)2977 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
2978 {
2979 struct xe_gt *gt = guc_to_gt(guc);
2980 u8 guc_class, instance;
2981 u32 reason;
2982
2983 if (unlikely(len != 3))
2984 return -EPROTO;
2985
2986 guc_class = msg[0];
2987 instance = msg[1];
2988 reason = msg[2];
2989
2990 /* Unexpected failure of a hardware feature, log an actual error */
2991 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X",
2992 guc_class, instance, reason);
2993
2994 xe_gt_reset_async(gt);
2995
2996 return 0;
2997 }
2998
xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc * guc,u32 * msg,u32 len)2999 int xe_guc_exec_queue_cgp_context_error_handler(struct xe_guc *guc, u32 *msg,
3000 u32 len)
3001 {
3002 struct xe_gt *gt = guc_to_gt(guc);
3003 struct xe_device *xe = guc_to_xe(guc);
3004 struct xe_exec_queue *q;
3005 u32 guc_id = msg[2];
3006
3007 if (unlikely(len != XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN)) {
3008 drm_err(&xe->drm, "Invalid length %u", len);
3009 return -EPROTO;
3010 }
3011
3012 q = g2h_exec_queue_lookup(guc, guc_id);
3013 if (unlikely(!q))
3014 return -EPROTO;
3015
3016 xe_gt_dbg(gt,
3017 "CGP context error: [%s] err=0x%x, q0_id=0x%x LRCA=0x%x guc_id=0x%x",
3018 msg[0] & 1 ? "uc" : "kmd", msg[1], msg[2], msg[3], msg[4]);
3019
3020 trace_xe_exec_queue_cgp_context_error(q);
3021
3022 /* Treat the same as engine reset */
3023 xe_guc_exec_queue_reset_trigger_cleanup(q);
3024
3025 return 0;
3026 }
3027
3028 /**
3029 * xe_guc_exec_queue_cgp_sync_done_handler - CGP synchronization done handler
3030 * @guc: guc
3031 * @msg: message indicating CGP sync done
3032 * @len: length of message
3033 *
3034 * Set multi queue group's sync_pending flag to false and wakeup anyone waiting
3035 * for CGP synchronization to complete.
3036 *
3037 * Return: 0 on success, -EPROTO for malformed messages.
3038 */
xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc * guc,u32 * msg,u32 len)3039 int xe_guc_exec_queue_cgp_sync_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
3040 {
3041 struct xe_device *xe = guc_to_xe(guc);
3042 struct xe_exec_queue *q;
3043 u32 guc_id = msg[0];
3044
3045 if (unlikely(len < 1)) {
3046 drm_err(&xe->drm, "Invalid CGP_SYNC_DONE length %u", len);
3047 return -EPROTO;
3048 }
3049
3050 q = g2h_exec_queue_lookup(guc, guc_id);
3051 if (unlikely(!q))
3052 return -EPROTO;
3053
3054 if (!xe_exec_queue_is_multi_queue_primary(q)) {
3055 drm_err(&xe->drm, "Unexpected CGP_SYNC_DONE response");
3056 return -EPROTO;
3057 }
3058
3059 /* Wakeup the serialized cgp update wait */
3060 WRITE_ONCE(q->multi_queue.group->sync_pending, false);
3061 xe_guc_ct_wake_waiters(&guc->ct);
3062
3063 return 0;
3064 }
3065
3066 static void
guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue * q,struct xe_guc_submit_exec_queue_snapshot * snapshot)3067 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
3068 struct xe_guc_submit_exec_queue_snapshot *snapshot)
3069 {
3070 struct xe_guc *guc = exec_queue_to_guc(q);
3071 struct xe_device *xe = guc_to_xe(guc);
3072 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
3073 int i;
3074
3075 snapshot->guc.wqi_head = q->guc->wqi_head;
3076 snapshot->guc.wqi_tail = q->guc->wqi_tail;
3077 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
3078 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
3079 snapshot->parallel.wq_desc.status = parallel_read(xe, map,
3080 wq_desc.wq_status);
3081
3082 if (snapshot->parallel.wq_desc.head !=
3083 snapshot->parallel.wq_desc.tail) {
3084 for (i = snapshot->parallel.wq_desc.head;
3085 i != snapshot->parallel.wq_desc.tail;
3086 i = (i + sizeof(u32)) % WQ_SIZE)
3087 snapshot->parallel.wq[i / sizeof(u32)] =
3088 parallel_read(xe, map, wq[i / sizeof(u32)]);
3089 }
3090 }
3091
3092 static void
guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot * snapshot,struct drm_printer * p)3093 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
3094 struct drm_printer *p)
3095 {
3096 int i;
3097
3098 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
3099 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
3100 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
3101 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
3102 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
3103
3104 if (snapshot->parallel.wq_desc.head !=
3105 snapshot->parallel.wq_desc.tail) {
3106 for (i = snapshot->parallel.wq_desc.head;
3107 i != snapshot->parallel.wq_desc.tail;
3108 i = (i + sizeof(u32)) % WQ_SIZE)
3109 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
3110 snapshot->parallel.wq[i / sizeof(u32)]);
3111 }
3112 }
3113
3114 /**
3115 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
3116 * @q: faulty exec queue
3117 *
3118 * This can be printed out in a later stage like during dev_coredump
3119 * analysis.
3120 *
3121 * Returns: a GuC Submit Engine snapshot object that must be freed by the
3122 * caller, using `xe_guc_exec_queue_snapshot_free`.
3123 */
3124 struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue * q)3125 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
3126 {
3127 struct xe_gpu_scheduler *sched = &q->guc->sched;
3128 struct xe_guc_submit_exec_queue_snapshot *snapshot;
3129 int i;
3130
3131 snapshot = kzalloc_obj(*snapshot, GFP_ATOMIC);
3132
3133 if (!snapshot)
3134 return NULL;
3135
3136 snapshot->guc.id = q->guc->id;
3137 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
3138 snapshot->class = q->class;
3139 snapshot->logical_mask = q->logical_mask;
3140 snapshot->width = q->width;
3141 snapshot->refcount = kref_read(&q->refcount);
3142 snapshot->sched_timeout = sched->base.timeout;
3143 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
3144 snapshot->sched_props.preempt_timeout_us =
3145 q->sched_props.preempt_timeout_us;
3146
3147 snapshot->lrc = kmalloc_objs(struct xe_lrc_snapshot *, q->width,
3148 GFP_ATOMIC);
3149
3150 if (snapshot->lrc) {
3151 for (i = 0; i < q->width; ++i) {
3152 struct xe_lrc *lrc = q->lrc[i];
3153
3154 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
3155 }
3156 }
3157
3158 snapshot->schedule_state = atomic_read(&q->guc->state);
3159 snapshot->exec_queue_flags = q->flags;
3160
3161 snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
3162 if (snapshot->parallel_execution)
3163 guc_exec_queue_wq_snapshot_capture(q, snapshot);
3164
3165 if (xe_exec_queue_is_multi_queue(q)) {
3166 snapshot->multi_queue.valid = true;
3167 snapshot->multi_queue.primary = xe_exec_queue_multi_queue_primary(q)->guc->id;
3168 snapshot->multi_queue.pos = q->multi_queue.pos;
3169 }
3170
3171 return snapshot;
3172 }
3173
3174 /**
3175 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
3176 * @snapshot: Previously captured snapshot of job.
3177 *
3178 * This captures some data that requires taking some locks, so it cannot be done in signaling path.
3179 */
3180 void
xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot * snapshot)3181 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
3182 {
3183 int i;
3184
3185 if (!snapshot || !snapshot->lrc)
3186 return;
3187
3188 for (i = 0; i < snapshot->width; ++i)
3189 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]);
3190 }
3191
3192 /**
3193 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
3194 * @snapshot: GuC Submit Engine snapshot object.
3195 * @p: drm_printer where it will be printed out.
3196 *
3197 * This function prints out a given GuC Submit Engine snapshot object.
3198 */
3199 void
xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot * snapshot,struct drm_printer * p)3200 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
3201 struct drm_printer *p)
3202 {
3203 int i;
3204
3205 if (!snapshot)
3206 return;
3207
3208 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id);
3209 drm_printf(p, "\tName: %s\n", snapshot->name);
3210 drm_printf(p, "\tClass: %d\n", snapshot->class);
3211 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
3212 drm_printf(p, "\tWidth: %d\n", snapshot->width);
3213 drm_printf(p, "\tRef: %d\n", snapshot->refcount);
3214 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
3215 drm_printf(p, "\tTimeslice: %u (us)\n",
3216 snapshot->sched_props.timeslice_us);
3217 drm_printf(p, "\tPreempt timeout: %u (us)\n",
3218 snapshot->sched_props.preempt_timeout_us);
3219
3220 for (i = 0; snapshot->lrc && i < snapshot->width; ++i)
3221 xe_lrc_snapshot_print(snapshot->lrc[i], p);
3222
3223 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
3224 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
3225
3226 if (snapshot->parallel_execution)
3227 guc_exec_queue_wq_snapshot_print(snapshot, p);
3228
3229 if (snapshot->multi_queue.valid) {
3230 drm_printf(p, "\tMulti queue primary GuC ID: %d\n", snapshot->multi_queue.primary);
3231 drm_printf(p, "\tMulti queue position: %d\n", snapshot->multi_queue.pos);
3232 }
3233 }
3234
3235 /**
3236 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
3237 * snapshot.
3238 * @snapshot: GuC Submit Engine snapshot object.
3239 *
3240 * This function free all the memory that needed to be allocated at capture
3241 * time.
3242 */
xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot * snapshot)3243 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
3244 {
3245 int i;
3246
3247 if (!snapshot)
3248 return;
3249
3250 if (snapshot->lrc) {
3251 for (i = 0; i < snapshot->width; i++)
3252 xe_lrc_snapshot_free(snapshot->lrc[i]);
3253 kfree(snapshot->lrc);
3254 }
3255 kfree(snapshot);
3256 }
3257
guc_exec_queue_print(struct xe_exec_queue * q,struct drm_printer * p)3258 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
3259 {
3260 struct xe_guc_submit_exec_queue_snapshot *snapshot;
3261
3262 snapshot = xe_guc_exec_queue_snapshot_capture(q);
3263 xe_guc_exec_queue_snapshot_print(snapshot, p);
3264 xe_guc_exec_queue_snapshot_free(snapshot);
3265 }
3266
3267 /**
3268 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type.
3269 * @q: Execution queue
3270 * @ctx_type: Type of the context
3271 *
3272 * This function registers the execution queue with the guc. Special context
3273 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE
3274 * are only applicable for IGPU and in the VF.
3275 * Submits the execution queue to GUC after registering it.
3276 *
3277 * Returns - None.
3278 */
xe_guc_register_vf_exec_queue(struct xe_exec_queue * q,int ctx_type)3279 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type)
3280 {
3281 struct xe_guc *guc = exec_queue_to_guc(q);
3282 struct xe_device *xe = guc_to_xe(guc);
3283 struct xe_gt *gt = guc_to_gt(guc);
3284
3285 xe_gt_assert(gt, IS_SRIOV_VF(xe));
3286 xe_gt_assert(gt, !IS_DGFX(xe));
3287 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE ||
3288 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE);
3289 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0));
3290
3291 register_exec_queue(q, ctx_type);
3292 enable_scheduling(q);
3293 }
3294
3295 /**
3296 * xe_guc_submit_print - GuC Submit Print.
3297 * @guc: GuC.
3298 * @p: drm_printer where it will be printed out.
3299 *
3300 * This function capture and prints snapshots of **all** GuC Engines.
3301 */
xe_guc_submit_print(struct xe_guc * guc,struct drm_printer * p)3302 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
3303 {
3304 struct xe_exec_queue *q;
3305 unsigned long index;
3306
3307 if (!xe_device_uc_enabled(guc_to_xe(guc)))
3308 return;
3309
3310 mutex_lock(&guc->submission_state.lock);
3311 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
3312 guc_exec_queue_print(q, p);
3313 mutex_unlock(&guc->submission_state.lock);
3314 }
3315
3316 /**
3317 * xe_guc_has_registered_mlrc_queues - check whether there are any MLRC queues
3318 * registered with the GuC
3319 * @guc: GuC.
3320 *
3321 * Return: true if any MLRC queue is registered with the GuC, false otherwise.
3322 */
xe_guc_has_registered_mlrc_queues(struct xe_guc * guc)3323 bool xe_guc_has_registered_mlrc_queues(struct xe_guc *guc)
3324 {
3325 struct xe_exec_queue *q;
3326 unsigned long index;
3327
3328 guard(mutex)(&guc->submission_state.lock);
3329
3330 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
3331 if (q->width > 1)
3332 return true;
3333
3334 return false;
3335 }
3336
3337 /**
3338 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all
3339 * exec queues registered to given GuC.
3340 * @guc: the &xe_guc struct instance
3341 * @scratch: scratch buffer to be used as temporary storage
3342 *
3343 * Returns: zero on success, negative error code on failure.
3344 */
xe_guc_contexts_hwsp_rebase(struct xe_guc * guc,void * scratch)3345 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch)
3346 {
3347 struct xe_exec_queue *q;
3348 unsigned long index;
3349 int err = 0;
3350
3351 mutex_lock(&guc->submission_state.lock);
3352 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
3353 /* Prevent redundant attempts to stop parallel queues */
3354 if (q->guc->id != index)
3355 continue;
3356
3357 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch);
3358 if (err)
3359 break;
3360 }
3361 mutex_unlock(&guc->submission_state.lock);
3362
3363 return err;
3364 }
3365