1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "xe_guc_submit.h"
7
8 #include <linux/bitfield.h>
9 #include <linux/bitmap.h>
10 #include <linux/circ_buf.h>
11 #include <linux/delay.h>
12 #include <linux/dma-fence-array.h>
13 #include <linux/math64.h>
14
15 #include <drm/drm_managed.h>
16
17 #include "abi/guc_actions_abi.h"
18 #include "abi/guc_actions_slpc_abi.h"
19 #include "abi/guc_klvs_abi.h"
20 #include "regs/xe_lrc_layout.h"
21 #include "xe_assert.h"
22 #include "xe_devcoredump.h"
23 #include "xe_device.h"
24 #include "xe_exec_queue.h"
25 #include "xe_force_wake.h"
26 #include "xe_gpu_scheduler.h"
27 #include "xe_gt.h"
28 #include "xe_gt_clock.h"
29 #include "xe_gt_printk.h"
30 #include "xe_guc.h"
31 #include "xe_guc_capture.h"
32 #include "xe_guc_ct.h"
33 #include "xe_guc_exec_queue_types.h"
34 #include "xe_guc_id_mgr.h"
35 #include "xe_guc_klv_helpers.h"
36 #include "xe_guc_submit_types.h"
37 #include "xe_hw_engine.h"
38 #include "xe_hw_fence.h"
39 #include "xe_lrc.h"
40 #include "xe_macros.h"
41 #include "xe_map.h"
42 #include "xe_mocs.h"
43 #include "xe_pm.h"
44 #include "xe_ring_ops_types.h"
45 #include "xe_sched_job.h"
46 #include "xe_trace.h"
47 #include "xe_vm.h"
48
49 static struct xe_guc *
exec_queue_to_guc(struct xe_exec_queue * q)50 exec_queue_to_guc(struct xe_exec_queue *q)
51 {
52 return &q->gt->uc.guc;
53 }
54
55 /*
56 * Helpers for engine state, using an atomic as some of the bits can transition
57 * as the same time (e.g. a suspend can be happning at the same time as schedule
58 * engine done being processed).
59 */
60 #define EXEC_QUEUE_STATE_REGISTERED (1 << 0)
61 #define EXEC_QUEUE_STATE_ENABLED (1 << 1)
62 #define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2)
63 #define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3)
64 #define EXEC_QUEUE_STATE_DESTROYED (1 << 4)
65 #define EXEC_QUEUE_STATE_SUSPENDED (1 << 5)
66 #define EXEC_QUEUE_STATE_RESET (1 << 6)
67 #define EXEC_QUEUE_STATE_KILLED (1 << 7)
68 #define EXEC_QUEUE_STATE_WEDGED (1 << 8)
69 #define EXEC_QUEUE_STATE_BANNED (1 << 9)
70 #define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10)
71 #define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11)
72
exec_queue_registered(struct xe_exec_queue * q)73 static bool exec_queue_registered(struct xe_exec_queue *q)
74 {
75 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
76 }
77
set_exec_queue_registered(struct xe_exec_queue * q)78 static void set_exec_queue_registered(struct xe_exec_queue *q)
79 {
80 atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
81 }
82
clear_exec_queue_registered(struct xe_exec_queue * q)83 static void clear_exec_queue_registered(struct xe_exec_queue *q)
84 {
85 atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state);
86 }
87
exec_queue_enabled(struct xe_exec_queue * q)88 static bool exec_queue_enabled(struct xe_exec_queue *q)
89 {
90 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED;
91 }
92
set_exec_queue_enabled(struct xe_exec_queue * q)93 static void set_exec_queue_enabled(struct xe_exec_queue *q)
94 {
95 atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
96 }
97
clear_exec_queue_enabled(struct xe_exec_queue * q)98 static void clear_exec_queue_enabled(struct xe_exec_queue *q)
99 {
100 atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state);
101 }
102
exec_queue_pending_enable(struct xe_exec_queue * q)103 static bool exec_queue_pending_enable(struct xe_exec_queue *q)
104 {
105 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
106 }
107
set_exec_queue_pending_enable(struct xe_exec_queue * q)108 static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
109 {
110 atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
111 }
112
clear_exec_queue_pending_enable(struct xe_exec_queue * q)113 static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
114 {
115 atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state);
116 }
117
exec_queue_pending_disable(struct xe_exec_queue * q)118 static bool exec_queue_pending_disable(struct xe_exec_queue *q)
119 {
120 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
121 }
122
set_exec_queue_pending_disable(struct xe_exec_queue * q)123 static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
124 {
125 atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
126 }
127
clear_exec_queue_pending_disable(struct xe_exec_queue * q)128 static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
129 {
130 atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state);
131 }
132
exec_queue_destroyed(struct xe_exec_queue * q)133 static bool exec_queue_destroyed(struct xe_exec_queue *q)
134 {
135 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
136 }
137
set_exec_queue_destroyed(struct xe_exec_queue * q)138 static void set_exec_queue_destroyed(struct xe_exec_queue *q)
139 {
140 atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
141 }
142
exec_queue_banned(struct xe_exec_queue * q)143 static bool exec_queue_banned(struct xe_exec_queue *q)
144 {
145 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED;
146 }
147
set_exec_queue_banned(struct xe_exec_queue * q)148 static void set_exec_queue_banned(struct xe_exec_queue *q)
149 {
150 atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state);
151 }
152
exec_queue_suspended(struct xe_exec_queue * q)153 static bool exec_queue_suspended(struct xe_exec_queue *q)
154 {
155 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED;
156 }
157
set_exec_queue_suspended(struct xe_exec_queue * q)158 static void set_exec_queue_suspended(struct xe_exec_queue *q)
159 {
160 atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
161 }
162
clear_exec_queue_suspended(struct xe_exec_queue * q)163 static void clear_exec_queue_suspended(struct xe_exec_queue *q)
164 {
165 atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state);
166 }
167
exec_queue_reset(struct xe_exec_queue * q)168 static bool exec_queue_reset(struct xe_exec_queue *q)
169 {
170 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET;
171 }
172
set_exec_queue_reset(struct xe_exec_queue * q)173 static void set_exec_queue_reset(struct xe_exec_queue *q)
174 {
175 atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state);
176 }
177
exec_queue_killed(struct xe_exec_queue * q)178 static bool exec_queue_killed(struct xe_exec_queue *q)
179 {
180 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED;
181 }
182
set_exec_queue_killed(struct xe_exec_queue * q)183 static void set_exec_queue_killed(struct xe_exec_queue *q)
184 {
185 atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state);
186 }
187
exec_queue_wedged(struct xe_exec_queue * q)188 static bool exec_queue_wedged(struct xe_exec_queue *q)
189 {
190 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
191 }
192
set_exec_queue_wedged(struct xe_exec_queue * q)193 static void set_exec_queue_wedged(struct xe_exec_queue *q)
194 {
195 atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state);
196 }
197
exec_queue_check_timeout(struct xe_exec_queue * q)198 static bool exec_queue_check_timeout(struct xe_exec_queue *q)
199 {
200 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT;
201 }
202
set_exec_queue_check_timeout(struct xe_exec_queue * q)203 static void set_exec_queue_check_timeout(struct xe_exec_queue *q)
204 {
205 atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state);
206 }
207
clear_exec_queue_check_timeout(struct xe_exec_queue * q)208 static void clear_exec_queue_check_timeout(struct xe_exec_queue *q)
209 {
210 atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state);
211 }
212
exec_queue_extra_ref(struct xe_exec_queue * q)213 static bool exec_queue_extra_ref(struct xe_exec_queue *q)
214 {
215 return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF;
216 }
217
set_exec_queue_extra_ref(struct xe_exec_queue * q)218 static void set_exec_queue_extra_ref(struct xe_exec_queue *q)
219 {
220 atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state);
221 }
222
exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue * q)223 static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
224 {
225 return (atomic_read(&q->guc->state) &
226 (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED |
227 EXEC_QUEUE_STATE_BANNED));
228 }
229
guc_submit_fini(struct drm_device * drm,void * arg)230 static void guc_submit_fini(struct drm_device *drm, void *arg)
231 {
232 struct xe_guc *guc = arg;
233 struct xe_device *xe = guc_to_xe(guc);
234 struct xe_gt *gt = guc_to_gt(guc);
235 int ret;
236
237 ret = wait_event_timeout(guc->submission_state.fini_wq,
238 xa_empty(&guc->submission_state.exec_queue_lookup),
239 HZ * 5);
240
241 drain_workqueue(xe->destroy_wq);
242
243 xe_gt_assert(gt, ret);
244
245 xa_destroy(&guc->submission_state.exec_queue_lookup);
246 }
247
guc_submit_wedged_fini(void * arg)248 static void guc_submit_wedged_fini(void *arg)
249 {
250 struct xe_guc *guc = arg;
251 struct xe_exec_queue *q;
252 unsigned long index;
253
254 mutex_lock(&guc->submission_state.lock);
255 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
256 if (exec_queue_wedged(q)) {
257 mutex_unlock(&guc->submission_state.lock);
258 xe_exec_queue_put(q);
259 mutex_lock(&guc->submission_state.lock);
260 }
261 }
262 mutex_unlock(&guc->submission_state.lock);
263 }
264
265 static const struct xe_exec_queue_ops guc_exec_queue_ops;
266
primelockdep(struct xe_guc * guc)267 static void primelockdep(struct xe_guc *guc)
268 {
269 if (!IS_ENABLED(CONFIG_LOCKDEP))
270 return;
271
272 fs_reclaim_acquire(GFP_KERNEL);
273
274 mutex_lock(&guc->submission_state.lock);
275 mutex_unlock(&guc->submission_state.lock);
276
277 fs_reclaim_release(GFP_KERNEL);
278 }
279
280 /**
281 * xe_guc_submit_init() - Initialize GuC submission.
282 * @guc: the &xe_guc to initialize
283 * @num_ids: number of GuC context IDs to use
284 *
285 * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all
286 * GuC context IDs supported by the GuC firmware should be used for submission.
287 *
288 * Only VF drivers will have to provide explicit number of GuC context IDs
289 * that they can use for submission.
290 *
291 * Return: 0 on success or a negative error code on failure.
292 */
xe_guc_submit_init(struct xe_guc * guc,unsigned int num_ids)293 int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
294 {
295 struct xe_device *xe = guc_to_xe(guc);
296 struct xe_gt *gt = guc_to_gt(guc);
297 int err;
298
299 err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
300 if (err)
301 return err;
302
303 err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids);
304 if (err)
305 return err;
306
307 gt->exec_queue_ops = &guc_exec_queue_ops;
308
309 xa_init(&guc->submission_state.exec_queue_lookup);
310
311 init_waitqueue_head(&guc->submission_state.fini_wq);
312
313 primelockdep(guc);
314
315 guc->submission_state.initialized = true;
316
317 return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
318 }
319
320 /*
321 * Given that we want to guarantee enough RCS throughput to avoid missing
322 * frames, we set the yield policy to 20% of each 80ms interval.
323 */
324 #define RC_YIELD_DURATION 80 /* in ms */
325 #define RC_YIELD_RATIO 20 /* in percent */
emit_render_compute_yield_klv(u32 * emit)326 static u32 *emit_render_compute_yield_klv(u32 *emit)
327 {
328 *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD);
329 *emit++ = RC_YIELD_DURATION;
330 *emit++ = RC_YIELD_RATIO;
331
332 return emit;
333 }
334
335 #define SCHEDULING_POLICY_MAX_DWORDS 16
guc_init_global_schedule_policy(struct xe_guc * guc)336 static int guc_init_global_schedule_policy(struct xe_guc *guc)
337 {
338 u32 data[SCHEDULING_POLICY_MAX_DWORDS];
339 u32 *emit = data;
340 u32 count = 0;
341 int ret;
342
343 if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
344 return 0;
345
346 *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
347
348 if (CCS_MASK(guc_to_gt(guc)))
349 emit = emit_render_compute_yield_klv(emit);
350
351 count = emit - data;
352 if (count > 1) {
353 xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS);
354
355 ret = xe_guc_ct_send_block(&guc->ct, data, count);
356 if (ret < 0) {
357 xe_gt_err(guc_to_gt(guc),
358 "failed to enable GuC scheduling policies: %pe\n",
359 ERR_PTR(ret));
360 return ret;
361 }
362 }
363
364 return 0;
365 }
366
xe_guc_submit_enable(struct xe_guc * guc)367 int xe_guc_submit_enable(struct xe_guc *guc)
368 {
369 int ret;
370
371 ret = guc_init_global_schedule_policy(guc);
372 if (ret)
373 return ret;
374
375 guc->submission_state.enabled = true;
376
377 return 0;
378 }
379
xe_guc_submit_disable(struct xe_guc * guc)380 void xe_guc_submit_disable(struct xe_guc *guc)
381 {
382 guc->submission_state.enabled = false;
383 }
384
__release_guc_id(struct xe_guc * guc,struct xe_exec_queue * q,u32 xa_count)385 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
386 {
387 int i;
388
389 lockdep_assert_held(&guc->submission_state.lock);
390
391 for (i = 0; i < xa_count; ++i)
392 xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
393
394 xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
395 q->guc->id, q->width);
396
397 if (xa_empty(&guc->submission_state.exec_queue_lookup))
398 wake_up(&guc->submission_state.fini_wq);
399 }
400
alloc_guc_id(struct xe_guc * guc,struct xe_exec_queue * q)401 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
402 {
403 int ret;
404 int i;
405
406 /*
407 * Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
408 * worse case user gets -ENOMEM on engine create and has to try again.
409 *
410 * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
411 * failure.
412 */
413 lockdep_assert_held(&guc->submission_state.lock);
414
415 ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm,
416 q->width);
417 if (ret < 0)
418 return ret;
419
420 q->guc->id = ret;
421
422 for (i = 0; i < q->width; ++i) {
423 ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
424 q->guc->id + i, q, GFP_NOWAIT));
425 if (ret)
426 goto err_release;
427 }
428
429 return 0;
430
431 err_release:
432 __release_guc_id(guc, q, i);
433
434 return ret;
435 }
436
release_guc_id(struct xe_guc * guc,struct xe_exec_queue * q)437 static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
438 {
439 mutex_lock(&guc->submission_state.lock);
440 __release_guc_id(guc, q, q->width);
441 mutex_unlock(&guc->submission_state.lock);
442 }
443
444 struct exec_queue_policy {
445 u32 count;
446 struct guc_update_exec_queue_policy h2g;
447 };
448
__guc_exec_queue_policy_action_size(struct exec_queue_policy * policy)449 static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
450 {
451 size_t bytes = sizeof(policy->h2g.header) +
452 (sizeof(policy->h2g.klv[0]) * policy->count);
453
454 return bytes / sizeof(u32);
455 }
456
__guc_exec_queue_policy_start_klv(struct exec_queue_policy * policy,u16 guc_id)457 static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
458 u16 guc_id)
459 {
460 policy->h2g.header.action =
461 XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
462 policy->h2g.header.guc_id = guc_id;
463 policy->count = 0;
464 }
465
466 #define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
467 static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
468 u32 data) \
469 { \
470 XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
471 \
472 policy->h2g.klv[policy->count].kl = \
473 FIELD_PREP(GUC_KLV_0_KEY, \
474 GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
475 FIELD_PREP(GUC_KLV_0_LEN, 1); \
476 policy->h2g.klv[policy->count].value = data; \
477 policy->count++; \
478 }
479
480 MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
481 MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
482 MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
483 MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY)
484 #undef MAKE_EXEC_QUEUE_POLICY_ADD
485
486 static const int xe_exec_queue_prio_to_guc[] = {
487 [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
488 [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
489 [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
490 [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
491 };
492
init_policies(struct xe_guc * guc,struct xe_exec_queue * q)493 static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q)
494 {
495 struct exec_queue_policy policy;
496 enum xe_exec_queue_priority prio = q->sched_props.priority;
497 u32 timeslice_us = q->sched_props.timeslice_us;
498 u32 slpc_exec_queue_freq_req = 0;
499 u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
500
501 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
502
503 if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY)
504 slpc_exec_queue_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
505
506 __guc_exec_queue_policy_start_klv(&policy, q->guc->id);
507 __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]);
508 __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us);
509 __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us);
510 __guc_exec_queue_policy_add_slpc_exec_queue_freq_req(&policy,
511 slpc_exec_queue_freq_req);
512
513 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
514 __guc_exec_queue_policy_action_size(&policy), 0, 0);
515 }
516
set_min_preemption_timeout(struct xe_guc * guc,struct xe_exec_queue * q)517 static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q)
518 {
519 struct exec_queue_policy policy;
520
521 __guc_exec_queue_policy_start_klv(&policy, q->guc->id);
522 __guc_exec_queue_policy_add_preemption_timeout(&policy, 1);
523
524 xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g,
525 __guc_exec_queue_policy_action_size(&policy), 0, 0);
526 }
527
528 #define parallel_read(xe_, map_, field_) \
529 xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
530 field_)
531 #define parallel_write(xe_, map_, field_, val_) \
532 xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
533 field_, val_)
534
__register_mlrc_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q,struct guc_ctxt_registration_info * info)535 static void __register_mlrc_exec_queue(struct xe_guc *guc,
536 struct xe_exec_queue *q,
537 struct guc_ctxt_registration_info *info)
538 {
539 #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
540 u32 action[MAX_MLRC_REG_SIZE];
541 int len = 0;
542 int i;
543
544 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q));
545
546 action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
547 action[len++] = info->flags;
548 action[len++] = info->context_idx;
549 action[len++] = info->engine_class;
550 action[len++] = info->engine_submit_mask;
551 action[len++] = info->wq_desc_lo;
552 action[len++] = info->wq_desc_hi;
553 action[len++] = info->wq_base_lo;
554 action[len++] = info->wq_base_hi;
555 action[len++] = info->wq_size;
556 action[len++] = q->width;
557 action[len++] = info->hwlrca_lo;
558 action[len++] = info->hwlrca_hi;
559
560 for (i = 1; i < q->width; ++i) {
561 struct xe_lrc *lrc = q->lrc[i];
562
563 action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
564 action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
565 }
566
567 /* explicitly checks some fields that we might fixup later */
568 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
569 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]);
570 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
571 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]);
572 xe_gt_assert(guc_to_gt(guc), q->width ==
573 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]);
574 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
575 action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]);
576 xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE);
577 #undef MAX_MLRC_REG_SIZE
578
579 xe_guc_ct_send(&guc->ct, action, len, 0, 0);
580 }
581
__register_exec_queue(struct xe_guc * guc,struct guc_ctxt_registration_info * info)582 static void __register_exec_queue(struct xe_guc *guc,
583 struct guc_ctxt_registration_info *info)
584 {
585 u32 action[] = {
586 XE_GUC_ACTION_REGISTER_CONTEXT,
587 info->flags,
588 info->context_idx,
589 info->engine_class,
590 info->engine_submit_mask,
591 info->wq_desc_lo,
592 info->wq_desc_hi,
593 info->wq_base_lo,
594 info->wq_base_hi,
595 info->wq_size,
596 info->hwlrca_lo,
597 info->hwlrca_hi,
598 };
599
600 /* explicitly checks some fields that we might fixup later */
601 xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
602 action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]);
603 xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
604 action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]);
605 xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
606 action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]);
607
608 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
609 }
610
register_exec_queue(struct xe_exec_queue * q,int ctx_type)611 static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
612 {
613 struct xe_guc *guc = exec_queue_to_guc(q);
614 struct xe_device *xe = guc_to_xe(guc);
615 struct xe_lrc *lrc = q->lrc[0];
616 struct guc_ctxt_registration_info info;
617
618 xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q));
619 xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT);
620
621 memset(&info, 0, sizeof(info));
622 info.context_idx = q->guc->id;
623 info.engine_class = xe_engine_class_to_guc_class(q->class);
624 info.engine_submit_mask = q->logical_mask;
625 info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
626 info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
627 info.flags = CONTEXT_REGISTRATION_FLAG_KMD |
628 FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
629
630 if (xe_exec_queue_is_parallel(q)) {
631 u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
632 struct iosys_map map = xe_lrc_parallel_map(lrc);
633
634 info.wq_desc_lo = lower_32_bits(ggtt_addr +
635 offsetof(struct guc_submit_parallel_scratch, wq_desc));
636 info.wq_desc_hi = upper_32_bits(ggtt_addr +
637 offsetof(struct guc_submit_parallel_scratch, wq_desc));
638 info.wq_base_lo = lower_32_bits(ggtt_addr +
639 offsetof(struct guc_submit_parallel_scratch, wq[0]));
640 info.wq_base_hi = upper_32_bits(ggtt_addr +
641 offsetof(struct guc_submit_parallel_scratch, wq[0]));
642 info.wq_size = WQ_SIZE;
643
644 q->guc->wqi_head = 0;
645 q->guc->wqi_tail = 0;
646 xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
647 parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
648 }
649
650 /*
651 * We must keep a reference for LR engines if engine is registered with
652 * the GuC as jobs signal immediately and can't destroy an engine if the
653 * GuC has a reference to it.
654 */
655 if (xe_exec_queue_is_lr(q))
656 xe_exec_queue_get(q);
657
658 set_exec_queue_registered(q);
659 trace_xe_exec_queue_register(q);
660 if (xe_exec_queue_is_parallel(q))
661 __register_mlrc_exec_queue(guc, q, &info);
662 else
663 __register_exec_queue(guc, &info);
664 init_policies(guc, q);
665 }
666
wq_space_until_wrap(struct xe_exec_queue * q)667 static u32 wq_space_until_wrap(struct xe_exec_queue *q)
668 {
669 return (WQ_SIZE - q->guc->wqi_tail);
670 }
671
wq_wait_for_space(struct xe_exec_queue * q,u32 wqi_size)672 static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
673 {
674 struct xe_guc *guc = exec_queue_to_guc(q);
675 struct xe_device *xe = guc_to_xe(guc);
676 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
677 unsigned int sleep_period_ms = 1;
678
679 #define AVAILABLE_SPACE \
680 CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
681 if (wqi_size > AVAILABLE_SPACE) {
682 try_again:
683 q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
684 if (wqi_size > AVAILABLE_SPACE) {
685 if (sleep_period_ms == 1024) {
686 xe_gt_reset_async(q->gt);
687 return -ENODEV;
688 }
689
690 msleep(sleep_period_ms);
691 sleep_period_ms <<= 1;
692 goto try_again;
693 }
694 }
695 #undef AVAILABLE_SPACE
696
697 return 0;
698 }
699
wq_noop_append(struct xe_exec_queue * q)700 static int wq_noop_append(struct xe_exec_queue *q)
701 {
702 struct xe_guc *guc = exec_queue_to_guc(q);
703 struct xe_device *xe = guc_to_xe(guc);
704 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
705 u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
706
707 if (wq_wait_for_space(q, wq_space_until_wrap(q)))
708 return -ENODEV;
709
710 xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw));
711
712 parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
713 FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
714 FIELD_PREP(WQ_LEN_MASK, len_dw));
715 q->guc->wqi_tail = 0;
716
717 return 0;
718 }
719
wq_item_append(struct xe_exec_queue * q)720 static void wq_item_append(struct xe_exec_queue *q)
721 {
722 struct xe_guc *guc = exec_queue_to_guc(q);
723 struct xe_device *xe = guc_to_xe(guc);
724 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
725 #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */
726 u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
727 u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
728 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
729 int i = 0, j;
730
731 if (wqi_size > wq_space_until_wrap(q)) {
732 if (wq_noop_append(q))
733 return;
734 }
735 if (wq_wait_for_space(q, wqi_size))
736 return;
737
738 xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN);
739 wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
740 FIELD_PREP(WQ_LEN_MASK, len_dw);
741 xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW);
742 wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
743 xe_gt_assert(guc_to_gt(guc), i ==
744 XE_GUC_CONTEXT_WQ_EL_INFO_DATA_2_GUCCTX_RINGTAIL_FREEZEPOCS);
745 wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
746 FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
747 xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_3_WI_FENCE_ID);
748 wqi[i++] = 0;
749 xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_CHILD_LIST_DATA_4_RINGTAIL);
750 for (j = 1; j < q->width; ++j) {
751 struct xe_lrc *lrc = q->lrc[j];
752
753 wqi[i++] = lrc->ring.tail / sizeof(u64);
754 }
755
756 xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32));
757
758 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch,
759 wq[q->guc->wqi_tail / sizeof(u32)]));
760 xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size);
761 q->guc->wqi_tail += wqi_size;
762 xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE);
763
764 xe_device_wmb(xe);
765
766 map = xe_lrc_parallel_map(q->lrc[0]);
767 parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
768 }
769
wq_items_rebase(struct xe_exec_queue * q)770 static int wq_items_rebase(struct xe_exec_queue *q)
771 {
772 struct xe_guc *guc = exec_queue_to_guc(q);
773 struct xe_device *xe = guc_to_xe(guc);
774 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
775 int i = q->guc->wqi_head;
776
777 /* the ring starts after a header struct */
778 iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[0]));
779
780 while ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) {
781 u32 len_dw, type, val;
782
783 if (drm_WARN_ON_ONCE(&xe->drm, i < 0 || i > 2 * WQ_SIZE))
784 break;
785
786 val = xe_map_rd_ring_u32(xe, &map, i / sizeof(u32) +
787 XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN,
788 WQ_SIZE / sizeof(u32));
789 len_dw = FIELD_GET(WQ_LEN_MASK, val);
790 type = FIELD_GET(WQ_TYPE_MASK, val);
791
792 if (drm_WARN_ON_ONCE(&xe->drm, len_dw >= WQ_SIZE / sizeof(u32)))
793 break;
794
795 if (type == WQ_TYPE_MULTI_LRC) {
796 val = xe_lrc_descriptor(q->lrc[0]);
797 xe_map_wr_ring_u32(xe, &map, i / sizeof(u32) +
798 XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW,
799 WQ_SIZE / sizeof(u32), val);
800 } else if (drm_WARN_ON_ONCE(&xe->drm, type != WQ_TYPE_NOOP)) {
801 break;
802 }
803
804 i += (len_dw + 1) * sizeof(u32);
805 }
806
807 if ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) {
808 xe_gt_err(q->gt, "Exec queue fixups incomplete - wqi parse failed\n");
809 return -EBADMSG;
810 }
811 return 0;
812 }
813
814 #define RESUME_PENDING ~0x0ull
submit_exec_queue(struct xe_exec_queue * q)815 static void submit_exec_queue(struct xe_exec_queue *q)
816 {
817 struct xe_guc *guc = exec_queue_to_guc(q);
818 struct xe_lrc *lrc = q->lrc[0];
819 u32 action[3];
820 u32 g2h_len = 0;
821 u32 num_g2h = 0;
822 int len = 0;
823 bool extra_submit = false;
824
825 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
826
827 if (xe_exec_queue_is_parallel(q))
828 wq_item_append(q);
829 else
830 xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
831
832 if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
833 return;
834
835 if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
836 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
837 action[len++] = q->guc->id;
838 action[len++] = GUC_CONTEXT_ENABLE;
839 g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
840 num_g2h = 1;
841 if (xe_exec_queue_is_parallel(q))
842 extra_submit = true;
843
844 q->guc->resume_time = RESUME_PENDING;
845 set_exec_queue_pending_enable(q);
846 set_exec_queue_enabled(q);
847 trace_xe_exec_queue_scheduling_enable(q);
848 } else {
849 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
850 action[len++] = q->guc->id;
851 trace_xe_exec_queue_submit(q);
852 }
853
854 xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h);
855
856 if (extra_submit) {
857 len = 0;
858 action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
859 action[len++] = q->guc->id;
860 trace_xe_exec_queue_submit(q);
861
862 xe_guc_ct_send(&guc->ct, action, len, 0, 0);
863 }
864 }
865
866 static struct dma_fence *
guc_exec_queue_run_job(struct drm_sched_job * drm_job)867 guc_exec_queue_run_job(struct drm_sched_job *drm_job)
868 {
869 struct xe_sched_job *job = to_xe_sched_job(drm_job);
870 struct xe_exec_queue *q = job->q;
871 struct xe_guc *guc = exec_queue_to_guc(q);
872 struct dma_fence *fence = NULL;
873 bool lr = xe_exec_queue_is_lr(q);
874
875 xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
876 exec_queue_banned(q) || exec_queue_suspended(q));
877
878 trace_xe_sched_job_run(job);
879
880 if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
881 if (!exec_queue_registered(q))
882 register_exec_queue(q, GUC_CONTEXT_NORMAL);
883 if (!lr) /* LR jobs are emitted in the exec IOCTL */
884 q->ring_ops->emit_job(job);
885 submit_exec_queue(q);
886 }
887
888 if (lr) {
889 xe_sched_job_set_error(job, -EOPNOTSUPP);
890 dma_fence_put(job->fence); /* Drop ref from xe_sched_job_arm */
891 } else {
892 fence = job->fence;
893 }
894
895 return fence;
896 }
897
898 /**
899 * xe_guc_jobs_ring_rebase - Re-emit ring commands of requests pending
900 * on all queues under a guc.
901 * @guc: the &xe_guc struct instance
902 */
xe_guc_jobs_ring_rebase(struct xe_guc * guc)903 void xe_guc_jobs_ring_rebase(struct xe_guc *guc)
904 {
905 struct xe_exec_queue *q;
906 unsigned long index;
907
908 /*
909 * This routine is used within VF migration recovery. This means
910 * using the lock here introduces a restriction: we cannot wait
911 * for any GFX HW response while the lock is taken.
912 */
913 mutex_lock(&guc->submission_state.lock);
914 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
915 if (exec_queue_killed_or_banned_or_wedged(q))
916 continue;
917 xe_exec_queue_jobs_ring_restore(q);
918 }
919 mutex_unlock(&guc->submission_state.lock);
920 }
921
guc_exec_queue_free_job(struct drm_sched_job * drm_job)922 static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
923 {
924 struct xe_sched_job *job = to_xe_sched_job(drm_job);
925
926 trace_xe_sched_job_free(job);
927 xe_sched_job_put(job);
928 }
929
xe_guc_read_stopped(struct xe_guc * guc)930 int xe_guc_read_stopped(struct xe_guc *guc)
931 {
932 return atomic_read(&guc->submission_state.stopped);
933 }
934
935 #define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \
936 u32 action[] = { \
937 XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \
938 q->guc->id, \
939 GUC_CONTEXT_##enable_disable, \
940 }
941
disable_scheduling_deregister(struct xe_guc * guc,struct xe_exec_queue * q)942 static void disable_scheduling_deregister(struct xe_guc *guc,
943 struct xe_exec_queue *q)
944 {
945 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
946 int ret;
947
948 set_min_preemption_timeout(guc, q);
949 smp_rmb();
950 ret = wait_event_timeout(guc->ct.wq,
951 (!exec_queue_pending_enable(q) &&
952 !exec_queue_pending_disable(q)) ||
953 xe_guc_read_stopped(guc),
954 HZ * 5);
955 if (!ret) {
956 struct xe_gpu_scheduler *sched = &q->guc->sched;
957
958 xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n");
959 xe_sched_submission_start(sched);
960 xe_gt_reset_async(q->gt);
961 xe_sched_tdr_queue_imm(sched);
962 return;
963 }
964
965 clear_exec_queue_enabled(q);
966 set_exec_queue_pending_disable(q);
967 set_exec_queue_destroyed(q);
968 trace_xe_exec_queue_scheduling_disable(q);
969
970 /*
971 * Reserve space for both G2H here as the 2nd G2H is sent from a G2H
972 * handler and we are not allowed to reserved G2H space in handlers.
973 */
974 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
975 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
976 G2H_LEN_DW_DEREGISTER_CONTEXT, 2);
977 }
978
xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue * q)979 static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
980 {
981 struct xe_guc *guc = exec_queue_to_guc(q);
982 struct xe_device *xe = guc_to_xe(guc);
983
984 /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */
985 wake_up_all(&xe->ufence_wq);
986
987 if (xe_exec_queue_is_lr(q))
988 queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr);
989 else
990 xe_sched_tdr_queue_imm(&q->guc->sched);
991 }
992
993 /**
994 * xe_guc_submit_wedge() - Wedge GuC submission
995 * @guc: the GuC object
996 *
997 * Save exec queue's registered with GuC state by taking a ref to each queue.
998 * Register a DRMM handler to drop refs upon driver unload.
999 */
xe_guc_submit_wedge(struct xe_guc * guc)1000 void xe_guc_submit_wedge(struct xe_guc *guc)
1001 {
1002 struct xe_gt *gt = guc_to_gt(guc);
1003 struct xe_exec_queue *q;
1004 unsigned long index;
1005 int err;
1006
1007 xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
1008
1009 /*
1010 * If device is being wedged even before submission_state is
1011 * initialized, there's nothing to do here.
1012 */
1013 if (!guc->submission_state.initialized)
1014 return;
1015
1016 err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
1017 guc_submit_wedged_fini, guc);
1018 if (err) {
1019 xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; "
1020 "Although device is wedged.\n");
1021 return;
1022 }
1023
1024 mutex_lock(&guc->submission_state.lock);
1025 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1026 if (xe_exec_queue_get_unless_zero(q))
1027 set_exec_queue_wedged(q);
1028 mutex_unlock(&guc->submission_state.lock);
1029 }
1030
guc_submit_hint_wedged(struct xe_guc * guc)1031 static bool guc_submit_hint_wedged(struct xe_guc *guc)
1032 {
1033 struct xe_device *xe = guc_to_xe(guc);
1034
1035 if (xe->wedged.mode != 2)
1036 return false;
1037
1038 if (xe_device_wedged(xe))
1039 return true;
1040
1041 xe_device_declare_wedged(xe);
1042
1043 return true;
1044 }
1045
xe_guc_exec_queue_lr_cleanup(struct work_struct * w)1046 static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
1047 {
1048 struct xe_guc_exec_queue *ge =
1049 container_of(w, struct xe_guc_exec_queue, lr_tdr);
1050 struct xe_exec_queue *q = ge->q;
1051 struct xe_guc *guc = exec_queue_to_guc(q);
1052 struct xe_gpu_scheduler *sched = &ge->sched;
1053 bool wedged = false;
1054
1055 xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q));
1056 trace_xe_exec_queue_lr_cleanup(q);
1057
1058 if (!exec_queue_killed(q))
1059 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
1060
1061 /* Kill the run_job / process_msg entry points */
1062 xe_sched_submission_stop(sched);
1063
1064 /*
1065 * Engine state now mostly stable, disable scheduling / deregister if
1066 * needed. This cleanup routine might be called multiple times, where
1067 * the actual async engine deregister drops the final engine ref.
1068 * Calling disable_scheduling_deregister will mark the engine as
1069 * destroyed and fire off the CT requests to disable scheduling /
1070 * deregister, which we only want to do once. We also don't want to mark
1071 * the engine as pending_disable again as this may race with the
1072 * xe_guc_deregister_done_handler() which treats it as an unexpected
1073 * state.
1074 */
1075 if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
1076 struct xe_guc *guc = exec_queue_to_guc(q);
1077 int ret;
1078
1079 set_exec_queue_banned(q);
1080 disable_scheduling_deregister(guc, q);
1081
1082 /*
1083 * Must wait for scheduling to be disabled before signalling
1084 * any fences, if GT broken the GT reset code should signal us.
1085 */
1086 ret = wait_event_timeout(guc->ct.wq,
1087 !exec_queue_pending_disable(q) ||
1088 xe_guc_read_stopped(guc), HZ * 5);
1089 if (!ret) {
1090 xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n",
1091 q->guc->id);
1092 xe_devcoredump(q, NULL, "Schedule disable failed to respond, guc_id=%d\n",
1093 q->guc->id);
1094 xe_sched_submission_start(sched);
1095 xe_gt_reset_async(q->gt);
1096 return;
1097 }
1098 }
1099
1100 if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0]))
1101 xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id);
1102
1103 xe_sched_submission_start(sched);
1104 }
1105
1106 #define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100)
1107
check_timeout(struct xe_exec_queue * q,struct xe_sched_job * job)1108 static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
1109 {
1110 struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
1111 u32 ctx_timestamp, ctx_job_timestamp;
1112 u32 timeout_ms = q->sched_props.job_timeout_ms;
1113 u32 diff;
1114 u64 running_time_ms;
1115
1116 if (!xe_sched_job_started(job)) {
1117 xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
1118 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1119 q->guc->id);
1120
1121 return xe_sched_invalidate_job(job, 2);
1122 }
1123
1124 ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0]));
1125 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
1126
1127 /*
1128 * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
1129 * possible overflows with a high timeout.
1130 */
1131 xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC);
1132
1133 diff = ctx_timestamp - ctx_job_timestamp;
1134
1135 /*
1136 * Ensure timeout is within 5% to account for an GuC scheduling latency
1137 */
1138 running_time_ms =
1139 ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff));
1140
1141 xe_gt_dbg(gt,
1142 "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x",
1143 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1144 q->guc->id, running_time_ms, timeout_ms, diff);
1145
1146 return running_time_ms >= timeout_ms;
1147 }
1148
enable_scheduling(struct xe_exec_queue * q)1149 static void enable_scheduling(struct xe_exec_queue *q)
1150 {
1151 MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
1152 struct xe_guc *guc = exec_queue_to_guc(q);
1153 int ret;
1154
1155 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1156 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1157 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1158 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1159
1160 set_exec_queue_pending_enable(q);
1161 set_exec_queue_enabled(q);
1162 trace_xe_exec_queue_scheduling_enable(q);
1163
1164 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1165 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1166
1167 ret = wait_event_timeout(guc->ct.wq,
1168 !exec_queue_pending_enable(q) ||
1169 xe_guc_read_stopped(guc), HZ * 5);
1170 if (!ret || xe_guc_read_stopped(guc)) {
1171 xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
1172 set_exec_queue_banned(q);
1173 xe_gt_reset_async(q->gt);
1174 xe_sched_tdr_queue_imm(&q->guc->sched);
1175 }
1176 }
1177
disable_scheduling(struct xe_exec_queue * q,bool immediate)1178 static void disable_scheduling(struct xe_exec_queue *q, bool immediate)
1179 {
1180 MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1181 struct xe_guc *guc = exec_queue_to_guc(q);
1182
1183 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1184 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1185 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1186
1187 if (immediate)
1188 set_min_preemption_timeout(guc, q);
1189 clear_exec_queue_enabled(q);
1190 set_exec_queue_pending_disable(q);
1191 trace_xe_exec_queue_scheduling_disable(q);
1192
1193 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1194 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1);
1195 }
1196
__deregister_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q)1197 static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
1198 {
1199 u32 action[] = {
1200 XE_GUC_ACTION_DEREGISTER_CONTEXT,
1201 q->guc->id,
1202 };
1203
1204 xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1205 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1206 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1207 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1208
1209 set_exec_queue_destroyed(q);
1210 trace_xe_exec_queue_deregister(q);
1211
1212 xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action),
1213 G2H_LEN_DW_DEREGISTER_CONTEXT, 1);
1214 }
1215
1216 static enum drm_gpu_sched_stat
guc_exec_queue_timedout_job(struct drm_sched_job * drm_job)1217 guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
1218 {
1219 struct xe_sched_job *job = to_xe_sched_job(drm_job);
1220 struct xe_sched_job *tmp_job;
1221 struct xe_exec_queue *q = job->q;
1222 struct xe_gpu_scheduler *sched = &q->guc->sched;
1223 struct xe_guc *guc = exec_queue_to_guc(q);
1224 const char *process_name = "no process";
1225 struct xe_device *xe = guc_to_xe(guc);
1226 unsigned int fw_ref;
1227 int err = -ETIME;
1228 pid_t pid = -1;
1229 int i = 0;
1230 bool wedged = false, skip_timeout_check;
1231
1232 /*
1233 * TDR has fired before free job worker. Common if exec queue
1234 * immediately closed after last fence signaled. Add back to pending
1235 * list so job can be freed and kick scheduler ensuring free job is not
1236 * lost.
1237 */
1238 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags))
1239 return DRM_GPU_SCHED_STAT_NO_HANG;
1240
1241 /* Kill the run_job entry point */
1242 xe_sched_submission_stop(sched);
1243
1244 /* Must check all state after stopping scheduler */
1245 skip_timeout_check = exec_queue_reset(q) ||
1246 exec_queue_killed_or_banned_or_wedged(q) ||
1247 exec_queue_destroyed(q);
1248
1249 /*
1250 * If devcoredump not captured and GuC capture for the job is not ready
1251 * do manual capture first and decide later if we need to use it
1252 */
1253 if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
1254 !xe_guc_capture_get_matching_and_lock(q)) {
1255 /* take force wake before engine register manual capture */
1256 fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
1257 if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL))
1258 xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
1259
1260 xe_engine_snapshot_capture_for_queue(q);
1261
1262 xe_force_wake_put(gt_to_fw(q->gt), fw_ref);
1263 }
1264
1265 /*
1266 * XXX: Sampling timeout doesn't work in wedged mode as we have to
1267 * modify scheduling state to read timestamp. We could read the
1268 * timestamp from a register to accumulate current running time but this
1269 * doesn't work for SRIOV. For now assuming timeouts in wedged mode are
1270 * genuine timeouts.
1271 */
1272 if (!exec_queue_killed(q))
1273 wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
1274
1275 /* Engine state now stable, disable scheduling to check timestamp */
1276 if (!wedged && exec_queue_registered(q)) {
1277 int ret;
1278
1279 if (exec_queue_reset(q))
1280 err = -EIO;
1281
1282 if (!exec_queue_destroyed(q)) {
1283 /*
1284 * Wait for any pending G2H to flush out before
1285 * modifying state
1286 */
1287 ret = wait_event_timeout(guc->ct.wq,
1288 (!exec_queue_pending_enable(q) &&
1289 !exec_queue_pending_disable(q)) ||
1290 xe_guc_read_stopped(guc), HZ * 5);
1291 if (!ret || xe_guc_read_stopped(guc))
1292 goto trigger_reset;
1293
1294 /*
1295 * Flag communicates to G2H handler that schedule
1296 * disable originated from a timeout check. The G2H then
1297 * avoid triggering cleanup or deregistering the exec
1298 * queue.
1299 */
1300 set_exec_queue_check_timeout(q);
1301 disable_scheduling(q, skip_timeout_check);
1302 }
1303
1304 /*
1305 * Must wait for scheduling to be disabled before signalling
1306 * any fences, if GT broken the GT reset code should signal us.
1307 *
1308 * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
1309 * error) messages which can cause the schedule disable to get
1310 * lost. If this occurs, trigger a GT reset to recover.
1311 */
1312 smp_rmb();
1313 ret = wait_event_timeout(guc->ct.wq,
1314 !exec_queue_pending_disable(q) ||
1315 xe_guc_read_stopped(guc), HZ * 5);
1316 if (!ret || xe_guc_read_stopped(guc)) {
1317 trigger_reset:
1318 if (!ret)
1319 xe_gt_warn(guc_to_gt(guc),
1320 "Schedule disable failed to respond, guc_id=%d",
1321 q->guc->id);
1322 xe_devcoredump(q, job,
1323 "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d",
1324 q->guc->id, ret, xe_guc_read_stopped(guc));
1325 set_exec_queue_extra_ref(q);
1326 xe_exec_queue_get(q); /* GT reset owns this */
1327 set_exec_queue_banned(q);
1328 xe_gt_reset_async(q->gt);
1329 xe_sched_tdr_queue_imm(sched);
1330 goto rearm;
1331 }
1332 }
1333
1334 /*
1335 * Check if job is actually timed out, if so restart job execution and TDR
1336 */
1337 if (!wedged && !skip_timeout_check && !check_timeout(q, job) &&
1338 !exec_queue_reset(q) && exec_queue_registered(q)) {
1339 clear_exec_queue_check_timeout(q);
1340 goto sched_enable;
1341 }
1342
1343 if (q->vm && q->vm->xef) {
1344 process_name = q->vm->xef->process_name;
1345 pid = q->vm->xef->pid;
1346 }
1347
1348 if (!exec_queue_killed(q))
1349 xe_gt_notice(guc_to_gt(guc),
1350 "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
1351 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1352 q->guc->id, q->flags, process_name, pid);
1353
1354 trace_xe_sched_job_timedout(job);
1355
1356 if (!exec_queue_killed(q))
1357 xe_devcoredump(q, job,
1358 "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
1359 xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1360 q->guc->id, q->flags);
1361
1362 /*
1363 * Kernel jobs should never fail, nor should VM jobs if they do
1364 * somethings has gone wrong and the GT needs a reset
1365 */
1366 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
1367 "Kernel-submitted job timed out\n");
1368 xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
1369 "VM job timed out on non-killed execqueue\n");
1370 if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL ||
1371 (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
1372 if (!xe_sched_invalidate_job(job, 2)) {
1373 clear_exec_queue_check_timeout(q);
1374 xe_gt_reset_async(q->gt);
1375 goto rearm;
1376 }
1377 }
1378
1379 /* Finish cleaning up exec queue via deregister */
1380 set_exec_queue_banned(q);
1381 if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
1382 set_exec_queue_extra_ref(q);
1383 xe_exec_queue_get(q);
1384 __deregister_exec_queue(guc, q);
1385 }
1386
1387 /* Stop fence signaling */
1388 xe_hw_fence_irq_stop(q->fence_irq);
1389
1390 /*
1391 * Fence state now stable, stop / start scheduler which cleans up any
1392 * fences that are complete
1393 */
1394 xe_sched_add_pending_job(sched, job);
1395 xe_sched_submission_start(sched);
1396
1397 xe_guc_exec_queue_trigger_cleanup(q);
1398
1399 /* Mark all outstanding jobs as bad, thus completing them */
1400 spin_lock(&sched->base.job_list_lock);
1401 list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
1402 xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED);
1403 spin_unlock(&sched->base.job_list_lock);
1404
1405 /* Start fence signaling */
1406 xe_hw_fence_irq_start(q->fence_irq);
1407
1408 return DRM_GPU_SCHED_STAT_RESET;
1409
1410 sched_enable:
1411 enable_scheduling(q);
1412 rearm:
1413 /*
1414 * XXX: Ideally want to adjust timeout based on current execution time
1415 * but there is not currently an easy way to do in DRM scheduler. With
1416 * some thought, do this in a follow up.
1417 */
1418 xe_sched_submission_start(sched);
1419 return DRM_GPU_SCHED_STAT_NO_HANG;
1420 }
1421
guc_exec_queue_fini(struct xe_exec_queue * q)1422 static void guc_exec_queue_fini(struct xe_exec_queue *q)
1423 {
1424 struct xe_guc_exec_queue *ge = q->guc;
1425 struct xe_guc *guc = exec_queue_to_guc(q);
1426
1427 release_guc_id(guc, q);
1428 xe_sched_entity_fini(&ge->entity);
1429 xe_sched_fini(&ge->sched);
1430
1431 /*
1432 * RCU free due sched being exported via DRM scheduler fences
1433 * (timeline name).
1434 */
1435 kfree_rcu(ge, rcu);
1436 }
1437
__guc_exec_queue_destroy_async(struct work_struct * w)1438 static void __guc_exec_queue_destroy_async(struct work_struct *w)
1439 {
1440 struct xe_guc_exec_queue *ge =
1441 container_of(w, struct xe_guc_exec_queue, destroy_async);
1442 struct xe_exec_queue *q = ge->q;
1443 struct xe_guc *guc = exec_queue_to_guc(q);
1444
1445 xe_pm_runtime_get(guc_to_xe(guc));
1446 trace_xe_exec_queue_destroy(q);
1447
1448 if (xe_exec_queue_is_lr(q))
1449 cancel_work_sync(&ge->lr_tdr);
1450 /* Confirm no work left behind accessing device structures */
1451 cancel_delayed_work_sync(&ge->sched.base.work_tdr);
1452
1453 xe_exec_queue_fini(q);
1454
1455 xe_pm_runtime_put(guc_to_xe(guc));
1456 }
1457
guc_exec_queue_destroy_async(struct xe_exec_queue * q)1458 static void guc_exec_queue_destroy_async(struct xe_exec_queue *q)
1459 {
1460 struct xe_guc *guc = exec_queue_to_guc(q);
1461 struct xe_device *xe = guc_to_xe(guc);
1462
1463 INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async);
1464
1465 /* We must block on kernel engines so slabs are empty on driver unload */
1466 if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
1467 __guc_exec_queue_destroy_async(&q->guc->destroy_async);
1468 else
1469 queue_work(xe->destroy_wq, &q->guc->destroy_async);
1470 }
1471
__guc_exec_queue_destroy(struct xe_guc * guc,struct xe_exec_queue * q)1472 static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q)
1473 {
1474 /*
1475 * Might be done from within the GPU scheduler, need to do async as we
1476 * fini the scheduler when the engine is fini'd, the scheduler can't
1477 * complete fini within itself (circular dependency). Async resolves
1478 * this we and don't really care when everything is fini'd, just that it
1479 * is.
1480 */
1481 guc_exec_queue_destroy_async(q);
1482 }
1483
__guc_exec_queue_process_msg_cleanup(struct xe_sched_msg * msg)1484 static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
1485 {
1486 struct xe_exec_queue *q = msg->private_data;
1487 struct xe_guc *guc = exec_queue_to_guc(q);
1488
1489 xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
1490 trace_xe_exec_queue_cleanup_entity(q);
1491
1492 if (exec_queue_registered(q))
1493 disable_scheduling_deregister(guc, q);
1494 else
1495 __guc_exec_queue_destroy(guc, q);
1496 }
1497
guc_exec_queue_allowed_to_change_state(struct xe_exec_queue * q)1498 static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
1499 {
1500 return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
1501 }
1502
__guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg * msg)1503 static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
1504 {
1505 struct xe_exec_queue *q = msg->private_data;
1506 struct xe_guc *guc = exec_queue_to_guc(q);
1507
1508 if (guc_exec_queue_allowed_to_change_state(q))
1509 init_policies(guc, q);
1510 kfree(msg);
1511 }
1512
__suspend_fence_signal(struct xe_exec_queue * q)1513 static void __suspend_fence_signal(struct xe_exec_queue *q)
1514 {
1515 if (!q->guc->suspend_pending)
1516 return;
1517
1518 WRITE_ONCE(q->guc->suspend_pending, false);
1519 wake_up(&q->guc->suspend_wait);
1520 }
1521
suspend_fence_signal(struct xe_exec_queue * q)1522 static void suspend_fence_signal(struct xe_exec_queue *q)
1523 {
1524 struct xe_guc *guc = exec_queue_to_guc(q);
1525
1526 xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) ||
1527 xe_guc_read_stopped(guc));
1528 xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending);
1529
1530 __suspend_fence_signal(q);
1531 }
1532
__guc_exec_queue_process_msg_suspend(struct xe_sched_msg * msg)1533 static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
1534 {
1535 struct xe_exec_queue *q = msg->private_data;
1536 struct xe_guc *guc = exec_queue_to_guc(q);
1537
1538 if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
1539 exec_queue_enabled(q)) {
1540 wait_event(guc->ct.wq, (q->guc->resume_time != RESUME_PENDING ||
1541 xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q));
1542
1543 if (!xe_guc_read_stopped(guc)) {
1544 s64 since_resume_ms =
1545 ktime_ms_delta(ktime_get(),
1546 q->guc->resume_time);
1547 s64 wait_ms = q->vm->preempt.min_run_period_ms -
1548 since_resume_ms;
1549
1550 if (wait_ms > 0 && q->guc->resume_time)
1551 msleep(wait_ms);
1552
1553 set_exec_queue_suspended(q);
1554 disable_scheduling(q, false);
1555 }
1556 } else if (q->guc->suspend_pending) {
1557 set_exec_queue_suspended(q);
1558 suspend_fence_signal(q);
1559 }
1560 }
1561
__guc_exec_queue_process_msg_resume(struct xe_sched_msg * msg)1562 static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
1563 {
1564 struct xe_exec_queue *q = msg->private_data;
1565
1566 if (guc_exec_queue_allowed_to_change_state(q)) {
1567 clear_exec_queue_suspended(q);
1568 if (!exec_queue_enabled(q)) {
1569 q->guc->resume_time = RESUME_PENDING;
1570 enable_scheduling(q);
1571 }
1572 } else {
1573 clear_exec_queue_suspended(q);
1574 }
1575 }
1576
1577 #define CLEANUP 1 /* Non-zero values to catch uninitialized msg */
1578 #define SET_SCHED_PROPS 2
1579 #define SUSPEND 3
1580 #define RESUME 4
1581 #define OPCODE_MASK 0xf
1582 #define MSG_LOCKED BIT(8)
1583
guc_exec_queue_process_msg(struct xe_sched_msg * msg)1584 static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
1585 {
1586 struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data));
1587
1588 trace_xe_sched_msg_recv(msg);
1589
1590 switch (msg->opcode) {
1591 case CLEANUP:
1592 __guc_exec_queue_process_msg_cleanup(msg);
1593 break;
1594 case SET_SCHED_PROPS:
1595 __guc_exec_queue_process_msg_set_sched_props(msg);
1596 break;
1597 case SUSPEND:
1598 __guc_exec_queue_process_msg_suspend(msg);
1599 break;
1600 case RESUME:
1601 __guc_exec_queue_process_msg_resume(msg);
1602 break;
1603 default:
1604 XE_WARN_ON("Unknown message type");
1605 }
1606
1607 xe_pm_runtime_put(xe);
1608 }
1609
1610 static const struct drm_sched_backend_ops drm_sched_ops = {
1611 .run_job = guc_exec_queue_run_job,
1612 .free_job = guc_exec_queue_free_job,
1613 .timedout_job = guc_exec_queue_timedout_job,
1614 };
1615
1616 static const struct xe_sched_backend_ops xe_sched_ops = {
1617 .process_msg = guc_exec_queue_process_msg,
1618 };
1619
guc_exec_queue_init(struct xe_exec_queue * q)1620 static int guc_exec_queue_init(struct xe_exec_queue *q)
1621 {
1622 struct xe_gpu_scheduler *sched;
1623 struct xe_guc *guc = exec_queue_to_guc(q);
1624 struct xe_guc_exec_queue *ge;
1625 long timeout;
1626 int err, i;
1627
1628 xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc)));
1629
1630 ge = kzalloc(sizeof(*ge), GFP_KERNEL);
1631 if (!ge)
1632 return -ENOMEM;
1633
1634 q->guc = ge;
1635 ge->q = q;
1636 init_rcu_head(&ge->rcu);
1637 init_waitqueue_head(&ge->suspend_wait);
1638
1639 for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
1640 INIT_LIST_HEAD(&ge->static_msgs[i].link);
1641
1642 timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
1643 msecs_to_jiffies(q->sched_props.job_timeout_ms);
1644 err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
1645 NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
1646 timeout, guc_to_gt(guc)->ordered_wq, NULL,
1647 q->name, gt_to_xe(q->gt)->drm.dev);
1648 if (err)
1649 goto err_free;
1650
1651 sched = &ge->sched;
1652 err = xe_sched_entity_init(&ge->entity, sched);
1653 if (err)
1654 goto err_sched;
1655
1656 if (xe_exec_queue_is_lr(q))
1657 INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
1658
1659 mutex_lock(&guc->submission_state.lock);
1660
1661 err = alloc_guc_id(guc, q);
1662 if (err)
1663 goto err_entity;
1664
1665 q->entity = &ge->entity;
1666
1667 if (xe_guc_read_stopped(guc))
1668 xe_sched_stop(sched);
1669
1670 mutex_unlock(&guc->submission_state.lock);
1671
1672 xe_exec_queue_assign_name(q, q->guc->id);
1673
1674 trace_xe_exec_queue_create(q);
1675
1676 return 0;
1677
1678 err_entity:
1679 mutex_unlock(&guc->submission_state.lock);
1680 xe_sched_entity_fini(&ge->entity);
1681 err_sched:
1682 xe_sched_fini(&ge->sched);
1683 err_free:
1684 kfree(ge);
1685
1686 return err;
1687 }
1688
guc_exec_queue_kill(struct xe_exec_queue * q)1689 static void guc_exec_queue_kill(struct xe_exec_queue *q)
1690 {
1691 trace_xe_exec_queue_kill(q);
1692 set_exec_queue_killed(q);
1693 __suspend_fence_signal(q);
1694 xe_guc_exec_queue_trigger_cleanup(q);
1695 }
1696
guc_exec_queue_add_msg(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)1697 static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg,
1698 u32 opcode)
1699 {
1700 xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q)));
1701
1702 INIT_LIST_HEAD(&msg->link);
1703 msg->opcode = opcode & OPCODE_MASK;
1704 msg->private_data = q;
1705
1706 trace_xe_sched_msg_add(msg);
1707 if (opcode & MSG_LOCKED)
1708 xe_sched_add_msg_locked(&q->guc->sched, msg);
1709 else
1710 xe_sched_add_msg(&q->guc->sched, msg);
1711 }
1712
guc_exec_queue_try_add_msg(struct xe_exec_queue * q,struct xe_sched_msg * msg,u32 opcode)1713 static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
1714 struct xe_sched_msg *msg,
1715 u32 opcode)
1716 {
1717 if (!list_empty(&msg->link))
1718 return false;
1719
1720 guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED);
1721
1722 return true;
1723 }
1724
1725 #define STATIC_MSG_CLEANUP 0
1726 #define STATIC_MSG_SUSPEND 1
1727 #define STATIC_MSG_RESUME 2
guc_exec_queue_destroy(struct xe_exec_queue * q)1728 static void guc_exec_queue_destroy(struct xe_exec_queue *q)
1729 {
1730 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
1731
1732 if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
1733 guc_exec_queue_add_msg(q, msg, CLEANUP);
1734 else
1735 __guc_exec_queue_destroy(exec_queue_to_guc(q), q);
1736 }
1737
guc_exec_queue_set_priority(struct xe_exec_queue * q,enum xe_exec_queue_priority priority)1738 static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
1739 enum xe_exec_queue_priority priority)
1740 {
1741 struct xe_sched_msg *msg;
1742
1743 if (q->sched_props.priority == priority ||
1744 exec_queue_killed_or_banned_or_wedged(q))
1745 return 0;
1746
1747 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1748 if (!msg)
1749 return -ENOMEM;
1750
1751 q->sched_props.priority = priority;
1752 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1753
1754 return 0;
1755 }
1756
guc_exec_queue_set_timeslice(struct xe_exec_queue * q,u32 timeslice_us)1757 static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
1758 {
1759 struct xe_sched_msg *msg;
1760
1761 if (q->sched_props.timeslice_us == timeslice_us ||
1762 exec_queue_killed_or_banned_or_wedged(q))
1763 return 0;
1764
1765 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1766 if (!msg)
1767 return -ENOMEM;
1768
1769 q->sched_props.timeslice_us = timeslice_us;
1770 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1771
1772 return 0;
1773 }
1774
guc_exec_queue_set_preempt_timeout(struct xe_exec_queue * q,u32 preempt_timeout_us)1775 static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
1776 u32 preempt_timeout_us)
1777 {
1778 struct xe_sched_msg *msg;
1779
1780 if (q->sched_props.preempt_timeout_us == preempt_timeout_us ||
1781 exec_queue_killed_or_banned_or_wedged(q))
1782 return 0;
1783
1784 msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1785 if (!msg)
1786 return -ENOMEM;
1787
1788 q->sched_props.preempt_timeout_us = preempt_timeout_us;
1789 guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1790
1791 return 0;
1792 }
1793
guc_exec_queue_suspend(struct xe_exec_queue * q)1794 static int guc_exec_queue_suspend(struct xe_exec_queue *q)
1795 {
1796 struct xe_gpu_scheduler *sched = &q->guc->sched;
1797 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
1798
1799 if (exec_queue_killed_or_banned_or_wedged(q))
1800 return -EINVAL;
1801
1802 xe_sched_msg_lock(sched);
1803 if (guc_exec_queue_try_add_msg(q, msg, SUSPEND))
1804 q->guc->suspend_pending = true;
1805 xe_sched_msg_unlock(sched);
1806
1807 return 0;
1808 }
1809
guc_exec_queue_suspend_wait(struct xe_exec_queue * q)1810 static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
1811 {
1812 struct xe_guc *guc = exec_queue_to_guc(q);
1813 int ret;
1814
1815 /*
1816 * Likely don't need to check exec_queue_killed() as we clear
1817 * suspend_pending upon kill but to be paranoid but races in which
1818 * suspend_pending is set after kill also check kill here.
1819 */
1820 ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
1821 !READ_ONCE(q->guc->suspend_pending) ||
1822 exec_queue_killed(q) ||
1823 xe_guc_read_stopped(guc),
1824 HZ * 5);
1825
1826 if (!ret) {
1827 xe_gt_warn(guc_to_gt(guc),
1828 "Suspend fence, guc_id=%d, failed to respond",
1829 q->guc->id);
1830 /* XXX: Trigger GT reset? */
1831 return -ETIME;
1832 }
1833
1834 return ret < 0 ? ret : 0;
1835 }
1836
guc_exec_queue_resume(struct xe_exec_queue * q)1837 static void guc_exec_queue_resume(struct xe_exec_queue *q)
1838 {
1839 struct xe_gpu_scheduler *sched = &q->guc->sched;
1840 struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
1841 struct xe_guc *guc = exec_queue_to_guc(q);
1842
1843 xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending);
1844
1845 xe_sched_msg_lock(sched);
1846 guc_exec_queue_try_add_msg(q, msg, RESUME);
1847 xe_sched_msg_unlock(sched);
1848 }
1849
guc_exec_queue_reset_status(struct xe_exec_queue * q)1850 static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
1851 {
1852 return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q);
1853 }
1854
1855 /*
1856 * All of these functions are an abstraction layer which other parts of XE can
1857 * use to trap into the GuC backend. All of these functions, aside from init,
1858 * really shouldn't do much other than trap into the DRM scheduler which
1859 * synchronizes these operations.
1860 */
1861 static const struct xe_exec_queue_ops guc_exec_queue_ops = {
1862 .init = guc_exec_queue_init,
1863 .kill = guc_exec_queue_kill,
1864 .fini = guc_exec_queue_fini,
1865 .destroy = guc_exec_queue_destroy,
1866 .set_priority = guc_exec_queue_set_priority,
1867 .set_timeslice = guc_exec_queue_set_timeslice,
1868 .set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
1869 .suspend = guc_exec_queue_suspend,
1870 .suspend_wait = guc_exec_queue_suspend_wait,
1871 .resume = guc_exec_queue_resume,
1872 .reset_status = guc_exec_queue_reset_status,
1873 };
1874
guc_exec_queue_stop(struct xe_guc * guc,struct xe_exec_queue * q)1875 static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
1876 {
1877 struct xe_gpu_scheduler *sched = &q->guc->sched;
1878
1879 /* Stop scheduling + flush any DRM scheduler operations */
1880 xe_sched_submission_stop(sched);
1881
1882 /* Clean up lost G2H + reset engine state */
1883 if (exec_queue_registered(q)) {
1884 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
1885 xe_exec_queue_put(q);
1886 else if (exec_queue_destroyed(q))
1887 __guc_exec_queue_destroy(guc, q);
1888 }
1889 if (q->guc->suspend_pending) {
1890 set_exec_queue_suspended(q);
1891 suspend_fence_signal(q);
1892 }
1893 atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED |
1894 EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED |
1895 EXEC_QUEUE_STATE_SUSPENDED,
1896 &q->guc->state);
1897 q->guc->resume_time = 0;
1898 trace_xe_exec_queue_stop(q);
1899
1900 /*
1901 * Ban any engine (aside from kernel and engines used for VM ops) with a
1902 * started but not complete job or if a job has gone through a GT reset
1903 * more than twice.
1904 */
1905 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
1906 struct xe_sched_job *job = xe_sched_first_pending_job(sched);
1907 bool ban = false;
1908
1909 if (job) {
1910 if ((xe_sched_job_started(job) &&
1911 !xe_sched_job_completed(job)) ||
1912 xe_sched_invalidate_job(job, 2)) {
1913 trace_xe_sched_job_ban(job);
1914 ban = true;
1915 }
1916 } else if (xe_exec_queue_is_lr(q) &&
1917 !xe_lrc_ring_is_idle(q->lrc[0])) {
1918 ban = true;
1919 }
1920
1921 if (ban) {
1922 set_exec_queue_banned(q);
1923 xe_guc_exec_queue_trigger_cleanup(q);
1924 }
1925 }
1926 }
1927
1928 /**
1929 * xe_guc_submit_reset_block - Disallow reset calls on given GuC.
1930 * @guc: the &xe_guc struct instance
1931 */
xe_guc_submit_reset_block(struct xe_guc * guc)1932 int xe_guc_submit_reset_block(struct xe_guc *guc)
1933 {
1934 return atomic_fetch_or(1, &guc->submission_state.reset_blocked);
1935 }
1936
1937 /**
1938 * xe_guc_submit_reset_unblock - Allow back reset calls on given GuC.
1939 * @guc: the &xe_guc struct instance
1940 */
xe_guc_submit_reset_unblock(struct xe_guc * guc)1941 void xe_guc_submit_reset_unblock(struct xe_guc *guc)
1942 {
1943 atomic_set_release(&guc->submission_state.reset_blocked, 0);
1944 wake_up_all(&guc->ct.wq);
1945 }
1946
guc_submit_reset_is_blocked(struct xe_guc * guc)1947 static int guc_submit_reset_is_blocked(struct xe_guc *guc)
1948 {
1949 return atomic_read_acquire(&guc->submission_state.reset_blocked);
1950 }
1951
1952 /* Maximum time of blocking reset */
1953 #define RESET_BLOCK_PERIOD_MAX (HZ * 5)
1954
1955 /**
1956 * xe_guc_wait_reset_unblock - Wait until reset blocking flag is lifted, or timeout.
1957 * @guc: the &xe_guc struct instance
1958 */
xe_guc_wait_reset_unblock(struct xe_guc * guc)1959 int xe_guc_wait_reset_unblock(struct xe_guc *guc)
1960 {
1961 return wait_event_timeout(guc->ct.wq,
1962 !guc_submit_reset_is_blocked(guc), RESET_BLOCK_PERIOD_MAX);
1963 }
1964
xe_guc_submit_reset_prepare(struct xe_guc * guc)1965 int xe_guc_submit_reset_prepare(struct xe_guc *guc)
1966 {
1967 int ret;
1968
1969 if (!guc->submission_state.initialized)
1970 return 0;
1971
1972 /*
1973 * Using an atomic here rather than submission_state.lock as this
1974 * function can be called while holding the CT lock (engine reset
1975 * failure). submission_state.lock needs the CT lock to resubmit jobs.
1976 * Atomic is not ideal, but it works to prevent against concurrent reset
1977 * and releasing any TDRs waiting on guc->submission_state.stopped.
1978 */
1979 ret = atomic_fetch_or(1, &guc->submission_state.stopped);
1980 smp_wmb();
1981 wake_up_all(&guc->ct.wq);
1982
1983 return ret;
1984 }
1985
xe_guc_submit_reset_wait(struct xe_guc * guc)1986 void xe_guc_submit_reset_wait(struct xe_guc *guc)
1987 {
1988 wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
1989 !xe_guc_read_stopped(guc));
1990 }
1991
xe_guc_submit_stop(struct xe_guc * guc)1992 void xe_guc_submit_stop(struct xe_guc *guc)
1993 {
1994 struct xe_exec_queue *q;
1995 unsigned long index;
1996
1997 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
1998
1999 mutex_lock(&guc->submission_state.lock);
2000
2001 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2002 /* Prevent redundant attempts to stop parallel queues */
2003 if (q->guc->id != index)
2004 continue;
2005
2006 guc_exec_queue_stop(guc, q);
2007 }
2008
2009 mutex_unlock(&guc->submission_state.lock);
2010
2011 /*
2012 * No one can enter the backend at this point, aside from new engine
2013 * creation which is protected by guc->submission_state.lock.
2014 */
2015
2016 }
2017
2018 /**
2019 * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC.
2020 * @guc: the &xe_guc struct instance whose scheduler is to be disabled
2021 */
xe_guc_submit_pause(struct xe_guc * guc)2022 void xe_guc_submit_pause(struct xe_guc *guc)
2023 {
2024 struct xe_exec_queue *q;
2025 unsigned long index;
2026
2027 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2028 xe_sched_submission_stop_async(&q->guc->sched);
2029 }
2030
guc_exec_queue_start(struct xe_exec_queue * q)2031 static void guc_exec_queue_start(struct xe_exec_queue *q)
2032 {
2033 struct xe_gpu_scheduler *sched = &q->guc->sched;
2034
2035 if (!exec_queue_killed_or_banned_or_wedged(q)) {
2036 int i;
2037
2038 trace_xe_exec_queue_resubmit(q);
2039 for (i = 0; i < q->width; ++i)
2040 xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail);
2041 xe_sched_resubmit_jobs(sched);
2042 }
2043
2044 xe_sched_submission_start(sched);
2045 xe_sched_submission_resume_tdr(sched);
2046 }
2047
xe_guc_submit_start(struct xe_guc * guc)2048 int xe_guc_submit_start(struct xe_guc *guc)
2049 {
2050 struct xe_exec_queue *q;
2051 unsigned long index;
2052
2053 xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1);
2054
2055 mutex_lock(&guc->submission_state.lock);
2056 atomic_dec(&guc->submission_state.stopped);
2057 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2058 /* Prevent redundant attempts to start parallel queues */
2059 if (q->guc->id != index)
2060 continue;
2061
2062 guc_exec_queue_start(q);
2063 }
2064 mutex_unlock(&guc->submission_state.lock);
2065
2066 wake_up_all(&guc->ct.wq);
2067
2068 return 0;
2069 }
2070
guc_exec_queue_unpause(struct xe_exec_queue * q)2071 static void guc_exec_queue_unpause(struct xe_exec_queue *q)
2072 {
2073 struct xe_gpu_scheduler *sched = &q->guc->sched;
2074
2075 xe_sched_submission_start(sched);
2076 }
2077
2078 /**
2079 * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC.
2080 * @guc: the &xe_guc struct instance whose scheduler is to be enabled
2081 */
xe_guc_submit_unpause(struct xe_guc * guc)2082 void xe_guc_submit_unpause(struct xe_guc *guc)
2083 {
2084 struct xe_exec_queue *q;
2085 unsigned long index;
2086
2087 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2088 guc_exec_queue_unpause(q);
2089
2090 wake_up_all(&guc->ct.wq);
2091 }
2092
2093 static struct xe_exec_queue *
g2h_exec_queue_lookup(struct xe_guc * guc,u32 guc_id)2094 g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
2095 {
2096 struct xe_gt *gt = guc_to_gt(guc);
2097 struct xe_exec_queue *q;
2098
2099 if (unlikely(guc_id >= GUC_ID_MAX)) {
2100 xe_gt_err(gt, "Invalid guc_id %u\n", guc_id);
2101 return NULL;
2102 }
2103
2104 q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
2105 if (unlikely(!q)) {
2106 xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id);
2107 return NULL;
2108 }
2109
2110 xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id);
2111 xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width));
2112
2113 return q;
2114 }
2115
deregister_exec_queue(struct xe_guc * guc,struct xe_exec_queue * q)2116 static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
2117 {
2118 u32 action[] = {
2119 XE_GUC_ACTION_DEREGISTER_CONTEXT,
2120 q->guc->id,
2121 };
2122
2123 xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q));
2124 xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
2125 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
2126 xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
2127
2128 trace_xe_exec_queue_deregister(q);
2129
2130 xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
2131 }
2132
handle_sched_done(struct xe_guc * guc,struct xe_exec_queue * q,u32 runnable_state)2133 static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
2134 u32 runnable_state)
2135 {
2136 trace_xe_exec_queue_scheduling_done(q);
2137
2138 if (runnable_state == 1) {
2139 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q));
2140
2141 q->guc->resume_time = ktime_get();
2142 clear_exec_queue_pending_enable(q);
2143 smp_wmb();
2144 wake_up_all(&guc->ct.wq);
2145 } else {
2146 bool check_timeout = exec_queue_check_timeout(q);
2147
2148 xe_gt_assert(guc_to_gt(guc), runnable_state == 0);
2149 xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q));
2150
2151 if (q->guc->suspend_pending) {
2152 suspend_fence_signal(q);
2153 clear_exec_queue_pending_disable(q);
2154 } else {
2155 if (exec_queue_banned(q) || check_timeout) {
2156 smp_wmb();
2157 wake_up_all(&guc->ct.wq);
2158 }
2159 if (!check_timeout && exec_queue_destroyed(q)) {
2160 /*
2161 * Make sure to clear the pending_disable only
2162 * after sampling the destroyed state. We want
2163 * to ensure we don't trigger the unregister too
2164 * early with something intending to only
2165 * disable scheduling. The caller doing the
2166 * destroy must wait for an ongoing
2167 * pending_disable before marking as destroyed.
2168 */
2169 clear_exec_queue_pending_disable(q);
2170 deregister_exec_queue(guc, q);
2171 } else {
2172 clear_exec_queue_pending_disable(q);
2173 }
2174 }
2175 }
2176 }
2177
xe_guc_sched_done_handler(struct xe_guc * guc,u32 * msg,u32 len)2178 int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
2179 {
2180 struct xe_exec_queue *q;
2181 u32 guc_id, runnable_state;
2182
2183 if (unlikely(len < 2))
2184 return -EPROTO;
2185
2186 guc_id = msg[0];
2187 runnable_state = msg[1];
2188
2189 q = g2h_exec_queue_lookup(guc, guc_id);
2190 if (unlikely(!q))
2191 return -EPROTO;
2192
2193 if (unlikely(!exec_queue_pending_enable(q) &&
2194 !exec_queue_pending_disable(q))) {
2195 xe_gt_err(guc_to_gt(guc),
2196 "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u",
2197 atomic_read(&q->guc->state), q->guc->id,
2198 runnable_state);
2199 return -EPROTO;
2200 }
2201
2202 handle_sched_done(guc, q, runnable_state);
2203
2204 return 0;
2205 }
2206
handle_deregister_done(struct xe_guc * guc,struct xe_exec_queue * q)2207 static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
2208 {
2209 trace_xe_exec_queue_deregister_done(q);
2210
2211 clear_exec_queue_registered(q);
2212
2213 if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
2214 xe_exec_queue_put(q);
2215 else
2216 __guc_exec_queue_destroy(guc, q);
2217 }
2218
xe_guc_deregister_done_handler(struct xe_guc * guc,u32 * msg,u32 len)2219 int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
2220 {
2221 struct xe_exec_queue *q;
2222 u32 guc_id;
2223
2224 if (unlikely(len < 1))
2225 return -EPROTO;
2226
2227 guc_id = msg[0];
2228
2229 q = g2h_exec_queue_lookup(guc, guc_id);
2230 if (unlikely(!q))
2231 return -EPROTO;
2232
2233 if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
2234 exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
2235 xe_gt_err(guc_to_gt(guc),
2236 "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d",
2237 atomic_read(&q->guc->state), q->guc->id);
2238 return -EPROTO;
2239 }
2240
2241 handle_deregister_done(guc, q);
2242
2243 return 0;
2244 }
2245
xe_guc_exec_queue_reset_handler(struct xe_guc * guc,u32 * msg,u32 len)2246 int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len)
2247 {
2248 struct xe_gt *gt = guc_to_gt(guc);
2249 struct xe_exec_queue *q;
2250 u32 guc_id;
2251
2252 if (unlikely(len < 1))
2253 return -EPROTO;
2254
2255 guc_id = msg[0];
2256
2257 q = g2h_exec_queue_lookup(guc, guc_id);
2258 if (unlikely(!q))
2259 return -EPROTO;
2260
2261 xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
2262 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2263
2264 trace_xe_exec_queue_reset(q);
2265
2266 /*
2267 * A banned engine is a NOP at this point (came from
2268 * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
2269 * jobs by setting timeout of the job to the minimum value kicking
2270 * guc_exec_queue_timedout_job.
2271 */
2272 set_exec_queue_reset(q);
2273 if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
2274 xe_guc_exec_queue_trigger_cleanup(q);
2275
2276 return 0;
2277 }
2278
2279 /*
2280 * xe_guc_error_capture_handler - Handler of GuC captured message
2281 * @guc: The GuC object
2282 * @msg: Point to the message
2283 * @len: The message length
2284 *
2285 * When GuC captured data is ready, GuC will send message
2286 * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
2287 * called 1st to check status before process the data comes with the message.
2288 *
2289 * Returns: error code. 0 if success
2290 */
xe_guc_error_capture_handler(struct xe_guc * guc,u32 * msg,u32 len)2291 int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len)
2292 {
2293 u32 status;
2294
2295 if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN))
2296 return -EPROTO;
2297
2298 status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
2299 if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
2300 xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space");
2301
2302 xe_guc_capture_process(guc);
2303
2304 return 0;
2305 }
2306
xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc * guc,u32 * msg,u32 len)2307 int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
2308 u32 len)
2309 {
2310 struct xe_gt *gt = guc_to_gt(guc);
2311 struct xe_exec_queue *q;
2312 u32 guc_id;
2313 u32 type = XE_GUC_CAT_ERR_TYPE_INVALID;
2314
2315 if (unlikely(!len || len > 2))
2316 return -EPROTO;
2317
2318 guc_id = msg[0];
2319
2320 if (len == 2)
2321 type = msg[1];
2322
2323 if (guc_id == GUC_ID_UNKNOWN) {
2324 /*
2325 * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF
2326 * context. In such case only PF will be notified about that fault.
2327 */
2328 xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n");
2329 return 0;
2330 }
2331
2332 q = g2h_exec_queue_lookup(guc, guc_id);
2333 if (unlikely(!q))
2334 return -EPROTO;
2335
2336 /*
2337 * The type is HW-defined and changes based on platform, so we don't
2338 * decode it in the kernel and only check if it is valid.
2339 * See bspec 54047 and 72187 for details.
2340 */
2341 if (type != XE_GUC_CAT_ERR_TYPE_INVALID)
2342 xe_gt_dbg(gt,
2343 "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d",
2344 type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2345 else
2346 xe_gt_dbg(gt,
2347 "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d",
2348 xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2349
2350 trace_xe_exec_queue_memory_cat_error(q);
2351
2352 /* Treat the same as engine reset */
2353 set_exec_queue_reset(q);
2354 if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
2355 xe_guc_exec_queue_trigger_cleanup(q);
2356
2357 return 0;
2358 }
2359
xe_guc_exec_queue_reset_failure_handler(struct xe_guc * guc,u32 * msg,u32 len)2360 int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len)
2361 {
2362 struct xe_gt *gt = guc_to_gt(guc);
2363 u8 guc_class, instance;
2364 u32 reason;
2365
2366 if (unlikely(len != 3))
2367 return -EPROTO;
2368
2369 guc_class = msg[0];
2370 instance = msg[1];
2371 reason = msg[2];
2372
2373 /* Unexpected failure of a hardware feature, log an actual error */
2374 xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X",
2375 guc_class, instance, reason);
2376
2377 xe_gt_reset_async(gt);
2378
2379 return 0;
2380 }
2381
2382 static void
guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue * q,struct xe_guc_submit_exec_queue_snapshot * snapshot)2383 guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
2384 struct xe_guc_submit_exec_queue_snapshot *snapshot)
2385 {
2386 struct xe_guc *guc = exec_queue_to_guc(q);
2387 struct xe_device *xe = guc_to_xe(guc);
2388 struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
2389 int i;
2390
2391 snapshot->guc.wqi_head = q->guc->wqi_head;
2392 snapshot->guc.wqi_tail = q->guc->wqi_tail;
2393 snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
2394 snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
2395 snapshot->parallel.wq_desc.status = parallel_read(xe, map,
2396 wq_desc.wq_status);
2397
2398 if (snapshot->parallel.wq_desc.head !=
2399 snapshot->parallel.wq_desc.tail) {
2400 for (i = snapshot->parallel.wq_desc.head;
2401 i != snapshot->parallel.wq_desc.tail;
2402 i = (i + sizeof(u32)) % WQ_SIZE)
2403 snapshot->parallel.wq[i / sizeof(u32)] =
2404 parallel_read(xe, map, wq[i / sizeof(u32)]);
2405 }
2406 }
2407
2408 static void
guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot * snapshot,struct drm_printer * p)2409 guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
2410 struct drm_printer *p)
2411 {
2412 int i;
2413
2414 drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n",
2415 snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
2416 drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n",
2417 snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
2418 drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
2419
2420 if (snapshot->parallel.wq_desc.head !=
2421 snapshot->parallel.wq_desc.tail) {
2422 for (i = snapshot->parallel.wq_desc.head;
2423 i != snapshot->parallel.wq_desc.tail;
2424 i = (i + sizeof(u32)) % WQ_SIZE)
2425 drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
2426 snapshot->parallel.wq[i / sizeof(u32)]);
2427 }
2428 }
2429
2430 /**
2431 * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
2432 * @q: faulty exec queue
2433 *
2434 * This can be printed out in a later stage like during dev_coredump
2435 * analysis.
2436 *
2437 * Returns: a GuC Submit Engine snapshot object that must be freed by the
2438 * caller, using `xe_guc_exec_queue_snapshot_free`.
2439 */
2440 struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue * q)2441 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
2442 {
2443 struct xe_gpu_scheduler *sched = &q->guc->sched;
2444 struct xe_guc_submit_exec_queue_snapshot *snapshot;
2445 int i;
2446
2447 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
2448
2449 if (!snapshot)
2450 return NULL;
2451
2452 snapshot->guc.id = q->guc->id;
2453 memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
2454 snapshot->class = q->class;
2455 snapshot->logical_mask = q->logical_mask;
2456 snapshot->width = q->width;
2457 snapshot->refcount = kref_read(&q->refcount);
2458 snapshot->sched_timeout = sched->base.timeout;
2459 snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
2460 snapshot->sched_props.preempt_timeout_us =
2461 q->sched_props.preempt_timeout_us;
2462
2463 snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *),
2464 GFP_ATOMIC);
2465
2466 if (snapshot->lrc) {
2467 for (i = 0; i < q->width; ++i) {
2468 struct xe_lrc *lrc = q->lrc[i];
2469
2470 snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
2471 }
2472 }
2473
2474 snapshot->schedule_state = atomic_read(&q->guc->state);
2475 snapshot->exec_queue_flags = q->flags;
2476
2477 snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
2478 if (snapshot->parallel_execution)
2479 guc_exec_queue_wq_snapshot_capture(q, snapshot);
2480
2481 spin_lock(&sched->base.job_list_lock);
2482 snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list);
2483 snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
2484 sizeof(struct pending_list_snapshot),
2485 GFP_ATOMIC);
2486
2487 if (snapshot->pending_list) {
2488 struct xe_sched_job *job_iter;
2489
2490 i = 0;
2491 list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
2492 snapshot->pending_list[i].seqno =
2493 xe_sched_job_seqno(job_iter);
2494 snapshot->pending_list[i].fence =
2495 dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
2496 snapshot->pending_list[i].finished =
2497 dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
2498 ? 1 : 0;
2499 i++;
2500 }
2501 }
2502
2503 spin_unlock(&sched->base.job_list_lock);
2504
2505 return snapshot;
2506 }
2507
2508 /**
2509 * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
2510 * @snapshot: Previously captured snapshot of job.
2511 *
2512 * This captures some data that requires taking some locks, so it cannot be done in signaling path.
2513 */
2514 void
xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot * snapshot)2515 xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
2516 {
2517 int i;
2518
2519 if (!snapshot || !snapshot->lrc)
2520 return;
2521
2522 for (i = 0; i < snapshot->width; ++i)
2523 xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]);
2524 }
2525
2526 /**
2527 * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
2528 * @snapshot: GuC Submit Engine snapshot object.
2529 * @p: drm_printer where it will be printed out.
2530 *
2531 * This function prints out a given GuC Submit Engine snapshot object.
2532 */
2533 void
xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot * snapshot,struct drm_printer * p)2534 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
2535 struct drm_printer *p)
2536 {
2537 int i;
2538
2539 if (!snapshot)
2540 return;
2541
2542 drm_printf(p, "GuC ID: %d\n", snapshot->guc.id);
2543 drm_printf(p, "\tName: %s\n", snapshot->name);
2544 drm_printf(p, "\tClass: %d\n", snapshot->class);
2545 drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask);
2546 drm_printf(p, "\tWidth: %d\n", snapshot->width);
2547 drm_printf(p, "\tRef: %d\n", snapshot->refcount);
2548 drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
2549 drm_printf(p, "\tTimeslice: %u (us)\n",
2550 snapshot->sched_props.timeslice_us);
2551 drm_printf(p, "\tPreempt timeout: %u (us)\n",
2552 snapshot->sched_props.preempt_timeout_us);
2553
2554 for (i = 0; snapshot->lrc && i < snapshot->width; ++i)
2555 xe_lrc_snapshot_print(snapshot->lrc[i], p);
2556
2557 drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
2558 drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
2559
2560 if (snapshot->parallel_execution)
2561 guc_exec_queue_wq_snapshot_print(snapshot, p);
2562
2563 for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size;
2564 i++)
2565 drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n",
2566 snapshot->pending_list[i].seqno,
2567 snapshot->pending_list[i].fence,
2568 snapshot->pending_list[i].finished);
2569 }
2570
2571 /**
2572 * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
2573 * snapshot.
2574 * @snapshot: GuC Submit Engine snapshot object.
2575 *
2576 * This function free all the memory that needed to be allocated at capture
2577 * time.
2578 */
xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot * snapshot)2579 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
2580 {
2581 int i;
2582
2583 if (!snapshot)
2584 return;
2585
2586 if (snapshot->lrc) {
2587 for (i = 0; i < snapshot->width; i++)
2588 xe_lrc_snapshot_free(snapshot->lrc[i]);
2589 kfree(snapshot->lrc);
2590 }
2591 kfree(snapshot->pending_list);
2592 kfree(snapshot);
2593 }
2594
guc_exec_queue_print(struct xe_exec_queue * q,struct drm_printer * p)2595 static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
2596 {
2597 struct xe_guc_submit_exec_queue_snapshot *snapshot;
2598
2599 snapshot = xe_guc_exec_queue_snapshot_capture(q);
2600 xe_guc_exec_queue_snapshot_print(snapshot, p);
2601 xe_guc_exec_queue_snapshot_free(snapshot);
2602 }
2603
2604 /**
2605 * xe_guc_register_vf_exec_queue - Register exec queue for a given context type.
2606 * @q: Execution queue
2607 * @ctx_type: Type of the context
2608 *
2609 * This function registers the execution queue with the guc. Special context
2610 * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE
2611 * are only applicable for IGPU and in the VF.
2612 * Submits the execution queue to GUC after registering it.
2613 *
2614 * Returns - None.
2615 */
xe_guc_register_vf_exec_queue(struct xe_exec_queue * q,int ctx_type)2616 void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type)
2617 {
2618 struct xe_guc *guc = exec_queue_to_guc(q);
2619 struct xe_device *xe = guc_to_xe(guc);
2620 struct xe_gt *gt = guc_to_gt(guc);
2621
2622 xe_gt_assert(gt, IS_SRIOV_VF(xe));
2623 xe_gt_assert(gt, !IS_DGFX(xe));
2624 xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE ||
2625 ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE);
2626 xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0));
2627
2628 register_exec_queue(q, ctx_type);
2629 enable_scheduling(q);
2630 }
2631
2632 /**
2633 * xe_guc_submit_print - GuC Submit Print.
2634 * @guc: GuC.
2635 * @p: drm_printer where it will be printed out.
2636 *
2637 * This function capture and prints snapshots of **all** GuC Engines.
2638 */
xe_guc_submit_print(struct xe_guc * guc,struct drm_printer * p)2639 void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
2640 {
2641 struct xe_exec_queue *q;
2642 unsigned long index;
2643
2644 if (!xe_device_uc_enabled(guc_to_xe(guc)))
2645 return;
2646
2647 mutex_lock(&guc->submission_state.lock);
2648 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2649 guc_exec_queue_print(q, p);
2650 mutex_unlock(&guc->submission_state.lock);
2651 }
2652
2653 /**
2654 * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all
2655 * exec queues registered to given GuC.
2656 * @guc: the &xe_guc struct instance
2657 * @scratch: scratch buffer to be used as temporary storage
2658 *
2659 * Returns: zero on success, negative error code on failure.
2660 */
xe_guc_contexts_hwsp_rebase(struct xe_guc * guc,void * scratch)2661 int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch)
2662 {
2663 struct xe_exec_queue *q;
2664 unsigned long index;
2665 int err = 0;
2666
2667 mutex_lock(&guc->submission_state.lock);
2668 xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2669 err = xe_exec_queue_contexts_hwsp_rebase(q, scratch);
2670 if (err)
2671 break;
2672 if (xe_exec_queue_is_parallel(q))
2673 err = wq_items_rebase(q);
2674 if (err)
2675 break;
2676 }
2677 mutex_unlock(&guc->submission_state.lock);
2678
2679 return err;
2680 }
2681