xref: /linux/drivers/gpu/drm/xe/xe_gt.c (revision 45d8b572fac3aa8b49d53c946b3685eaf78a2824)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_gt.h"
7 
8 #include <linux/minmax.h>
9 
10 #include <drm/drm_managed.h>
11 #include <drm/xe_drm.h>
12 
13 #include "instructions/xe_gfxpipe_commands.h"
14 #include "instructions/xe_mi_commands.h"
15 #include "regs/xe_gt_regs.h"
16 #include "xe_assert.h"
17 #include "xe_bb.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_exec_queue.h"
21 #include "xe_execlist.h"
22 #include "xe_force_wake.h"
23 #include "xe_ggtt.h"
24 #include "xe_gsc.h"
25 #include "xe_gt_ccs_mode.h"
26 #include "xe_gt_clock.h"
27 #include "xe_gt_freq.h"
28 #include "xe_gt_idle.h"
29 #include "xe_gt_mcr.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_gt_printk.h"
32 #include "xe_gt_sysfs.h"
33 #include "xe_gt_tlb_invalidation.h"
34 #include "xe_gt_topology.h"
35 #include "xe_guc_exec_queue_types.h"
36 #include "xe_guc_pc.h"
37 #include "xe_hw_fence.h"
38 #include "xe_hw_engine_class_sysfs.h"
39 #include "xe_irq.h"
40 #include "xe_lmtt.h"
41 #include "xe_lrc.h"
42 #include "xe_map.h"
43 #include "xe_migrate.h"
44 #include "xe_mmio.h"
45 #include "xe_pat.h"
46 #include "xe_mocs.h"
47 #include "xe_reg_sr.h"
48 #include "xe_ring_ops.h"
49 #include "xe_sa.h"
50 #include "xe_sched_job.h"
51 #include "xe_sriov.h"
52 #include "xe_tuning.h"
53 #include "xe_uc.h"
54 #include "xe_vm.h"
55 #include "xe_wa.h"
56 #include "xe_wopcm.h"
57 
58 struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
59 {
60 	struct xe_gt *gt;
61 
62 	gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL);
63 	if (!gt)
64 		return ERR_PTR(-ENOMEM);
65 
66 	gt->tile = tile;
67 	gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
68 
69 	return gt;
70 }
71 
72 void xe_gt_sanitize(struct xe_gt *gt)
73 {
74 	/*
75 	 * FIXME: if xe_uc_sanitize is called here, on TGL driver will not
76 	 * reload
77 	 */
78 	gt->uc.guc.submission_state.enabled = false;
79 }
80 
81 /**
82  * xe_gt_remove() - Clean up the GT structures before driver removal
83  * @gt: the GT object
84  *
85  * This function should only act on objects/structures that must be cleaned
86  * before the driver removal callback is complete and therefore can't be
87  * deferred to a drmm action.
88  */
89 void xe_gt_remove(struct xe_gt *gt)
90 {
91 	xe_uc_remove(&gt->uc);
92 }
93 
94 static void gt_fini(struct drm_device *drm, void *arg)
95 {
96 	struct xe_gt *gt = arg;
97 	int i;
98 
99 	destroy_workqueue(gt->ordered_wq);
100 
101 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
102 		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
103 }
104 
105 static void gt_reset_worker(struct work_struct *w);
106 
107 static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
108 {
109 	struct xe_sched_job *job;
110 	struct xe_bb *bb;
111 	struct dma_fence *fence;
112 	long timeout;
113 
114 	bb = xe_bb_new(gt, 4, false);
115 	if (IS_ERR(bb))
116 		return PTR_ERR(bb);
117 
118 	job = xe_bb_create_job(q, bb);
119 	if (IS_ERR(job)) {
120 		xe_bb_free(bb, NULL);
121 		return PTR_ERR(job);
122 	}
123 
124 	xe_sched_job_arm(job);
125 	fence = dma_fence_get(&job->drm.s_fence->finished);
126 	xe_sched_job_push(job);
127 
128 	timeout = dma_fence_wait_timeout(fence, false, HZ);
129 	dma_fence_put(fence);
130 	xe_bb_free(bb, NULL);
131 	if (timeout < 0)
132 		return timeout;
133 	else if (!timeout)
134 		return -ETIME;
135 
136 	return 0;
137 }
138 
139 /*
140  * Convert back from encoded value to type-safe, only to be used when reg.mcr
141  * is true
142  */
143 static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
144 {
145 	return (const struct xe_reg_mcr){.__reg.raw = reg.raw };
146 }
147 
148 static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
149 {
150 	struct xe_reg_sr *sr = &q->hwe->reg_lrc;
151 	struct xe_reg_sr_entry *entry;
152 	unsigned long idx;
153 	struct xe_sched_job *job;
154 	struct xe_bb *bb;
155 	struct dma_fence *fence;
156 	long timeout;
157 	int count = 0;
158 
159 	if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
160 		/* Big enough to emit all of the context's 3DSTATE */
161 		bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false);
162 	else
163 		/* Just pick a large BB size */
164 		bb = xe_bb_new(gt, SZ_4K, false);
165 
166 	if (IS_ERR(bb))
167 		return PTR_ERR(bb);
168 
169 	xa_for_each(&sr->xa, idx, entry)
170 		++count;
171 
172 	if (count) {
173 		xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
174 
175 		bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
176 
177 		xa_for_each(&sr->xa, idx, entry) {
178 			struct xe_reg reg = entry->reg;
179 			struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
180 			u32 val;
181 
182 			/*
183 			 * Skip reading the register if it's not really needed
184 			 */
185 			if (reg.masked)
186 				val = entry->clr_bits << 16;
187 			else if (entry->clr_bits + 1)
188 				val = (reg.mcr ?
189 				       xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
190 				       xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
191 			else
192 				val = 0;
193 
194 			val |= entry->set_bits;
195 
196 			bb->cs[bb->len++] = reg.addr;
197 			bb->cs[bb->len++] = val;
198 			xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val);
199 		}
200 	}
201 
202 	xe_lrc_emit_hwe_state_instructions(q, bb);
203 
204 	job = xe_bb_create_job(q, bb);
205 	if (IS_ERR(job)) {
206 		xe_bb_free(bb, NULL);
207 		return PTR_ERR(job);
208 	}
209 
210 	xe_sched_job_arm(job);
211 	fence = dma_fence_get(&job->drm.s_fence->finished);
212 	xe_sched_job_push(job);
213 
214 	timeout = dma_fence_wait_timeout(fence, false, HZ);
215 	dma_fence_put(fence);
216 	xe_bb_free(bb, NULL);
217 	if (timeout < 0)
218 		return timeout;
219 	else if (!timeout)
220 		return -ETIME;
221 
222 	return 0;
223 }
224 
225 int xe_gt_record_default_lrcs(struct xe_gt *gt)
226 {
227 	struct xe_device *xe = gt_to_xe(gt);
228 	struct xe_hw_engine *hwe;
229 	enum xe_hw_engine_id id;
230 	int err = 0;
231 
232 	for_each_hw_engine(hwe, gt, id) {
233 		struct xe_exec_queue *q, *nop_q;
234 		void *default_lrc;
235 
236 		if (gt->default_lrc[hwe->class])
237 			continue;
238 
239 		xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe);
240 		xe_wa_process_lrc(hwe);
241 		xe_hw_engine_setup_default_lrc_state(hwe);
242 		xe_tuning_process_lrc(hwe);
243 
244 		default_lrc = drmm_kzalloc(&xe->drm,
245 					   xe_lrc_size(xe, hwe->class),
246 					   GFP_KERNEL);
247 		if (!default_lrc)
248 			return -ENOMEM;
249 
250 		q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
251 					 hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
252 		if (IS_ERR(q)) {
253 			err = PTR_ERR(q);
254 			xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
255 				  hwe->name, q);
256 			return err;
257 		}
258 
259 		/* Prime golden LRC with known good state */
260 		err = emit_wa_job(gt, q);
261 		if (err) {
262 			xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n",
263 				  hwe->name, ERR_PTR(err), q->guc->id);
264 			goto put_exec_queue;
265 		}
266 
267 		nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
268 					     1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
269 		if (IS_ERR(nop_q)) {
270 			err = PTR_ERR(nop_q);
271 			xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
272 				  hwe->name, nop_q);
273 			goto put_exec_queue;
274 		}
275 
276 		/* Switch to different LRC */
277 		err = emit_nop_job(gt, nop_q);
278 		if (err) {
279 			xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n",
280 				  hwe->name, ERR_PTR(err), nop_q->guc->id);
281 			goto put_nop_q;
282 		}
283 
284 		/* Reload golden LRC to record the effect of any indirect W/A */
285 		err = emit_nop_job(gt, q);
286 		if (err) {
287 			xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n",
288 				  hwe->name, ERR_PTR(err), q->guc->id);
289 			goto put_nop_q;
290 		}
291 
292 		xe_map_memcpy_from(xe, default_lrc,
293 				   &q->lrc[0].bo->vmap,
294 				   xe_lrc_pphwsp_offset(&q->lrc[0]),
295 				   xe_lrc_size(xe, hwe->class));
296 
297 		gt->default_lrc[hwe->class] = default_lrc;
298 put_nop_q:
299 		xe_exec_queue_put(nop_q);
300 put_exec_queue:
301 		xe_exec_queue_put(q);
302 		if (err)
303 			break;
304 	}
305 
306 	return err;
307 }
308 
309 int xe_gt_init_early(struct xe_gt *gt)
310 {
311 	int err;
312 
313 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
314 	if (err)
315 		return err;
316 
317 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
318 	if (err)
319 		return err;
320 
321 	xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt));
322 
323 	err = xe_wa_init(gt);
324 	if (err)
325 		return err;
326 
327 	xe_wa_process_gt(gt);
328 	xe_wa_process_oob(gt);
329 	xe_tuning_process_gt(gt);
330 
331 	return 0;
332 }
333 
334 static void dump_pat_on_error(struct xe_gt *gt)
335 {
336 	struct drm_printer p;
337 	char prefix[32];
338 
339 	snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id);
340 	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, prefix);
341 
342 	xe_pat_dump(gt, &p);
343 }
344 
345 static int gt_fw_domain_init(struct xe_gt *gt)
346 {
347 	int err, i;
348 
349 	xe_device_mem_access_get(gt_to_xe(gt));
350 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
351 	if (err)
352 		goto err_hw_fence_irq;
353 
354 	if (!xe_gt_is_media_type(gt)) {
355 		err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
356 		if (err)
357 			goto err_force_wake;
358 		if (IS_SRIOV_PF(gt_to_xe(gt)))
359 			xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
360 	}
361 
362 	xe_gt_idle_sysfs_init(&gt->gtidle);
363 
364 	/* Enable per hw engine IRQs */
365 	xe_irq_enable_hwe(gt);
366 
367 	/* Rerun MCR init as we now have hw engine list */
368 	xe_gt_mcr_init(gt);
369 
370 	err = xe_hw_engines_init_early(gt);
371 	if (err)
372 		goto err_force_wake;
373 
374 	err = xe_hw_engine_class_sysfs_init(gt);
375 	if (err)
376 		drm_warn(&gt_to_xe(gt)->drm,
377 			 "failed to register engines sysfs directory, err: %d\n",
378 			 err);
379 
380 	/* Initialize CCS mode sysfs after early initialization of HW engines */
381 	err = xe_gt_ccs_mode_sysfs_init(gt);
382 	if (err)
383 		goto err_force_wake;
384 
385 	/*
386 	 * Stash hardware-reported version.  Since this register does not exist
387 	 * on pre-MTL platforms, reading it there will (correctly) return 0.
388 	 */
389 	gt->info.gmdid = xe_mmio_read32(gt, GMD_ID);
390 
391 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
392 	XE_WARN_ON(err);
393 	xe_device_mem_access_put(gt_to_xe(gt));
394 
395 	return 0;
396 
397 err_force_wake:
398 	dump_pat_on_error(gt);
399 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
400 err_hw_fence_irq:
401 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
402 		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
403 	xe_device_mem_access_put(gt_to_xe(gt));
404 
405 	return err;
406 }
407 
408 static int all_fw_domain_init(struct xe_gt *gt)
409 {
410 	int err, i;
411 
412 	xe_device_mem_access_get(gt_to_xe(gt));
413 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
414 	if (err)
415 		goto err_hw_fence_irq;
416 
417 	xe_gt_mcr_set_implicit_defaults(gt);
418 	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
419 
420 	err = xe_gt_clock_init(gt);
421 	if (err)
422 		goto err_force_wake;
423 
424 	xe_mocs_init(gt);
425 	err = xe_execlist_init(gt);
426 	if (err)
427 		goto err_force_wake;
428 
429 	err = xe_hw_engines_init(gt);
430 	if (err)
431 		goto err_force_wake;
432 
433 	if (!xe_gt_is_media_type(gt)) {
434 		/*
435 		 * USM has its only SA pool to non-block behind user operations
436 		 */
437 		if (gt_to_xe(gt)->info.has_usm) {
438 			struct xe_device *xe = gt_to_xe(gt);
439 
440 			gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt),
441 								IS_DGFX(xe) ? SZ_1M : SZ_512K, 16);
442 			if (IS_ERR(gt->usm.bb_pool)) {
443 				err = PTR_ERR(gt->usm.bb_pool);
444 				goto err_force_wake;
445 			}
446 		}
447 	}
448 
449 	if (!xe_gt_is_media_type(gt)) {
450 		struct xe_tile *tile = gt_to_tile(gt);
451 
452 		tile->migrate = xe_migrate_init(tile);
453 		if (IS_ERR(tile->migrate)) {
454 			err = PTR_ERR(tile->migrate);
455 			goto err_force_wake;
456 		}
457 	}
458 
459 	err = xe_uc_init_post_hwconfig(&gt->uc);
460 	if (err)
461 		goto err_force_wake;
462 
463 	err = xe_uc_init_hw(&gt->uc);
464 	if (err)
465 		goto err_force_wake;
466 
467 	/* Configure default CCS mode of 1 engine with all resources */
468 	if (xe_gt_ccs_mode_enabled(gt)) {
469 		gt->ccs_mode = 1;
470 		xe_gt_apply_ccs_mode(gt);
471 	}
472 
473 	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
474 		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
475 
476 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
477 	XE_WARN_ON(err);
478 	xe_device_mem_access_put(gt_to_xe(gt));
479 
480 	return 0;
481 
482 err_force_wake:
483 	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
484 err_hw_fence_irq:
485 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
486 		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
487 	xe_device_mem_access_put(gt_to_xe(gt));
488 
489 	return err;
490 }
491 
492 /*
493  * Initialize enough GT to be able to load GuC in order to obtain hwconfig and
494  * enable CTB communication.
495  */
496 int xe_gt_init_hwconfig(struct xe_gt *gt)
497 {
498 	int err;
499 
500 	xe_device_mem_access_get(gt_to_xe(gt));
501 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
502 	if (err)
503 		goto out;
504 
505 	xe_gt_topology_init(gt);
506 	xe_gt_mcr_init(gt);
507 	xe_pat_init(gt);
508 
509 	err = xe_uc_init(&gt->uc);
510 	if (err)
511 		goto out_fw;
512 
513 	err = xe_uc_init_hwconfig(&gt->uc);
514 	if (err)
515 		goto out_fw;
516 
517 	/* XXX: Fake that we pull the engine mask from hwconfig blob */
518 	gt->info.engine_mask = gt->info.__engine_mask;
519 
520 out_fw:
521 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
522 out:
523 	xe_device_mem_access_put(gt_to_xe(gt));
524 
525 	return err;
526 }
527 
528 int xe_gt_init(struct xe_gt *gt)
529 {
530 	int err;
531 	int i;
532 
533 	INIT_WORK(&gt->reset.worker, gt_reset_worker);
534 
535 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) {
536 		gt->ring_ops[i] = xe_ring_ops_get(gt, i);
537 		xe_hw_fence_irq_init(&gt->fence_irq[i]);
538 	}
539 
540 	err = xe_gt_tlb_invalidation_init(gt);
541 	if (err)
542 		return err;
543 
544 	err = xe_gt_pagefault_init(gt);
545 	if (err)
546 		return err;
547 
548 	xe_mocs_init_early(gt);
549 
550 	xe_gt_sysfs_init(gt);
551 
552 	err = gt_fw_domain_init(gt);
553 	if (err)
554 		return err;
555 
556 	xe_gt_freq_init(gt);
557 
558 	xe_force_wake_init_engines(gt, gt_to_fw(gt));
559 
560 	err = all_fw_domain_init(gt);
561 	if (err)
562 		return err;
563 
564 	err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
565 	if (err)
566 		return err;
567 
568 	return 0;
569 }
570 
571 static int do_gt_reset(struct xe_gt *gt)
572 {
573 	int err;
574 
575 	xe_gsc_wa_14015076503(gt, true);
576 
577 	xe_mmio_write32(gt, GDRST, GRDOM_FULL);
578 	err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
579 	if (err)
580 		xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n",
581 			  ERR_PTR(err));
582 
583 	xe_gsc_wa_14015076503(gt, false);
584 
585 	return err;
586 }
587 
588 static int do_gt_restart(struct xe_gt *gt)
589 {
590 	struct xe_hw_engine *hwe;
591 	enum xe_hw_engine_id id;
592 	int err;
593 
594 	xe_pat_init(gt);
595 
596 	xe_gt_mcr_set_implicit_defaults(gt);
597 	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
598 
599 	err = xe_wopcm_init(&gt->uc.wopcm);
600 	if (err)
601 		return err;
602 
603 	for_each_hw_engine(hwe, gt, id)
604 		xe_hw_engine_enable_ring(hwe);
605 
606 	err = xe_uc_sanitize_reset(&gt->uc);
607 	if (err)
608 		return err;
609 
610 	err = xe_uc_init_hw(&gt->uc);
611 	if (err)
612 		return err;
613 
614 	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
615 		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
616 
617 	xe_mocs_init(gt);
618 	err = xe_uc_start(&gt->uc);
619 	if (err)
620 		return err;
621 
622 	for_each_hw_engine(hwe, gt, id) {
623 		xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
624 		xe_reg_sr_apply_whitelist(hwe);
625 	}
626 
627 	/* Get CCS mode in sync between sw/hw */
628 	xe_gt_apply_ccs_mode(gt);
629 
630 	return 0;
631 }
632 
633 static int gt_reset(struct xe_gt *gt)
634 {
635 	int err;
636 
637 	/* We only support GT resets with GuC submission */
638 	if (!xe_device_uc_enabled(gt_to_xe(gt)))
639 		return -ENODEV;
640 
641 	xe_gt_info(gt, "reset started\n");
642 
643 	if (xe_fault_inject_gt_reset()) {
644 		err = -ECANCELED;
645 		goto err_fail;
646 	}
647 
648 	xe_gt_sanitize(gt);
649 
650 	xe_device_mem_access_get(gt_to_xe(gt));
651 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
652 	if (err)
653 		goto err_msg;
654 
655 	xe_uc_gucrc_disable(&gt->uc);
656 	xe_uc_stop_prepare(&gt->uc);
657 	xe_gt_pagefault_reset(gt);
658 
659 	err = xe_uc_stop(&gt->uc);
660 	if (err)
661 		goto err_out;
662 
663 	xe_gt_tlb_invalidation_reset(gt);
664 
665 	err = do_gt_reset(gt);
666 	if (err)
667 		goto err_out;
668 
669 	err = do_gt_restart(gt);
670 	if (err)
671 		goto err_out;
672 
673 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
674 	xe_device_mem_access_put(gt_to_xe(gt));
675 	XE_WARN_ON(err);
676 
677 	xe_gt_info(gt, "reset done\n");
678 
679 	return 0;
680 
681 err_out:
682 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
683 err_msg:
684 	XE_WARN_ON(xe_uc_start(&gt->uc));
685 	xe_device_mem_access_put(gt_to_xe(gt));
686 err_fail:
687 	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
688 
689 	gt_to_xe(gt)->needs_flr_on_fini = true;
690 
691 	return err;
692 }
693 
694 static void gt_reset_worker(struct work_struct *w)
695 {
696 	struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
697 
698 	gt_reset(gt);
699 }
700 
701 void xe_gt_reset_async(struct xe_gt *gt)
702 {
703 	xe_gt_info(gt, "trying reset\n");
704 
705 	/* Don't do a reset while one is already in flight */
706 	if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
707 		return;
708 
709 	xe_gt_info(gt, "reset queued\n");
710 	queue_work(gt->ordered_wq, &gt->reset.worker);
711 }
712 
713 void xe_gt_suspend_prepare(struct xe_gt *gt)
714 {
715 	xe_device_mem_access_get(gt_to_xe(gt));
716 	XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
717 
718 	xe_uc_stop_prepare(&gt->uc);
719 
720 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
721 	xe_device_mem_access_put(gt_to_xe(gt));
722 }
723 
724 int xe_gt_suspend(struct xe_gt *gt)
725 {
726 	int err;
727 
728 	xe_gt_sanitize(gt);
729 
730 	xe_device_mem_access_get(gt_to_xe(gt));
731 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
732 	if (err)
733 		goto err_msg;
734 
735 	err = xe_uc_suspend(&gt->uc);
736 	if (err)
737 		goto err_force_wake;
738 
739 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
740 	xe_device_mem_access_put(gt_to_xe(gt));
741 	xe_gt_info(gt, "suspended\n");
742 
743 	return 0;
744 
745 err_force_wake:
746 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
747 err_msg:
748 	xe_device_mem_access_put(gt_to_xe(gt));
749 	xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
750 
751 	return err;
752 }
753 
754 int xe_gt_resume(struct xe_gt *gt)
755 {
756 	int err;
757 
758 	xe_device_mem_access_get(gt_to_xe(gt));
759 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
760 	if (err)
761 		goto err_msg;
762 
763 	err = do_gt_restart(gt);
764 	if (err)
765 		goto err_force_wake;
766 
767 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
768 	xe_device_mem_access_put(gt_to_xe(gt));
769 	xe_gt_info(gt, "resumed\n");
770 
771 	return 0;
772 
773 err_force_wake:
774 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
775 err_msg:
776 	xe_device_mem_access_put(gt_to_xe(gt));
777 	xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
778 
779 	return err;
780 }
781 
782 struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
783 				     enum xe_engine_class class,
784 				     u16 instance, bool logical)
785 {
786 	struct xe_hw_engine *hwe;
787 	enum xe_hw_engine_id id;
788 
789 	for_each_hw_engine(hwe, gt, id)
790 		if (hwe->class == class &&
791 		    ((!logical && hwe->instance == instance) ||
792 		    (logical && hwe->logical_instance == instance)))
793 			return hwe;
794 
795 	return NULL;
796 }
797 
798 struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
799 							 enum xe_engine_class class)
800 {
801 	struct xe_hw_engine *hwe;
802 	enum xe_hw_engine_id id;
803 
804 	for_each_hw_engine(hwe, gt, id) {
805 		switch (class) {
806 		case XE_ENGINE_CLASS_RENDER:
807 		case XE_ENGINE_CLASS_COMPUTE:
808 			if (hwe->class == XE_ENGINE_CLASS_RENDER ||
809 			    hwe->class == XE_ENGINE_CLASS_COMPUTE)
810 				return hwe;
811 			break;
812 		default:
813 			if (hwe->class == class)
814 				return hwe;
815 		}
816 	}
817 
818 	return NULL;
819 }
820