xref: /linux/drivers/gpu/drm/xe/xe_gt.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_gt.h"
7 
8 #include <linux/minmax.h>
9 
10 #include <drm/drm_managed.h>
11 #include <drm/xe_drm.h>
12 
13 #include "instructions/xe_gfxpipe_commands.h"
14 #include "instructions/xe_mi_commands.h"
15 #include "regs/xe_gt_regs.h"
16 #include "xe_assert.h"
17 #include "xe_bb.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_exec_queue.h"
21 #include "xe_execlist.h"
22 #include "xe_force_wake.h"
23 #include "xe_ggtt.h"
24 #include "xe_gsc.h"
25 #include "xe_gt_ccs_mode.h"
26 #include "xe_gt_clock.h"
27 #include "xe_gt_freq.h"
28 #include "xe_gt_idle.h"
29 #include "xe_gt_mcr.h"
30 #include "xe_gt_pagefault.h"
31 #include "xe_gt_printk.h"
32 #include "xe_gt_sysfs.h"
33 #include "xe_gt_tlb_invalidation.h"
34 #include "xe_gt_topology.h"
35 #include "xe_guc_exec_queue_types.h"
36 #include "xe_guc_pc.h"
37 #include "xe_hw_fence.h"
38 #include "xe_hw_engine_class_sysfs.h"
39 #include "xe_irq.h"
40 #include "xe_lmtt.h"
41 #include "xe_lrc.h"
42 #include "xe_map.h"
43 #include "xe_migrate.h"
44 #include "xe_mmio.h"
45 #include "xe_pat.h"
46 #include "xe_mocs.h"
47 #include "xe_reg_sr.h"
48 #include "xe_ring_ops.h"
49 #include "xe_sa.h"
50 #include "xe_sched_job.h"
51 #include "xe_sriov.h"
52 #include "xe_tuning.h"
53 #include "xe_uc.h"
54 #include "xe_vm.h"
55 #include "xe_wa.h"
56 #include "xe_wopcm.h"
57 
58 struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
59 {
60 	struct xe_gt *gt;
61 
62 	gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL);
63 	if (!gt)
64 		return ERR_PTR(-ENOMEM);
65 
66 	gt->tile = tile;
67 	gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0);
68 
69 	return gt;
70 }
71 
72 void xe_gt_sanitize(struct xe_gt *gt)
73 {
74 	/*
75 	 * FIXME: if xe_uc_sanitize is called here, on TGL driver will not
76 	 * reload
77 	 */
78 	gt->uc.guc.submission_state.enabled = false;
79 }
80 
81 /**
82  * xe_gt_remove() - Clean up the GT structures before driver removal
83  * @gt: the GT object
84  *
85  * This function should only act on objects/structures that must be cleaned
86  * before the driver removal callback is complete and therefore can't be
87  * deferred to a drmm action.
88  */
89 void xe_gt_remove(struct xe_gt *gt)
90 {
91 	xe_uc_remove(&gt->uc);
92 }
93 
94 static void gt_fini(struct drm_device *drm, void *arg)
95 {
96 	struct xe_gt *gt = arg;
97 	int i;
98 
99 	destroy_workqueue(gt->ordered_wq);
100 
101 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
102 		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
103 }
104 
105 static void gt_reset_worker(struct work_struct *w);
106 
107 static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
108 {
109 	struct xe_sched_job *job;
110 	struct xe_bb *bb;
111 	struct dma_fence *fence;
112 	long timeout;
113 
114 	bb = xe_bb_new(gt, 4, false);
115 	if (IS_ERR(bb))
116 		return PTR_ERR(bb);
117 
118 	job = xe_bb_create_job(q, bb);
119 	if (IS_ERR(job)) {
120 		xe_bb_free(bb, NULL);
121 		return PTR_ERR(job);
122 	}
123 
124 	xe_sched_job_arm(job);
125 	fence = dma_fence_get(&job->drm.s_fence->finished);
126 	xe_sched_job_push(job);
127 
128 	timeout = dma_fence_wait_timeout(fence, false, HZ);
129 	dma_fence_put(fence);
130 	xe_bb_free(bb, NULL);
131 	if (timeout < 0)
132 		return timeout;
133 	else if (!timeout)
134 		return -ETIME;
135 
136 	return 0;
137 }
138 
139 /*
140  * Convert back from encoded value to type-safe, only to be used when reg.mcr
141  * is true
142  */
143 static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
144 {
145 	return (const struct xe_reg_mcr){.__reg.raw = reg.raw };
146 }
147 
148 static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
149 {
150 	struct xe_reg_sr *sr = &q->hwe->reg_lrc;
151 	struct xe_reg_sr_entry *entry;
152 	unsigned long idx;
153 	struct xe_sched_job *job;
154 	struct xe_bb *bb;
155 	struct dma_fence *fence;
156 	long timeout;
157 	int count = 0;
158 
159 	if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
160 		/* Big enough to emit all of the context's 3DSTATE */
161 		bb = xe_bb_new(gt, xe_lrc_size(gt_to_xe(gt), q->hwe->class), false);
162 	else
163 		/* Just pick a large BB size */
164 		bb = xe_bb_new(gt, SZ_4K, false);
165 
166 	if (IS_ERR(bb))
167 		return PTR_ERR(bb);
168 
169 	xa_for_each(&sr->xa, idx, entry)
170 		++count;
171 
172 	if (count) {
173 		xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
174 
175 		bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
176 
177 		xa_for_each(&sr->xa, idx, entry) {
178 			struct xe_reg reg = entry->reg;
179 			struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
180 			u32 val;
181 
182 			/*
183 			 * Skip reading the register if it's not really needed
184 			 */
185 			if (reg.masked)
186 				val = entry->clr_bits << 16;
187 			else if (entry->clr_bits + 1)
188 				val = (reg.mcr ?
189 				       xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
190 				       xe_mmio_read32(gt, reg)) & (~entry->clr_bits);
191 			else
192 				val = 0;
193 
194 			val |= entry->set_bits;
195 
196 			bb->cs[bb->len++] = reg.addr;
197 			bb->cs[bb->len++] = val;
198 			xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val);
199 		}
200 	}
201 
202 	xe_lrc_emit_hwe_state_instructions(q, bb);
203 
204 	job = xe_bb_create_job(q, bb);
205 	if (IS_ERR(job)) {
206 		xe_bb_free(bb, NULL);
207 		return PTR_ERR(job);
208 	}
209 
210 	xe_sched_job_arm(job);
211 	fence = dma_fence_get(&job->drm.s_fence->finished);
212 	xe_sched_job_push(job);
213 
214 	timeout = dma_fence_wait_timeout(fence, false, HZ);
215 	dma_fence_put(fence);
216 	xe_bb_free(bb, NULL);
217 	if (timeout < 0)
218 		return timeout;
219 	else if (!timeout)
220 		return -ETIME;
221 
222 	return 0;
223 }
224 
225 int xe_gt_record_default_lrcs(struct xe_gt *gt)
226 {
227 	struct xe_device *xe = gt_to_xe(gt);
228 	struct xe_hw_engine *hwe;
229 	enum xe_hw_engine_id id;
230 	int err = 0;
231 
232 	for_each_hw_engine(hwe, gt, id) {
233 		struct xe_exec_queue *q, *nop_q;
234 		void *default_lrc;
235 
236 		if (gt->default_lrc[hwe->class])
237 			continue;
238 
239 		xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe);
240 		xe_wa_process_lrc(hwe);
241 		xe_hw_engine_setup_default_lrc_state(hwe);
242 		xe_tuning_process_lrc(hwe);
243 
244 		default_lrc = drmm_kzalloc(&xe->drm,
245 					   xe_lrc_size(xe, hwe->class),
246 					   GFP_KERNEL);
247 		if (!default_lrc)
248 			return -ENOMEM;
249 
250 		q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
251 					 hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
252 		if (IS_ERR(q)) {
253 			err = PTR_ERR(q);
254 			xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
255 				  hwe->name, q);
256 			return err;
257 		}
258 
259 		/* Prime golden LRC with known good state */
260 		err = emit_wa_job(gt, q);
261 		if (err) {
262 			xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n",
263 				  hwe->name, ERR_PTR(err), q->guc->id);
264 			goto put_exec_queue;
265 		}
266 
267 		nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
268 					     1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
269 		if (IS_ERR(nop_q)) {
270 			err = PTR_ERR(nop_q);
271 			xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
272 				  hwe->name, nop_q);
273 			goto put_exec_queue;
274 		}
275 
276 		/* Switch to different LRC */
277 		err = emit_nop_job(gt, nop_q);
278 		if (err) {
279 			xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n",
280 				  hwe->name, ERR_PTR(err), nop_q->guc->id);
281 			goto put_nop_q;
282 		}
283 
284 		/* Reload golden LRC to record the effect of any indirect W/A */
285 		err = emit_nop_job(gt, q);
286 		if (err) {
287 			xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n",
288 				  hwe->name, ERR_PTR(err), q->guc->id);
289 			goto put_nop_q;
290 		}
291 
292 		xe_map_memcpy_from(xe, default_lrc,
293 				   &q->lrc[0].bo->vmap,
294 				   xe_lrc_pphwsp_offset(&q->lrc[0]),
295 				   xe_lrc_size(xe, hwe->class));
296 
297 		gt->default_lrc[hwe->class] = default_lrc;
298 put_nop_q:
299 		xe_exec_queue_put(nop_q);
300 put_exec_queue:
301 		xe_exec_queue_put(q);
302 		if (err)
303 			break;
304 	}
305 
306 	return err;
307 }
308 
309 int xe_gt_init_early(struct xe_gt *gt)
310 {
311 	int err;
312 
313 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
314 	if (err)
315 		return err;
316 
317 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
318 	if (err)
319 		return err;
320 
321 	xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt));
322 
323 	err = xe_wa_init(gt);
324 	if (err)
325 		return err;
326 
327 	xe_wa_process_gt(gt);
328 	xe_wa_process_oob(gt);
329 	xe_tuning_process_gt(gt);
330 
331 	return 0;
332 }
333 
334 static void dump_pat_on_error(struct xe_gt *gt)
335 {
336 	struct drm_printer p;
337 	char prefix[32];
338 
339 	snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id);
340 	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, prefix);
341 
342 	xe_pat_dump(gt, &p);
343 }
344 
345 static int gt_fw_domain_init(struct xe_gt *gt)
346 {
347 	int err, i;
348 
349 	xe_device_mem_access_get(gt_to_xe(gt));
350 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
351 	if (err)
352 		goto err_hw_fence_irq;
353 
354 	if (!xe_gt_is_media_type(gt)) {
355 		err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
356 		if (err)
357 			goto err_force_wake;
358 		if (IS_SRIOV_PF(gt_to_xe(gt)))
359 			xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
360 	}
361 
362 	xe_gt_idle_sysfs_init(&gt->gtidle);
363 
364 	/* Enable per hw engine IRQs */
365 	xe_irq_enable_hwe(gt);
366 
367 	/* Rerun MCR init as we now have hw engine list */
368 	xe_gt_mcr_init(gt);
369 
370 	err = xe_hw_engines_init_early(gt);
371 	if (err)
372 		goto err_force_wake;
373 
374 	err = xe_hw_engine_class_sysfs_init(gt);
375 	if (err)
376 		drm_warn(&gt_to_xe(gt)->drm,
377 			 "failed to register engines sysfs directory, err: %d\n",
378 			 err);
379 
380 	/* Initialize CCS mode sysfs after early initialization of HW engines */
381 	xe_gt_ccs_mode_sysfs_init(gt);
382 
383 	/*
384 	 * Stash hardware-reported version.  Since this register does not exist
385 	 * on pre-MTL platforms, reading it there will (correctly) return 0.
386 	 */
387 	gt->info.gmdid = xe_mmio_read32(gt, GMD_ID);
388 
389 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
390 	XE_WARN_ON(err);
391 	xe_device_mem_access_put(gt_to_xe(gt));
392 
393 	return 0;
394 
395 err_force_wake:
396 	dump_pat_on_error(gt);
397 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
398 err_hw_fence_irq:
399 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
400 		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
401 	xe_device_mem_access_put(gt_to_xe(gt));
402 
403 	return err;
404 }
405 
406 static int all_fw_domain_init(struct xe_gt *gt)
407 {
408 	int err, i;
409 
410 	xe_device_mem_access_get(gt_to_xe(gt));
411 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
412 	if (err)
413 		goto err_hw_fence_irq;
414 
415 	xe_gt_mcr_set_implicit_defaults(gt);
416 	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
417 
418 	err = xe_gt_clock_init(gt);
419 	if (err)
420 		goto err_force_wake;
421 
422 	xe_mocs_init(gt);
423 	err = xe_execlist_init(gt);
424 	if (err)
425 		goto err_force_wake;
426 
427 	err = xe_hw_engines_init(gt);
428 	if (err)
429 		goto err_force_wake;
430 
431 	if (!xe_gt_is_media_type(gt)) {
432 		/*
433 		 * USM has its only SA pool to non-block behind user operations
434 		 */
435 		if (gt_to_xe(gt)->info.has_usm) {
436 			struct xe_device *xe = gt_to_xe(gt);
437 
438 			gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt),
439 								IS_DGFX(xe) ? SZ_1M : SZ_512K, 16);
440 			if (IS_ERR(gt->usm.bb_pool)) {
441 				err = PTR_ERR(gt->usm.bb_pool);
442 				goto err_force_wake;
443 			}
444 		}
445 	}
446 
447 	if (!xe_gt_is_media_type(gt)) {
448 		struct xe_tile *tile = gt_to_tile(gt);
449 
450 		tile->migrate = xe_migrate_init(tile);
451 		if (IS_ERR(tile->migrate)) {
452 			err = PTR_ERR(tile->migrate);
453 			goto err_force_wake;
454 		}
455 	}
456 
457 	err = xe_uc_init_post_hwconfig(&gt->uc);
458 	if (err)
459 		goto err_force_wake;
460 
461 	err = xe_uc_init_hw(&gt->uc);
462 	if (err)
463 		goto err_force_wake;
464 
465 	/* Configure default CCS mode of 1 engine with all resources */
466 	if (xe_gt_ccs_mode_enabled(gt)) {
467 		gt->ccs_mode = 1;
468 		xe_gt_apply_ccs_mode(gt);
469 	}
470 
471 	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
472 		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
473 
474 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
475 	XE_WARN_ON(err);
476 	xe_device_mem_access_put(gt_to_xe(gt));
477 
478 	return 0;
479 
480 err_force_wake:
481 	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
482 err_hw_fence_irq:
483 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
484 		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
485 	xe_device_mem_access_put(gt_to_xe(gt));
486 
487 	return err;
488 }
489 
490 /*
491  * Initialize enough GT to be able to load GuC in order to obtain hwconfig and
492  * enable CTB communication.
493  */
494 int xe_gt_init_hwconfig(struct xe_gt *gt)
495 {
496 	int err;
497 
498 	xe_device_mem_access_get(gt_to_xe(gt));
499 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
500 	if (err)
501 		goto out;
502 
503 	xe_gt_topology_init(gt);
504 	xe_gt_mcr_init(gt);
505 	xe_pat_init(gt);
506 
507 	err = xe_uc_init(&gt->uc);
508 	if (err)
509 		goto out_fw;
510 
511 	err = xe_uc_init_hwconfig(&gt->uc);
512 	if (err)
513 		goto out_fw;
514 
515 	/* XXX: Fake that we pull the engine mask from hwconfig blob */
516 	gt->info.engine_mask = gt->info.__engine_mask;
517 
518 out_fw:
519 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
520 out:
521 	xe_device_mem_access_put(gt_to_xe(gt));
522 
523 	return err;
524 }
525 
526 int xe_gt_init(struct xe_gt *gt)
527 {
528 	int err;
529 	int i;
530 
531 	INIT_WORK(&gt->reset.worker, gt_reset_worker);
532 
533 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) {
534 		gt->ring_ops[i] = xe_ring_ops_get(gt, i);
535 		xe_hw_fence_irq_init(&gt->fence_irq[i]);
536 	}
537 
538 	err = xe_gt_tlb_invalidation_init(gt);
539 	if (err)
540 		return err;
541 
542 	err = xe_gt_pagefault_init(gt);
543 	if (err)
544 		return err;
545 
546 	xe_mocs_init_early(gt);
547 
548 	xe_gt_sysfs_init(gt);
549 
550 	err = gt_fw_domain_init(gt);
551 	if (err)
552 		return err;
553 
554 	xe_gt_freq_init(gt);
555 
556 	xe_force_wake_init_engines(gt, gt_to_fw(gt));
557 
558 	err = all_fw_domain_init(gt);
559 	if (err)
560 		return err;
561 
562 	err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
563 	if (err)
564 		return err;
565 
566 	return 0;
567 }
568 
569 static int do_gt_reset(struct xe_gt *gt)
570 {
571 	int err;
572 
573 	xe_gsc_wa_14015076503(gt, true);
574 
575 	xe_mmio_write32(gt, GDRST, GRDOM_FULL);
576 	err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
577 	if (err)
578 		xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n",
579 			  ERR_PTR(err));
580 
581 	xe_gsc_wa_14015076503(gt, false);
582 
583 	return err;
584 }
585 
586 static int do_gt_restart(struct xe_gt *gt)
587 {
588 	struct xe_hw_engine *hwe;
589 	enum xe_hw_engine_id id;
590 	int err;
591 
592 	xe_pat_init(gt);
593 
594 	xe_gt_mcr_set_implicit_defaults(gt);
595 	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
596 
597 	err = xe_wopcm_init(&gt->uc.wopcm);
598 	if (err)
599 		return err;
600 
601 	for_each_hw_engine(hwe, gt, id)
602 		xe_hw_engine_enable_ring(hwe);
603 
604 	err = xe_uc_sanitize_reset(&gt->uc);
605 	if (err)
606 		return err;
607 
608 	err = xe_uc_init_hw(&gt->uc);
609 	if (err)
610 		return err;
611 
612 	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
613 		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
614 
615 	xe_mocs_init(gt);
616 	err = xe_uc_start(&gt->uc);
617 	if (err)
618 		return err;
619 
620 	for_each_hw_engine(hwe, gt, id) {
621 		xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
622 		xe_reg_sr_apply_whitelist(hwe);
623 	}
624 
625 	/* Get CCS mode in sync between sw/hw */
626 	xe_gt_apply_ccs_mode(gt);
627 
628 	return 0;
629 }
630 
631 static int gt_reset(struct xe_gt *gt)
632 {
633 	int err;
634 
635 	/* We only support GT resets with GuC submission */
636 	if (!xe_device_uc_enabled(gt_to_xe(gt)))
637 		return -ENODEV;
638 
639 	xe_gt_info(gt, "reset started\n");
640 
641 	if (xe_fault_inject_gt_reset()) {
642 		err = -ECANCELED;
643 		goto err_fail;
644 	}
645 
646 	xe_gt_sanitize(gt);
647 
648 	xe_device_mem_access_get(gt_to_xe(gt));
649 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
650 	if (err)
651 		goto err_msg;
652 
653 	xe_uc_gucrc_disable(&gt->uc);
654 	xe_uc_stop_prepare(&gt->uc);
655 	xe_gt_pagefault_reset(gt);
656 
657 	err = xe_uc_stop(&gt->uc);
658 	if (err)
659 		goto err_out;
660 
661 	xe_gt_tlb_invalidation_reset(gt);
662 
663 	err = do_gt_reset(gt);
664 	if (err)
665 		goto err_out;
666 
667 	err = do_gt_restart(gt);
668 	if (err)
669 		goto err_out;
670 
671 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
672 	xe_device_mem_access_put(gt_to_xe(gt));
673 	XE_WARN_ON(err);
674 
675 	xe_gt_info(gt, "reset done\n");
676 
677 	return 0;
678 
679 err_out:
680 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
681 err_msg:
682 	XE_WARN_ON(xe_uc_start(&gt->uc));
683 	xe_device_mem_access_put(gt_to_xe(gt));
684 err_fail:
685 	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
686 
687 	gt_to_xe(gt)->needs_flr_on_fini = true;
688 
689 	return err;
690 }
691 
692 static void gt_reset_worker(struct work_struct *w)
693 {
694 	struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
695 
696 	gt_reset(gt);
697 }
698 
699 void xe_gt_reset_async(struct xe_gt *gt)
700 {
701 	xe_gt_info(gt, "trying reset\n");
702 
703 	/* Don't do a reset while one is already in flight */
704 	if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
705 		return;
706 
707 	xe_gt_info(gt, "reset queued\n");
708 	queue_work(gt->ordered_wq, &gt->reset.worker);
709 }
710 
711 void xe_gt_suspend_prepare(struct xe_gt *gt)
712 {
713 	xe_device_mem_access_get(gt_to_xe(gt));
714 	XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
715 
716 	xe_uc_stop_prepare(&gt->uc);
717 
718 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
719 	xe_device_mem_access_put(gt_to_xe(gt));
720 }
721 
722 int xe_gt_suspend(struct xe_gt *gt)
723 {
724 	int err;
725 
726 	xe_gt_sanitize(gt);
727 
728 	xe_device_mem_access_get(gt_to_xe(gt));
729 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
730 	if (err)
731 		goto err_msg;
732 
733 	err = xe_uc_suspend(&gt->uc);
734 	if (err)
735 		goto err_force_wake;
736 
737 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
738 	xe_device_mem_access_put(gt_to_xe(gt));
739 	xe_gt_info(gt, "suspended\n");
740 
741 	return 0;
742 
743 err_force_wake:
744 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
745 err_msg:
746 	xe_device_mem_access_put(gt_to_xe(gt));
747 	xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
748 
749 	return err;
750 }
751 
752 int xe_gt_resume(struct xe_gt *gt)
753 {
754 	int err;
755 
756 	xe_device_mem_access_get(gt_to_xe(gt));
757 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
758 	if (err)
759 		goto err_msg;
760 
761 	err = do_gt_restart(gt);
762 	if (err)
763 		goto err_force_wake;
764 
765 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
766 	xe_device_mem_access_put(gt_to_xe(gt));
767 	xe_gt_info(gt, "resumed\n");
768 
769 	return 0;
770 
771 err_force_wake:
772 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
773 err_msg:
774 	xe_device_mem_access_put(gt_to_xe(gt));
775 	xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
776 
777 	return err;
778 }
779 
780 struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
781 				     enum xe_engine_class class,
782 				     u16 instance, bool logical)
783 {
784 	struct xe_hw_engine *hwe;
785 	enum xe_hw_engine_id id;
786 
787 	for_each_hw_engine(hwe, gt, id)
788 		if (hwe->class == class &&
789 		    ((!logical && hwe->instance == instance) ||
790 		    (logical && hwe->logical_instance == instance)))
791 			return hwe;
792 
793 	return NULL;
794 }
795 
796 struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
797 							 enum xe_engine_class class)
798 {
799 	struct xe_hw_engine *hwe;
800 	enum xe_hw_engine_id id;
801 
802 	for_each_hw_engine(hwe, gt, id) {
803 		switch (class) {
804 		case XE_ENGINE_CLASS_RENDER:
805 		case XE_ENGINE_CLASS_COMPUTE:
806 			if (hwe->class == XE_ENGINE_CLASS_RENDER ||
807 			    hwe->class == XE_ENGINE_CLASS_COMPUTE)
808 				return hwe;
809 			break;
810 		default:
811 			if (hwe->class == class)
812 				return hwe;
813 		}
814 	}
815 
816 	return NULL;
817 }
818