xref: /linux/drivers/gpu/drm/xe/xe_gt.c (revision 058da4a2b164e477b02ca0562f01a9eb9e621cdd)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_gt.h"
7 
8 #include <linux/minmax.h>
9 
10 #include <kunit/visibility.h>
11 
12 #include <drm/drm_managed.h>
13 #include <uapi/drm/xe_drm.h>
14 
15 #include <generated/xe_device_wa_oob.h>
16 #include <generated/xe_wa_oob.h>
17 
18 #include "instructions/xe_alu_commands.h"
19 #include "instructions/xe_mi_commands.h"
20 #include "regs/xe_engine_regs.h"
21 #include "regs/xe_gt_regs.h"
22 #include "xe_assert.h"
23 #include "xe_bb.h"
24 #include "xe_device.h"
25 #include "xe_eu_stall.h"
26 #include "xe_exec_queue.h"
27 #include "xe_execlist.h"
28 #include "xe_force_wake.h"
29 #include "xe_ggtt.h"
30 #include "xe_gsc.h"
31 #include "xe_gt_ccs_mode.h"
32 #include "xe_gt_clock.h"
33 #include "xe_gt_freq.h"
34 #include "xe_gt_idle.h"
35 #include "xe_gt_mcr.h"
36 #include "xe_gt_printk.h"
37 #include "xe_gt_sriov_pf.h"
38 #include "xe_gt_sriov_vf.h"
39 #include "xe_gt_stats.h"
40 #include "xe_gt_sysfs.h"
41 #include "xe_gt_topology.h"
42 #include "xe_guc_exec_queue_types.h"
43 #include "xe_guc_pc.h"
44 #include "xe_guc_rc.h"
45 #include "xe_guc_submit.h"
46 #include "xe_hw_fence.h"
47 #include "xe_hw_engine_class_sysfs.h"
48 #include "xe_irq.h"
49 #include "xe_lmtt.h"
50 #include "xe_lrc.h"
51 #include "xe_map.h"
52 #include "xe_migrate.h"
53 #include "xe_mmio.h"
54 #include "xe_pagefault.h"
55 #include "xe_pat.h"
56 #include "xe_pm.h"
57 #include "xe_mocs.h"
58 #include "xe_reg_sr.h"
59 #include "xe_ring_ops.h"
60 #include "xe_sa.h"
61 #include "xe_sched_job.h"
62 #include "xe_sriov.h"
63 #include "xe_tlb_inval.h"
64 #include "xe_tuning.h"
65 #include "xe_uc.h"
66 #include "xe_uc_fw.h"
67 #include "xe_vm.h"
68 #include "xe_wa.h"
69 #include "xe_wopcm.h"
70 
71 struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
72 {
73 	struct xe_device *xe = tile_to_xe(tile);
74 	struct drm_device *drm = &xe->drm;
75 	bool shared_wq = xe->info.needs_shared_vf_gt_wq && tile->primary_gt &&
76 		IS_SRIOV_VF(xe);
77 	struct workqueue_struct *ordered_wq;
78 	struct xe_gt *gt;
79 
80 	gt = drmm_kzalloc(drm, sizeof(*gt), GFP_KERNEL);
81 	if (!gt)
82 		return ERR_PTR(-ENOMEM);
83 
84 	gt->tile = tile;
85 	if (shared_wq && tile->primary_gt->ordered_wq)
86 		ordered_wq = tile->primary_gt->ordered_wq;
87 	else
88 		ordered_wq = drmm_alloc_ordered_workqueue(drm, "gt-ordered-wq",
89 							  WQ_MEM_RECLAIM);
90 	if (IS_ERR(ordered_wq))
91 		return ERR_CAST(ordered_wq);
92 
93 	gt->ordered_wq = ordered_wq;
94 
95 	return gt;
96 }
97 
98 void xe_gt_sanitize(struct xe_gt *gt)
99 {
100 	/*
101 	 * FIXME: if xe_uc_sanitize is called here, on TGL driver will not
102 	 * reload
103 	 */
104 	xe_guc_submit_disable(&gt->uc.guc);
105 }
106 
107 static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
108 {
109 	u32 reg;
110 
111 	if (!XE_GT_WA(gt, 16023588340))
112 		return;
113 
114 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
115 	if (!fw_ref.domains)
116 		return;
117 
118 	if (xe_gt_is_main_type(gt)) {
119 		reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
120 		reg |= CG_DIS_CNTLBUS;
121 		xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
122 	}
123 
124 	xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF);
125 }
126 
127 static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
128 {
129 	u32 reg;
130 
131 	if (!XE_GT_WA(gt, 16023588340))
132 		return;
133 
134 	if (xe_gt_is_media_type(gt))
135 		return;
136 
137 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
138 	if (!fw_ref.domains)
139 		return;
140 
141 	reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
142 	reg &= ~CG_DIS_CNTLBUS;
143 	xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
144 }
145 
146 static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt)
147 {
148 	struct xe_device *xe = gt_to_xe(gt);
149 	u32 reg;
150 
151 	if (IS_SRIOV_VF(xe))
152 		return;
153 
154 	if (GRAPHICS_VER(xe) >= 30 && xe->info.has_flat_ccs) {
155 		CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
156 		if (!fw_ref.domains)
157 			return;
158 
159 		reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
160 		reg |= EN_CMP_1WCOH;
161 		xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
162 
163 		if (xe_gt_is_media_type(gt)) {
164 			xe_mmio_rmw32(&gt->mmio, XE2_GAMWALK_CTRL_MEDIA, 0, EN_CMP_1WCOH_GW);
165 		} else {
166 			reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMWALK_CTRL_3D);
167 			reg |= EN_CMP_1WCOH_GW;
168 			xe_gt_mcr_multicast_write(gt, XE2_GAMWALK_CTRL_3D, reg);
169 		}
170 	}
171 }
172 
173 static void gt_reset_worker(struct work_struct *w);
174 
175 static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb,
176 			 long timeout_jiffies, bool force_reset)
177 {
178 	struct xe_sched_job *job;
179 	struct dma_fence *fence;
180 	long timeout;
181 
182 	job = xe_bb_create_job(q, bb);
183 	if (IS_ERR(job))
184 		return PTR_ERR(job);
185 
186 	job->ring_ops_force_reset = force_reset;
187 
188 	xe_sched_job_arm(job);
189 	fence = dma_fence_get(&job->drm.s_fence->finished);
190 	xe_sched_job_push(job);
191 
192 	timeout = dma_fence_wait_timeout(fence, false, timeout_jiffies);
193 	dma_fence_put(fence);
194 	if (timeout < 0)
195 		return timeout;
196 	else if (!timeout)
197 		return -ETIME;
198 
199 	return 0;
200 }
201 
202 static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
203 {
204 	struct xe_bb *bb;
205 	int ret;
206 
207 	bb = xe_bb_new(gt, 4, false);
208 	if (IS_ERR(bb))
209 		return PTR_ERR(bb);
210 
211 	ret = emit_job_sync(q, bb, HZ, false);
212 	xe_bb_free(bb, NULL);
213 
214 	return ret;
215 }
216 
217 /* Dwords required to emit a RMW of a register */
218 #define EMIT_RMW_DW 20
219 
220 static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
221 {
222 	struct xe_hw_engine *hwe = q->hwe;
223 	struct xe_reg_sr *sr = &hwe->reg_lrc;
224 	struct xe_reg_sr_entry *entry;
225 	int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret;
226 	unsigned long idx;
227 	struct xe_bb *bb;
228 	size_t bb_len = 0;
229 	u32 *cs;
230 
231 	/* count RMW registers as those will be handled separately */
232 	xa_for_each(&sr->xa, idx, entry) {
233 		if (entry->reg.masked || entry->clr_bits == ~0)
234 			++count;
235 		else if (entry->reg.mcr)
236 			++count_rmw_mcr;
237 		else
238 			++count_rmw;
239 	}
240 
241 	if (count)
242 		bb_len += count * 2 + 1;
243 
244 	/*
245 	 * RMW of MCR registers is the same as a normal RMW, except an
246 	 * additional LRI (3 dwords) is required per register to steer the read
247 	 * to a nom-terminated instance.
248 	 *
249 	 * We could probably shorten the batch slightly by eliding the
250 	 * steering for consecutive MCR registers that have the same
251 	 * group/instance target, but it's not worth the extra complexity to do
252 	 * so.
253 	 */
254 	bb_len += count_rmw * EMIT_RMW_DW;
255 	bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3);
256 
257 	/*
258 	 * After doing all RMW, we need 7 trailing dwords to clean up,
259 	 * plus an additional 3 dwords to reset steering if any of the
260 	 * registers were MCR.
261 	 */
262 	if (count_rmw || count_rmw_mcr)
263 		bb_len += 7 + (count_rmw_mcr ? 3 : 0);
264 
265 	if (hwe->class == XE_ENGINE_CLASS_RENDER)
266 		/*
267 		 * Big enough to emit all of the context's 3DSTATE via
268 		 * xe_lrc_emit_hwe_state_instructions()
269 		 */
270 		bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32);
271 
272 	xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len);
273 
274 	bb = xe_bb_new(gt, bb_len, false);
275 	if (IS_ERR(bb))
276 		return PTR_ERR(bb);
277 
278 	cs = bb->cs;
279 
280 	if (count) {
281 		/*
282 		 * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per
283 		 * reg + 1
284 		 */
285 
286 		*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
287 
288 		xa_for_each(&sr->xa, idx, entry) {
289 			struct xe_reg reg = entry->reg;
290 			u32 val;
291 
292 			if (reg.masked)
293 				val = entry->clr_bits << 16;
294 			else if (entry->clr_bits == ~0)
295 				val = 0;
296 			else
297 				continue;
298 
299 			val |= entry->set_bits;
300 
301 			*cs++ = reg.addr;
302 			*cs++ = val;
303 			xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val);
304 		}
305 	}
306 
307 	if (count_rmw || count_rmw_mcr) {
308 		xa_for_each(&sr->xa, idx, entry) {
309 			if (entry->reg.masked || entry->clr_bits == ~0)
310 				continue;
311 
312 			if (entry->reg.mcr) {
313 				struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw };
314 				u8 group, instance;
315 
316 				xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
317 				*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
318 				*cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr;
319 				*cs++ = SELECTIVE_READ_ADDRESSING |
320 					REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) |
321 					REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance);
322 			}
323 
324 			*cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
325 			*cs++ = entry->reg.addr;
326 			*cs++ = CS_GPR_REG(0, 0).addr;
327 
328 			*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
329 				MI_LRI_LRM_CS_MMIO;
330 			*cs++ = CS_GPR_REG(0, 1).addr;
331 			*cs++ = entry->clr_bits;
332 			*cs++ = CS_GPR_REG(0, 2).addr;
333 			*cs++ = entry->set_bits;
334 
335 			*cs++ = MI_MATH(8);
336 			*cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0);
337 			*cs++ = CS_ALU_INSTR_LOADINV(SRCB, REG1);
338 			*cs++ = CS_ALU_INSTR_AND;
339 			*cs++ = CS_ALU_INSTR_STORE(REG0, ACCU);
340 			*cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0);
341 			*cs++ = CS_ALU_INSTR_LOAD(SRCB, REG2);
342 			*cs++ = CS_ALU_INSTR_OR;
343 			*cs++ = CS_ALU_INSTR_STORE(REG0, ACCU);
344 
345 			*cs++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO;
346 			*cs++ = CS_GPR_REG(0, 0).addr;
347 			*cs++ = entry->reg.addr;
348 
349 			xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n",
350 				  entry->reg.addr, entry->clr_bits, entry->set_bits,
351 				  entry->reg.mcr ? " (MCR)" : "");
352 		}
353 
354 		/* reset used GPR */
355 		*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) |
356 			MI_LRI_LRM_CS_MMIO;
357 		*cs++ = CS_GPR_REG(0, 0).addr;
358 		*cs++ = 0;
359 		*cs++ = CS_GPR_REG(0, 1).addr;
360 		*cs++ = 0;
361 		*cs++ = CS_GPR_REG(0, 2).addr;
362 		*cs++ = 0;
363 
364 		/* reset steering */
365 		if (count_rmw_mcr) {
366 			*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
367 			*cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr;
368 			*cs++ = 0;
369 		}
370 	}
371 
372 	cs = xe_lrc_emit_hwe_state_instructions(q, cs);
373 
374 	bb->len = cs - bb->cs;
375 
376 	/* only VFs need to trigger reset to get a clean NULL context */
377 	ret = emit_job_sync(q, bb, HZ, IS_SRIOV_VF(gt_to_xe(gt)));
378 
379 	xe_bb_free(bb, NULL);
380 
381 	return ret;
382 }
383 
384 int xe_gt_record_default_lrcs(struct xe_gt *gt)
385 {
386 	struct xe_device *xe = gt_to_xe(gt);
387 	struct xe_hw_engine *hwe;
388 	enum xe_hw_engine_id id;
389 	int err = 0;
390 
391 	for_each_hw_engine(hwe, gt, id) {
392 		struct xe_exec_queue *q, *nop_q;
393 		void *default_lrc;
394 
395 		if (gt->default_lrc[hwe->class])
396 			continue;
397 
398 		xe_hw_engine_setup_reg_lrc(hwe);
399 
400 		default_lrc = drmm_kzalloc(&xe->drm,
401 					   xe_gt_lrc_size(gt, hwe->class),
402 					   GFP_KERNEL);
403 		if (!default_lrc)
404 			return -ENOMEM;
405 
406 		q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
407 					 hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
408 		if (IS_ERR(q)) {
409 			err = PTR_ERR(q);
410 			xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
411 				  hwe->name, q);
412 			return err;
413 		}
414 
415 		/* Prime golden LRC with known good state */
416 		err = emit_wa_job(gt, q);
417 		if (err) {
418 			xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n",
419 				  hwe->name, ERR_PTR(err), q->guc->id);
420 			goto put_exec_queue;
421 		}
422 
423 		nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
424 					     1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
425 		if (IS_ERR(nop_q)) {
426 			err = PTR_ERR(nop_q);
427 			xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
428 				  hwe->name, nop_q);
429 			goto put_exec_queue;
430 		}
431 
432 		/* Switch to different LRC */
433 		err = emit_nop_job(gt, nop_q);
434 		if (err) {
435 			xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n",
436 				  hwe->name, ERR_PTR(err), nop_q->guc->id);
437 			goto put_nop_q;
438 		}
439 
440 		xe_map_memcpy_from(xe, default_lrc,
441 				   &q->lrc[0]->bo->vmap,
442 				   xe_lrc_pphwsp_offset(q->lrc[0]),
443 				   xe_gt_lrc_size(gt, hwe->class));
444 
445 		gt->default_lrc[hwe->class] = default_lrc;
446 put_nop_q:
447 		xe_exec_queue_put(nop_q);
448 put_exec_queue:
449 		xe_exec_queue_put(q);
450 		if (err)
451 			break;
452 	}
453 
454 	return err;
455 }
456 
457 static void wa_14026539277(struct xe_gt *gt)
458 {
459 	struct xe_device *xe = gt_to_xe(gt);
460 	u32 val;
461 
462 	/*
463 	 * FIXME: We currently can't use FUNC(xe_rtp_match_not_sriov_vf) in the
464 	 * rules for Wa_14026539277 due to xe_wa_process_device_oob() being
465 	 * called before xe_sriov_probe_early(); and we can't move the call to
466 	 * the former to happen after the latter because MMIO read functions
467 	 * already depend on a device OOB workaround.  This needs to be fixed by
468 	 * allowing workaround checks to happen at different stages of driver
469 	 * initialization.
470 	 */
471 	if (IS_SRIOV_VF(xe))
472 		return;
473 
474 	if (!XE_DEVICE_WA(xe, 14026539277))
475 		return;
476 
477 	if (!xe_gt_is_main_type(gt))
478 		return;
479 
480 	val = xe_gt_mcr_unicast_read_any(gt, L2COMPUTESIDECTRL);
481 	val &= ~CECTRL;
482 	val |= CECTRL_CENODATA_ALWAYS;
483 	xe_gt_mcr_multicast_write(gt, L2COMPUTESIDECTRL, val);
484 }
485 
486 int xe_gt_init_early(struct xe_gt *gt)
487 {
488 	int err;
489 
490 	if (IS_SRIOV_PF(gt_to_xe(gt))) {
491 		err = xe_gt_sriov_pf_init_early(gt);
492 		if (err)
493 			return err;
494 	}
495 
496 	if (IS_SRIOV_VF(gt_to_xe(gt))) {
497 		err = xe_gt_sriov_vf_init_early(gt);
498 		if (err)
499 			return err;
500 	}
501 
502 	xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt));
503 
504 	err = xe_wa_gt_init(gt);
505 	if (err)
506 		return err;
507 
508 	err = xe_tuning_init(gt);
509 	if (err)
510 		return err;
511 
512 	xe_wa_process_gt_oob(gt);
513 
514 	xe_force_wake_init_gt(gt, gt_to_fw(gt));
515 	spin_lock_init(&gt->global_invl_lock);
516 
517 	err = xe_gt_tlb_inval_init_early(gt);
518 	if (err)
519 		return err;
520 
521 	xe_mocs_init_early(gt);
522 
523 	/*
524 	 * Only after this point can GT-specific MMIO operations
525 	 * (including things like communication with the GuC)
526 	 * be performed.
527 	 */
528 	xe_gt_mmio_init(gt);
529 
530 	err = xe_uc_init_noalloc(&gt->uc);
531 	if (err)
532 		return err;
533 
534 	err = xe_gt_stats_init(gt);
535 	if (err)
536 		return err;
537 
538 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
539 	if (!fw_ref.domains)
540 		return -ETIMEDOUT;
541 
542 	xe_gt_mcr_init_early(gt);
543 	xe_pat_init(gt);
544 
545 	return 0;
546 }
547 
548 static void dump_pat_on_error(struct xe_gt *gt)
549 {
550 	struct drm_printer p;
551 	char prefix[32];
552 
553 	snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id);
554 	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, prefix);
555 
556 	xe_pat_dump(gt, &p);
557 }
558 
559 static int gt_init_with_gt_forcewake(struct xe_gt *gt)
560 {
561 	int err;
562 
563 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
564 	if (!fw_ref.domains)
565 		return -ETIMEDOUT;
566 
567 	err = xe_uc_init(&gt->uc);
568 	if (err)
569 		return err;
570 
571 	xe_gt_topology_init(gt);
572 	xe_gt_mcr_init(gt);
573 	xe_gt_enable_host_l2_vram(gt);
574 	xe_gt_enable_comp_1wcoh(gt);
575 
576 	if (xe_gt_is_main_type(gt)) {
577 		err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
578 		if (err)
579 			return err;
580 		if (IS_SRIOV_PF(gt_to_xe(gt)))
581 			xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
582 	}
583 
584 	/* Enable per hw engine IRQs */
585 	xe_irq_enable_hwe(gt);
586 
587 	/* Rerun MCR init as we now have hw engine list */
588 	xe_gt_mcr_init(gt);
589 
590 	err = xe_hw_engines_init_early(gt);
591 	if (err) {
592 		dump_pat_on_error(gt);
593 		return err;
594 	}
595 
596 	err = xe_hw_engine_class_sysfs_init(gt);
597 	if (err)
598 		return err;
599 
600 	/* Initialize CCS mode sysfs after early initialization of HW engines */
601 	err = xe_gt_ccs_mode_sysfs_init(gt);
602 	if (err)
603 		return err;
604 
605 	/*
606 	 * Stash hardware-reported version.  Since this register does not exist
607 	 * on pre-MTL platforms, reading it there will (correctly) return 0.
608 	 */
609 	gt->info.gmdid = xe_mmio_read32(&gt->mmio, GMD_ID);
610 
611 	/*
612 	 * Wa_14026539277 can't be implemented as a regular GT workaround (i.e.
613 	 * as an entry in gt_was[]) for two reasons: it is actually a device
614 	 * workaround that happens to involve programming a GT register; and it
615 	 * needs to be applied early to avoid getting the hardware in a bad
616 	 * state before we have a chance to do the necessary programming.
617 	 */
618 	wa_14026539277(gt);
619 
620 	return 0;
621 }
622 
623 static int gt_init_with_all_forcewake(struct xe_gt *gt)
624 {
625 	int err;
626 
627 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
628 	if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
629 		return -ETIMEDOUT;
630 
631 	xe_gt_mcr_set_implicit_defaults(gt);
632 	xe_wa_process_gt(gt);
633 	xe_tuning_process_gt(gt);
634 	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
635 
636 	err = xe_gt_clock_init(gt);
637 	if (err)
638 		return err;
639 
640 	xe_mocs_init(gt);
641 	err = xe_execlist_init(gt);
642 	if (err)
643 		return err;
644 
645 	err = xe_hw_engines_init(gt);
646 	if (err)
647 		return err;
648 
649 	err = xe_uc_init_post_hwconfig(&gt->uc);
650 	if (err)
651 		return err;
652 
653 	if (xe_gt_is_main_type(gt)) {
654 		/*
655 		 * USM has its only SA pool to non-block behind user operations
656 		 */
657 		if (gt_to_xe(gt)->info.has_usm) {
658 			struct xe_device *xe = gt_to_xe(gt);
659 
660 			gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt),
661 								IS_DGFX(xe) ? SZ_1M : SZ_512K, 16);
662 			if (IS_ERR(gt->usm.bb_pool))
663 				return PTR_ERR(gt->usm.bb_pool);
664 		}
665 	}
666 
667 	if (xe_gt_is_main_type(gt)) {
668 		struct xe_tile *tile = gt_to_tile(gt);
669 
670 		err = xe_migrate_init(tile->migrate);
671 		if (err)
672 			return err;
673 	}
674 
675 	err = xe_uc_load_hw(&gt->uc);
676 	if (err)
677 		return err;
678 
679 	/* Configure default CCS mode of 1 engine with all resources */
680 	if (xe_gt_ccs_mode_enabled(gt)) {
681 		gt->ccs_mode = 1;
682 		xe_gt_apply_ccs_mode(gt);
683 	}
684 
685 	if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt))
686 		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
687 
688 	if (IS_SRIOV_PF(gt_to_xe(gt)))
689 		xe_gt_sriov_pf_init_hw(gt);
690 
691 	return 0;
692 }
693 
694 static void xe_gt_fini(void *arg)
695 {
696 	struct xe_gt *gt = arg;
697 	int i;
698 
699 	if (disable_work_sync(&gt->reset.worker))
700 		/*
701 		 * If gt_reset_worker was halted from executing, take care of
702 		 * releasing the rpm reference here.
703 		 */
704 		xe_pm_runtime_put(gt_to_xe(gt));
705 
706 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
707 		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
708 
709 	xe_gt_disable_host_l2_vram(gt);
710 }
711 
712 int xe_gt_init(struct xe_gt *gt)
713 {
714 	int err;
715 	int i;
716 
717 	INIT_WORK(&gt->reset.worker, gt_reset_worker);
718 
719 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) {
720 		gt->ring_ops[i] = xe_ring_ops_get(gt, i);
721 		xe_hw_fence_irq_init(&gt->fence_irq[i]);
722 	}
723 
724 	err = devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, xe_gt_fini, gt);
725 	if (err)
726 		return err;
727 
728 	err = xe_gt_sysfs_init(gt);
729 	if (err)
730 		return err;
731 
732 	err = gt_init_with_gt_forcewake(gt);
733 	if (err)
734 		return err;
735 
736 	err = xe_gt_idle_init(&gt->gtidle);
737 	if (err)
738 		return err;
739 
740 	err = xe_gt_freq_init(gt);
741 	if (err)
742 		return err;
743 
744 	xe_force_wake_init_engines(gt, gt_to_fw(gt));
745 
746 	err = gt_init_with_all_forcewake(gt);
747 	if (err)
748 		return err;
749 
750 	xe_gt_record_user_engines(gt);
751 
752 	err = xe_eu_stall_init(gt);
753 	if (err)
754 		return err;
755 
756 	if (IS_SRIOV_VF(gt_to_xe(gt))) {
757 		err = xe_gt_sriov_vf_init(gt);
758 		if (err)
759 			return err;
760 	}
761 
762 	return 0;
763 }
764 
765 /**
766  * xe_gt_mmio_init() - Initialize GT's MMIO access
767  * @gt: the GT object
768  *
769  * Initialize GT's MMIO accessor, which will be used to access registers inside
770  * this GT.
771  */
772 void xe_gt_mmio_init(struct xe_gt *gt)
773 {
774 	struct xe_tile *tile = gt_to_tile(gt);
775 	struct xe_device *xe = tile_to_xe(tile);
776 
777 	xe_mmio_init(&gt->mmio, tile, tile->mmio.regs, tile->mmio.regs_size);
778 
779 	if (gt->info.type == XE_GT_TYPE_MEDIA) {
780 		gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
781 		gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
782 	} else {
783 		gt->mmio.adj_offset = 0;
784 		gt->mmio.adj_limit = 0;
785 	}
786 
787 	if (IS_SRIOV_VF(xe))
788 		gt->mmio.sriov_vf_gt = gt;
789 }
790 EXPORT_SYMBOL_IF_KUNIT(xe_gt_mmio_init);
791 
792 void xe_gt_record_user_engines(struct xe_gt *gt)
793 {
794 	struct xe_hw_engine *hwe;
795 	enum xe_hw_engine_id id;
796 
797 	gt->user_engines.mask = 0;
798 	memset(gt->user_engines.instances_per_class, 0,
799 	       sizeof(gt->user_engines.instances_per_class));
800 
801 	for_each_hw_engine(hwe, gt, id) {
802 		if (xe_hw_engine_is_reserved(hwe))
803 			continue;
804 
805 		gt->user_engines.mask |= BIT_ULL(id);
806 		gt->user_engines.instances_per_class[hwe->class]++;
807 	}
808 
809 	xe_gt_assert(gt, (gt->user_engines.mask | gt->info.engine_mask)
810 		     == gt->info.engine_mask);
811 }
812 
813 static int do_gt_reset(struct xe_gt *gt)
814 {
815 	int err;
816 
817 	if (IS_SRIOV_VF(gt_to_xe(gt)))
818 		return xe_gt_sriov_vf_reset(gt);
819 
820 	xe_gsc_wa_14015076503(gt, true);
821 
822 	xe_mmio_write32(&gt->mmio, GDRST, GRDOM_FULL);
823 	err = xe_mmio_wait32(&gt->mmio, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
824 	if (err)
825 		xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n",
826 			  ERR_PTR(err));
827 
828 	xe_gsc_wa_14015076503(gt, false);
829 
830 	return err;
831 }
832 
833 static int vf_gt_restart(struct xe_gt *gt)
834 {
835 	int err;
836 
837 	err = xe_uc_sanitize_reset(&gt->uc);
838 	if (err)
839 		return err;
840 
841 	err = xe_uc_load_hw(&gt->uc);
842 	if (err)
843 		return err;
844 
845 	err = xe_uc_start(&gt->uc);
846 	if (err)
847 		return err;
848 
849 	return 0;
850 }
851 
852 static int do_gt_restart(struct xe_gt *gt)
853 {
854 	struct xe_hw_engine *hwe;
855 	enum xe_hw_engine_id id;
856 	int err;
857 
858 	if (IS_SRIOV_VF(gt_to_xe(gt)))
859 		return vf_gt_restart(gt);
860 
861 	xe_pat_init(gt);
862 
863 	xe_gt_enable_host_l2_vram(gt);
864 	xe_gt_enable_comp_1wcoh(gt);
865 
866 	xe_gt_mcr_set_implicit_defaults(gt);
867 	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
868 
869 	err = xe_wopcm_init(&gt->uc.wopcm);
870 	if (err)
871 		return err;
872 
873 	for_each_hw_engine(hwe, gt, id)
874 		xe_hw_engine_enable_ring(hwe);
875 
876 	err = xe_uc_sanitize_reset(&gt->uc);
877 	if (err)
878 		return err;
879 
880 	err = xe_uc_load_hw(&gt->uc);
881 	if (err)
882 		return err;
883 
884 	if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt))
885 		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
886 
887 	if (IS_SRIOV_PF(gt_to_xe(gt)))
888 		xe_gt_sriov_pf_init_hw(gt);
889 
890 	xe_mocs_init(gt);
891 
892 	for_each_hw_engine(hwe, gt, id)
893 		xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
894 
895 	/* Get CCS mode in sync between sw/hw */
896 	xe_gt_apply_ccs_mode(gt);
897 
898 	err = xe_uc_start(&gt->uc);
899 	if (err)
900 		return err;
901 
902 	/* Restore GT freq to expected values */
903 	xe_gt_sanitize_freq(gt);
904 
905 	if (IS_SRIOV_PF(gt_to_xe(gt)))
906 		xe_gt_sriov_pf_restart(gt);
907 
908 	return 0;
909 }
910 
911 static void gt_reset_worker(struct work_struct *w)
912 {
913 	struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
914 	unsigned int fw_ref;
915 	int err;
916 
917 	if (xe_device_wedged(gt_to_xe(gt)))
918 		goto err_pm_put;
919 
920 	/* We only support GT resets with GuC submission */
921 	if (!xe_device_uc_enabled(gt_to_xe(gt)))
922 		goto err_pm_put;
923 
924 	xe_gt_info(gt, "reset started\n");
925 
926 	if (xe_fault_inject_gt_reset()) {
927 		err = -ECANCELED;
928 		goto err_fail;
929 	}
930 
931 	xe_gt_sanitize(gt);
932 
933 	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
934 	if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
935 		err = -ETIMEDOUT;
936 		goto err_out;
937 	}
938 
939 	if (IS_SRIOV_PF(gt_to_xe(gt)))
940 		xe_gt_sriov_pf_stop_prepare(gt);
941 
942 	xe_guc_rc_disable(&gt->uc.guc);
943 	xe_uc_stop_prepare(&gt->uc);
944 	xe_pagefault_reset(gt_to_xe(gt), gt);
945 
946 	xe_uc_stop(&gt->uc);
947 
948 	xe_tlb_inval_reset(&gt->tlb_inval);
949 
950 	err = do_gt_reset(gt);
951 	if (err)
952 		goto err_out;
953 
954 	err = do_gt_restart(gt);
955 	if (err)
956 		goto err_out;
957 
958 	xe_force_wake_put(gt_to_fw(gt), fw_ref);
959 
960 	/* Pair with get while enqueueing the work in xe_gt_reset_async() */
961 	xe_pm_runtime_put(gt_to_xe(gt));
962 
963 	xe_gt_info(gt, "reset done\n");
964 
965 	return;
966 
967 err_out:
968 	xe_force_wake_put(gt_to_fw(gt), fw_ref);
969 	XE_WARN_ON(xe_uc_start(&gt->uc));
970 
971 err_fail:
972 	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
973 	xe_device_declare_wedged(gt_to_xe(gt));
974 err_pm_put:
975 	xe_pm_runtime_put(gt_to_xe(gt));
976 }
977 
978 void xe_gt_reset_async(struct xe_gt *gt)
979 {
980 	xe_gt_info(gt, "trying reset from %ps\n", __builtin_return_address(0));
981 
982 	/* Don't do a reset while one is already in flight */
983 	if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
984 		return;
985 
986 	xe_gt_info(gt, "reset queued\n");
987 
988 	/* Pair with put in gt_reset_worker() if work is enqueued */
989 	xe_pm_runtime_get_noresume(gt_to_xe(gt));
990 	if (!queue_work(gt->ordered_wq, &gt->reset.worker))
991 		xe_pm_runtime_put(gt_to_xe(gt));
992 }
993 
994 void xe_gt_suspend_prepare(struct xe_gt *gt)
995 {
996 	xe_uc_suspend_prepare(&gt->uc);
997 }
998 
999 int xe_gt_suspend(struct xe_gt *gt)
1000 {
1001 	int err;
1002 
1003 	xe_gt_dbg(gt, "suspending\n");
1004 	xe_gt_sanitize(gt);
1005 
1006 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
1007 	if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
1008 		xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
1009 		return -ETIMEDOUT;
1010 	}
1011 
1012 	err = xe_uc_suspend(&gt->uc);
1013 	if (err) {
1014 		xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
1015 		return err;
1016 	}
1017 
1018 	xe_gt_idle_disable_pg(gt);
1019 
1020 	xe_gt_disable_host_l2_vram(gt);
1021 
1022 	xe_gt_dbg(gt, "suspended\n");
1023 
1024 	return 0;
1025 }
1026 
1027 void xe_gt_shutdown(struct xe_gt *gt)
1028 {
1029 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
1030 	do_gt_reset(gt);
1031 }
1032 
1033 /**
1034  * xe_gt_sanitize_freq() - Restore saved frequencies if necessary.
1035  * @gt: the GT object
1036  *
1037  * Called after driver init/GSC load completes to restore GT frequencies if we
1038  * limited them for any WAs.
1039  */
1040 int xe_gt_sanitize_freq(struct xe_gt *gt)
1041 {
1042 	int ret = 0;
1043 
1044 	if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
1045 	     xe_uc_fw_is_loaded(&gt->uc.gsc.fw) ||
1046 	     xe_uc_fw_is_in_error_state(&gt->uc.gsc.fw)) &&
1047 	    XE_GT_WA(gt, 22019338487))
1048 		ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
1049 
1050 	return ret;
1051 }
1052 
1053 int xe_gt_resume(struct xe_gt *gt)
1054 {
1055 	int err;
1056 
1057 	xe_gt_dbg(gt, "resuming\n");
1058 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
1059 	if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
1060 		xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
1061 		return -ETIMEDOUT;
1062 	}
1063 
1064 	err = do_gt_restart(gt);
1065 	if (err)
1066 		return err;
1067 
1068 	xe_gt_idle_enable_pg(gt);
1069 
1070 	xe_gt_dbg(gt, "resumed\n");
1071 
1072 	return 0;
1073 }
1074 
1075 /**
1076  * xe_gt_runtime_suspend() - GT runtime suspend
1077  * @gt: the GT object
1078  *
1079  * Return: 0 on success, negative error code otherwise.
1080  */
1081 int xe_gt_runtime_suspend(struct xe_gt *gt)
1082 {
1083 	xe_gt_dbg(gt, "runtime suspending\n");
1084 
1085 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
1086 	if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
1087 		xe_gt_err(gt, "runtime suspend failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
1088 		return -ETIMEDOUT;
1089 	}
1090 
1091 	xe_uc_runtime_suspend(&gt->uc);
1092 	xe_gt_disable_host_l2_vram(gt);
1093 
1094 	xe_gt_dbg(gt, "runtime suspended\n");
1095 
1096 	return 0;
1097 }
1098 
1099 /**
1100  * xe_gt_runtime_resume() - GT runtime resume
1101  * @gt: the GT object
1102  *
1103  * Return: 0 on success, negative error code otherwise.
1104  */
1105 int xe_gt_runtime_resume(struct xe_gt *gt)
1106 {
1107 	xe_gt_dbg(gt, "runtime resuming\n");
1108 
1109 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
1110 	if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
1111 		xe_gt_err(gt, "runtime resume failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
1112 		return -ETIMEDOUT;
1113 	}
1114 
1115 	xe_gt_enable_host_l2_vram(gt);
1116 	xe_uc_runtime_resume(&gt->uc);
1117 
1118 	xe_gt_dbg(gt, "runtime resumed\n");
1119 
1120 	return 0;
1121 }
1122 
1123 struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
1124 				     enum xe_engine_class class,
1125 				     u16 instance, bool logical)
1126 {
1127 	struct xe_hw_engine *hwe;
1128 	enum xe_hw_engine_id id;
1129 
1130 	for_each_hw_engine(hwe, gt, id)
1131 		if (hwe->class == class &&
1132 		    ((!logical && hwe->instance == instance) ||
1133 		    (logical && hwe->logical_instance == instance)))
1134 			return hwe;
1135 
1136 	return NULL;
1137 }
1138 
1139 struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
1140 							 enum xe_engine_class class)
1141 {
1142 	struct xe_hw_engine *hwe;
1143 	enum xe_hw_engine_id id;
1144 
1145 	for_each_hw_engine(hwe, gt, id) {
1146 		switch (class) {
1147 		case XE_ENGINE_CLASS_RENDER:
1148 		case XE_ENGINE_CLASS_COMPUTE:
1149 			if (hwe->class == XE_ENGINE_CLASS_RENDER ||
1150 			    hwe->class == XE_ENGINE_CLASS_COMPUTE)
1151 				return hwe;
1152 			break;
1153 		default:
1154 			if (hwe->class == class)
1155 				return hwe;
1156 		}
1157 	}
1158 
1159 	return NULL;
1160 }
1161 
1162 struct xe_hw_engine *xe_gt_any_hw_engine(struct xe_gt *gt)
1163 {
1164 	struct xe_hw_engine *hwe;
1165 	enum xe_hw_engine_id id;
1166 
1167 	for_each_hw_engine(hwe, gt, id)
1168 		return hwe;
1169 
1170 	return NULL;
1171 }
1172 
1173 /**
1174  * xe_gt_declare_wedged() - Declare GT wedged
1175  * @gt: the GT object
1176  *
1177  * Wedge the GT which stops all submission, saves desired debug state, and
1178  * cleans up anything which could timeout.
1179  */
1180 void xe_gt_declare_wedged(struct xe_gt *gt)
1181 {
1182 	xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode);
1183 
1184 	xe_uc_declare_wedged(&gt->uc);
1185 	xe_tlb_inval_reset(&gt->tlb_inval);
1186 }
1187