xref: /linux/drivers/gpu/drm/xe/xe_gt.c (revision 8457669db968c98edb781892d73fa559e1efcbd4)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_gt.h"
7 
8 #include <linux/minmax.h>
9 
10 #include <drm/drm_managed.h>
11 #include <uapi/drm/xe_drm.h>
12 
13 #include <generated/xe_wa_oob.h>
14 
15 #include "instructions/xe_alu_commands.h"
16 #include "instructions/xe_mi_commands.h"
17 #include "regs/xe_engine_regs.h"
18 #include "regs/xe_gt_regs.h"
19 #include "xe_assert.h"
20 #include "xe_bb.h"
21 #include "xe_device.h"
22 #include "xe_eu_stall.h"
23 #include "xe_exec_queue.h"
24 #include "xe_execlist.h"
25 #include "xe_force_wake.h"
26 #include "xe_ggtt.h"
27 #include "xe_gsc.h"
28 #include "xe_gt_ccs_mode.h"
29 #include "xe_gt_clock.h"
30 #include "xe_gt_freq.h"
31 #include "xe_gt_idle.h"
32 #include "xe_gt_mcr.h"
33 #include "xe_gt_printk.h"
34 #include "xe_gt_sriov_pf.h"
35 #include "xe_gt_sriov_vf.h"
36 #include "xe_gt_sysfs.h"
37 #include "xe_gt_topology.h"
38 #include "xe_guc_exec_queue_types.h"
39 #include "xe_guc_pc.h"
40 #include "xe_guc_submit.h"
41 #include "xe_hw_fence.h"
42 #include "xe_hw_engine_class_sysfs.h"
43 #include "xe_irq.h"
44 #include "xe_lmtt.h"
45 #include "xe_lrc.h"
46 #include "xe_map.h"
47 #include "xe_migrate.h"
48 #include "xe_mmio.h"
49 #include "xe_pagefault.h"
50 #include "xe_pat.h"
51 #include "xe_pm.h"
52 #include "xe_mocs.h"
53 #include "xe_reg_sr.h"
54 #include "xe_ring_ops.h"
55 #include "xe_sa.h"
56 #include "xe_sched_job.h"
57 #include "xe_sriov.h"
58 #include "xe_tlb_inval.h"
59 #include "xe_tuning.h"
60 #include "xe_uc.h"
61 #include "xe_uc_fw.h"
62 #include "xe_vm.h"
63 #include "xe_wa.h"
64 #include "xe_wopcm.h"
65 
xe_gt_alloc(struct xe_tile * tile)66 struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
67 {
68 	struct xe_device *xe = tile_to_xe(tile);
69 	struct drm_device *drm = &xe->drm;
70 	bool shared_wq = xe->info.needs_shared_vf_gt_wq && tile->primary_gt &&
71 		IS_SRIOV_VF(xe);
72 	struct workqueue_struct *ordered_wq;
73 	struct xe_gt *gt;
74 
75 	gt = drmm_kzalloc(drm, sizeof(*gt), GFP_KERNEL);
76 	if (!gt)
77 		return ERR_PTR(-ENOMEM);
78 
79 	gt->tile = tile;
80 	if (shared_wq && tile->primary_gt->ordered_wq)
81 		ordered_wq = tile->primary_gt->ordered_wq;
82 	else
83 		ordered_wq = drmm_alloc_ordered_workqueue(drm, "gt-ordered-wq",
84 							  WQ_MEM_RECLAIM);
85 	if (IS_ERR(ordered_wq))
86 		return ERR_CAST(ordered_wq);
87 
88 	gt->ordered_wq = ordered_wq;
89 
90 	return gt;
91 }
92 
xe_gt_sanitize(struct xe_gt * gt)93 void xe_gt_sanitize(struct xe_gt *gt)
94 {
95 	/*
96 	 * FIXME: if xe_uc_sanitize is called here, on TGL driver will not
97 	 * reload
98 	 */
99 	xe_guc_submit_disable(&gt->uc.guc);
100 }
101 
xe_gt_enable_host_l2_vram(struct xe_gt * gt)102 static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
103 {
104 	u32 reg;
105 
106 	if (!XE_GT_WA(gt, 16023588340))
107 		return;
108 
109 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
110 	if (!fw_ref.domains)
111 		return;
112 
113 	if (xe_gt_is_main_type(gt)) {
114 		reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
115 		reg |= CG_DIS_CNTLBUS;
116 		xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
117 	}
118 
119 	xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF);
120 }
121 
xe_gt_disable_host_l2_vram(struct xe_gt * gt)122 static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
123 {
124 	u32 reg;
125 
126 	if (!XE_GT_WA(gt, 16023588340))
127 		return;
128 
129 	if (xe_gt_is_media_type(gt))
130 		return;
131 
132 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
133 	if (!fw_ref.domains)
134 		return;
135 
136 	reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
137 	reg &= ~CG_DIS_CNTLBUS;
138 	xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
139 }
140 
xe_gt_enable_comp_1wcoh(struct xe_gt * gt)141 static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt)
142 {
143 	struct xe_device *xe = gt_to_xe(gt);
144 	unsigned int fw_ref;
145 	u32 reg;
146 
147 	if (IS_SRIOV_VF(xe))
148 		return;
149 
150 	if (GRAPHICS_VER(xe) >= 30 && xe->info.has_flat_ccs) {
151 		fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
152 		if (!fw_ref)
153 			return;
154 
155 		reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
156 		reg |= EN_CMP_1WCOH;
157 		xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
158 
159 		if (xe_gt_is_media_type(gt)) {
160 			xe_mmio_rmw32(&gt->mmio, XE2_GAMWALK_CTRL_MEDIA, 0, EN_CMP_1WCOH_GW);
161 		} else {
162 			reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMWALK_CTRL_3D);
163 			reg |= EN_CMP_1WCOH_GW;
164 			xe_gt_mcr_multicast_write(gt, XE2_GAMWALK_CTRL_3D, reg);
165 		}
166 
167 		xe_force_wake_put(gt_to_fw(gt), fw_ref);
168 	}
169 }
170 
171 static void gt_reset_worker(struct work_struct *w);
172 
emit_job_sync(struct xe_exec_queue * q,struct xe_bb * bb,long timeout_jiffies)173 static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb,
174 			 long timeout_jiffies)
175 {
176 	struct xe_sched_job *job;
177 	struct dma_fence *fence;
178 	long timeout;
179 
180 	job = xe_bb_create_job(q, bb);
181 	if (IS_ERR(job))
182 		return PTR_ERR(job);
183 
184 	xe_sched_job_arm(job);
185 	fence = dma_fence_get(&job->drm.s_fence->finished);
186 	xe_sched_job_push(job);
187 
188 	timeout = dma_fence_wait_timeout(fence, false, timeout_jiffies);
189 	dma_fence_put(fence);
190 	if (timeout < 0)
191 		return timeout;
192 	else if (!timeout)
193 		return -ETIME;
194 
195 	return 0;
196 }
197 
emit_nop_job(struct xe_gt * gt,struct xe_exec_queue * q)198 static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
199 {
200 	struct xe_bb *bb;
201 	int ret;
202 
203 	bb = xe_bb_new(gt, 4, false);
204 	if (IS_ERR(bb))
205 		return PTR_ERR(bb);
206 
207 	ret = emit_job_sync(q, bb, HZ);
208 	xe_bb_free(bb, NULL);
209 
210 	return ret;
211 }
212 
213 /* Dwords required to emit a RMW of a register */
214 #define EMIT_RMW_DW 20
215 
emit_wa_job(struct xe_gt * gt,struct xe_exec_queue * q)216 static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
217 {
218 	struct xe_hw_engine *hwe = q->hwe;
219 	struct xe_reg_sr *sr = &hwe->reg_lrc;
220 	struct xe_reg_sr_entry *entry;
221 	int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret;
222 	unsigned long idx;
223 	struct xe_bb *bb;
224 	size_t bb_len = 0;
225 	u32 *cs;
226 
227 	/* count RMW registers as those will be handled separately */
228 	xa_for_each(&sr->xa, idx, entry) {
229 		if (entry->reg.masked || entry->clr_bits == ~0)
230 			++count;
231 		else if (entry->reg.mcr)
232 			++count_rmw_mcr;
233 		else
234 			++count_rmw;
235 	}
236 
237 	if (count)
238 		bb_len += count * 2 + 1;
239 
240 	/*
241 	 * RMW of MCR registers is the same as a normal RMW, except an
242 	 * additional LRI (3 dwords) is required per register to steer the read
243 	 * to a nom-terminated instance.
244 	 *
245 	 * We could probably shorten the batch slightly by eliding the
246 	 * steering for consecutive MCR registers that have the same
247 	 * group/instance target, but it's not worth the extra complexity to do
248 	 * so.
249 	 */
250 	bb_len += count_rmw * EMIT_RMW_DW;
251 	bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3);
252 
253 	/*
254 	 * After doing all RMW, we need 7 trailing dwords to clean up,
255 	 * plus an additional 3 dwords to reset steering if any of the
256 	 * registers were MCR.
257 	 */
258 	if (count_rmw || count_rmw_mcr)
259 		bb_len += 7 + (count_rmw_mcr ? 3 : 0);
260 
261 	if (hwe->class == XE_ENGINE_CLASS_RENDER)
262 		/*
263 		 * Big enough to emit all of the context's 3DSTATE via
264 		 * xe_lrc_emit_hwe_state_instructions()
265 		 */
266 		bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32);
267 
268 	xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len);
269 
270 	bb = xe_bb_new(gt, bb_len, false);
271 	if (IS_ERR(bb))
272 		return PTR_ERR(bb);
273 
274 	cs = bb->cs;
275 
276 	if (count) {
277 		/*
278 		 * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per
279 		 * reg + 1
280 		 */
281 
282 		*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
283 
284 		xa_for_each(&sr->xa, idx, entry) {
285 			struct xe_reg reg = entry->reg;
286 			u32 val;
287 
288 			if (reg.masked)
289 				val = entry->clr_bits << 16;
290 			else if (entry->clr_bits == ~0)
291 				val = 0;
292 			else
293 				continue;
294 
295 			val |= entry->set_bits;
296 
297 			*cs++ = reg.addr;
298 			*cs++ = val;
299 			xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val);
300 		}
301 	}
302 
303 	if (count_rmw || count_rmw_mcr) {
304 		xa_for_each(&sr->xa, idx, entry) {
305 			if (entry->reg.masked || entry->clr_bits == ~0)
306 				continue;
307 
308 			if (entry->reg.mcr) {
309 				struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw };
310 				u8 group, instance;
311 
312 				xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
313 				*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
314 				*cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr;
315 				*cs++ = SELECTIVE_READ_ADDRESSING |
316 					REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) |
317 					REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance);
318 			}
319 
320 			*cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
321 			*cs++ = entry->reg.addr;
322 			*cs++ = CS_GPR_REG(0, 0).addr;
323 
324 			*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
325 				MI_LRI_LRM_CS_MMIO;
326 			*cs++ = CS_GPR_REG(0, 1).addr;
327 			*cs++ = entry->clr_bits;
328 			*cs++ = CS_GPR_REG(0, 2).addr;
329 			*cs++ = entry->set_bits;
330 
331 			*cs++ = MI_MATH(8);
332 			*cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0);
333 			*cs++ = CS_ALU_INSTR_LOADINV(SRCB, REG1);
334 			*cs++ = CS_ALU_INSTR_AND;
335 			*cs++ = CS_ALU_INSTR_STORE(REG0, ACCU);
336 			*cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0);
337 			*cs++ = CS_ALU_INSTR_LOAD(SRCB, REG2);
338 			*cs++ = CS_ALU_INSTR_OR;
339 			*cs++ = CS_ALU_INSTR_STORE(REG0, ACCU);
340 
341 			*cs++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO;
342 			*cs++ = CS_GPR_REG(0, 0).addr;
343 			*cs++ = entry->reg.addr;
344 
345 			xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n",
346 				  entry->reg.addr, entry->clr_bits, entry->set_bits,
347 				  entry->reg.mcr ? " (MCR)" : "");
348 		}
349 
350 		/* reset used GPR */
351 		*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) |
352 			MI_LRI_LRM_CS_MMIO;
353 		*cs++ = CS_GPR_REG(0, 0).addr;
354 		*cs++ = 0;
355 		*cs++ = CS_GPR_REG(0, 1).addr;
356 		*cs++ = 0;
357 		*cs++ = CS_GPR_REG(0, 2).addr;
358 		*cs++ = 0;
359 
360 		/* reset steering */
361 		if (count_rmw_mcr) {
362 			*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
363 			*cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr;
364 			*cs++ = 0;
365 		}
366 	}
367 
368 	cs = xe_lrc_emit_hwe_state_instructions(q, cs);
369 
370 	bb->len = cs - bb->cs;
371 
372 	ret = emit_job_sync(q, bb, HZ);
373 
374 	xe_bb_free(bb, NULL);
375 
376 	return ret;
377 }
378 
xe_gt_record_default_lrcs(struct xe_gt * gt)379 int xe_gt_record_default_lrcs(struct xe_gt *gt)
380 {
381 	struct xe_device *xe = gt_to_xe(gt);
382 	struct xe_hw_engine *hwe;
383 	enum xe_hw_engine_id id;
384 	int err = 0;
385 
386 	for_each_hw_engine(hwe, gt, id) {
387 		struct xe_exec_queue *q, *nop_q;
388 		void *default_lrc;
389 
390 		if (gt->default_lrc[hwe->class])
391 			continue;
392 
393 		xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe);
394 		xe_wa_process_lrc(hwe);
395 		xe_hw_engine_setup_default_lrc_state(hwe);
396 		xe_tuning_process_lrc(hwe);
397 
398 		default_lrc = drmm_kzalloc(&xe->drm,
399 					   xe_gt_lrc_size(gt, hwe->class),
400 					   GFP_KERNEL);
401 		if (!default_lrc)
402 			return -ENOMEM;
403 
404 		q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
405 					 hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
406 		if (IS_ERR(q)) {
407 			err = PTR_ERR(q);
408 			xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
409 				  hwe->name, q);
410 			return err;
411 		}
412 
413 		/* Prime golden LRC with known good state */
414 		err = emit_wa_job(gt, q);
415 		if (err) {
416 			xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n",
417 				  hwe->name, ERR_PTR(err), q->guc->id);
418 			goto put_exec_queue;
419 		}
420 
421 		nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
422 					     1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
423 		if (IS_ERR(nop_q)) {
424 			err = PTR_ERR(nop_q);
425 			xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
426 				  hwe->name, nop_q);
427 			goto put_exec_queue;
428 		}
429 
430 		/* Switch to different LRC */
431 		err = emit_nop_job(gt, nop_q);
432 		if (err) {
433 			xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n",
434 				  hwe->name, ERR_PTR(err), nop_q->guc->id);
435 			goto put_nop_q;
436 		}
437 
438 		xe_map_memcpy_from(xe, default_lrc,
439 				   &q->lrc[0]->bo->vmap,
440 				   xe_lrc_pphwsp_offset(q->lrc[0]),
441 				   xe_gt_lrc_size(gt, hwe->class));
442 
443 		gt->default_lrc[hwe->class] = default_lrc;
444 put_nop_q:
445 		xe_exec_queue_put(nop_q);
446 put_exec_queue:
447 		xe_exec_queue_put(q);
448 		if (err)
449 			break;
450 	}
451 
452 	return err;
453 }
454 
xe_gt_init_early(struct xe_gt * gt)455 int xe_gt_init_early(struct xe_gt *gt)
456 {
457 	int err;
458 
459 	if (IS_SRIOV_PF(gt_to_xe(gt))) {
460 		err = xe_gt_sriov_pf_init_early(gt);
461 		if (err)
462 			return err;
463 	}
464 
465 	if (IS_SRIOV_VF(gt_to_xe(gt))) {
466 		err = xe_gt_sriov_vf_init_early(gt);
467 		if (err)
468 			return err;
469 	}
470 
471 	xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt));
472 
473 	err = xe_wa_gt_init(gt);
474 	if (err)
475 		return err;
476 
477 	err = xe_tuning_init(gt);
478 	if (err)
479 		return err;
480 
481 	xe_wa_process_gt_oob(gt);
482 
483 	xe_force_wake_init_gt(gt, gt_to_fw(gt));
484 	spin_lock_init(&gt->global_invl_lock);
485 
486 	err = xe_gt_tlb_inval_init_early(gt);
487 	if (err)
488 		return err;
489 
490 	xe_mocs_init_early(gt);
491 
492 	/*
493 	 * Only after this point can GT-specific MMIO operations
494 	 * (including things like communication with the GuC)
495 	 * be performed.
496 	 */
497 	xe_gt_mmio_init(gt);
498 
499 	err = xe_uc_init_noalloc(&gt->uc);
500 	if (err)
501 		return err;
502 
503 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
504 	if (!fw_ref.domains)
505 		return -ETIMEDOUT;
506 
507 	xe_gt_mcr_init_early(gt);
508 	xe_pat_init(gt);
509 
510 	return 0;
511 }
512 
dump_pat_on_error(struct xe_gt * gt)513 static void dump_pat_on_error(struct xe_gt *gt)
514 {
515 	struct drm_printer p;
516 	char prefix[32];
517 
518 	snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id);
519 	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, prefix);
520 
521 	xe_pat_dump(gt, &p);
522 }
523 
gt_init_with_gt_forcewake(struct xe_gt * gt)524 static int gt_init_with_gt_forcewake(struct xe_gt *gt)
525 {
526 	int err;
527 
528 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FW_GT);
529 	if (!fw_ref.domains)
530 		return -ETIMEDOUT;
531 
532 	err = xe_uc_init(&gt->uc);
533 	if (err)
534 		return err;
535 
536 	xe_gt_topology_init(gt);
537 	xe_gt_mcr_init(gt);
538 	xe_gt_enable_host_l2_vram(gt);
539 	xe_gt_enable_comp_1wcoh(gt);
540 
541 	if (xe_gt_is_main_type(gt)) {
542 		err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
543 		if (err)
544 			return err;
545 		if (IS_SRIOV_PF(gt_to_xe(gt)))
546 			xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
547 	}
548 
549 	/* Enable per hw engine IRQs */
550 	xe_irq_enable_hwe(gt);
551 
552 	/* Rerun MCR init as we now have hw engine list */
553 	xe_gt_mcr_init(gt);
554 
555 	err = xe_hw_engines_init_early(gt);
556 	if (err) {
557 		dump_pat_on_error(gt);
558 		return err;
559 	}
560 
561 	err = xe_hw_engine_class_sysfs_init(gt);
562 	if (err)
563 		return err;
564 
565 	/* Initialize CCS mode sysfs after early initialization of HW engines */
566 	err = xe_gt_ccs_mode_sysfs_init(gt);
567 	if (err)
568 		return err;
569 
570 	/*
571 	 * Stash hardware-reported version.  Since this register does not exist
572 	 * on pre-MTL platforms, reading it there will (correctly) return 0.
573 	 */
574 	gt->info.gmdid = xe_mmio_read32(&gt->mmio, GMD_ID);
575 
576 	return 0;
577 }
578 
gt_init_with_all_forcewake(struct xe_gt * gt)579 static int gt_init_with_all_forcewake(struct xe_gt *gt)
580 {
581 	int err;
582 
583 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
584 	if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL))
585 		return -ETIMEDOUT;
586 
587 	xe_gt_mcr_set_implicit_defaults(gt);
588 	xe_wa_process_gt(gt);
589 	xe_tuning_process_gt(gt);
590 	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
591 
592 	err = xe_gt_clock_init(gt);
593 	if (err)
594 		return err;
595 
596 	xe_mocs_init(gt);
597 	err = xe_execlist_init(gt);
598 	if (err)
599 		return err;
600 
601 	err = xe_hw_engines_init(gt);
602 	if (err)
603 		return err;
604 
605 	err = xe_uc_init_post_hwconfig(&gt->uc);
606 	if (err)
607 		return err;
608 
609 	if (xe_gt_is_main_type(gt)) {
610 		/*
611 		 * USM has its only SA pool to non-block behind user operations
612 		 */
613 		if (gt_to_xe(gt)->info.has_usm) {
614 			struct xe_device *xe = gt_to_xe(gt);
615 
616 			gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt),
617 								IS_DGFX(xe) ? SZ_1M : SZ_512K, 16);
618 			if (IS_ERR(gt->usm.bb_pool))
619 				return PTR_ERR(gt->usm.bb_pool);
620 		}
621 	}
622 
623 	if (xe_gt_is_main_type(gt)) {
624 		struct xe_tile *tile = gt_to_tile(gt);
625 
626 		err = xe_migrate_init(tile->migrate);
627 		if (err)
628 			return err;
629 	}
630 
631 	err = xe_uc_load_hw(&gt->uc);
632 	if (err)
633 		return err;
634 
635 	/* Configure default CCS mode of 1 engine with all resources */
636 	if (xe_gt_ccs_mode_enabled(gt)) {
637 		gt->ccs_mode = 1;
638 		xe_gt_apply_ccs_mode(gt);
639 	}
640 
641 	if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt))
642 		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
643 
644 	if (IS_SRIOV_PF(gt_to_xe(gt)))
645 		xe_gt_sriov_pf_init_hw(gt);
646 
647 	return 0;
648 }
649 
xe_gt_fini(void * arg)650 static void xe_gt_fini(void *arg)
651 {
652 	struct xe_gt *gt = arg;
653 	int i;
654 
655 	if (disable_work_sync(&gt->reset.worker))
656 		/*
657 		 * If gt_reset_worker was halted from executing, take care of
658 		 * releasing the rpm reference here.
659 		 */
660 		xe_pm_runtime_put(gt_to_xe(gt));
661 
662 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
663 		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
664 
665 	xe_gt_disable_host_l2_vram(gt);
666 }
667 
xe_gt_init(struct xe_gt * gt)668 int xe_gt_init(struct xe_gt *gt)
669 {
670 	int err;
671 	int i;
672 
673 	INIT_WORK(&gt->reset.worker, gt_reset_worker);
674 
675 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) {
676 		gt->ring_ops[i] = xe_ring_ops_get(gt, i);
677 		xe_hw_fence_irq_init(&gt->fence_irq[i]);
678 	}
679 
680 	err = devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, xe_gt_fini, gt);
681 	if (err)
682 		return err;
683 
684 	err = xe_gt_sysfs_init(gt);
685 	if (err)
686 		return err;
687 
688 	err = gt_init_with_gt_forcewake(gt);
689 	if (err)
690 		return err;
691 
692 	err = xe_gt_idle_init(&gt->gtidle);
693 	if (err)
694 		return err;
695 
696 	err = xe_gt_freq_init(gt);
697 	if (err)
698 		return err;
699 
700 	xe_force_wake_init_engines(gt, gt_to_fw(gt));
701 
702 	err = gt_init_with_all_forcewake(gt);
703 	if (err)
704 		return err;
705 
706 	xe_gt_record_user_engines(gt);
707 
708 	err = xe_eu_stall_init(gt);
709 	if (err)
710 		return err;
711 
712 	if (IS_SRIOV_VF(gt_to_xe(gt))) {
713 		err = xe_gt_sriov_vf_init(gt);
714 		if (err)
715 			return err;
716 	}
717 
718 	return 0;
719 }
720 
721 /**
722  * xe_gt_mmio_init() - Initialize GT's MMIO access
723  * @gt: the GT object
724  *
725  * Initialize GT's MMIO accessor, which will be used to access registers inside
726  * this GT.
727  */
xe_gt_mmio_init(struct xe_gt * gt)728 void xe_gt_mmio_init(struct xe_gt *gt)
729 {
730 	struct xe_tile *tile = gt_to_tile(gt);
731 	struct xe_device *xe = tile_to_xe(tile);
732 
733 	xe_mmio_init(&gt->mmio, tile, tile->mmio.regs, tile->mmio.regs_size);
734 
735 	if (gt->info.type == XE_GT_TYPE_MEDIA) {
736 		gt->mmio.adj_offset = MEDIA_GT_GSI_OFFSET;
737 		gt->mmio.adj_limit = MEDIA_GT_GSI_LENGTH;
738 	} else {
739 		gt->mmio.adj_offset = 0;
740 		gt->mmio.adj_limit = 0;
741 	}
742 
743 	if (IS_SRIOV_VF(xe))
744 		gt->mmio.sriov_vf_gt = gt;
745 }
746 
xe_gt_record_user_engines(struct xe_gt * gt)747 void xe_gt_record_user_engines(struct xe_gt *gt)
748 {
749 	struct xe_hw_engine *hwe;
750 	enum xe_hw_engine_id id;
751 
752 	gt->user_engines.mask = 0;
753 	memset(gt->user_engines.instances_per_class, 0,
754 	       sizeof(gt->user_engines.instances_per_class));
755 
756 	for_each_hw_engine(hwe, gt, id) {
757 		if (xe_hw_engine_is_reserved(hwe))
758 			continue;
759 
760 		gt->user_engines.mask |= BIT_ULL(id);
761 		gt->user_engines.instances_per_class[hwe->class]++;
762 	}
763 
764 	xe_gt_assert(gt, (gt->user_engines.mask | gt->info.engine_mask)
765 		     == gt->info.engine_mask);
766 }
767 
do_gt_reset(struct xe_gt * gt)768 static int do_gt_reset(struct xe_gt *gt)
769 {
770 	int err;
771 
772 	if (IS_SRIOV_VF(gt_to_xe(gt)))
773 		return xe_gt_sriov_vf_reset(gt);
774 
775 	xe_gsc_wa_14015076503(gt, true);
776 
777 	xe_mmio_write32(&gt->mmio, GDRST, GRDOM_FULL);
778 	err = xe_mmio_wait32(&gt->mmio, GDRST, GRDOM_FULL, 0, 5000, NULL, false);
779 	if (err)
780 		xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n",
781 			  ERR_PTR(err));
782 
783 	xe_gsc_wa_14015076503(gt, false);
784 
785 	return err;
786 }
787 
vf_gt_restart(struct xe_gt * gt)788 static int vf_gt_restart(struct xe_gt *gt)
789 {
790 	int err;
791 
792 	err = xe_uc_sanitize_reset(&gt->uc);
793 	if (err)
794 		return err;
795 
796 	err = xe_uc_load_hw(&gt->uc);
797 	if (err)
798 		return err;
799 
800 	err = xe_uc_start(&gt->uc);
801 	if (err)
802 		return err;
803 
804 	return 0;
805 }
806 
do_gt_restart(struct xe_gt * gt)807 static int do_gt_restart(struct xe_gt *gt)
808 {
809 	struct xe_hw_engine *hwe;
810 	enum xe_hw_engine_id id;
811 	int err;
812 
813 	if (IS_SRIOV_VF(gt_to_xe(gt)))
814 		return vf_gt_restart(gt);
815 
816 	xe_pat_init(gt);
817 
818 	xe_gt_enable_host_l2_vram(gt);
819 	xe_gt_enable_comp_1wcoh(gt);
820 
821 	xe_gt_mcr_set_implicit_defaults(gt);
822 	xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
823 
824 	err = xe_wopcm_init(&gt->uc.wopcm);
825 	if (err)
826 		return err;
827 
828 	for_each_hw_engine(hwe, gt, id)
829 		xe_hw_engine_enable_ring(hwe);
830 
831 	err = xe_uc_sanitize_reset(&gt->uc);
832 	if (err)
833 		return err;
834 
835 	err = xe_uc_load_hw(&gt->uc);
836 	if (err)
837 		return err;
838 
839 	if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt))
840 		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
841 
842 	if (IS_SRIOV_PF(gt_to_xe(gt)))
843 		xe_gt_sriov_pf_init_hw(gt);
844 
845 	xe_mocs_init(gt);
846 
847 	for_each_hw_engine(hwe, gt, id)
848 		xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
849 
850 	/* Get CCS mode in sync between sw/hw */
851 	xe_gt_apply_ccs_mode(gt);
852 
853 	err = xe_uc_start(&gt->uc);
854 	if (err)
855 		return err;
856 
857 	/* Restore GT freq to expected values */
858 	xe_gt_sanitize_freq(gt);
859 
860 	if (IS_SRIOV_PF(gt_to_xe(gt)))
861 		xe_gt_sriov_pf_restart(gt);
862 
863 	return 0;
864 }
865 
gt_reset_worker(struct work_struct * w)866 static void gt_reset_worker(struct work_struct *w)
867 {
868 	struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
869 	unsigned int fw_ref;
870 	int err;
871 
872 	if (xe_device_wedged(gt_to_xe(gt)))
873 		goto err_pm_put;
874 
875 	/* We only support GT resets with GuC submission */
876 	if (!xe_device_uc_enabled(gt_to_xe(gt)))
877 		goto err_pm_put;
878 
879 	xe_gt_info(gt, "reset started\n");
880 
881 	if (xe_fault_inject_gt_reset()) {
882 		err = -ECANCELED;
883 		goto err_fail;
884 	}
885 
886 	xe_gt_sanitize(gt);
887 
888 	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
889 	if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
890 		err = -ETIMEDOUT;
891 		goto err_out;
892 	}
893 
894 	if (IS_SRIOV_PF(gt_to_xe(gt)))
895 		xe_gt_sriov_pf_stop_prepare(gt);
896 
897 	xe_uc_gucrc_disable(&gt->uc);
898 	xe_uc_stop_prepare(&gt->uc);
899 	xe_pagefault_reset(gt_to_xe(gt), gt);
900 
901 	xe_uc_stop(&gt->uc);
902 
903 	xe_tlb_inval_reset(&gt->tlb_inval);
904 
905 	err = do_gt_reset(gt);
906 	if (err)
907 		goto err_out;
908 
909 	err = do_gt_restart(gt);
910 	if (err)
911 		goto err_out;
912 
913 	xe_force_wake_put(gt_to_fw(gt), fw_ref);
914 
915 	/* Pair with get while enqueueing the work in xe_gt_reset_async() */
916 	xe_pm_runtime_put(gt_to_xe(gt));
917 
918 	xe_gt_info(gt, "reset done\n");
919 
920 	return;
921 
922 err_out:
923 	xe_force_wake_put(gt_to_fw(gt), fw_ref);
924 	XE_WARN_ON(xe_uc_start(&gt->uc));
925 
926 err_fail:
927 	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
928 	xe_device_declare_wedged(gt_to_xe(gt));
929 err_pm_put:
930 	xe_pm_runtime_put(gt_to_xe(gt));
931 }
932 
xe_gt_reset_async(struct xe_gt * gt)933 void xe_gt_reset_async(struct xe_gt *gt)
934 {
935 	xe_gt_info(gt, "trying reset from %ps\n", __builtin_return_address(0));
936 
937 	/* Don't do a reset while one is already in flight */
938 	if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(&gt->uc))
939 		return;
940 
941 	xe_gt_info(gt, "reset queued\n");
942 
943 	/* Pair with put in gt_reset_worker() if work is enqueued */
944 	xe_pm_runtime_get_noresume(gt_to_xe(gt));
945 	if (!queue_work(gt->ordered_wq, &gt->reset.worker))
946 		xe_pm_runtime_put(gt_to_xe(gt));
947 }
948 
xe_gt_suspend_prepare(struct xe_gt * gt)949 void xe_gt_suspend_prepare(struct xe_gt *gt)
950 {
951 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
952 	xe_uc_suspend_prepare(&gt->uc);
953 }
954 
xe_gt_suspend(struct xe_gt * gt)955 int xe_gt_suspend(struct xe_gt *gt)
956 {
957 	int err;
958 
959 	xe_gt_dbg(gt, "suspending\n");
960 	xe_gt_sanitize(gt);
961 
962 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
963 	if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
964 		xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
965 		return -ETIMEDOUT;
966 	}
967 
968 	err = xe_uc_suspend(&gt->uc);
969 	if (err) {
970 		xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
971 		return err;
972 	}
973 
974 	xe_gt_idle_disable_pg(gt);
975 
976 	xe_gt_disable_host_l2_vram(gt);
977 
978 	xe_gt_dbg(gt, "suspended\n");
979 
980 	return 0;
981 }
982 
xe_gt_shutdown(struct xe_gt * gt)983 void xe_gt_shutdown(struct xe_gt *gt)
984 {
985 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
986 	do_gt_reset(gt);
987 }
988 
989 /**
990  * xe_gt_sanitize_freq() - Restore saved frequencies if necessary.
991  * @gt: the GT object
992  *
993  * Called after driver init/GSC load completes to restore GT frequencies if we
994  * limited them for any WAs.
995  */
xe_gt_sanitize_freq(struct xe_gt * gt)996 int xe_gt_sanitize_freq(struct xe_gt *gt)
997 {
998 	int ret = 0;
999 
1000 	if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
1001 	     xe_uc_fw_is_loaded(&gt->uc.gsc.fw) ||
1002 	     xe_uc_fw_is_in_error_state(&gt->uc.gsc.fw)) &&
1003 	    XE_GT_WA(gt, 22019338487))
1004 		ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
1005 
1006 	return ret;
1007 }
1008 
xe_gt_resume(struct xe_gt * gt)1009 int xe_gt_resume(struct xe_gt *gt)
1010 {
1011 	int err;
1012 
1013 	xe_gt_dbg(gt, "resuming\n");
1014 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
1015 	if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
1016 		xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
1017 		return -ETIMEDOUT;
1018 	}
1019 
1020 	err = do_gt_restart(gt);
1021 	if (err)
1022 		return err;
1023 
1024 	xe_gt_idle_enable_pg(gt);
1025 
1026 	xe_gt_dbg(gt, "resumed\n");
1027 
1028 	return 0;
1029 }
1030 
1031 /**
1032  * xe_gt_runtime_suspend() - GT runtime suspend
1033  * @gt: the GT object
1034  *
1035  * Return: 0 on success, negative error code otherwise.
1036  */
xe_gt_runtime_suspend(struct xe_gt * gt)1037 int xe_gt_runtime_suspend(struct xe_gt *gt)
1038 {
1039 	xe_gt_dbg(gt, "runtime suspending\n");
1040 
1041 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
1042 	if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
1043 		xe_gt_err(gt, "runtime suspend failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
1044 		return -ETIMEDOUT;
1045 	}
1046 
1047 	xe_uc_runtime_suspend(&gt->uc);
1048 	xe_gt_disable_host_l2_vram(gt);
1049 
1050 	xe_gt_dbg(gt, "runtime suspended\n");
1051 
1052 	return 0;
1053 }
1054 
1055 /**
1056  * xe_gt_runtime_resume() - GT runtime resume
1057  * @gt: the GT object
1058  *
1059  * Return: 0 on success, negative error code otherwise.
1060  */
xe_gt_runtime_resume(struct xe_gt * gt)1061 int xe_gt_runtime_resume(struct xe_gt *gt)
1062 {
1063 	xe_gt_dbg(gt, "runtime resuming\n");
1064 
1065 	CLASS(xe_force_wake, fw_ref)(gt_to_fw(gt), XE_FORCEWAKE_ALL);
1066 	if (!xe_force_wake_ref_has_domain(fw_ref.domains, XE_FORCEWAKE_ALL)) {
1067 		xe_gt_err(gt, "runtime resume failed (%pe)\n", ERR_PTR(-ETIMEDOUT));
1068 		return -ETIMEDOUT;
1069 	}
1070 
1071 	xe_gt_enable_host_l2_vram(gt);
1072 	xe_uc_runtime_resume(&gt->uc);
1073 
1074 	xe_gt_dbg(gt, "runtime resumed\n");
1075 
1076 	return 0;
1077 }
1078 
xe_gt_hw_engine(struct xe_gt * gt,enum xe_engine_class class,u16 instance,bool logical)1079 struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt,
1080 				     enum xe_engine_class class,
1081 				     u16 instance, bool logical)
1082 {
1083 	struct xe_hw_engine *hwe;
1084 	enum xe_hw_engine_id id;
1085 
1086 	for_each_hw_engine(hwe, gt, id)
1087 		if (hwe->class == class &&
1088 		    ((!logical && hwe->instance == instance) ||
1089 		    (logical && hwe->logical_instance == instance)))
1090 			return hwe;
1091 
1092 	return NULL;
1093 }
1094 
xe_gt_any_hw_engine_by_reset_domain(struct xe_gt * gt,enum xe_engine_class class)1095 struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt,
1096 							 enum xe_engine_class class)
1097 {
1098 	struct xe_hw_engine *hwe;
1099 	enum xe_hw_engine_id id;
1100 
1101 	for_each_hw_engine(hwe, gt, id) {
1102 		switch (class) {
1103 		case XE_ENGINE_CLASS_RENDER:
1104 		case XE_ENGINE_CLASS_COMPUTE:
1105 			if (hwe->class == XE_ENGINE_CLASS_RENDER ||
1106 			    hwe->class == XE_ENGINE_CLASS_COMPUTE)
1107 				return hwe;
1108 			break;
1109 		default:
1110 			if (hwe->class == class)
1111 				return hwe;
1112 		}
1113 	}
1114 
1115 	return NULL;
1116 }
1117 
xe_gt_any_hw_engine(struct xe_gt * gt)1118 struct xe_hw_engine *xe_gt_any_hw_engine(struct xe_gt *gt)
1119 {
1120 	struct xe_hw_engine *hwe;
1121 	enum xe_hw_engine_id id;
1122 
1123 	for_each_hw_engine(hwe, gt, id)
1124 		return hwe;
1125 
1126 	return NULL;
1127 }
1128 
1129 /**
1130  * xe_gt_declare_wedged() - Declare GT wedged
1131  * @gt: the GT object
1132  *
1133  * Wedge the GT which stops all submission, saves desired debug state, and
1134  * cleans up anything which could timeout.
1135  */
xe_gt_declare_wedged(struct xe_gt * gt)1136 void xe_gt_declare_wedged(struct xe_gt *gt)
1137 {
1138 	xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode);
1139 
1140 	xe_uc_declare_wedged(&gt->uc);
1141 	xe_tlb_inval_reset(&gt->tlb_inval);
1142 }
1143