xref: /linux/drivers/gpu/drm/xe/xe_hw_engine.c (revision 9557b4376d02088a33e5f4116bcc324d35a3b64c)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_hw_engine.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "regs/xe_engine_regs.h"
11 #include "regs/xe_gt_regs.h"
12 #include "xe_assert.h"
13 #include "xe_bo.h"
14 #include "xe_device.h"
15 #include "xe_execlist.h"
16 #include "xe_force_wake.h"
17 #include "xe_gsc.h"
18 #include "xe_gt.h"
19 #include "xe_gt_ccs_mode.h"
20 #include "xe_gt_printk.h"
21 #include "xe_gt_mcr.h"
22 #include "xe_gt_topology.h"
23 #include "xe_hw_fence.h"
24 #include "xe_irq.h"
25 #include "xe_lrc.h"
26 #include "xe_macros.h"
27 #include "xe_mmio.h"
28 #include "xe_reg_sr.h"
29 #include "xe_reg_whitelist.h"
30 #include "xe_rtp.h"
31 #include "xe_sched_job.h"
32 #include "xe_sriov.h"
33 #include "xe_tuning.h"
34 #include "xe_uc_fw.h"
35 #include "xe_wa.h"
36 
37 #define MAX_MMIO_BASES 3
38 struct engine_info {
39 	const char *name;
40 	unsigned int class : 8;
41 	unsigned int instance : 8;
42 	unsigned int irq_offset : 8;
43 	enum xe_force_wake_domains domain;
44 	u32 mmio_base;
45 };
46 
47 static const struct engine_info engine_infos[] = {
48 	[XE_HW_ENGINE_RCS0] = {
49 		.name = "rcs0",
50 		.class = XE_ENGINE_CLASS_RENDER,
51 		.instance = 0,
52 		.irq_offset = ilog2(INTR_RCS0),
53 		.domain = XE_FW_RENDER,
54 		.mmio_base = RENDER_RING_BASE,
55 	},
56 	[XE_HW_ENGINE_BCS0] = {
57 		.name = "bcs0",
58 		.class = XE_ENGINE_CLASS_COPY,
59 		.instance = 0,
60 		.irq_offset = ilog2(INTR_BCS(0)),
61 		.domain = XE_FW_RENDER,
62 		.mmio_base = BLT_RING_BASE,
63 	},
64 	[XE_HW_ENGINE_BCS1] = {
65 		.name = "bcs1",
66 		.class = XE_ENGINE_CLASS_COPY,
67 		.instance = 1,
68 		.irq_offset = ilog2(INTR_BCS(1)),
69 		.domain = XE_FW_RENDER,
70 		.mmio_base = XEHPC_BCS1_RING_BASE,
71 	},
72 	[XE_HW_ENGINE_BCS2] = {
73 		.name = "bcs2",
74 		.class = XE_ENGINE_CLASS_COPY,
75 		.instance = 2,
76 		.irq_offset = ilog2(INTR_BCS(2)),
77 		.domain = XE_FW_RENDER,
78 		.mmio_base = XEHPC_BCS2_RING_BASE,
79 	},
80 	[XE_HW_ENGINE_BCS3] = {
81 		.name = "bcs3",
82 		.class = XE_ENGINE_CLASS_COPY,
83 		.instance = 3,
84 		.irq_offset = ilog2(INTR_BCS(3)),
85 		.domain = XE_FW_RENDER,
86 		.mmio_base = XEHPC_BCS3_RING_BASE,
87 	},
88 	[XE_HW_ENGINE_BCS4] = {
89 		.name = "bcs4",
90 		.class = XE_ENGINE_CLASS_COPY,
91 		.instance = 4,
92 		.irq_offset = ilog2(INTR_BCS(4)),
93 		.domain = XE_FW_RENDER,
94 		.mmio_base = XEHPC_BCS4_RING_BASE,
95 	},
96 	[XE_HW_ENGINE_BCS5] = {
97 		.name = "bcs5",
98 		.class = XE_ENGINE_CLASS_COPY,
99 		.instance = 5,
100 		.irq_offset = ilog2(INTR_BCS(5)),
101 		.domain = XE_FW_RENDER,
102 		.mmio_base = XEHPC_BCS5_RING_BASE,
103 	},
104 	[XE_HW_ENGINE_BCS6] = {
105 		.name = "bcs6",
106 		.class = XE_ENGINE_CLASS_COPY,
107 		.instance = 6,
108 		.irq_offset = ilog2(INTR_BCS(6)),
109 		.domain = XE_FW_RENDER,
110 		.mmio_base = XEHPC_BCS6_RING_BASE,
111 	},
112 	[XE_HW_ENGINE_BCS7] = {
113 		.name = "bcs7",
114 		.class = XE_ENGINE_CLASS_COPY,
115 		.irq_offset = ilog2(INTR_BCS(7)),
116 		.instance = 7,
117 		.domain = XE_FW_RENDER,
118 		.mmio_base = XEHPC_BCS7_RING_BASE,
119 	},
120 	[XE_HW_ENGINE_BCS8] = {
121 		.name = "bcs8",
122 		.class = XE_ENGINE_CLASS_COPY,
123 		.instance = 8,
124 		.irq_offset = ilog2(INTR_BCS8),
125 		.domain = XE_FW_RENDER,
126 		.mmio_base = XEHPC_BCS8_RING_BASE,
127 	},
128 
129 	[XE_HW_ENGINE_VCS0] = {
130 		.name = "vcs0",
131 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
132 		.instance = 0,
133 		.irq_offset = 32 + ilog2(INTR_VCS(0)),
134 		.domain = XE_FW_MEDIA_VDBOX0,
135 		.mmio_base = BSD_RING_BASE,
136 	},
137 	[XE_HW_ENGINE_VCS1] = {
138 		.name = "vcs1",
139 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
140 		.instance = 1,
141 		.irq_offset = 32 + ilog2(INTR_VCS(1)),
142 		.domain = XE_FW_MEDIA_VDBOX1,
143 		.mmio_base = BSD2_RING_BASE,
144 	},
145 	[XE_HW_ENGINE_VCS2] = {
146 		.name = "vcs2",
147 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
148 		.instance = 2,
149 		.irq_offset = 32 + ilog2(INTR_VCS(2)),
150 		.domain = XE_FW_MEDIA_VDBOX2,
151 		.mmio_base = BSD3_RING_BASE,
152 	},
153 	[XE_HW_ENGINE_VCS3] = {
154 		.name = "vcs3",
155 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
156 		.instance = 3,
157 		.irq_offset = 32 + ilog2(INTR_VCS(3)),
158 		.domain = XE_FW_MEDIA_VDBOX3,
159 		.mmio_base = BSD4_RING_BASE,
160 	},
161 	[XE_HW_ENGINE_VCS4] = {
162 		.name = "vcs4",
163 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
164 		.instance = 4,
165 		.irq_offset = 32 + ilog2(INTR_VCS(4)),
166 		.domain = XE_FW_MEDIA_VDBOX4,
167 		.mmio_base = XEHP_BSD5_RING_BASE,
168 	},
169 	[XE_HW_ENGINE_VCS5] = {
170 		.name = "vcs5",
171 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
172 		.instance = 5,
173 		.irq_offset = 32 + ilog2(INTR_VCS(5)),
174 		.domain = XE_FW_MEDIA_VDBOX5,
175 		.mmio_base = XEHP_BSD6_RING_BASE,
176 	},
177 	[XE_HW_ENGINE_VCS6] = {
178 		.name = "vcs6",
179 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
180 		.instance = 6,
181 		.irq_offset = 32 + ilog2(INTR_VCS(6)),
182 		.domain = XE_FW_MEDIA_VDBOX6,
183 		.mmio_base = XEHP_BSD7_RING_BASE,
184 	},
185 	[XE_HW_ENGINE_VCS7] = {
186 		.name = "vcs7",
187 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
188 		.instance = 7,
189 		.irq_offset = 32 + ilog2(INTR_VCS(7)),
190 		.domain = XE_FW_MEDIA_VDBOX7,
191 		.mmio_base = XEHP_BSD8_RING_BASE,
192 	},
193 	[XE_HW_ENGINE_VECS0] = {
194 		.name = "vecs0",
195 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
196 		.instance = 0,
197 		.irq_offset = 32 + ilog2(INTR_VECS(0)),
198 		.domain = XE_FW_MEDIA_VEBOX0,
199 		.mmio_base = VEBOX_RING_BASE,
200 	},
201 	[XE_HW_ENGINE_VECS1] = {
202 		.name = "vecs1",
203 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
204 		.instance = 1,
205 		.irq_offset = 32 + ilog2(INTR_VECS(1)),
206 		.domain = XE_FW_MEDIA_VEBOX1,
207 		.mmio_base = VEBOX2_RING_BASE,
208 	},
209 	[XE_HW_ENGINE_VECS2] = {
210 		.name = "vecs2",
211 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
212 		.instance = 2,
213 		.irq_offset = 32 + ilog2(INTR_VECS(2)),
214 		.domain = XE_FW_MEDIA_VEBOX2,
215 		.mmio_base = XEHP_VEBOX3_RING_BASE,
216 	},
217 	[XE_HW_ENGINE_VECS3] = {
218 		.name = "vecs3",
219 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
220 		.instance = 3,
221 		.irq_offset = 32 + ilog2(INTR_VECS(3)),
222 		.domain = XE_FW_MEDIA_VEBOX3,
223 		.mmio_base = XEHP_VEBOX4_RING_BASE,
224 	},
225 	[XE_HW_ENGINE_CCS0] = {
226 		.name = "ccs0",
227 		.class = XE_ENGINE_CLASS_COMPUTE,
228 		.instance = 0,
229 		.irq_offset = ilog2(INTR_CCS(0)),
230 		.domain = XE_FW_RENDER,
231 		.mmio_base = COMPUTE0_RING_BASE,
232 	},
233 	[XE_HW_ENGINE_CCS1] = {
234 		.name = "ccs1",
235 		.class = XE_ENGINE_CLASS_COMPUTE,
236 		.instance = 1,
237 		.irq_offset = ilog2(INTR_CCS(1)),
238 		.domain = XE_FW_RENDER,
239 		.mmio_base = COMPUTE1_RING_BASE,
240 	},
241 	[XE_HW_ENGINE_CCS2] = {
242 		.name = "ccs2",
243 		.class = XE_ENGINE_CLASS_COMPUTE,
244 		.instance = 2,
245 		.irq_offset = ilog2(INTR_CCS(2)),
246 		.domain = XE_FW_RENDER,
247 		.mmio_base = COMPUTE2_RING_BASE,
248 	},
249 	[XE_HW_ENGINE_CCS3] = {
250 		.name = "ccs3",
251 		.class = XE_ENGINE_CLASS_COMPUTE,
252 		.instance = 3,
253 		.irq_offset = ilog2(INTR_CCS(3)),
254 		.domain = XE_FW_RENDER,
255 		.mmio_base = COMPUTE3_RING_BASE,
256 	},
257 	[XE_HW_ENGINE_GSCCS0] = {
258 		.name = "gsccs0",
259 		.class = XE_ENGINE_CLASS_OTHER,
260 		.instance = OTHER_GSC_INSTANCE,
261 		.domain = XE_FW_GSC,
262 		.mmio_base = GSCCS_RING_BASE,
263 	},
264 };
265 
266 static void hw_engine_fini(struct drm_device *drm, void *arg)
267 {
268 	struct xe_hw_engine *hwe = arg;
269 
270 	if (hwe->exl_port)
271 		xe_execlist_port_destroy(hwe->exl_port);
272 	xe_lrc_put(hwe->kernel_lrc);
273 
274 	hwe->gt = NULL;
275 }
276 
277 static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
278 				   u32 val)
279 {
280 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
281 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
282 
283 	reg.addr += hwe->mmio_base;
284 
285 	xe_mmio_write32(hwe->gt, reg, val);
286 }
287 
288 static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
289 {
290 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
291 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
292 
293 	reg.addr += hwe->mmio_base;
294 
295 	return xe_mmio_read32(hwe->gt, reg);
296 }
297 
298 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
299 {
300 	u32 ccs_mask =
301 		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
302 
303 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
304 		xe_mmio_write32(hwe->gt, RCU_MODE,
305 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
306 
307 	hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
308 	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
309 			       xe_bo_ggtt_addr(hwe->hwsp));
310 	hw_engine_mmio_write32(hwe, RING_MODE(0),
311 			       _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
312 	hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
313 			       _MASKED_BIT_DISABLE(STOP_RING));
314 	hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
315 }
316 
317 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
318 						 const struct xe_hw_engine *hwe)
319 {
320 	return xe_gt_ccs_mode_enabled(gt) &&
321 	       xe_rtp_match_first_render_or_compute(gt, hwe);
322 }
323 
324 static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
325 				      const struct xe_hw_engine *hwe)
326 {
327 	if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
328 		return false;
329 
330 	if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
331 	    hwe->class != XE_ENGINE_CLASS_RENDER)
332 		return false;
333 
334 	return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
335 }
336 
337 void
338 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
339 {
340 	struct xe_gt *gt = hwe->gt;
341 	const u8 mocs_write_idx = gt->mocs.uc_index;
342 	const u8 mocs_read_idx = gt->mocs.uc_index;
343 	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
344 			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
345 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
346 	const struct xe_rtp_entry_sr lrc_setup[] = {
347 		/*
348 		 * Some blitter commands do not have a field for MOCS, those
349 		 * commands will use MOCS index pointed by BLIT_CCTL.
350 		 * BLIT_CCTL registers are needed to be programmed to un-cached.
351 		 */
352 		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
353 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
354 			       ENGINE_CLASS(COPY)),
355 		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
356 				 BLIT_CCTL_DST_MOCS_MASK |
357 				 BLIT_CCTL_SRC_MOCS_MASK,
358 				 blit_cctl_val,
359 				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
360 		},
361 		/* Use Fixed slice CCS mode */
362 		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
363 		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
364 		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
365 					   RCU_MODE_FIXED_SLICE_CCS_MODE))
366 		},
367 		/* Disable WMTP if HW doesn't support it */
368 		{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
369 		  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
370 		  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
371 					   PREEMPT_GPGPU_LEVEL_MASK,
372 					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
373 		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
374 		},
375 		{}
376 	};
377 
378 	xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc);
379 }
380 
381 static void
382 hw_engine_setup_default_state(struct xe_hw_engine *hwe)
383 {
384 	struct xe_gt *gt = hwe->gt;
385 	struct xe_device *xe = gt_to_xe(gt);
386 	/*
387 	 * RING_CMD_CCTL specifies the default MOCS entry that will be
388 	 * used by the command streamer when executing commands that
389 	 * don't have a way to explicitly specify a MOCS setting.
390 	 * The default should usually reference whichever MOCS entry
391 	 * corresponds to uncached behavior, although use of a WB cached
392 	 * entry is recommended by the spec in certain circumstances on
393 	 * specific platforms.
394 	 * Bspec: 72161
395 	 */
396 	const u8 mocs_write_idx = gt->mocs.uc_index;
397 	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
398 				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
399 				 gt->mocs.wb_index : gt->mocs.uc_index;
400 	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
401 				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
402 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
403 	const struct xe_rtp_entry_sr engine_entries[] = {
404 		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
405 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
406 		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
407 					   CMD_CCTL_WRITE_OVERRIDE_MASK |
408 					   CMD_CCTL_READ_OVERRIDE_MASK,
409 					   ring_cmd_cctl_val,
410 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
411 		},
412 		/*
413 		 * To allow the GSC engine to go idle on MTL we need to enable
414 		 * idle messaging and set the hysteresis value (we use 0xA=5us
415 		 * as recommended in spec). On platforms after MTL this is
416 		 * enabled by default.
417 		 */
418 		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
419 		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
420 		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
421 				     IDLE_MSG_DISABLE,
422 				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
423 				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
424 					   IDLE_WAIT_TIME,
425 					   0xA,
426 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
427 		},
428 		{}
429 	};
430 
431 	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
432 }
433 
434 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
435 				 enum xe_hw_engine_id id)
436 {
437 	const struct engine_info *info;
438 
439 	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
440 		return;
441 
442 	if (!(gt->info.engine_mask & BIT(id)))
443 		return;
444 
445 	info = &engine_infos[id];
446 
447 	xe_gt_assert(gt, !hwe->gt);
448 
449 	hwe->gt = gt;
450 	hwe->class = info->class;
451 	hwe->instance = info->instance;
452 	hwe->mmio_base = info->mmio_base;
453 	hwe->irq_offset = info->irq_offset;
454 	hwe->domain = info->domain;
455 	hwe->name = info->name;
456 	hwe->fence_irq = &gt->fence_irq[info->class];
457 	hwe->engine_id = id;
458 
459 	hwe->eclass = &gt->eclass[hwe->class];
460 	if (!hwe->eclass->sched_props.job_timeout_ms) {
461 		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
462 		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
463 		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
464 		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
465 		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
466 		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
467 		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
468 		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
469 		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
470 
471 		/*
472 		 * The GSC engine can accept submissions while the GSC shim is
473 		 * being reset, during which time the submission is stalled. In
474 		 * the worst case, the shim reset can take up to the maximum GSC
475 		 * command execution time (250ms), so the request start can be
476 		 * delayed by that much; the request itself can take that long
477 		 * without being preemptible, which means worst case it can
478 		 * theoretically take up to 500ms for a preemption to go through
479 		 * on the GSC engine. Adding to that an extra 100ms as a safety
480 		 * margin, we get a minimum recommended timeout of 600ms.
481 		 * The preempt_timeout value can't be tuned for OTHER_CLASS
482 		 * because the class is reserved for kernel usage, so we just
483 		 * need to make sure that the starting value is above that
484 		 * threshold; since our default value (640ms) is greater than
485 		 * 600ms, the only way we can go below is via a kconfig setting.
486 		 * If that happens, log it in dmesg and update the value.
487 		 */
488 		if (hwe->class == XE_ENGINE_CLASS_OTHER) {
489 			const u32 min_preempt_timeout = 600 * 1000;
490 			if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) {
491 				hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout;
492 				xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n");
493 			}
494 		}
495 
496 		/* Record default props */
497 		hwe->eclass->defaults = hwe->eclass->sched_props;
498 	}
499 
500 	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
501 	xe_tuning_process_engine(hwe);
502 	xe_wa_process_engine(hwe);
503 	hw_engine_setup_default_state(hwe);
504 
505 	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
506 	xe_reg_whitelist_process_engine(hwe);
507 }
508 
509 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
510 			  enum xe_hw_engine_id id)
511 {
512 	struct xe_device *xe = gt_to_xe(gt);
513 	struct xe_tile *tile = gt_to_tile(gt);
514 	int err;
515 
516 	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
517 	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
518 
519 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
520 	xe_reg_sr_apply_whitelist(hwe);
521 
522 	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
523 						 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
524 						 XE_BO_FLAG_GGTT |
525 						 XE_BO_FLAG_GGTT_INVALIDATE);
526 	if (IS_ERR(hwe->hwsp)) {
527 		err = PTR_ERR(hwe->hwsp);
528 		goto err_name;
529 	}
530 
531 	hwe->kernel_lrc = xe_lrc_create(hwe, NULL, SZ_16K);
532 	if (IS_ERR(hwe->kernel_lrc)) {
533 		err = PTR_ERR(hwe->kernel_lrc);
534 		goto err_hwsp;
535 	}
536 
537 	if (!xe_device_uc_enabled(xe)) {
538 		hwe->exl_port = xe_execlist_port_create(xe, hwe);
539 		if (IS_ERR(hwe->exl_port)) {
540 			err = PTR_ERR(hwe->exl_port);
541 			goto err_kernel_lrc;
542 		}
543 	}
544 
545 	if (xe_device_uc_enabled(xe)) {
546 		/* GSCCS has a special interrupt for reset */
547 		if (hwe->class == XE_ENGINE_CLASS_OTHER)
548 			hwe->irq_handler = xe_gsc_hwe_irq_handler;
549 
550 		if (!IS_SRIOV_VF(xe))
551 			xe_hw_engine_enable_ring(hwe);
552 	}
553 
554 	/* We reserve the highest BCS instance for USM */
555 	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
556 		gt->usm.reserved_bcs_instance = hwe->instance;
557 
558 	return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
559 
560 err_kernel_lrc:
561 	xe_lrc_put(hwe->kernel_lrc);
562 err_hwsp:
563 	xe_bo_unpin_map_no_vm(hwe->hwsp);
564 err_name:
565 	hwe->name = NULL;
566 
567 	return err;
568 }
569 
570 static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
571 {
572 	int class;
573 
574 	/* FIXME: Doing a simple logical mapping that works for most hardware */
575 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
576 		struct xe_hw_engine *hwe;
577 		enum xe_hw_engine_id id;
578 		int logical_instance = 0;
579 
580 		for_each_hw_engine(hwe, gt, id)
581 			if (hwe->class == class)
582 				hwe->logical_instance = logical_instance++;
583 	}
584 }
585 
586 static void read_media_fuses(struct xe_gt *gt)
587 {
588 	struct xe_device *xe = gt_to_xe(gt);
589 	u32 media_fuse;
590 	u16 vdbox_mask;
591 	u16 vebox_mask;
592 	int i, j;
593 
594 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
595 
596 	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
597 
598 	/*
599 	 * Pre-Xe_HP platforms had register bits representing absent engines,
600 	 * whereas Xe_HP and beyond have bits representing present engines.
601 	 * Invert the polarity on old platforms so that we can use common
602 	 * handling below.
603 	 */
604 	if (GRAPHICS_VERx100(xe) < 1250)
605 		media_fuse = ~media_fuse;
606 
607 	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
608 	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
609 
610 	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
611 		if (!(gt->info.engine_mask & BIT(i)))
612 			continue;
613 
614 		if (!(BIT(j) & vdbox_mask)) {
615 			gt->info.engine_mask &= ~BIT(i);
616 			drm_info(&xe->drm, "vcs%u fused off\n", j);
617 		}
618 	}
619 
620 	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
621 		if (!(gt->info.engine_mask & BIT(i)))
622 			continue;
623 
624 		if (!(BIT(j) & vebox_mask)) {
625 			gt->info.engine_mask &= ~BIT(i);
626 			drm_info(&xe->drm, "vecs%u fused off\n", j);
627 		}
628 	}
629 }
630 
631 static void read_copy_fuses(struct xe_gt *gt)
632 {
633 	struct xe_device *xe = gt_to_xe(gt);
634 	u32 bcs_mask;
635 
636 	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
637 		return;
638 
639 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
640 
641 	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
642 	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
643 
644 	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
645 	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
646 		if (!(gt->info.engine_mask & BIT(i)))
647 			continue;
648 
649 		if (!(BIT(j / 2) & bcs_mask)) {
650 			gt->info.engine_mask &= ~BIT(i);
651 			drm_info(&xe->drm, "bcs%u fused off\n", j);
652 		}
653 	}
654 }
655 
656 static void read_compute_fuses_from_dss(struct xe_gt *gt)
657 {
658 	struct xe_device *xe = gt_to_xe(gt);
659 
660 	/*
661 	 * CCS fusing based on DSS masks only applies to platforms that can
662 	 * have more than one CCS.
663 	 */
664 	if (hweight64(gt->info.engine_mask &
665 		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
666 		return;
667 
668 	/*
669 	 * CCS availability on Xe_HP is inferred from the presence of DSS in
670 	 * each quadrant.
671 	 */
672 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
673 		if (!(gt->info.engine_mask & BIT(i)))
674 			continue;
675 
676 		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
677 			gt->info.engine_mask &= ~BIT(i);
678 			drm_info(&xe->drm, "ccs%u fused off\n", j);
679 		}
680 	}
681 }
682 
683 static void read_compute_fuses_from_reg(struct xe_gt *gt)
684 {
685 	struct xe_device *xe = gt_to_xe(gt);
686 	u32 ccs_mask;
687 
688 	ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
689 	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
690 
691 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
692 		if (!(gt->info.engine_mask & BIT(i)))
693 			continue;
694 
695 		if ((ccs_mask & BIT(j)) == 0) {
696 			gt->info.engine_mask &= ~BIT(i);
697 			drm_info(&xe->drm, "ccs%u fused off\n", j);
698 		}
699 	}
700 }
701 
702 static void read_compute_fuses(struct xe_gt *gt)
703 {
704 	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
705 		read_compute_fuses_from_reg(gt);
706 	else
707 		read_compute_fuses_from_dss(gt);
708 }
709 
710 static void check_gsc_availability(struct xe_gt *gt)
711 {
712 	struct xe_device *xe = gt_to_xe(gt);
713 
714 	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
715 		return;
716 
717 	/*
718 	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
719 	 * have the FW there is nothing we need the engine for and can therefore
720 	 * skip its initialization.
721 	 */
722 	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
723 		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
724 
725 		/* interrupts where previously enabled, so turn them off */
726 		xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0);
727 		xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0);
728 
729 		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
730 	}
731 }
732 
733 int xe_hw_engines_init_early(struct xe_gt *gt)
734 {
735 	int i;
736 
737 	read_media_fuses(gt);
738 	read_copy_fuses(gt);
739 	read_compute_fuses(gt);
740 	check_gsc_availability(gt);
741 
742 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
743 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
744 
745 	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
746 		hw_engine_init_early(gt, &gt->hw_engines[i], i);
747 
748 	return 0;
749 }
750 
751 int xe_hw_engines_init(struct xe_gt *gt)
752 {
753 	int err;
754 	struct xe_hw_engine *hwe;
755 	enum xe_hw_engine_id id;
756 
757 	for_each_hw_engine(hwe, gt, id) {
758 		err = hw_engine_init(gt, hwe, id);
759 		if (err)
760 			return err;
761 	}
762 
763 	hw_engine_setup_logical_mapping(gt);
764 
765 	return 0;
766 }
767 
768 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
769 {
770 	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
771 
772 	if (hwe->irq_handler)
773 		hwe->irq_handler(hwe, intr_vec);
774 
775 	if (intr_vec & GT_RENDER_USER_INTERRUPT)
776 		xe_hw_fence_irq_run(hwe->fence_irq);
777 }
778 
779 static bool
780 is_slice_common_per_gslice(struct xe_device *xe)
781 {
782 	return GRAPHICS_VERx100(xe) >= 1255;
783 }
784 
785 static void
786 xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
787 				       struct xe_hw_engine_snapshot *snapshot)
788 {
789 	struct xe_gt *gt = hwe->gt;
790 	struct xe_device *xe = gt_to_xe(gt);
791 	unsigned int dss;
792 	u16 group, instance;
793 
794 	snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
795 
796 	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
797 		return;
798 
799 	if (is_slice_common_per_gslice(xe) == false) {
800 		snapshot->reg.instdone.slice_common[0] =
801 			xe_mmio_read32(gt, SC_INSTDONE);
802 		snapshot->reg.instdone.slice_common_extra[0] =
803 			xe_mmio_read32(gt, SC_INSTDONE_EXTRA);
804 		snapshot->reg.instdone.slice_common_extra2[0] =
805 			xe_mmio_read32(gt, SC_INSTDONE_EXTRA2);
806 	} else {
807 		for_each_geometry_dss(dss, gt, group, instance) {
808 			snapshot->reg.instdone.slice_common[dss] =
809 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance);
810 			snapshot->reg.instdone.slice_common_extra[dss] =
811 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance);
812 			snapshot->reg.instdone.slice_common_extra2[dss] =
813 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance);
814 		}
815 	}
816 
817 	for_each_geometry_dss(dss, gt, group, instance) {
818 		snapshot->reg.instdone.sampler[dss] =
819 			xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
820 		snapshot->reg.instdone.row[dss] =
821 			xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
822 
823 		if (GRAPHICS_VERx100(xe) >= 1255)
824 			snapshot->reg.instdone.geom_svg[dss] =
825 				xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT,
826 						       group, instance);
827 	}
828 }
829 
830 /**
831  * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
832  * @hwe: Xe HW Engine.
833  *
834  * This can be printed out in a later stage like during dev_coredump
835  * analysis.
836  *
837  * Returns: a Xe HW Engine snapshot object that must be freed by the
838  * caller, using `xe_hw_engine_snapshot_free`.
839  */
840 struct xe_hw_engine_snapshot *
841 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
842 {
843 	struct xe_hw_engine_snapshot *snapshot;
844 	size_t len;
845 	u64 val;
846 
847 	if (!xe_hw_engine_is_valid(hwe))
848 		return NULL;
849 
850 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
851 
852 	if (!snapshot)
853 		return NULL;
854 
855 	/* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
856 	 * includes xe_hw_engine_types.h the length of this 3 registers can't be
857 	 * set in struct xe_hw_engine_snapshot, so here doing additional
858 	 * allocations.
859 	 */
860 	len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
861 	snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC);
862 	snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC);
863 	snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC);
864 	snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
865 	snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
866 	snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
867 	if (!snapshot->reg.instdone.slice_common ||
868 	    !snapshot->reg.instdone.slice_common_extra ||
869 	    !snapshot->reg.instdone.slice_common_extra2 ||
870 	    !snapshot->reg.instdone.sampler ||
871 	    !snapshot->reg.instdone.row ||
872 	    !snapshot->reg.instdone.geom_svg) {
873 		xe_hw_engine_snapshot_free(snapshot);
874 		return NULL;
875 	}
876 
877 	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
878 	snapshot->hwe = hwe;
879 	snapshot->logical_instance = hwe->logical_instance;
880 	snapshot->forcewake.domain = hwe->domain;
881 	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
882 						    hwe->domain);
883 	snapshot->mmio_base = hwe->mmio_base;
884 
885 	/* no more VF accessible data below this point */
886 	if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
887 		return snapshot;
888 
889 	snapshot->reg.ring_execlist_status =
890 		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
891 	val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
892 	snapshot->reg.ring_execlist_status |= val << 32;
893 
894 	snapshot->reg.ring_execlist_sq_contents =
895 		hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
896 	val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
897 	snapshot->reg.ring_execlist_sq_contents |= val << 32;
898 
899 	snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
900 	val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
901 	snapshot->reg.ring_acthd |= val << 32;
902 
903 	snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
904 	val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
905 	snapshot->reg.ring_bbaddr |= val << 32;
906 
907 	snapshot->reg.ring_dma_fadd =
908 		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
909 	val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
910 	snapshot->reg.ring_dma_fadd |= val << 32;
911 
912 	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
913 	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
914 	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
915 	if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) {
916 		val = hw_engine_mmio_read32(hwe, RING_START_UDW(0));
917 		snapshot->reg.ring_start |= val << 32;
918 	}
919 	if (xe_gt_has_indirect_ring_state(hwe->gt)) {
920 		snapshot->reg.indirect_ring_state =
921 			hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
922 	}
923 
924 	snapshot->reg.ring_head =
925 		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
926 	snapshot->reg.ring_tail =
927 		hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
928 	snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0));
929 	snapshot->reg.ring_mi_mode =
930 		hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
931 	snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0));
932 	snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0));
933 	snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
934 	snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
935 	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
936 	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
937 	xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
938 
939 	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
940 		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
941 
942 	return snapshot;
943 }
944 
945 static void
946 xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
947 {
948 	struct xe_gt *gt = snapshot->hwe->gt;
949 	struct xe_device *xe = gt_to_xe(gt);
950 	u16 group, instance;
951 	unsigned int dss;
952 
953 	drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring);
954 
955 	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
956 		return;
957 
958 	if (is_slice_common_per_gslice(xe) == false) {
959 		drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n",
960 			   snapshot->reg.instdone.slice_common[0]);
961 		drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n",
962 			   snapshot->reg.instdone.slice_common_extra[0]);
963 		drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n",
964 			   snapshot->reg.instdone.slice_common_extra2[0]);
965 	} else {
966 		for_each_geometry_dss(dss, gt, group, instance) {
967 			drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss,
968 				   snapshot->reg.instdone.slice_common[dss]);
969 			drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss,
970 				   snapshot->reg.instdone.slice_common_extra[dss]);
971 			drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss,
972 				   snapshot->reg.instdone.slice_common_extra2[dss]);
973 		}
974 	}
975 
976 	for_each_geometry_dss(dss, gt, group, instance) {
977 		drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
978 			   snapshot->reg.instdone.sampler[dss]);
979 		drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
980 			   snapshot->reg.instdone.row[dss]);
981 
982 		if (GRAPHICS_VERx100(xe) >= 1255)
983 			drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n",
984 				   dss, snapshot->reg.instdone.geom_svg[dss]);
985 	}
986 }
987 
988 /**
989  * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
990  * @snapshot: Xe HW Engine snapshot object.
991  * @p: drm_printer where it will be printed out.
992  *
993  * This function prints out a given Xe HW Engine snapshot object.
994  */
995 void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
996 				 struct drm_printer *p)
997 {
998 	if (!snapshot)
999 		return;
1000 
1001 	drm_printf(p, "%s (physical), logical instance=%d\n",
1002 		   snapshot->name ? snapshot->name : "",
1003 		   snapshot->logical_instance);
1004 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
1005 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
1006 	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
1007 	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
1008 	drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
1009 		   snapshot->reg.ring_execlist_status);
1010 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
1011 		   snapshot->reg.ring_execlist_sq_contents);
1012 	drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start);
1013 	drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
1014 	drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
1015 	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
1016 	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
1017 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
1018 		   snapshot->reg.ring_mode);
1019 	drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
1020 	drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
1021 	drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
1022 	drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
1023 	drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
1024 	drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
1025 	drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
1026 	drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n",
1027 		   snapshot->reg.indirect_ring_state);
1028 	drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
1029 	xe_hw_engine_snapshot_instdone_print(snapshot, p);
1030 
1031 	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
1032 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
1033 			   snapshot->reg.rcu_mode);
1034 	drm_puts(p, "\n");
1035 }
1036 
1037 /**
1038  * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
1039  * @snapshot: Xe HW Engine snapshot object.
1040  *
1041  * This function free all the memory that needed to be allocated at capture
1042  * time.
1043  */
1044 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
1045 {
1046 	if (!snapshot)
1047 		return;
1048 
1049 	kfree(snapshot->reg.instdone.slice_common);
1050 	kfree(snapshot->reg.instdone.slice_common_extra);
1051 	kfree(snapshot->reg.instdone.slice_common_extra2);
1052 	kfree(snapshot->reg.instdone.sampler);
1053 	kfree(snapshot->reg.instdone.row);
1054 	kfree(snapshot->reg.instdone.geom_svg);
1055 	kfree(snapshot->name);
1056 	kfree(snapshot);
1057 }
1058 
1059 /**
1060  * xe_hw_engine_print - Xe HW Engine Print.
1061  * @hwe: Hardware Engine.
1062  * @p: drm_printer.
1063  *
1064  * This function quickly capture a snapshot and immediately print it out.
1065  */
1066 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
1067 {
1068 	struct xe_hw_engine_snapshot *snapshot;
1069 
1070 	snapshot = xe_hw_engine_snapshot_capture(hwe);
1071 	xe_hw_engine_snapshot_print(snapshot, p);
1072 	xe_hw_engine_snapshot_free(snapshot);
1073 }
1074 
1075 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
1076 				enum xe_engine_class engine_class)
1077 {
1078 	u32 mask = 0;
1079 	enum xe_hw_engine_id id;
1080 
1081 	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
1082 		if (engine_infos[id].class == engine_class &&
1083 		    gt->info.engine_mask & BIT(id))
1084 			mask |= BIT(engine_infos[id].instance);
1085 	}
1086 	return mask;
1087 }
1088 
1089 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
1090 {
1091 	struct xe_gt *gt = hwe->gt;
1092 	struct xe_device *xe = gt_to_xe(gt);
1093 
1094 	if (hwe->class == XE_ENGINE_CLASS_OTHER)
1095 		return true;
1096 
1097 	/* Check for engines disabled by ccs_mode setting */
1098 	if (xe_gt_ccs_mode_enabled(gt) &&
1099 	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
1100 	    hwe->logical_instance >= gt->ccs_mode)
1101 		return true;
1102 
1103 	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
1104 		hwe->instance == gt->usm.reserved_bcs_instance;
1105 }
1106 
1107 const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
1108 {
1109 	switch (class) {
1110 	case XE_ENGINE_CLASS_RENDER:
1111 		return "rcs";
1112 	case XE_ENGINE_CLASS_VIDEO_DECODE:
1113 		return "vcs";
1114 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
1115 		return "vecs";
1116 	case XE_ENGINE_CLASS_COPY:
1117 		return "bcs";
1118 	case XE_ENGINE_CLASS_OTHER:
1119 		return "other";
1120 	case XE_ENGINE_CLASS_COMPUTE:
1121 		return "ccs";
1122 	case XE_ENGINE_CLASS_MAX:
1123 		break;
1124 	}
1125 
1126 	return NULL;
1127 }
1128 
1129 u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
1130 {
1131 	return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base));
1132 }
1133 
1134 enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe)
1135 {
1136 	return engine_infos[hwe->engine_id].domain;
1137 }
1138