xref: /linux/drivers/gpu/drm/xe/xe_hw_engine.c (revision 569d7db70e5dcf13fbf072f10e9096577ac1e565)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_hw_engine.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "regs/xe_engine_regs.h"
11 #include "regs/xe_gt_regs.h"
12 #include "xe_assert.h"
13 #include "xe_bo.h"
14 #include "xe_device.h"
15 #include "xe_execlist.h"
16 #include "xe_force_wake.h"
17 #include "xe_gsc.h"
18 #include "xe_gt.h"
19 #include "xe_gt_ccs_mode.h"
20 #include "xe_gt_printk.h"
21 #include "xe_gt_mcr.h"
22 #include "xe_gt_topology.h"
23 #include "xe_hw_fence.h"
24 #include "xe_irq.h"
25 #include "xe_lrc.h"
26 #include "xe_macros.h"
27 #include "xe_mmio.h"
28 #include "xe_reg_sr.h"
29 #include "xe_rtp.h"
30 #include "xe_sched_job.h"
31 #include "xe_sriov.h"
32 #include "xe_tuning.h"
33 #include "xe_uc_fw.h"
34 #include "xe_wa.h"
35 
36 #define MAX_MMIO_BASES 3
37 struct engine_info {
38 	const char *name;
39 	unsigned int class : 8;
40 	unsigned int instance : 8;
41 	unsigned int irq_offset : 8;
42 	enum xe_force_wake_domains domain;
43 	u32 mmio_base;
44 };
45 
46 static const struct engine_info engine_infos[] = {
47 	[XE_HW_ENGINE_RCS0] = {
48 		.name = "rcs0",
49 		.class = XE_ENGINE_CLASS_RENDER,
50 		.instance = 0,
51 		.irq_offset = ilog2(INTR_RCS0),
52 		.domain = XE_FW_RENDER,
53 		.mmio_base = RENDER_RING_BASE,
54 	},
55 	[XE_HW_ENGINE_BCS0] = {
56 		.name = "bcs0",
57 		.class = XE_ENGINE_CLASS_COPY,
58 		.instance = 0,
59 		.irq_offset = ilog2(INTR_BCS(0)),
60 		.domain = XE_FW_RENDER,
61 		.mmio_base = BLT_RING_BASE,
62 	},
63 	[XE_HW_ENGINE_BCS1] = {
64 		.name = "bcs1",
65 		.class = XE_ENGINE_CLASS_COPY,
66 		.instance = 1,
67 		.irq_offset = ilog2(INTR_BCS(1)),
68 		.domain = XE_FW_RENDER,
69 		.mmio_base = XEHPC_BCS1_RING_BASE,
70 	},
71 	[XE_HW_ENGINE_BCS2] = {
72 		.name = "bcs2",
73 		.class = XE_ENGINE_CLASS_COPY,
74 		.instance = 2,
75 		.irq_offset = ilog2(INTR_BCS(2)),
76 		.domain = XE_FW_RENDER,
77 		.mmio_base = XEHPC_BCS2_RING_BASE,
78 	},
79 	[XE_HW_ENGINE_BCS3] = {
80 		.name = "bcs3",
81 		.class = XE_ENGINE_CLASS_COPY,
82 		.instance = 3,
83 		.irq_offset = ilog2(INTR_BCS(3)),
84 		.domain = XE_FW_RENDER,
85 		.mmio_base = XEHPC_BCS3_RING_BASE,
86 	},
87 	[XE_HW_ENGINE_BCS4] = {
88 		.name = "bcs4",
89 		.class = XE_ENGINE_CLASS_COPY,
90 		.instance = 4,
91 		.irq_offset = ilog2(INTR_BCS(4)),
92 		.domain = XE_FW_RENDER,
93 		.mmio_base = XEHPC_BCS4_RING_BASE,
94 	},
95 	[XE_HW_ENGINE_BCS5] = {
96 		.name = "bcs5",
97 		.class = XE_ENGINE_CLASS_COPY,
98 		.instance = 5,
99 		.irq_offset = ilog2(INTR_BCS(5)),
100 		.domain = XE_FW_RENDER,
101 		.mmio_base = XEHPC_BCS5_RING_BASE,
102 	},
103 	[XE_HW_ENGINE_BCS6] = {
104 		.name = "bcs6",
105 		.class = XE_ENGINE_CLASS_COPY,
106 		.instance = 6,
107 		.irq_offset = ilog2(INTR_BCS(6)),
108 		.domain = XE_FW_RENDER,
109 		.mmio_base = XEHPC_BCS6_RING_BASE,
110 	},
111 	[XE_HW_ENGINE_BCS7] = {
112 		.name = "bcs7",
113 		.class = XE_ENGINE_CLASS_COPY,
114 		.irq_offset = ilog2(INTR_BCS(7)),
115 		.instance = 7,
116 		.domain = XE_FW_RENDER,
117 		.mmio_base = XEHPC_BCS7_RING_BASE,
118 	},
119 	[XE_HW_ENGINE_BCS8] = {
120 		.name = "bcs8",
121 		.class = XE_ENGINE_CLASS_COPY,
122 		.instance = 8,
123 		.irq_offset = ilog2(INTR_BCS8),
124 		.domain = XE_FW_RENDER,
125 		.mmio_base = XEHPC_BCS8_RING_BASE,
126 	},
127 
128 	[XE_HW_ENGINE_VCS0] = {
129 		.name = "vcs0",
130 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
131 		.instance = 0,
132 		.irq_offset = 32 + ilog2(INTR_VCS(0)),
133 		.domain = XE_FW_MEDIA_VDBOX0,
134 		.mmio_base = BSD_RING_BASE,
135 	},
136 	[XE_HW_ENGINE_VCS1] = {
137 		.name = "vcs1",
138 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
139 		.instance = 1,
140 		.irq_offset = 32 + ilog2(INTR_VCS(1)),
141 		.domain = XE_FW_MEDIA_VDBOX1,
142 		.mmio_base = BSD2_RING_BASE,
143 	},
144 	[XE_HW_ENGINE_VCS2] = {
145 		.name = "vcs2",
146 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
147 		.instance = 2,
148 		.irq_offset = 32 + ilog2(INTR_VCS(2)),
149 		.domain = XE_FW_MEDIA_VDBOX2,
150 		.mmio_base = BSD3_RING_BASE,
151 	},
152 	[XE_HW_ENGINE_VCS3] = {
153 		.name = "vcs3",
154 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
155 		.instance = 3,
156 		.irq_offset = 32 + ilog2(INTR_VCS(3)),
157 		.domain = XE_FW_MEDIA_VDBOX3,
158 		.mmio_base = BSD4_RING_BASE,
159 	},
160 	[XE_HW_ENGINE_VCS4] = {
161 		.name = "vcs4",
162 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
163 		.instance = 4,
164 		.irq_offset = 32 + ilog2(INTR_VCS(4)),
165 		.domain = XE_FW_MEDIA_VDBOX4,
166 		.mmio_base = XEHP_BSD5_RING_BASE,
167 	},
168 	[XE_HW_ENGINE_VCS5] = {
169 		.name = "vcs5",
170 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
171 		.instance = 5,
172 		.irq_offset = 32 + ilog2(INTR_VCS(5)),
173 		.domain = XE_FW_MEDIA_VDBOX5,
174 		.mmio_base = XEHP_BSD6_RING_BASE,
175 	},
176 	[XE_HW_ENGINE_VCS6] = {
177 		.name = "vcs6",
178 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
179 		.instance = 6,
180 		.irq_offset = 32 + ilog2(INTR_VCS(6)),
181 		.domain = XE_FW_MEDIA_VDBOX6,
182 		.mmio_base = XEHP_BSD7_RING_BASE,
183 	},
184 	[XE_HW_ENGINE_VCS7] = {
185 		.name = "vcs7",
186 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
187 		.instance = 7,
188 		.irq_offset = 32 + ilog2(INTR_VCS(7)),
189 		.domain = XE_FW_MEDIA_VDBOX7,
190 		.mmio_base = XEHP_BSD8_RING_BASE,
191 	},
192 	[XE_HW_ENGINE_VECS0] = {
193 		.name = "vecs0",
194 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
195 		.instance = 0,
196 		.irq_offset = 32 + ilog2(INTR_VECS(0)),
197 		.domain = XE_FW_MEDIA_VEBOX0,
198 		.mmio_base = VEBOX_RING_BASE,
199 	},
200 	[XE_HW_ENGINE_VECS1] = {
201 		.name = "vecs1",
202 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
203 		.instance = 1,
204 		.irq_offset = 32 + ilog2(INTR_VECS(1)),
205 		.domain = XE_FW_MEDIA_VEBOX1,
206 		.mmio_base = VEBOX2_RING_BASE,
207 	},
208 	[XE_HW_ENGINE_VECS2] = {
209 		.name = "vecs2",
210 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
211 		.instance = 2,
212 		.irq_offset = 32 + ilog2(INTR_VECS(2)),
213 		.domain = XE_FW_MEDIA_VEBOX2,
214 		.mmio_base = XEHP_VEBOX3_RING_BASE,
215 	},
216 	[XE_HW_ENGINE_VECS3] = {
217 		.name = "vecs3",
218 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
219 		.instance = 3,
220 		.irq_offset = 32 + ilog2(INTR_VECS(3)),
221 		.domain = XE_FW_MEDIA_VEBOX3,
222 		.mmio_base = XEHP_VEBOX4_RING_BASE,
223 	},
224 	[XE_HW_ENGINE_CCS0] = {
225 		.name = "ccs0",
226 		.class = XE_ENGINE_CLASS_COMPUTE,
227 		.instance = 0,
228 		.irq_offset = ilog2(INTR_CCS(0)),
229 		.domain = XE_FW_RENDER,
230 		.mmio_base = COMPUTE0_RING_BASE,
231 	},
232 	[XE_HW_ENGINE_CCS1] = {
233 		.name = "ccs1",
234 		.class = XE_ENGINE_CLASS_COMPUTE,
235 		.instance = 1,
236 		.irq_offset = ilog2(INTR_CCS(1)),
237 		.domain = XE_FW_RENDER,
238 		.mmio_base = COMPUTE1_RING_BASE,
239 	},
240 	[XE_HW_ENGINE_CCS2] = {
241 		.name = "ccs2",
242 		.class = XE_ENGINE_CLASS_COMPUTE,
243 		.instance = 2,
244 		.irq_offset = ilog2(INTR_CCS(2)),
245 		.domain = XE_FW_RENDER,
246 		.mmio_base = COMPUTE2_RING_BASE,
247 	},
248 	[XE_HW_ENGINE_CCS3] = {
249 		.name = "ccs3",
250 		.class = XE_ENGINE_CLASS_COMPUTE,
251 		.instance = 3,
252 		.irq_offset = ilog2(INTR_CCS(3)),
253 		.domain = XE_FW_RENDER,
254 		.mmio_base = COMPUTE3_RING_BASE,
255 	},
256 	[XE_HW_ENGINE_GSCCS0] = {
257 		.name = "gsccs0",
258 		.class = XE_ENGINE_CLASS_OTHER,
259 		.instance = OTHER_GSC_INSTANCE,
260 		.domain = XE_FW_GSC,
261 		.mmio_base = GSCCS_RING_BASE,
262 	},
263 };
264 
265 static void hw_engine_fini(struct drm_device *drm, void *arg)
266 {
267 	struct xe_hw_engine *hwe = arg;
268 
269 	if (hwe->exl_port)
270 		xe_execlist_port_destroy(hwe->exl_port);
271 	xe_lrc_put(hwe->kernel_lrc);
272 
273 	hwe->gt = NULL;
274 }
275 
276 static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
277 				   u32 val)
278 {
279 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
280 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
281 
282 	reg.addr += hwe->mmio_base;
283 
284 	xe_mmio_write32(hwe->gt, reg, val);
285 }
286 
287 static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
288 {
289 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
290 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
291 
292 	reg.addr += hwe->mmio_base;
293 
294 	return xe_mmio_read32(hwe->gt, reg);
295 }
296 
297 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
298 {
299 	u32 ccs_mask =
300 		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
301 
302 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
303 		xe_mmio_write32(hwe->gt, RCU_MODE,
304 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
305 
306 	hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
307 	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
308 			       xe_bo_ggtt_addr(hwe->hwsp));
309 	hw_engine_mmio_write32(hwe, RING_MODE(0),
310 			       _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
311 	hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
312 			       _MASKED_BIT_DISABLE(STOP_RING));
313 	hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
314 }
315 
316 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
317 						 const struct xe_hw_engine *hwe)
318 {
319 	return xe_gt_ccs_mode_enabled(gt) &&
320 	       xe_rtp_match_first_render_or_compute(gt, hwe);
321 }
322 
323 static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
324 				      const struct xe_hw_engine *hwe)
325 {
326 	if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
327 		return false;
328 
329 	if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
330 	    hwe->class != XE_ENGINE_CLASS_RENDER)
331 		return false;
332 
333 	return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
334 }
335 
336 void
337 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
338 {
339 	struct xe_gt *gt = hwe->gt;
340 	const u8 mocs_write_idx = gt->mocs.uc_index;
341 	const u8 mocs_read_idx = gt->mocs.uc_index;
342 	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
343 			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
344 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
345 	const struct xe_rtp_entry_sr lrc_setup[] = {
346 		/*
347 		 * Some blitter commands do not have a field for MOCS, those
348 		 * commands will use MOCS index pointed by BLIT_CCTL.
349 		 * BLIT_CCTL registers are needed to be programmed to un-cached.
350 		 */
351 		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
352 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
353 			       ENGINE_CLASS(COPY)),
354 		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
355 				 BLIT_CCTL_DST_MOCS_MASK |
356 				 BLIT_CCTL_SRC_MOCS_MASK,
357 				 blit_cctl_val,
358 				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
359 		},
360 		/* Use Fixed slice CCS mode */
361 		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
362 		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
363 		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
364 					   RCU_MODE_FIXED_SLICE_CCS_MODE))
365 		},
366 		/* Disable WMTP if HW doesn't support it */
367 		{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
368 		  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
369 		  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
370 					   PREEMPT_GPGPU_LEVEL_MASK,
371 					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
372 		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
373 		},
374 		{}
375 	};
376 
377 	xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc);
378 }
379 
380 static void
381 hw_engine_setup_default_state(struct xe_hw_engine *hwe)
382 {
383 	struct xe_gt *gt = hwe->gt;
384 	struct xe_device *xe = gt_to_xe(gt);
385 	/*
386 	 * RING_CMD_CCTL specifies the default MOCS entry that will be
387 	 * used by the command streamer when executing commands that
388 	 * don't have a way to explicitly specify a MOCS setting.
389 	 * The default should usually reference whichever MOCS entry
390 	 * corresponds to uncached behavior, although use of a WB cached
391 	 * entry is recommended by the spec in certain circumstances on
392 	 * specific platforms.
393 	 * Bspec: 72161
394 	 */
395 	const u8 mocs_write_idx = gt->mocs.uc_index;
396 	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
397 				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
398 				 gt->mocs.wb_index : gt->mocs.uc_index;
399 	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
400 				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
401 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
402 	const struct xe_rtp_entry_sr engine_entries[] = {
403 		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
404 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
405 		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
406 					   CMD_CCTL_WRITE_OVERRIDE_MASK |
407 					   CMD_CCTL_READ_OVERRIDE_MASK,
408 					   ring_cmd_cctl_val,
409 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
410 		},
411 		/*
412 		 * To allow the GSC engine to go idle on MTL we need to enable
413 		 * idle messaging and set the hysteresis value (we use 0xA=5us
414 		 * as recommended in spec). On platforms after MTL this is
415 		 * enabled by default.
416 		 */
417 		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
418 		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
419 		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
420 				     IDLE_MSG_DISABLE,
421 				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
422 				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
423 					   IDLE_WAIT_TIME,
424 					   0xA,
425 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
426 		},
427 		{}
428 	};
429 
430 	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
431 }
432 
433 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
434 				 enum xe_hw_engine_id id)
435 {
436 	const struct engine_info *info;
437 
438 	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
439 		return;
440 
441 	if (!(gt->info.engine_mask & BIT(id)))
442 		return;
443 
444 	info = &engine_infos[id];
445 
446 	xe_gt_assert(gt, !hwe->gt);
447 
448 	hwe->gt = gt;
449 	hwe->class = info->class;
450 	hwe->instance = info->instance;
451 	hwe->mmio_base = info->mmio_base;
452 	hwe->irq_offset = info->irq_offset;
453 	hwe->domain = info->domain;
454 	hwe->name = info->name;
455 	hwe->fence_irq = &gt->fence_irq[info->class];
456 	hwe->engine_id = id;
457 
458 	hwe->eclass = &gt->eclass[hwe->class];
459 	if (!hwe->eclass->sched_props.job_timeout_ms) {
460 		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
461 		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
462 		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
463 		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
464 		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
465 		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
466 		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
467 		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
468 		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
469 
470 		/*
471 		 * The GSC engine can accept submissions while the GSC shim is
472 		 * being reset, during which time the submission is stalled. In
473 		 * the worst case, the shim reset can take up to the maximum GSC
474 		 * command execution time (250ms), so the request start can be
475 		 * delayed by that much; the request itself can take that long
476 		 * without being preemptible, which means worst case it can
477 		 * theoretically take up to 500ms for a preemption to go through
478 		 * on the GSC engine. Adding to that an extra 100ms as a safety
479 		 * margin, we get a minimum recommended timeout of 600ms.
480 		 * The preempt_timeout value can't be tuned for OTHER_CLASS
481 		 * because the class is reserved for kernel usage, so we just
482 		 * need to make sure that the starting value is above that
483 		 * threshold; since our default value (640ms) is greater than
484 		 * 600ms, the only way we can go below is via a kconfig setting.
485 		 * If that happens, log it in dmesg and update the value.
486 		 */
487 		if (hwe->class == XE_ENGINE_CLASS_OTHER) {
488 			const u32 min_preempt_timeout = 600 * 1000;
489 			if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) {
490 				hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout;
491 				xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n");
492 			}
493 		}
494 
495 		/* Record default props */
496 		hwe->eclass->defaults = hwe->eclass->sched_props;
497 	}
498 
499 	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
500 	xe_tuning_process_engine(hwe);
501 	xe_wa_process_engine(hwe);
502 	hw_engine_setup_default_state(hwe);
503 
504 	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
505 	xe_reg_whitelist_process_engine(hwe);
506 }
507 
508 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
509 			  enum xe_hw_engine_id id)
510 {
511 	struct xe_device *xe = gt_to_xe(gt);
512 	struct xe_tile *tile = gt_to_tile(gt);
513 	int err;
514 
515 	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
516 	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
517 
518 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
519 	xe_reg_sr_apply_whitelist(hwe);
520 
521 	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
522 						 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
523 						 XE_BO_FLAG_GGTT |
524 						 XE_BO_FLAG_GGTT_INVALIDATE);
525 	if (IS_ERR(hwe->hwsp)) {
526 		err = PTR_ERR(hwe->hwsp);
527 		goto err_name;
528 	}
529 
530 	hwe->kernel_lrc = xe_lrc_create(hwe, NULL, SZ_16K);
531 	if (IS_ERR(hwe->kernel_lrc)) {
532 		err = PTR_ERR(hwe->kernel_lrc);
533 		goto err_hwsp;
534 	}
535 
536 	if (!xe_device_uc_enabled(xe)) {
537 		hwe->exl_port = xe_execlist_port_create(xe, hwe);
538 		if (IS_ERR(hwe->exl_port)) {
539 			err = PTR_ERR(hwe->exl_port);
540 			goto err_kernel_lrc;
541 		}
542 	}
543 
544 	if (xe_device_uc_enabled(xe)) {
545 		/* GSCCS has a special interrupt for reset */
546 		if (hwe->class == XE_ENGINE_CLASS_OTHER)
547 			hwe->irq_handler = xe_gsc_hwe_irq_handler;
548 
549 		xe_hw_engine_enable_ring(hwe);
550 	}
551 
552 	/* We reserve the highest BCS instance for USM */
553 	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
554 		gt->usm.reserved_bcs_instance = hwe->instance;
555 
556 	return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
557 
558 err_kernel_lrc:
559 	xe_lrc_put(hwe->kernel_lrc);
560 err_hwsp:
561 	xe_bo_unpin_map_no_vm(hwe->hwsp);
562 err_name:
563 	hwe->name = NULL;
564 
565 	return err;
566 }
567 
568 static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
569 {
570 	int class;
571 
572 	/* FIXME: Doing a simple logical mapping that works for most hardware */
573 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
574 		struct xe_hw_engine *hwe;
575 		enum xe_hw_engine_id id;
576 		int logical_instance = 0;
577 
578 		for_each_hw_engine(hwe, gt, id)
579 			if (hwe->class == class)
580 				hwe->logical_instance = logical_instance++;
581 	}
582 }
583 
584 static void read_media_fuses(struct xe_gt *gt)
585 {
586 	struct xe_device *xe = gt_to_xe(gt);
587 	u32 media_fuse;
588 	u16 vdbox_mask;
589 	u16 vebox_mask;
590 	int i, j;
591 
592 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
593 
594 	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
595 
596 	/*
597 	 * Pre-Xe_HP platforms had register bits representing absent engines,
598 	 * whereas Xe_HP and beyond have bits representing present engines.
599 	 * Invert the polarity on old platforms so that we can use common
600 	 * handling below.
601 	 */
602 	if (GRAPHICS_VERx100(xe) < 1250)
603 		media_fuse = ~media_fuse;
604 
605 	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
606 	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
607 
608 	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
609 		if (!(gt->info.engine_mask & BIT(i)))
610 			continue;
611 
612 		if (!(BIT(j) & vdbox_mask)) {
613 			gt->info.engine_mask &= ~BIT(i);
614 			drm_info(&xe->drm, "vcs%u fused off\n", j);
615 		}
616 	}
617 
618 	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
619 		if (!(gt->info.engine_mask & BIT(i)))
620 			continue;
621 
622 		if (!(BIT(j) & vebox_mask)) {
623 			gt->info.engine_mask &= ~BIT(i);
624 			drm_info(&xe->drm, "vecs%u fused off\n", j);
625 		}
626 	}
627 }
628 
629 static void read_copy_fuses(struct xe_gt *gt)
630 {
631 	struct xe_device *xe = gt_to_xe(gt);
632 	u32 bcs_mask;
633 
634 	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
635 		return;
636 
637 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
638 
639 	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
640 	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
641 
642 	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
643 	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
644 		if (!(gt->info.engine_mask & BIT(i)))
645 			continue;
646 
647 		if (!(BIT(j / 2) & bcs_mask)) {
648 			gt->info.engine_mask &= ~BIT(i);
649 			drm_info(&xe->drm, "bcs%u fused off\n", j);
650 		}
651 	}
652 }
653 
654 static void read_compute_fuses_from_dss(struct xe_gt *gt)
655 {
656 	struct xe_device *xe = gt_to_xe(gt);
657 
658 	/*
659 	 * CCS fusing based on DSS masks only applies to platforms that can
660 	 * have more than one CCS.
661 	 */
662 	if (hweight64(gt->info.engine_mask &
663 		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
664 		return;
665 
666 	/*
667 	 * CCS availability on Xe_HP is inferred from the presence of DSS in
668 	 * each quadrant.
669 	 */
670 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
671 		if (!(gt->info.engine_mask & BIT(i)))
672 			continue;
673 
674 		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
675 			gt->info.engine_mask &= ~BIT(i);
676 			drm_info(&xe->drm, "ccs%u fused off\n", j);
677 		}
678 	}
679 }
680 
681 static void read_compute_fuses_from_reg(struct xe_gt *gt)
682 {
683 	struct xe_device *xe = gt_to_xe(gt);
684 	u32 ccs_mask;
685 
686 	ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
687 	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
688 
689 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
690 		if (!(gt->info.engine_mask & BIT(i)))
691 			continue;
692 
693 		if ((ccs_mask & BIT(j)) == 0) {
694 			gt->info.engine_mask &= ~BIT(i);
695 			drm_info(&xe->drm, "ccs%u fused off\n", j);
696 		}
697 	}
698 }
699 
700 static void read_compute_fuses(struct xe_gt *gt)
701 {
702 	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
703 		read_compute_fuses_from_reg(gt);
704 	else
705 		read_compute_fuses_from_dss(gt);
706 }
707 
708 static void check_gsc_availability(struct xe_gt *gt)
709 {
710 	struct xe_device *xe = gt_to_xe(gt);
711 
712 	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
713 		return;
714 
715 	/*
716 	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
717 	 * have the FW there is nothing we need the engine for and can therefore
718 	 * skip its initialization.
719 	 */
720 	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
721 		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
722 
723 		/* interrupts where previously enabled, so turn them off */
724 		xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0);
725 		xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0);
726 
727 		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
728 	}
729 }
730 
731 int xe_hw_engines_init_early(struct xe_gt *gt)
732 {
733 	int i;
734 
735 	read_media_fuses(gt);
736 	read_copy_fuses(gt);
737 	read_compute_fuses(gt);
738 	check_gsc_availability(gt);
739 
740 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
741 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
742 
743 	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
744 		hw_engine_init_early(gt, &gt->hw_engines[i], i);
745 
746 	return 0;
747 }
748 
749 int xe_hw_engines_init(struct xe_gt *gt)
750 {
751 	int err;
752 	struct xe_hw_engine *hwe;
753 	enum xe_hw_engine_id id;
754 
755 	for_each_hw_engine(hwe, gt, id) {
756 		err = hw_engine_init(gt, hwe, id);
757 		if (err)
758 			return err;
759 	}
760 
761 	hw_engine_setup_logical_mapping(gt);
762 
763 	return 0;
764 }
765 
766 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
767 {
768 	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
769 
770 	if (hwe->irq_handler)
771 		hwe->irq_handler(hwe, intr_vec);
772 
773 	if (intr_vec & GT_RENDER_USER_INTERRUPT)
774 		xe_hw_fence_irq_run(hwe->fence_irq);
775 }
776 
777 static bool
778 is_slice_common_per_gslice(struct xe_device *xe)
779 {
780 	return GRAPHICS_VERx100(xe) >= 1255;
781 }
782 
783 static void
784 xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
785 				       struct xe_hw_engine_snapshot *snapshot)
786 {
787 	struct xe_gt *gt = hwe->gt;
788 	struct xe_device *xe = gt_to_xe(gt);
789 	unsigned int dss;
790 	u16 group, instance;
791 
792 	snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
793 
794 	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
795 		return;
796 
797 	if (is_slice_common_per_gslice(xe) == false) {
798 		snapshot->reg.instdone.slice_common[0] =
799 			xe_mmio_read32(gt, SC_INSTDONE);
800 		snapshot->reg.instdone.slice_common_extra[0] =
801 			xe_mmio_read32(gt, SC_INSTDONE_EXTRA);
802 		snapshot->reg.instdone.slice_common_extra2[0] =
803 			xe_mmio_read32(gt, SC_INSTDONE_EXTRA2);
804 	} else {
805 		for_each_geometry_dss(dss, gt, group, instance) {
806 			snapshot->reg.instdone.slice_common[dss] =
807 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance);
808 			snapshot->reg.instdone.slice_common_extra[dss] =
809 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance);
810 			snapshot->reg.instdone.slice_common_extra2[dss] =
811 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance);
812 		}
813 	}
814 
815 	for_each_geometry_dss(dss, gt, group, instance) {
816 		snapshot->reg.instdone.sampler[dss] =
817 			xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
818 		snapshot->reg.instdone.row[dss] =
819 			xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
820 
821 		if (GRAPHICS_VERx100(xe) >= 1255)
822 			snapshot->reg.instdone.geom_svg[dss] =
823 				xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT,
824 						       group, instance);
825 	}
826 }
827 
828 /**
829  * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
830  * @hwe: Xe HW Engine.
831  *
832  * This can be printed out in a later stage like during dev_coredump
833  * analysis.
834  *
835  * Returns: a Xe HW Engine snapshot object that must be freed by the
836  * caller, using `xe_hw_engine_snapshot_free`.
837  */
838 struct xe_hw_engine_snapshot *
839 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
840 {
841 	struct xe_hw_engine_snapshot *snapshot;
842 	size_t len;
843 	u64 val;
844 
845 	if (!xe_hw_engine_is_valid(hwe))
846 		return NULL;
847 
848 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
849 
850 	if (!snapshot)
851 		return NULL;
852 
853 	/* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
854 	 * includes xe_hw_engine_types.h the length of this 3 registers can't be
855 	 * set in struct xe_hw_engine_snapshot, so here doing additional
856 	 * allocations.
857 	 */
858 	len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
859 	snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC);
860 	snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC);
861 	snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC);
862 	snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
863 	snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
864 	snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
865 	if (!snapshot->reg.instdone.slice_common ||
866 	    !snapshot->reg.instdone.slice_common_extra ||
867 	    !snapshot->reg.instdone.slice_common_extra2 ||
868 	    !snapshot->reg.instdone.sampler ||
869 	    !snapshot->reg.instdone.row ||
870 	    !snapshot->reg.instdone.geom_svg) {
871 		xe_hw_engine_snapshot_free(snapshot);
872 		return NULL;
873 	}
874 
875 	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
876 	snapshot->hwe = hwe;
877 	snapshot->logical_instance = hwe->logical_instance;
878 	snapshot->forcewake.domain = hwe->domain;
879 	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
880 						    hwe->domain);
881 	snapshot->mmio_base = hwe->mmio_base;
882 
883 	/* no more VF accessible data below this point */
884 	if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
885 		return snapshot;
886 
887 	snapshot->reg.ring_execlist_status =
888 		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
889 	val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
890 	snapshot->reg.ring_execlist_status |= val << 32;
891 
892 	snapshot->reg.ring_execlist_sq_contents =
893 		hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
894 	val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
895 	snapshot->reg.ring_execlist_sq_contents |= val << 32;
896 
897 	snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
898 	val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
899 	snapshot->reg.ring_acthd |= val << 32;
900 
901 	snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
902 	val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
903 	snapshot->reg.ring_bbaddr |= val << 32;
904 
905 	snapshot->reg.ring_dma_fadd =
906 		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
907 	val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
908 	snapshot->reg.ring_dma_fadd |= val << 32;
909 
910 	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
911 	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
912 	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
913 	if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) {
914 		val = hw_engine_mmio_read32(hwe, RING_START_UDW(0));
915 		snapshot->reg.ring_start |= val << 32;
916 	}
917 	if (xe_gt_has_indirect_ring_state(hwe->gt)) {
918 		snapshot->reg.indirect_ring_state =
919 			hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
920 	}
921 
922 	snapshot->reg.ring_head =
923 		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
924 	snapshot->reg.ring_tail =
925 		hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
926 	snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0));
927 	snapshot->reg.ring_mi_mode =
928 		hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
929 	snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0));
930 	snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0));
931 	snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
932 	snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
933 	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
934 	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
935 	xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
936 
937 	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
938 		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
939 
940 	return snapshot;
941 }
942 
943 static void
944 xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
945 {
946 	struct xe_gt *gt = snapshot->hwe->gt;
947 	struct xe_device *xe = gt_to_xe(gt);
948 	u16 group, instance;
949 	unsigned int dss;
950 
951 	drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring);
952 
953 	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
954 		return;
955 
956 	if (is_slice_common_per_gslice(xe) == false) {
957 		drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n",
958 			   snapshot->reg.instdone.slice_common[0]);
959 		drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n",
960 			   snapshot->reg.instdone.slice_common_extra[0]);
961 		drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n",
962 			   snapshot->reg.instdone.slice_common_extra2[0]);
963 	} else {
964 		for_each_geometry_dss(dss, gt, group, instance) {
965 			drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss,
966 				   snapshot->reg.instdone.slice_common[dss]);
967 			drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss,
968 				   snapshot->reg.instdone.slice_common_extra[dss]);
969 			drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss,
970 				   snapshot->reg.instdone.slice_common_extra2[dss]);
971 		}
972 	}
973 
974 	for_each_geometry_dss(dss, gt, group, instance) {
975 		drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
976 			   snapshot->reg.instdone.sampler[dss]);
977 		drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
978 			   snapshot->reg.instdone.row[dss]);
979 
980 		if (GRAPHICS_VERx100(xe) >= 1255)
981 			drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n",
982 				   dss, snapshot->reg.instdone.geom_svg[dss]);
983 	}
984 }
985 
986 /**
987  * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
988  * @snapshot: Xe HW Engine snapshot object.
989  * @p: drm_printer where it will be printed out.
990  *
991  * This function prints out a given Xe HW Engine snapshot object.
992  */
993 void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
994 				 struct drm_printer *p)
995 {
996 	if (!snapshot)
997 		return;
998 
999 	drm_printf(p, "%s (physical), logical instance=%d\n",
1000 		   snapshot->name ? snapshot->name : "",
1001 		   snapshot->logical_instance);
1002 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
1003 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
1004 	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
1005 	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
1006 	drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
1007 		   snapshot->reg.ring_execlist_status);
1008 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
1009 		   snapshot->reg.ring_execlist_sq_contents);
1010 	drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start);
1011 	drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
1012 	drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
1013 	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
1014 	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
1015 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
1016 		   snapshot->reg.ring_mode);
1017 	drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
1018 	drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
1019 	drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
1020 	drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
1021 	drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
1022 	drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
1023 	drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
1024 	drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n",
1025 		   snapshot->reg.indirect_ring_state);
1026 	drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
1027 	xe_hw_engine_snapshot_instdone_print(snapshot, p);
1028 
1029 	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
1030 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
1031 			   snapshot->reg.rcu_mode);
1032 	drm_puts(p, "\n");
1033 }
1034 
1035 /**
1036  * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
1037  * @snapshot: Xe HW Engine snapshot object.
1038  *
1039  * This function free all the memory that needed to be allocated at capture
1040  * time.
1041  */
1042 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
1043 {
1044 	if (!snapshot)
1045 		return;
1046 
1047 	kfree(snapshot->reg.instdone.slice_common);
1048 	kfree(snapshot->reg.instdone.slice_common_extra);
1049 	kfree(snapshot->reg.instdone.slice_common_extra2);
1050 	kfree(snapshot->reg.instdone.sampler);
1051 	kfree(snapshot->reg.instdone.row);
1052 	kfree(snapshot->reg.instdone.geom_svg);
1053 	kfree(snapshot->name);
1054 	kfree(snapshot);
1055 }
1056 
1057 /**
1058  * xe_hw_engine_print - Xe HW Engine Print.
1059  * @hwe: Hardware Engine.
1060  * @p: drm_printer.
1061  *
1062  * This function quickly capture a snapshot and immediately print it out.
1063  */
1064 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
1065 {
1066 	struct xe_hw_engine_snapshot *snapshot;
1067 
1068 	snapshot = xe_hw_engine_snapshot_capture(hwe);
1069 	xe_hw_engine_snapshot_print(snapshot, p);
1070 	xe_hw_engine_snapshot_free(snapshot);
1071 }
1072 
1073 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
1074 				enum xe_engine_class engine_class)
1075 {
1076 	u32 mask = 0;
1077 	enum xe_hw_engine_id id;
1078 
1079 	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
1080 		if (engine_infos[id].class == engine_class &&
1081 		    gt->info.engine_mask & BIT(id))
1082 			mask |= BIT(engine_infos[id].instance);
1083 	}
1084 	return mask;
1085 }
1086 
1087 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
1088 {
1089 	struct xe_gt *gt = hwe->gt;
1090 	struct xe_device *xe = gt_to_xe(gt);
1091 
1092 	if (hwe->class == XE_ENGINE_CLASS_OTHER)
1093 		return true;
1094 
1095 	/* Check for engines disabled by ccs_mode setting */
1096 	if (xe_gt_ccs_mode_enabled(gt) &&
1097 	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
1098 	    hwe->logical_instance >= gt->ccs_mode)
1099 		return true;
1100 
1101 	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
1102 		hwe->instance == gt->usm.reserved_bcs_instance;
1103 }
1104 
1105 const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
1106 {
1107 	switch (class) {
1108 	case XE_ENGINE_CLASS_RENDER:
1109 		return "rcs";
1110 	case XE_ENGINE_CLASS_VIDEO_DECODE:
1111 		return "vcs";
1112 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
1113 		return "vecs";
1114 	case XE_ENGINE_CLASS_COPY:
1115 		return "bcs";
1116 	case XE_ENGINE_CLASS_OTHER:
1117 		return "other";
1118 	case XE_ENGINE_CLASS_COMPUTE:
1119 		return "ccs";
1120 	case XE_ENGINE_CLASS_MAX:
1121 		break;
1122 	}
1123 
1124 	return NULL;
1125 }
1126 
1127 u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
1128 {
1129 	return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base));
1130 }
1131