xref: /linux/drivers/gpu/drm/xe/xe_hw_engine.c (revision 188ced1e0ff892f0948f20480e2e0122380ae46d)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_hw_engine.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "regs/xe_engine_regs.h"
11 #include "regs/xe_gt_regs.h"
12 #include "xe_assert.h"
13 #include "xe_bo.h"
14 #include "xe_device.h"
15 #include "xe_execlist.h"
16 #include "xe_force_wake.h"
17 #include "xe_gsc.h"
18 #include "xe_gt.h"
19 #include "xe_gt_ccs_mode.h"
20 #include "xe_gt_printk.h"
21 #include "xe_gt_mcr.h"
22 #include "xe_gt_topology.h"
23 #include "xe_hw_fence.h"
24 #include "xe_irq.h"
25 #include "xe_lrc.h"
26 #include "xe_macros.h"
27 #include "xe_mmio.h"
28 #include "xe_reg_sr.h"
29 #include "xe_rtp.h"
30 #include "xe_sched_job.h"
31 #include "xe_sriov.h"
32 #include "xe_tuning.h"
33 #include "xe_uc_fw.h"
34 #include "xe_wa.h"
35 
36 #define MAX_MMIO_BASES 3
37 struct engine_info {
38 	const char *name;
39 	unsigned int class : 8;
40 	unsigned int instance : 8;
41 	unsigned int irq_offset : 8;
42 	enum xe_force_wake_domains domain;
43 	u32 mmio_base;
44 };
45 
46 static const struct engine_info engine_infos[] = {
47 	[XE_HW_ENGINE_RCS0] = {
48 		.name = "rcs0",
49 		.class = XE_ENGINE_CLASS_RENDER,
50 		.instance = 0,
51 		.irq_offset = ilog2(INTR_RCS0),
52 		.domain = XE_FW_RENDER,
53 		.mmio_base = RENDER_RING_BASE,
54 	},
55 	[XE_HW_ENGINE_BCS0] = {
56 		.name = "bcs0",
57 		.class = XE_ENGINE_CLASS_COPY,
58 		.instance = 0,
59 		.irq_offset = ilog2(INTR_BCS(0)),
60 		.domain = XE_FW_RENDER,
61 		.mmio_base = BLT_RING_BASE,
62 	},
63 	[XE_HW_ENGINE_BCS1] = {
64 		.name = "bcs1",
65 		.class = XE_ENGINE_CLASS_COPY,
66 		.instance = 1,
67 		.irq_offset = ilog2(INTR_BCS(1)),
68 		.domain = XE_FW_RENDER,
69 		.mmio_base = XEHPC_BCS1_RING_BASE,
70 	},
71 	[XE_HW_ENGINE_BCS2] = {
72 		.name = "bcs2",
73 		.class = XE_ENGINE_CLASS_COPY,
74 		.instance = 2,
75 		.irq_offset = ilog2(INTR_BCS(2)),
76 		.domain = XE_FW_RENDER,
77 		.mmio_base = XEHPC_BCS2_RING_BASE,
78 	},
79 	[XE_HW_ENGINE_BCS3] = {
80 		.name = "bcs3",
81 		.class = XE_ENGINE_CLASS_COPY,
82 		.instance = 3,
83 		.irq_offset = ilog2(INTR_BCS(3)),
84 		.domain = XE_FW_RENDER,
85 		.mmio_base = XEHPC_BCS3_RING_BASE,
86 	},
87 	[XE_HW_ENGINE_BCS4] = {
88 		.name = "bcs4",
89 		.class = XE_ENGINE_CLASS_COPY,
90 		.instance = 4,
91 		.irq_offset = ilog2(INTR_BCS(4)),
92 		.domain = XE_FW_RENDER,
93 		.mmio_base = XEHPC_BCS4_RING_BASE,
94 	},
95 	[XE_HW_ENGINE_BCS5] = {
96 		.name = "bcs5",
97 		.class = XE_ENGINE_CLASS_COPY,
98 		.instance = 5,
99 		.irq_offset = ilog2(INTR_BCS(5)),
100 		.domain = XE_FW_RENDER,
101 		.mmio_base = XEHPC_BCS5_RING_BASE,
102 	},
103 	[XE_HW_ENGINE_BCS6] = {
104 		.name = "bcs6",
105 		.class = XE_ENGINE_CLASS_COPY,
106 		.instance = 6,
107 		.irq_offset = ilog2(INTR_BCS(6)),
108 		.domain = XE_FW_RENDER,
109 		.mmio_base = XEHPC_BCS6_RING_BASE,
110 	},
111 	[XE_HW_ENGINE_BCS7] = {
112 		.name = "bcs7",
113 		.class = XE_ENGINE_CLASS_COPY,
114 		.irq_offset = ilog2(INTR_BCS(7)),
115 		.instance = 7,
116 		.domain = XE_FW_RENDER,
117 		.mmio_base = XEHPC_BCS7_RING_BASE,
118 	},
119 	[XE_HW_ENGINE_BCS8] = {
120 		.name = "bcs8",
121 		.class = XE_ENGINE_CLASS_COPY,
122 		.instance = 8,
123 		.irq_offset = ilog2(INTR_BCS8),
124 		.domain = XE_FW_RENDER,
125 		.mmio_base = XEHPC_BCS8_RING_BASE,
126 	},
127 
128 	[XE_HW_ENGINE_VCS0] = {
129 		.name = "vcs0",
130 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
131 		.instance = 0,
132 		.irq_offset = 32 + ilog2(INTR_VCS(0)),
133 		.domain = XE_FW_MEDIA_VDBOX0,
134 		.mmio_base = BSD_RING_BASE,
135 	},
136 	[XE_HW_ENGINE_VCS1] = {
137 		.name = "vcs1",
138 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
139 		.instance = 1,
140 		.irq_offset = 32 + ilog2(INTR_VCS(1)),
141 		.domain = XE_FW_MEDIA_VDBOX1,
142 		.mmio_base = BSD2_RING_BASE,
143 	},
144 	[XE_HW_ENGINE_VCS2] = {
145 		.name = "vcs2",
146 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
147 		.instance = 2,
148 		.irq_offset = 32 + ilog2(INTR_VCS(2)),
149 		.domain = XE_FW_MEDIA_VDBOX2,
150 		.mmio_base = BSD3_RING_BASE,
151 	},
152 	[XE_HW_ENGINE_VCS3] = {
153 		.name = "vcs3",
154 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
155 		.instance = 3,
156 		.irq_offset = 32 + ilog2(INTR_VCS(3)),
157 		.domain = XE_FW_MEDIA_VDBOX3,
158 		.mmio_base = BSD4_RING_BASE,
159 	},
160 	[XE_HW_ENGINE_VCS4] = {
161 		.name = "vcs4",
162 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
163 		.instance = 4,
164 		.irq_offset = 32 + ilog2(INTR_VCS(4)),
165 		.domain = XE_FW_MEDIA_VDBOX4,
166 		.mmio_base = XEHP_BSD5_RING_BASE,
167 	},
168 	[XE_HW_ENGINE_VCS5] = {
169 		.name = "vcs5",
170 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
171 		.instance = 5,
172 		.irq_offset = 32 + ilog2(INTR_VCS(5)),
173 		.domain = XE_FW_MEDIA_VDBOX5,
174 		.mmio_base = XEHP_BSD6_RING_BASE,
175 	},
176 	[XE_HW_ENGINE_VCS6] = {
177 		.name = "vcs6",
178 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
179 		.instance = 6,
180 		.irq_offset = 32 + ilog2(INTR_VCS(6)),
181 		.domain = XE_FW_MEDIA_VDBOX6,
182 		.mmio_base = XEHP_BSD7_RING_BASE,
183 	},
184 	[XE_HW_ENGINE_VCS7] = {
185 		.name = "vcs7",
186 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
187 		.instance = 7,
188 		.irq_offset = 32 + ilog2(INTR_VCS(7)),
189 		.domain = XE_FW_MEDIA_VDBOX7,
190 		.mmio_base = XEHP_BSD8_RING_BASE,
191 	},
192 	[XE_HW_ENGINE_VECS0] = {
193 		.name = "vecs0",
194 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
195 		.instance = 0,
196 		.irq_offset = 32 + ilog2(INTR_VECS(0)),
197 		.domain = XE_FW_MEDIA_VEBOX0,
198 		.mmio_base = VEBOX_RING_BASE,
199 	},
200 	[XE_HW_ENGINE_VECS1] = {
201 		.name = "vecs1",
202 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
203 		.instance = 1,
204 		.irq_offset = 32 + ilog2(INTR_VECS(1)),
205 		.domain = XE_FW_MEDIA_VEBOX1,
206 		.mmio_base = VEBOX2_RING_BASE,
207 	},
208 	[XE_HW_ENGINE_VECS2] = {
209 		.name = "vecs2",
210 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
211 		.instance = 2,
212 		.irq_offset = 32 + ilog2(INTR_VECS(2)),
213 		.domain = XE_FW_MEDIA_VEBOX2,
214 		.mmio_base = XEHP_VEBOX3_RING_BASE,
215 	},
216 	[XE_HW_ENGINE_VECS3] = {
217 		.name = "vecs3",
218 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
219 		.instance = 3,
220 		.irq_offset = 32 + ilog2(INTR_VECS(3)),
221 		.domain = XE_FW_MEDIA_VEBOX3,
222 		.mmio_base = XEHP_VEBOX4_RING_BASE,
223 	},
224 	[XE_HW_ENGINE_CCS0] = {
225 		.name = "ccs0",
226 		.class = XE_ENGINE_CLASS_COMPUTE,
227 		.instance = 0,
228 		.irq_offset = ilog2(INTR_CCS(0)),
229 		.domain = XE_FW_RENDER,
230 		.mmio_base = COMPUTE0_RING_BASE,
231 	},
232 	[XE_HW_ENGINE_CCS1] = {
233 		.name = "ccs1",
234 		.class = XE_ENGINE_CLASS_COMPUTE,
235 		.instance = 1,
236 		.irq_offset = ilog2(INTR_CCS(1)),
237 		.domain = XE_FW_RENDER,
238 		.mmio_base = COMPUTE1_RING_BASE,
239 	},
240 	[XE_HW_ENGINE_CCS2] = {
241 		.name = "ccs2",
242 		.class = XE_ENGINE_CLASS_COMPUTE,
243 		.instance = 2,
244 		.irq_offset = ilog2(INTR_CCS(2)),
245 		.domain = XE_FW_RENDER,
246 		.mmio_base = COMPUTE2_RING_BASE,
247 	},
248 	[XE_HW_ENGINE_CCS3] = {
249 		.name = "ccs3",
250 		.class = XE_ENGINE_CLASS_COMPUTE,
251 		.instance = 3,
252 		.irq_offset = ilog2(INTR_CCS(3)),
253 		.domain = XE_FW_RENDER,
254 		.mmio_base = COMPUTE3_RING_BASE,
255 	},
256 	[XE_HW_ENGINE_GSCCS0] = {
257 		.name = "gsccs0",
258 		.class = XE_ENGINE_CLASS_OTHER,
259 		.instance = OTHER_GSC_INSTANCE,
260 		.domain = XE_FW_GSC,
261 		.mmio_base = GSCCS_RING_BASE,
262 	},
263 };
264 
265 static void hw_engine_fini(struct drm_device *drm, void *arg)
266 {
267 	struct xe_hw_engine *hwe = arg;
268 
269 	if (hwe->exl_port)
270 		xe_execlist_port_destroy(hwe->exl_port);
271 	xe_lrc_finish(&hwe->kernel_lrc);
272 
273 	hwe->gt = NULL;
274 }
275 
276 static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
277 				   u32 val)
278 {
279 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
280 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
281 
282 	reg.addr += hwe->mmio_base;
283 
284 	xe_mmio_write32(hwe->gt, reg, val);
285 }
286 
287 static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
288 {
289 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
290 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
291 
292 	reg.addr += hwe->mmio_base;
293 
294 	return xe_mmio_read32(hwe->gt, reg);
295 }
296 
297 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
298 {
299 	u32 ccs_mask =
300 		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
301 
302 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
303 		xe_mmio_write32(hwe->gt, RCU_MODE,
304 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
305 
306 	hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
307 	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
308 			       xe_bo_ggtt_addr(hwe->hwsp));
309 	hw_engine_mmio_write32(hwe, RING_MODE(0),
310 			       _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
311 	hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
312 			       _MASKED_BIT_DISABLE(STOP_RING));
313 	hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
314 }
315 
316 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
317 						 const struct xe_hw_engine *hwe)
318 {
319 	return xe_gt_ccs_mode_enabled(gt) &&
320 	       xe_rtp_match_first_render_or_compute(gt, hwe);
321 }
322 
323 static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
324 				      const struct xe_hw_engine *hwe)
325 {
326 	if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
327 		return false;
328 
329 	if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
330 	    hwe->class != XE_ENGINE_CLASS_RENDER)
331 		return false;
332 
333 	return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
334 }
335 
336 void
337 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
338 {
339 	struct xe_gt *gt = hwe->gt;
340 	const u8 mocs_write_idx = gt->mocs.uc_index;
341 	const u8 mocs_read_idx = gt->mocs.uc_index;
342 	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
343 			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
344 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
345 	const struct xe_rtp_entry_sr lrc_was[] = {
346 		/*
347 		 * Some blitter commands do not have a field for MOCS, those
348 		 * commands will use MOCS index pointed by BLIT_CCTL.
349 		 * BLIT_CCTL registers are needed to be programmed to un-cached.
350 		 */
351 		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
352 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
353 			       ENGINE_CLASS(COPY)),
354 		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
355 				 BLIT_CCTL_DST_MOCS_MASK |
356 				 BLIT_CCTL_SRC_MOCS_MASK,
357 				 blit_cctl_val,
358 				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
359 		},
360 		/* Use Fixed slice CCS mode */
361 		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
362 		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
363 		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
364 					   RCU_MODE_FIXED_SLICE_CCS_MODE))
365 		},
366 		/* Disable WMTP if HW doesn't support it */
367 		{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
368 		  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
369 		  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
370 					   PREEMPT_GPGPU_LEVEL_MASK,
371 					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
372 		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
373 		},
374 		{}
375 	};
376 
377 	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
378 }
379 
380 static void
381 hw_engine_setup_default_state(struct xe_hw_engine *hwe)
382 {
383 	struct xe_gt *gt = hwe->gt;
384 	struct xe_device *xe = gt_to_xe(gt);
385 	/*
386 	 * RING_CMD_CCTL specifies the default MOCS entry that will be
387 	 * used by the command streamer when executing commands that
388 	 * don't have a way to explicitly specify a MOCS setting.
389 	 * The default should usually reference whichever MOCS entry
390 	 * corresponds to uncached behavior, although use of a WB cached
391 	 * entry is recommended by the spec in certain circumstances on
392 	 * specific platforms.
393 	 * Bspec: 72161
394 	 */
395 	const u8 mocs_write_idx = gt->mocs.uc_index;
396 	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
397 				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
398 				 gt->mocs.wb_index : gt->mocs.uc_index;
399 	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
400 				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
401 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
402 	const struct xe_rtp_entry_sr engine_entries[] = {
403 		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
404 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
405 		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
406 					   CMD_CCTL_WRITE_OVERRIDE_MASK |
407 					   CMD_CCTL_READ_OVERRIDE_MASK,
408 					   ring_cmd_cctl_val,
409 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
410 		},
411 		/*
412 		 * To allow the GSC engine to go idle on MTL we need to enable
413 		 * idle messaging and set the hysteresis value (we use 0xA=5us
414 		 * as recommended in spec). On platforms after MTL this is
415 		 * enabled by default.
416 		 */
417 		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
418 		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
419 		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
420 				     IDLE_MSG_DISABLE,
421 				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
422 				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
423 					   IDLE_WAIT_TIME,
424 					   0xA,
425 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
426 		},
427 		{}
428 	};
429 
430 	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
431 }
432 
433 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
434 				 enum xe_hw_engine_id id)
435 {
436 	const struct engine_info *info;
437 
438 	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
439 		return;
440 
441 	if (!(gt->info.engine_mask & BIT(id)))
442 		return;
443 
444 	info = &engine_infos[id];
445 
446 	xe_gt_assert(gt, !hwe->gt);
447 
448 	hwe->gt = gt;
449 	hwe->class = info->class;
450 	hwe->instance = info->instance;
451 	hwe->mmio_base = info->mmio_base;
452 	hwe->irq_offset = info->irq_offset;
453 	hwe->domain = info->domain;
454 	hwe->name = info->name;
455 	hwe->fence_irq = &gt->fence_irq[info->class];
456 	hwe->engine_id = id;
457 
458 	hwe->eclass = &gt->eclass[hwe->class];
459 	if (!hwe->eclass->sched_props.job_timeout_ms) {
460 		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
461 		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
462 		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
463 		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
464 		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
465 		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
466 		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
467 		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
468 		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
469 
470 		/*
471 		 * The GSC engine can accept submissions while the GSC shim is
472 		 * being reset, during which time the submission is stalled. In
473 		 * the worst case, the shim reset can take up to the maximum GSC
474 		 * command execution time (250ms), so the request start can be
475 		 * delayed by that much; the request itself can take that long
476 		 * without being preemptible, which means worst case it can
477 		 * theoretically take up to 500ms for a preemption to go through
478 		 * on the GSC engine. Adding to that an extra 100ms as a safety
479 		 * margin, we get a minimum recommended timeout of 600ms.
480 		 * The preempt_timeout value can't be tuned for OTHER_CLASS
481 		 * because the class is reserved for kernel usage, so we just
482 		 * need to make sure that the starting value is above that
483 		 * threshold; since our default value (640ms) is greater than
484 		 * 600ms, the only way we can go below is via a kconfig setting.
485 		 * If that happens, log it in dmesg and update the value.
486 		 */
487 		if (hwe->class == XE_ENGINE_CLASS_OTHER) {
488 			const u32 min_preempt_timeout = 600 * 1000;
489 			if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) {
490 				hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout;
491 				xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n");
492 			}
493 		}
494 
495 		/* Record default props */
496 		hwe->eclass->defaults = hwe->eclass->sched_props;
497 	}
498 
499 	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
500 	xe_tuning_process_engine(hwe);
501 	xe_wa_process_engine(hwe);
502 	hw_engine_setup_default_state(hwe);
503 
504 	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
505 	xe_reg_whitelist_process_engine(hwe);
506 }
507 
508 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
509 			  enum xe_hw_engine_id id)
510 {
511 	struct xe_device *xe = gt_to_xe(gt);
512 	struct xe_tile *tile = gt_to_tile(gt);
513 	int err;
514 
515 	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
516 	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
517 
518 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
519 	xe_reg_sr_apply_whitelist(hwe);
520 
521 	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
522 						 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
523 						 XE_BO_FLAG_GGTT |
524 						 XE_BO_FLAG_GGTT_INVALIDATE);
525 	if (IS_ERR(hwe->hwsp)) {
526 		err = PTR_ERR(hwe->hwsp);
527 		goto err_name;
528 	}
529 
530 	err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
531 	if (err)
532 		goto err_hwsp;
533 
534 	if (!xe_device_uc_enabled(xe)) {
535 		hwe->exl_port = xe_execlist_port_create(xe, hwe);
536 		if (IS_ERR(hwe->exl_port)) {
537 			err = PTR_ERR(hwe->exl_port);
538 			goto err_kernel_lrc;
539 		}
540 	}
541 
542 	if (xe_device_uc_enabled(xe)) {
543 		/* GSCCS has a special interrupt for reset */
544 		if (hwe->class == XE_ENGINE_CLASS_OTHER)
545 			hwe->irq_handler = xe_gsc_hwe_irq_handler;
546 
547 		xe_hw_engine_enable_ring(hwe);
548 	}
549 
550 	/* We reserve the highest BCS instance for USM */
551 	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
552 		gt->usm.reserved_bcs_instance = hwe->instance;
553 
554 	return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
555 
556 err_kernel_lrc:
557 	xe_lrc_finish(&hwe->kernel_lrc);
558 err_hwsp:
559 	xe_bo_unpin_map_no_vm(hwe->hwsp);
560 err_name:
561 	hwe->name = NULL;
562 
563 	return err;
564 }
565 
566 static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
567 {
568 	int class;
569 
570 	/* FIXME: Doing a simple logical mapping that works for most hardware */
571 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
572 		struct xe_hw_engine *hwe;
573 		enum xe_hw_engine_id id;
574 		int logical_instance = 0;
575 
576 		for_each_hw_engine(hwe, gt, id)
577 			if (hwe->class == class)
578 				hwe->logical_instance = logical_instance++;
579 	}
580 }
581 
582 static void read_media_fuses(struct xe_gt *gt)
583 {
584 	struct xe_device *xe = gt_to_xe(gt);
585 	u32 media_fuse;
586 	u16 vdbox_mask;
587 	u16 vebox_mask;
588 	int i, j;
589 
590 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
591 
592 	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
593 
594 	/*
595 	 * Pre-Xe_HP platforms had register bits representing absent engines,
596 	 * whereas Xe_HP and beyond have bits representing present engines.
597 	 * Invert the polarity on old platforms so that we can use common
598 	 * handling below.
599 	 */
600 	if (GRAPHICS_VERx100(xe) < 1250)
601 		media_fuse = ~media_fuse;
602 
603 	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
604 	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
605 
606 	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
607 		if (!(gt->info.engine_mask & BIT(i)))
608 			continue;
609 
610 		if (!(BIT(j) & vdbox_mask)) {
611 			gt->info.engine_mask &= ~BIT(i);
612 			drm_info(&xe->drm, "vcs%u fused off\n", j);
613 		}
614 	}
615 
616 	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
617 		if (!(gt->info.engine_mask & BIT(i)))
618 			continue;
619 
620 		if (!(BIT(j) & vebox_mask)) {
621 			gt->info.engine_mask &= ~BIT(i);
622 			drm_info(&xe->drm, "vecs%u fused off\n", j);
623 		}
624 	}
625 }
626 
627 static void read_copy_fuses(struct xe_gt *gt)
628 {
629 	struct xe_device *xe = gt_to_xe(gt);
630 	u32 bcs_mask;
631 
632 	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
633 		return;
634 
635 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
636 
637 	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
638 	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
639 
640 	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
641 	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
642 		if (!(gt->info.engine_mask & BIT(i)))
643 			continue;
644 
645 		if (!(BIT(j / 2) & bcs_mask)) {
646 			gt->info.engine_mask &= ~BIT(i);
647 			drm_info(&xe->drm, "bcs%u fused off\n", j);
648 		}
649 	}
650 }
651 
652 static void read_compute_fuses_from_dss(struct xe_gt *gt)
653 {
654 	struct xe_device *xe = gt_to_xe(gt);
655 
656 	/*
657 	 * CCS fusing based on DSS masks only applies to platforms that can
658 	 * have more than one CCS.
659 	 */
660 	if (hweight64(gt->info.engine_mask &
661 		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
662 		return;
663 
664 	/*
665 	 * CCS availability on Xe_HP is inferred from the presence of DSS in
666 	 * each quadrant.
667 	 */
668 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
669 		if (!(gt->info.engine_mask & BIT(i)))
670 			continue;
671 
672 		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
673 			gt->info.engine_mask &= ~BIT(i);
674 			drm_info(&xe->drm, "ccs%u fused off\n", j);
675 		}
676 	}
677 }
678 
679 static void read_compute_fuses_from_reg(struct xe_gt *gt)
680 {
681 	struct xe_device *xe = gt_to_xe(gt);
682 	u32 ccs_mask;
683 
684 	ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
685 	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
686 
687 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
688 		if (!(gt->info.engine_mask & BIT(i)))
689 			continue;
690 
691 		if ((ccs_mask & BIT(j)) == 0) {
692 			gt->info.engine_mask &= ~BIT(i);
693 			drm_info(&xe->drm, "ccs%u fused off\n", j);
694 		}
695 	}
696 }
697 
698 static void read_compute_fuses(struct xe_gt *gt)
699 {
700 	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
701 		read_compute_fuses_from_reg(gt);
702 	else
703 		read_compute_fuses_from_dss(gt);
704 }
705 
706 static void check_gsc_availability(struct xe_gt *gt)
707 {
708 	struct xe_device *xe = gt_to_xe(gt);
709 
710 	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
711 		return;
712 
713 	/*
714 	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
715 	 * have the FW there is nothing we need the engine for and can therefore
716 	 * skip its initialization.
717 	 */
718 	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
719 		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
720 
721 		/* interrupts where previously enabled, so turn them off */
722 		xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0);
723 		xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0);
724 
725 		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
726 	}
727 }
728 
729 int xe_hw_engines_init_early(struct xe_gt *gt)
730 {
731 	int i;
732 
733 	read_media_fuses(gt);
734 	read_copy_fuses(gt);
735 	read_compute_fuses(gt);
736 	check_gsc_availability(gt);
737 
738 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
739 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
740 
741 	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
742 		hw_engine_init_early(gt, &gt->hw_engines[i], i);
743 
744 	return 0;
745 }
746 
747 int xe_hw_engines_init(struct xe_gt *gt)
748 {
749 	int err;
750 	struct xe_hw_engine *hwe;
751 	enum xe_hw_engine_id id;
752 
753 	for_each_hw_engine(hwe, gt, id) {
754 		err = hw_engine_init(gt, hwe, id);
755 		if (err)
756 			return err;
757 	}
758 
759 	hw_engine_setup_logical_mapping(gt);
760 
761 	return 0;
762 }
763 
764 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
765 {
766 	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
767 
768 	if (hwe->irq_handler)
769 		hwe->irq_handler(hwe, intr_vec);
770 
771 	if (intr_vec & GT_RENDER_USER_INTERRUPT)
772 		xe_hw_fence_irq_run(hwe->fence_irq);
773 }
774 
775 static bool
776 is_slice_common_per_gslice(struct xe_device *xe)
777 {
778 	return GRAPHICS_VERx100(xe) >= 1255;
779 }
780 
781 static void
782 xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
783 				       struct xe_hw_engine_snapshot *snapshot)
784 {
785 	struct xe_gt *gt = hwe->gt;
786 	struct xe_device *xe = gt_to_xe(gt);
787 	unsigned int dss;
788 	u16 group, instance;
789 
790 	snapshot->reg.instdone.ring = hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
791 
792 	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
793 		return;
794 
795 	if (is_slice_common_per_gslice(xe) == false) {
796 		snapshot->reg.instdone.slice_common[0] =
797 			xe_mmio_read32(gt, SC_INSTDONE);
798 		snapshot->reg.instdone.slice_common_extra[0] =
799 			xe_mmio_read32(gt, SC_INSTDONE_EXTRA);
800 		snapshot->reg.instdone.slice_common_extra2[0] =
801 			xe_mmio_read32(gt, SC_INSTDONE_EXTRA2);
802 	} else {
803 		for_each_geometry_dss(dss, gt, group, instance) {
804 			snapshot->reg.instdone.slice_common[dss] =
805 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance);
806 			snapshot->reg.instdone.slice_common_extra[dss] =
807 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance);
808 			snapshot->reg.instdone.slice_common_extra2[dss] =
809 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance);
810 		}
811 	}
812 
813 	for_each_geometry_dss(dss, gt, group, instance) {
814 		snapshot->reg.instdone.sampler[dss] =
815 			xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
816 		snapshot->reg.instdone.row[dss] =
817 			xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
818 
819 		if (GRAPHICS_VERx100(xe) >= 1255)
820 			snapshot->reg.instdone.geom_svg[dss] =
821 				xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT,
822 						       group, instance);
823 	}
824 }
825 
826 /**
827  * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
828  * @hwe: Xe HW Engine.
829  *
830  * This can be printed out in a later stage like during dev_coredump
831  * analysis.
832  *
833  * Returns: a Xe HW Engine snapshot object that must be freed by the
834  * caller, using `xe_hw_engine_snapshot_free`.
835  */
836 struct xe_hw_engine_snapshot *
837 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
838 {
839 	struct xe_hw_engine_snapshot *snapshot;
840 	size_t len;
841 	u64 val;
842 
843 	if (!xe_hw_engine_is_valid(hwe))
844 		return NULL;
845 
846 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
847 
848 	if (!snapshot)
849 		return NULL;
850 
851 	/* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
852 	 * includes xe_hw_engine_types.h the length of this 3 registers can't be
853 	 * set in struct xe_hw_engine_snapshot, so here doing additional
854 	 * allocations.
855 	 */
856 	len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
857 	snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC);
858 	snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC);
859 	snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC);
860 	snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
861 	snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
862 	snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
863 	if (!snapshot->reg.instdone.slice_common ||
864 	    !snapshot->reg.instdone.slice_common_extra ||
865 	    !snapshot->reg.instdone.slice_common_extra2 ||
866 	    !snapshot->reg.instdone.sampler ||
867 	    !snapshot->reg.instdone.row ||
868 	    !snapshot->reg.instdone.geom_svg) {
869 		xe_hw_engine_snapshot_free(snapshot);
870 		return NULL;
871 	}
872 
873 	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
874 	snapshot->hwe = hwe;
875 	snapshot->logical_instance = hwe->logical_instance;
876 	snapshot->forcewake.domain = hwe->domain;
877 	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
878 						    hwe->domain);
879 	snapshot->mmio_base = hwe->mmio_base;
880 
881 	/* no more VF accessible data below this point */
882 	if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
883 		return snapshot;
884 
885 	snapshot->reg.ring_execlist_status =
886 		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
887 	val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
888 	snapshot->reg.ring_execlist_status |= val << 32;
889 
890 	snapshot->reg.ring_execlist_sq_contents =
891 		hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
892 	val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
893 	snapshot->reg.ring_execlist_sq_contents |= val << 32;
894 
895 	snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
896 	val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
897 	snapshot->reg.ring_acthd |= val << 32;
898 
899 	snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
900 	val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
901 	snapshot->reg.ring_bbaddr |= val << 32;
902 
903 	snapshot->reg.ring_dma_fadd =
904 		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
905 	val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
906 	snapshot->reg.ring_dma_fadd |= val << 32;
907 
908 	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
909 	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
910 	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
911 	if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) {
912 		val = hw_engine_mmio_read32(hwe, RING_START_UDW(0));
913 		snapshot->reg.ring_start |= val << 32;
914 	}
915 	if (xe_gt_has_indirect_ring_state(hwe->gt)) {
916 		snapshot->reg.indirect_ring_state =
917 			hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
918 	}
919 
920 	snapshot->reg.ring_head =
921 		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
922 	snapshot->reg.ring_tail =
923 		hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
924 	snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0));
925 	snapshot->reg.ring_mi_mode =
926 		hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
927 	snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0));
928 	snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0));
929 	snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
930 	snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
931 	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
932 	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
933 	xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
934 
935 	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
936 		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
937 
938 	return snapshot;
939 }
940 
941 static void
942 xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
943 {
944 	struct xe_gt *gt = snapshot->hwe->gt;
945 	struct xe_device *xe = gt_to_xe(gt);
946 	u16 group, instance;
947 	unsigned int dss;
948 
949 	drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring);
950 
951 	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
952 		return;
953 
954 	if (is_slice_common_per_gslice(xe) == false) {
955 		drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n",
956 			   snapshot->reg.instdone.slice_common[0]);
957 		drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n",
958 			   snapshot->reg.instdone.slice_common_extra[0]);
959 		drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n",
960 			   snapshot->reg.instdone.slice_common_extra2[0]);
961 	} else {
962 		for_each_geometry_dss(dss, gt, group, instance) {
963 			drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss,
964 				   snapshot->reg.instdone.slice_common[dss]);
965 			drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss,
966 				   snapshot->reg.instdone.slice_common_extra[dss]);
967 			drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss,
968 				   snapshot->reg.instdone.slice_common_extra2[dss]);
969 		}
970 	}
971 
972 	for_each_geometry_dss(dss, gt, group, instance) {
973 		drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
974 			   snapshot->reg.instdone.sampler[dss]);
975 		drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
976 			   snapshot->reg.instdone.row[dss]);
977 
978 		if (GRAPHICS_VERx100(xe) >= 1255)
979 			drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n",
980 				   dss, snapshot->reg.instdone.geom_svg[dss]);
981 	}
982 }
983 
984 /**
985  * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
986  * @snapshot: Xe HW Engine snapshot object.
987  * @p: drm_printer where it will be printed out.
988  *
989  * This function prints out a given Xe HW Engine snapshot object.
990  */
991 void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
992 				 struct drm_printer *p)
993 {
994 	if (!snapshot)
995 		return;
996 
997 	drm_printf(p, "%s (physical), logical instance=%d\n",
998 		   snapshot->name ? snapshot->name : "",
999 		   snapshot->logical_instance);
1000 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
1001 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
1002 	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
1003 	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
1004 	drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
1005 		   snapshot->reg.ring_execlist_status);
1006 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
1007 		   snapshot->reg.ring_execlist_sq_contents);
1008 	drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start);
1009 	drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
1010 	drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
1011 	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
1012 	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
1013 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
1014 		   snapshot->reg.ring_mode);
1015 	drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
1016 	drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
1017 	drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
1018 	drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
1019 	drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
1020 	drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
1021 	drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
1022 	drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n",
1023 		   snapshot->reg.indirect_ring_state);
1024 	drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
1025 	xe_hw_engine_snapshot_instdone_print(snapshot, p);
1026 
1027 	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
1028 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
1029 			   snapshot->reg.rcu_mode);
1030 	drm_puts(p, "\n");
1031 }
1032 
1033 /**
1034  * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
1035  * @snapshot: Xe HW Engine snapshot object.
1036  *
1037  * This function free all the memory that needed to be allocated at capture
1038  * time.
1039  */
1040 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
1041 {
1042 	if (!snapshot)
1043 		return;
1044 
1045 	kfree(snapshot->reg.instdone.slice_common);
1046 	kfree(snapshot->reg.instdone.slice_common_extra);
1047 	kfree(snapshot->reg.instdone.slice_common_extra2);
1048 	kfree(snapshot->reg.instdone.sampler);
1049 	kfree(snapshot->reg.instdone.row);
1050 	kfree(snapshot->reg.instdone.geom_svg);
1051 	kfree(snapshot->name);
1052 	kfree(snapshot);
1053 }
1054 
1055 /**
1056  * xe_hw_engine_print - Xe HW Engine Print.
1057  * @hwe: Hardware Engine.
1058  * @p: drm_printer.
1059  *
1060  * This function quickly capture a snapshot and immediately print it out.
1061  */
1062 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
1063 {
1064 	struct xe_hw_engine_snapshot *snapshot;
1065 
1066 	snapshot = xe_hw_engine_snapshot_capture(hwe);
1067 	xe_hw_engine_snapshot_print(snapshot, p);
1068 	xe_hw_engine_snapshot_free(snapshot);
1069 }
1070 
1071 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
1072 				enum xe_engine_class engine_class)
1073 {
1074 	u32 mask = 0;
1075 	enum xe_hw_engine_id id;
1076 
1077 	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
1078 		if (engine_infos[id].class == engine_class &&
1079 		    gt->info.engine_mask & BIT(id))
1080 			mask |= BIT(engine_infos[id].instance);
1081 	}
1082 	return mask;
1083 }
1084 
1085 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
1086 {
1087 	struct xe_gt *gt = hwe->gt;
1088 	struct xe_device *xe = gt_to_xe(gt);
1089 
1090 	if (hwe->class == XE_ENGINE_CLASS_OTHER)
1091 		return true;
1092 
1093 	/* Check for engines disabled by ccs_mode setting */
1094 	if (xe_gt_ccs_mode_enabled(gt) &&
1095 	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
1096 	    hwe->logical_instance >= gt->ccs_mode)
1097 		return true;
1098 
1099 	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
1100 		hwe->instance == gt->usm.reserved_bcs_instance;
1101 }
1102 
1103 const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
1104 {
1105 	switch (class) {
1106 	case XE_ENGINE_CLASS_RENDER:
1107 		return "rcs";
1108 	case XE_ENGINE_CLASS_VIDEO_DECODE:
1109 		return "vcs";
1110 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
1111 		return "vecs";
1112 	case XE_ENGINE_CLASS_COPY:
1113 		return "bcs";
1114 	case XE_ENGINE_CLASS_OTHER:
1115 		return "other";
1116 	case XE_ENGINE_CLASS_COMPUTE:
1117 		return "ccs";
1118 	case XE_ENGINE_CLASS_MAX:
1119 		break;
1120 	}
1121 
1122 	return NULL;
1123 }
1124 
1125 u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
1126 {
1127 	return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base));
1128 }
1129