xref: /linux/drivers/gpu/drm/xe/xe_hw_engine.c (revision 47cebb740a83682224654a6583a20efd9f3cfeae)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_hw_engine.h"
7 
8 #include <linux/nospec.h>
9 
10 #include <drm/drm_managed.h>
11 #include <drm/xe_drm.h>
12 
13 #include "regs/xe_engine_regs.h"
14 #include "regs/xe_gt_regs.h"
15 #include "xe_assert.h"
16 #include "xe_bo.h"
17 #include "xe_device.h"
18 #include "xe_execlist.h"
19 #include "xe_force_wake.h"
20 #include "xe_gsc.h"
21 #include "xe_gt.h"
22 #include "xe_gt_ccs_mode.h"
23 #include "xe_gt_printk.h"
24 #include "xe_gt_mcr.h"
25 #include "xe_gt_topology.h"
26 #include "xe_hw_engine_group.h"
27 #include "xe_hw_fence.h"
28 #include "xe_irq.h"
29 #include "xe_lrc.h"
30 #include "xe_macros.h"
31 #include "xe_mmio.h"
32 #include "xe_reg_sr.h"
33 #include "xe_reg_whitelist.h"
34 #include "xe_rtp.h"
35 #include "xe_sched_job.h"
36 #include "xe_sriov.h"
37 #include "xe_tuning.h"
38 #include "xe_uc_fw.h"
39 #include "xe_wa.h"
40 
41 #define MAX_MMIO_BASES 3
42 struct engine_info {
43 	const char *name;
44 	unsigned int class : 8;
45 	unsigned int instance : 8;
46 	unsigned int irq_offset : 8;
47 	enum xe_force_wake_domains domain;
48 	u32 mmio_base;
49 };
50 
51 static const struct engine_info engine_infos[] = {
52 	[XE_HW_ENGINE_RCS0] = {
53 		.name = "rcs0",
54 		.class = XE_ENGINE_CLASS_RENDER,
55 		.instance = 0,
56 		.irq_offset = ilog2(INTR_RCS0),
57 		.domain = XE_FW_RENDER,
58 		.mmio_base = RENDER_RING_BASE,
59 	},
60 	[XE_HW_ENGINE_BCS0] = {
61 		.name = "bcs0",
62 		.class = XE_ENGINE_CLASS_COPY,
63 		.instance = 0,
64 		.irq_offset = ilog2(INTR_BCS(0)),
65 		.domain = XE_FW_RENDER,
66 		.mmio_base = BLT_RING_BASE,
67 	},
68 	[XE_HW_ENGINE_BCS1] = {
69 		.name = "bcs1",
70 		.class = XE_ENGINE_CLASS_COPY,
71 		.instance = 1,
72 		.irq_offset = ilog2(INTR_BCS(1)),
73 		.domain = XE_FW_RENDER,
74 		.mmio_base = XEHPC_BCS1_RING_BASE,
75 	},
76 	[XE_HW_ENGINE_BCS2] = {
77 		.name = "bcs2",
78 		.class = XE_ENGINE_CLASS_COPY,
79 		.instance = 2,
80 		.irq_offset = ilog2(INTR_BCS(2)),
81 		.domain = XE_FW_RENDER,
82 		.mmio_base = XEHPC_BCS2_RING_BASE,
83 	},
84 	[XE_HW_ENGINE_BCS3] = {
85 		.name = "bcs3",
86 		.class = XE_ENGINE_CLASS_COPY,
87 		.instance = 3,
88 		.irq_offset = ilog2(INTR_BCS(3)),
89 		.domain = XE_FW_RENDER,
90 		.mmio_base = XEHPC_BCS3_RING_BASE,
91 	},
92 	[XE_HW_ENGINE_BCS4] = {
93 		.name = "bcs4",
94 		.class = XE_ENGINE_CLASS_COPY,
95 		.instance = 4,
96 		.irq_offset = ilog2(INTR_BCS(4)),
97 		.domain = XE_FW_RENDER,
98 		.mmio_base = XEHPC_BCS4_RING_BASE,
99 	},
100 	[XE_HW_ENGINE_BCS5] = {
101 		.name = "bcs5",
102 		.class = XE_ENGINE_CLASS_COPY,
103 		.instance = 5,
104 		.irq_offset = ilog2(INTR_BCS(5)),
105 		.domain = XE_FW_RENDER,
106 		.mmio_base = XEHPC_BCS5_RING_BASE,
107 	},
108 	[XE_HW_ENGINE_BCS6] = {
109 		.name = "bcs6",
110 		.class = XE_ENGINE_CLASS_COPY,
111 		.instance = 6,
112 		.irq_offset = ilog2(INTR_BCS(6)),
113 		.domain = XE_FW_RENDER,
114 		.mmio_base = XEHPC_BCS6_RING_BASE,
115 	},
116 	[XE_HW_ENGINE_BCS7] = {
117 		.name = "bcs7",
118 		.class = XE_ENGINE_CLASS_COPY,
119 		.irq_offset = ilog2(INTR_BCS(7)),
120 		.instance = 7,
121 		.domain = XE_FW_RENDER,
122 		.mmio_base = XEHPC_BCS7_RING_BASE,
123 	},
124 	[XE_HW_ENGINE_BCS8] = {
125 		.name = "bcs8",
126 		.class = XE_ENGINE_CLASS_COPY,
127 		.instance = 8,
128 		.irq_offset = ilog2(INTR_BCS8),
129 		.domain = XE_FW_RENDER,
130 		.mmio_base = XEHPC_BCS8_RING_BASE,
131 	},
132 
133 	[XE_HW_ENGINE_VCS0] = {
134 		.name = "vcs0",
135 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
136 		.instance = 0,
137 		.irq_offset = 32 + ilog2(INTR_VCS(0)),
138 		.domain = XE_FW_MEDIA_VDBOX0,
139 		.mmio_base = BSD_RING_BASE,
140 	},
141 	[XE_HW_ENGINE_VCS1] = {
142 		.name = "vcs1",
143 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
144 		.instance = 1,
145 		.irq_offset = 32 + ilog2(INTR_VCS(1)),
146 		.domain = XE_FW_MEDIA_VDBOX1,
147 		.mmio_base = BSD2_RING_BASE,
148 	},
149 	[XE_HW_ENGINE_VCS2] = {
150 		.name = "vcs2",
151 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
152 		.instance = 2,
153 		.irq_offset = 32 + ilog2(INTR_VCS(2)),
154 		.domain = XE_FW_MEDIA_VDBOX2,
155 		.mmio_base = BSD3_RING_BASE,
156 	},
157 	[XE_HW_ENGINE_VCS3] = {
158 		.name = "vcs3",
159 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
160 		.instance = 3,
161 		.irq_offset = 32 + ilog2(INTR_VCS(3)),
162 		.domain = XE_FW_MEDIA_VDBOX3,
163 		.mmio_base = BSD4_RING_BASE,
164 	},
165 	[XE_HW_ENGINE_VCS4] = {
166 		.name = "vcs4",
167 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
168 		.instance = 4,
169 		.irq_offset = 32 + ilog2(INTR_VCS(4)),
170 		.domain = XE_FW_MEDIA_VDBOX4,
171 		.mmio_base = XEHP_BSD5_RING_BASE,
172 	},
173 	[XE_HW_ENGINE_VCS5] = {
174 		.name = "vcs5",
175 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
176 		.instance = 5,
177 		.irq_offset = 32 + ilog2(INTR_VCS(5)),
178 		.domain = XE_FW_MEDIA_VDBOX5,
179 		.mmio_base = XEHP_BSD6_RING_BASE,
180 	},
181 	[XE_HW_ENGINE_VCS6] = {
182 		.name = "vcs6",
183 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
184 		.instance = 6,
185 		.irq_offset = 32 + ilog2(INTR_VCS(6)),
186 		.domain = XE_FW_MEDIA_VDBOX6,
187 		.mmio_base = XEHP_BSD7_RING_BASE,
188 	},
189 	[XE_HW_ENGINE_VCS7] = {
190 		.name = "vcs7",
191 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
192 		.instance = 7,
193 		.irq_offset = 32 + ilog2(INTR_VCS(7)),
194 		.domain = XE_FW_MEDIA_VDBOX7,
195 		.mmio_base = XEHP_BSD8_RING_BASE,
196 	},
197 	[XE_HW_ENGINE_VECS0] = {
198 		.name = "vecs0",
199 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
200 		.instance = 0,
201 		.irq_offset = 32 + ilog2(INTR_VECS(0)),
202 		.domain = XE_FW_MEDIA_VEBOX0,
203 		.mmio_base = VEBOX_RING_BASE,
204 	},
205 	[XE_HW_ENGINE_VECS1] = {
206 		.name = "vecs1",
207 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
208 		.instance = 1,
209 		.irq_offset = 32 + ilog2(INTR_VECS(1)),
210 		.domain = XE_FW_MEDIA_VEBOX1,
211 		.mmio_base = VEBOX2_RING_BASE,
212 	},
213 	[XE_HW_ENGINE_VECS2] = {
214 		.name = "vecs2",
215 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
216 		.instance = 2,
217 		.irq_offset = 32 + ilog2(INTR_VECS(2)),
218 		.domain = XE_FW_MEDIA_VEBOX2,
219 		.mmio_base = XEHP_VEBOX3_RING_BASE,
220 	},
221 	[XE_HW_ENGINE_VECS3] = {
222 		.name = "vecs3",
223 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
224 		.instance = 3,
225 		.irq_offset = 32 + ilog2(INTR_VECS(3)),
226 		.domain = XE_FW_MEDIA_VEBOX3,
227 		.mmio_base = XEHP_VEBOX4_RING_BASE,
228 	},
229 	[XE_HW_ENGINE_CCS0] = {
230 		.name = "ccs0",
231 		.class = XE_ENGINE_CLASS_COMPUTE,
232 		.instance = 0,
233 		.irq_offset = ilog2(INTR_CCS(0)),
234 		.domain = XE_FW_RENDER,
235 		.mmio_base = COMPUTE0_RING_BASE,
236 	},
237 	[XE_HW_ENGINE_CCS1] = {
238 		.name = "ccs1",
239 		.class = XE_ENGINE_CLASS_COMPUTE,
240 		.instance = 1,
241 		.irq_offset = ilog2(INTR_CCS(1)),
242 		.domain = XE_FW_RENDER,
243 		.mmio_base = COMPUTE1_RING_BASE,
244 	},
245 	[XE_HW_ENGINE_CCS2] = {
246 		.name = "ccs2",
247 		.class = XE_ENGINE_CLASS_COMPUTE,
248 		.instance = 2,
249 		.irq_offset = ilog2(INTR_CCS(2)),
250 		.domain = XE_FW_RENDER,
251 		.mmio_base = COMPUTE2_RING_BASE,
252 	},
253 	[XE_HW_ENGINE_CCS3] = {
254 		.name = "ccs3",
255 		.class = XE_ENGINE_CLASS_COMPUTE,
256 		.instance = 3,
257 		.irq_offset = ilog2(INTR_CCS(3)),
258 		.domain = XE_FW_RENDER,
259 		.mmio_base = COMPUTE3_RING_BASE,
260 	},
261 	[XE_HW_ENGINE_GSCCS0] = {
262 		.name = "gsccs0",
263 		.class = XE_ENGINE_CLASS_OTHER,
264 		.instance = OTHER_GSC_INSTANCE,
265 		.domain = XE_FW_GSC,
266 		.mmio_base = GSCCS_RING_BASE,
267 	},
268 };
269 
270 static void hw_engine_fini(void *arg)
271 {
272 	struct xe_hw_engine *hwe = arg;
273 
274 	if (hwe->exl_port)
275 		xe_execlist_port_destroy(hwe->exl_port);
276 	xe_lrc_put(hwe->kernel_lrc);
277 
278 	hwe->gt = NULL;
279 }
280 
281 /**
282  * xe_hw_engine_mmio_write32() - Write engine register
283  * @hwe: engine
284  * @reg: register to write into
285  * @val: desired 32-bit value to write
286  *
287  * This function will write val into an engine specific register.
288  * Forcewake must be held by the caller.
289  *
290  */
291 void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe,
292 			       struct xe_reg reg, u32 val)
293 {
294 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
295 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
296 
297 	reg.addr += hwe->mmio_base;
298 
299 	xe_mmio_write32(hwe->gt, reg, val);
300 }
301 
302 /**
303  * xe_hw_engine_mmio_read32() - Read engine register
304  * @hwe: engine
305  * @reg: register to read from
306  *
307  * This function will read from an engine specific register.
308  * Forcewake must be held by the caller.
309  *
310  * Return: value of the 32-bit register.
311  */
312 u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
313 {
314 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
315 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
316 
317 	reg.addr += hwe->mmio_base;
318 
319 	return xe_mmio_read32(hwe->gt, reg);
320 }
321 
322 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
323 {
324 	u32 ccs_mask =
325 		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
326 
327 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
328 		xe_mmio_write32(hwe->gt, RCU_MODE,
329 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
330 
331 	xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
332 	xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
333 				  xe_bo_ggtt_addr(hwe->hwsp));
334 	xe_hw_engine_mmio_write32(hwe, RING_MODE(0),
335 				  _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
336 	xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
337 				  _MASKED_BIT_DISABLE(STOP_RING));
338 	xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
339 }
340 
341 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
342 						 const struct xe_hw_engine *hwe)
343 {
344 	return xe_gt_ccs_mode_enabled(gt) &&
345 	       xe_rtp_match_first_render_or_compute(gt, hwe);
346 }
347 
348 static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
349 				      const struct xe_hw_engine *hwe)
350 {
351 	if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
352 		return false;
353 
354 	if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
355 	    hwe->class != XE_ENGINE_CLASS_RENDER)
356 		return false;
357 
358 	return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
359 }
360 
361 void
362 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
363 {
364 	struct xe_gt *gt = hwe->gt;
365 	const u8 mocs_write_idx = gt->mocs.uc_index;
366 	const u8 mocs_read_idx = gt->mocs.uc_index;
367 	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
368 			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
369 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
370 	const struct xe_rtp_entry_sr lrc_setup[] = {
371 		/*
372 		 * Some blitter commands do not have a field for MOCS, those
373 		 * commands will use MOCS index pointed by BLIT_CCTL.
374 		 * BLIT_CCTL registers are needed to be programmed to un-cached.
375 		 */
376 		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
377 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
378 			       ENGINE_CLASS(COPY)),
379 		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
380 				 BLIT_CCTL_DST_MOCS_MASK |
381 				 BLIT_CCTL_SRC_MOCS_MASK,
382 				 blit_cctl_val,
383 				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
384 		},
385 		/* Use Fixed slice CCS mode */
386 		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
387 		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
388 		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
389 					   RCU_MODE_FIXED_SLICE_CCS_MODE))
390 		},
391 		/* Disable WMTP if HW doesn't support it */
392 		{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
393 		  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
394 		  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
395 					   PREEMPT_GPGPU_LEVEL_MASK,
396 					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
397 		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
398 		},
399 		{}
400 	};
401 
402 	xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc);
403 }
404 
405 static void
406 hw_engine_setup_default_state(struct xe_hw_engine *hwe)
407 {
408 	struct xe_gt *gt = hwe->gt;
409 	struct xe_device *xe = gt_to_xe(gt);
410 	/*
411 	 * RING_CMD_CCTL specifies the default MOCS entry that will be
412 	 * used by the command streamer when executing commands that
413 	 * don't have a way to explicitly specify a MOCS setting.
414 	 * The default should usually reference whichever MOCS entry
415 	 * corresponds to uncached behavior, although use of a WB cached
416 	 * entry is recommended by the spec in certain circumstances on
417 	 * specific platforms.
418 	 * Bspec: 72161
419 	 */
420 	const u8 mocs_write_idx = gt->mocs.uc_index;
421 	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
422 				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
423 				 gt->mocs.wb_index : gt->mocs.uc_index;
424 	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
425 				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
426 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
427 	const struct xe_rtp_entry_sr engine_entries[] = {
428 		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
429 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
430 		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
431 					   CMD_CCTL_WRITE_OVERRIDE_MASK |
432 					   CMD_CCTL_READ_OVERRIDE_MASK,
433 					   ring_cmd_cctl_val,
434 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
435 		},
436 		/*
437 		 * To allow the GSC engine to go idle on MTL we need to enable
438 		 * idle messaging and set the hysteresis value (we use 0xA=5us
439 		 * as recommended in spec). On platforms after MTL this is
440 		 * enabled by default.
441 		 */
442 		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
443 		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
444 		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
445 				     IDLE_MSG_DISABLE,
446 				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
447 				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
448 					   IDLE_WAIT_TIME,
449 					   0xA,
450 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
451 		},
452 		/* Enable Priority Mem Read */
453 		{ XE_RTP_NAME("Priority_Mem_Read"),
454 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
455 		  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
456 				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
457 		},
458 		{}
459 	};
460 
461 	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
462 }
463 
464 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
465 				 enum xe_hw_engine_id id)
466 {
467 	const struct engine_info *info;
468 
469 	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
470 		return;
471 
472 	if (!(gt->info.engine_mask & BIT(id)))
473 		return;
474 
475 	info = &engine_infos[id];
476 
477 	xe_gt_assert(gt, !hwe->gt);
478 
479 	hwe->gt = gt;
480 	hwe->class = info->class;
481 	hwe->instance = info->instance;
482 	hwe->mmio_base = info->mmio_base;
483 	hwe->irq_offset = info->irq_offset;
484 	hwe->domain = info->domain;
485 	hwe->name = info->name;
486 	hwe->fence_irq = &gt->fence_irq[info->class];
487 	hwe->engine_id = id;
488 
489 	hwe->eclass = &gt->eclass[hwe->class];
490 	if (!hwe->eclass->sched_props.job_timeout_ms) {
491 		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
492 		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
493 		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
494 		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
495 		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
496 		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
497 		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
498 		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
499 		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
500 
501 		/*
502 		 * The GSC engine can accept submissions while the GSC shim is
503 		 * being reset, during which time the submission is stalled. In
504 		 * the worst case, the shim reset can take up to the maximum GSC
505 		 * command execution time (250ms), so the request start can be
506 		 * delayed by that much; the request itself can take that long
507 		 * without being preemptible, which means worst case it can
508 		 * theoretically take up to 500ms for a preemption to go through
509 		 * on the GSC engine. Adding to that an extra 100ms as a safety
510 		 * margin, we get a minimum recommended timeout of 600ms.
511 		 * The preempt_timeout value can't be tuned for OTHER_CLASS
512 		 * because the class is reserved for kernel usage, so we just
513 		 * need to make sure that the starting value is above that
514 		 * threshold; since our default value (640ms) is greater than
515 		 * 600ms, the only way we can go below is via a kconfig setting.
516 		 * If that happens, log it in dmesg and update the value.
517 		 */
518 		if (hwe->class == XE_ENGINE_CLASS_OTHER) {
519 			const u32 min_preempt_timeout = 600 * 1000;
520 			if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) {
521 				hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout;
522 				xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n");
523 			}
524 		}
525 
526 		/* Record default props */
527 		hwe->eclass->defaults = hwe->eclass->sched_props;
528 	}
529 
530 	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
531 	xe_tuning_process_engine(hwe);
532 	xe_wa_process_engine(hwe);
533 	hw_engine_setup_default_state(hwe);
534 
535 	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
536 	xe_reg_whitelist_process_engine(hwe);
537 }
538 
539 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
540 			  enum xe_hw_engine_id id)
541 {
542 	struct xe_device *xe = gt_to_xe(gt);
543 	struct xe_tile *tile = gt_to_tile(gt);
544 	int err;
545 
546 	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
547 	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
548 
549 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
550 	xe_reg_sr_apply_whitelist(hwe);
551 
552 	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
553 						 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
554 						 XE_BO_FLAG_GGTT |
555 						 XE_BO_FLAG_GGTT_INVALIDATE);
556 	if (IS_ERR(hwe->hwsp)) {
557 		err = PTR_ERR(hwe->hwsp);
558 		goto err_name;
559 	}
560 
561 	hwe->kernel_lrc = xe_lrc_create(hwe, NULL, SZ_16K);
562 	if (IS_ERR(hwe->kernel_lrc)) {
563 		err = PTR_ERR(hwe->kernel_lrc);
564 		goto err_hwsp;
565 	}
566 
567 	if (!xe_device_uc_enabled(xe)) {
568 		hwe->exl_port = xe_execlist_port_create(xe, hwe);
569 		if (IS_ERR(hwe->exl_port)) {
570 			err = PTR_ERR(hwe->exl_port);
571 			goto err_kernel_lrc;
572 		}
573 	}
574 
575 	if (xe_device_uc_enabled(xe)) {
576 		/* GSCCS has a special interrupt for reset */
577 		if (hwe->class == XE_ENGINE_CLASS_OTHER)
578 			hwe->irq_handler = xe_gsc_hwe_irq_handler;
579 
580 		if (!IS_SRIOV_VF(xe))
581 			xe_hw_engine_enable_ring(hwe);
582 	}
583 
584 	/* We reserve the highest BCS instance for USM */
585 	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
586 		gt->usm.reserved_bcs_instance = hwe->instance;
587 
588 	return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe);
589 
590 err_kernel_lrc:
591 	xe_lrc_put(hwe->kernel_lrc);
592 err_hwsp:
593 	xe_bo_unpin_map_no_vm(hwe->hwsp);
594 err_name:
595 	hwe->name = NULL;
596 
597 	return err;
598 }
599 
600 static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
601 {
602 	int class;
603 
604 	/* FIXME: Doing a simple logical mapping that works for most hardware */
605 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
606 		struct xe_hw_engine *hwe;
607 		enum xe_hw_engine_id id;
608 		int logical_instance = 0;
609 
610 		for_each_hw_engine(hwe, gt, id)
611 			if (hwe->class == class)
612 				hwe->logical_instance = logical_instance++;
613 	}
614 }
615 
616 static void read_media_fuses(struct xe_gt *gt)
617 {
618 	struct xe_device *xe = gt_to_xe(gt);
619 	u32 media_fuse;
620 	u16 vdbox_mask;
621 	u16 vebox_mask;
622 	int i, j;
623 
624 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
625 
626 	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
627 
628 	/*
629 	 * Pre-Xe_HP platforms had register bits representing absent engines,
630 	 * whereas Xe_HP and beyond have bits representing present engines.
631 	 * Invert the polarity on old platforms so that we can use common
632 	 * handling below.
633 	 */
634 	if (GRAPHICS_VERx100(xe) < 1250)
635 		media_fuse = ~media_fuse;
636 
637 	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
638 	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
639 
640 	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
641 		if (!(gt->info.engine_mask & BIT(i)))
642 			continue;
643 
644 		if (!(BIT(j) & vdbox_mask)) {
645 			gt->info.engine_mask &= ~BIT(i);
646 			drm_info(&xe->drm, "vcs%u fused off\n", j);
647 		}
648 	}
649 
650 	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
651 		if (!(gt->info.engine_mask & BIT(i)))
652 			continue;
653 
654 		if (!(BIT(j) & vebox_mask)) {
655 			gt->info.engine_mask &= ~BIT(i);
656 			drm_info(&xe->drm, "vecs%u fused off\n", j);
657 		}
658 	}
659 }
660 
661 static void read_copy_fuses(struct xe_gt *gt)
662 {
663 	struct xe_device *xe = gt_to_xe(gt);
664 	u32 bcs_mask;
665 
666 	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
667 		return;
668 
669 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
670 
671 	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
672 	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
673 
674 	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
675 	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
676 		if (!(gt->info.engine_mask & BIT(i)))
677 			continue;
678 
679 		if (!(BIT(j / 2) & bcs_mask)) {
680 			gt->info.engine_mask &= ~BIT(i);
681 			drm_info(&xe->drm, "bcs%u fused off\n", j);
682 		}
683 	}
684 }
685 
686 static void read_compute_fuses_from_dss(struct xe_gt *gt)
687 {
688 	struct xe_device *xe = gt_to_xe(gt);
689 
690 	/*
691 	 * CCS fusing based on DSS masks only applies to platforms that can
692 	 * have more than one CCS.
693 	 */
694 	if (hweight64(gt->info.engine_mask &
695 		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
696 		return;
697 
698 	/*
699 	 * CCS availability on Xe_HP is inferred from the presence of DSS in
700 	 * each quadrant.
701 	 */
702 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
703 		if (!(gt->info.engine_mask & BIT(i)))
704 			continue;
705 
706 		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
707 			gt->info.engine_mask &= ~BIT(i);
708 			drm_info(&xe->drm, "ccs%u fused off\n", j);
709 		}
710 	}
711 }
712 
713 static void read_compute_fuses_from_reg(struct xe_gt *gt)
714 {
715 	struct xe_device *xe = gt_to_xe(gt);
716 	u32 ccs_mask;
717 
718 	ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
719 	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
720 
721 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
722 		if (!(gt->info.engine_mask & BIT(i)))
723 			continue;
724 
725 		if ((ccs_mask & BIT(j)) == 0) {
726 			gt->info.engine_mask &= ~BIT(i);
727 			drm_info(&xe->drm, "ccs%u fused off\n", j);
728 		}
729 	}
730 }
731 
732 static void read_compute_fuses(struct xe_gt *gt)
733 {
734 	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
735 		read_compute_fuses_from_reg(gt);
736 	else
737 		read_compute_fuses_from_dss(gt);
738 }
739 
740 static void check_gsc_availability(struct xe_gt *gt)
741 {
742 	struct xe_device *xe = gt_to_xe(gt);
743 
744 	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
745 		return;
746 
747 	/*
748 	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
749 	 * have the FW there is nothing we need the engine for and can therefore
750 	 * skip its initialization.
751 	 */
752 	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
753 		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
754 
755 		/* interrupts where previously enabled, so turn them off */
756 		xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0);
757 		xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0);
758 
759 		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
760 	}
761 }
762 
763 int xe_hw_engines_init_early(struct xe_gt *gt)
764 {
765 	int i;
766 
767 	read_media_fuses(gt);
768 	read_copy_fuses(gt);
769 	read_compute_fuses(gt);
770 	check_gsc_availability(gt);
771 
772 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
773 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
774 
775 	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
776 		hw_engine_init_early(gt, &gt->hw_engines[i], i);
777 
778 	return 0;
779 }
780 
781 int xe_hw_engines_init(struct xe_gt *gt)
782 {
783 	int err;
784 	struct xe_hw_engine *hwe;
785 	enum xe_hw_engine_id id;
786 
787 	for_each_hw_engine(hwe, gt, id) {
788 		err = hw_engine_init(gt, hwe, id);
789 		if (err)
790 			return err;
791 	}
792 
793 	hw_engine_setup_logical_mapping(gt);
794 	err = xe_hw_engine_setup_groups(gt);
795 	if (err)
796 		return err;
797 
798 	return 0;
799 }
800 
801 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
802 {
803 	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
804 
805 	if (hwe->irq_handler)
806 		hwe->irq_handler(hwe, intr_vec);
807 
808 	if (intr_vec & GT_RENDER_USER_INTERRUPT)
809 		xe_hw_fence_irq_run(hwe->fence_irq);
810 }
811 
812 static bool
813 is_slice_common_per_gslice(struct xe_device *xe)
814 {
815 	return GRAPHICS_VERx100(xe) >= 1255;
816 }
817 
818 static void
819 xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
820 				       struct xe_hw_engine_snapshot *snapshot)
821 {
822 	struct xe_gt *gt = hwe->gt;
823 	struct xe_device *xe = gt_to_xe(gt);
824 	unsigned int dss;
825 	u16 group, instance;
826 
827 	snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
828 
829 	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
830 		return;
831 
832 	if (is_slice_common_per_gslice(xe) == false) {
833 		snapshot->reg.instdone.slice_common[0] =
834 			xe_mmio_read32(gt, SC_INSTDONE);
835 		snapshot->reg.instdone.slice_common_extra[0] =
836 			xe_mmio_read32(gt, SC_INSTDONE_EXTRA);
837 		snapshot->reg.instdone.slice_common_extra2[0] =
838 			xe_mmio_read32(gt, SC_INSTDONE_EXTRA2);
839 	} else {
840 		for_each_geometry_dss(dss, gt, group, instance) {
841 			snapshot->reg.instdone.slice_common[dss] =
842 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance);
843 			snapshot->reg.instdone.slice_common_extra[dss] =
844 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance);
845 			snapshot->reg.instdone.slice_common_extra2[dss] =
846 				xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance);
847 		}
848 	}
849 
850 	for_each_geometry_dss(dss, gt, group, instance) {
851 		snapshot->reg.instdone.sampler[dss] =
852 			xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
853 		snapshot->reg.instdone.row[dss] =
854 			xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
855 
856 		if (GRAPHICS_VERx100(xe) >= 1255)
857 			snapshot->reg.instdone.geom_svg[dss] =
858 				xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT,
859 						       group, instance);
860 	}
861 }
862 
863 /**
864  * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
865  * @hwe: Xe HW Engine.
866  *
867  * This can be printed out in a later stage like during dev_coredump
868  * analysis.
869  *
870  * Returns: a Xe HW Engine snapshot object that must be freed by the
871  * caller, using `xe_hw_engine_snapshot_free`.
872  */
873 struct xe_hw_engine_snapshot *
874 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
875 {
876 	struct xe_hw_engine_snapshot *snapshot;
877 	size_t len;
878 	u64 val;
879 
880 	if (!xe_hw_engine_is_valid(hwe))
881 		return NULL;
882 
883 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
884 
885 	if (!snapshot)
886 		return NULL;
887 
888 	/* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
889 	 * includes xe_hw_engine_types.h the length of this 3 registers can't be
890 	 * set in struct xe_hw_engine_snapshot, so here doing additional
891 	 * allocations.
892 	 */
893 	len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
894 	snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC);
895 	snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC);
896 	snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC);
897 	snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
898 	snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
899 	snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
900 	if (!snapshot->reg.instdone.slice_common ||
901 	    !snapshot->reg.instdone.slice_common_extra ||
902 	    !snapshot->reg.instdone.slice_common_extra2 ||
903 	    !snapshot->reg.instdone.sampler ||
904 	    !snapshot->reg.instdone.row ||
905 	    !snapshot->reg.instdone.geom_svg) {
906 		xe_hw_engine_snapshot_free(snapshot);
907 		return NULL;
908 	}
909 
910 	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
911 	snapshot->hwe = hwe;
912 	snapshot->logical_instance = hwe->logical_instance;
913 	snapshot->forcewake.domain = hwe->domain;
914 	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
915 						    hwe->domain);
916 	snapshot->mmio_base = hwe->mmio_base;
917 
918 	/* no more VF accessible data below this point */
919 	if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
920 		return snapshot;
921 
922 	snapshot->reg.ring_execlist_status =
923 		xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
924 	val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
925 	snapshot->reg.ring_execlist_status |= val << 32;
926 
927 	snapshot->reg.ring_execlist_sq_contents =
928 		xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
929 	val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
930 	snapshot->reg.ring_execlist_sq_contents |= val << 32;
931 
932 	snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0));
933 	val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
934 	snapshot->reg.ring_acthd |= val << 32;
935 
936 	snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0));
937 	val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
938 	snapshot->reg.ring_bbaddr |= val << 32;
939 
940 	snapshot->reg.ring_dma_fadd =
941 		xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
942 	val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
943 	snapshot->reg.ring_dma_fadd |= val << 32;
944 
945 	snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
946 	snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
947 	snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0));
948 	if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) {
949 		val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0));
950 		snapshot->reg.ring_start |= val << 32;
951 	}
952 	if (xe_gt_has_indirect_ring_state(hwe->gt)) {
953 		snapshot->reg.indirect_ring_state =
954 			xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
955 	}
956 
957 	snapshot->reg.ring_head =
958 		xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
959 	snapshot->reg.ring_tail =
960 		xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
961 	snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0));
962 	snapshot->reg.ring_mi_mode =
963 		xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
964 	snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0));
965 	snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0));
966 	snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0));
967 	snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0));
968 	snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0));
969 	snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0));
970 	xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
971 
972 	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
973 		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
974 
975 	return snapshot;
976 }
977 
978 static void
979 xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
980 {
981 	struct xe_gt *gt = snapshot->hwe->gt;
982 	struct xe_device *xe = gt_to_xe(gt);
983 	u16 group, instance;
984 	unsigned int dss;
985 
986 	drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring);
987 
988 	if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
989 		return;
990 
991 	if (is_slice_common_per_gslice(xe) == false) {
992 		drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n",
993 			   snapshot->reg.instdone.slice_common[0]);
994 		drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n",
995 			   snapshot->reg.instdone.slice_common_extra[0]);
996 		drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n",
997 			   snapshot->reg.instdone.slice_common_extra2[0]);
998 	} else {
999 		for_each_geometry_dss(dss, gt, group, instance) {
1000 			drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss,
1001 				   snapshot->reg.instdone.slice_common[dss]);
1002 			drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss,
1003 				   snapshot->reg.instdone.slice_common_extra[dss]);
1004 			drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss,
1005 				   snapshot->reg.instdone.slice_common_extra2[dss]);
1006 		}
1007 	}
1008 
1009 	for_each_geometry_dss(dss, gt, group, instance) {
1010 		drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
1011 			   snapshot->reg.instdone.sampler[dss]);
1012 		drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
1013 			   snapshot->reg.instdone.row[dss]);
1014 
1015 		if (GRAPHICS_VERx100(xe) >= 1255)
1016 			drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n",
1017 				   dss, snapshot->reg.instdone.geom_svg[dss]);
1018 	}
1019 }
1020 
1021 /**
1022  * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
1023  * @snapshot: Xe HW Engine snapshot object.
1024  * @p: drm_printer where it will be printed out.
1025  *
1026  * This function prints out a given Xe HW Engine snapshot object.
1027  */
1028 void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
1029 				 struct drm_printer *p)
1030 {
1031 	if (!snapshot)
1032 		return;
1033 
1034 	drm_printf(p, "%s (physical), logical instance=%d\n",
1035 		   snapshot->name ? snapshot->name : "",
1036 		   snapshot->logical_instance);
1037 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
1038 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
1039 	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
1040 	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
1041 	drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
1042 		   snapshot->reg.ring_execlist_status);
1043 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
1044 		   snapshot->reg.ring_execlist_sq_contents);
1045 	drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start);
1046 	drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
1047 	drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
1048 	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
1049 	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
1050 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
1051 		   snapshot->reg.ring_mode);
1052 	drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
1053 	drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
1054 	drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
1055 	drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
1056 	drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
1057 	drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
1058 	drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
1059 	drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n",
1060 		   snapshot->reg.indirect_ring_state);
1061 	drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
1062 	xe_hw_engine_snapshot_instdone_print(snapshot, p);
1063 
1064 	if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
1065 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
1066 			   snapshot->reg.rcu_mode);
1067 	drm_puts(p, "\n");
1068 }
1069 
1070 /**
1071  * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
1072  * @snapshot: Xe HW Engine snapshot object.
1073  *
1074  * This function free all the memory that needed to be allocated at capture
1075  * time.
1076  */
1077 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
1078 {
1079 	if (!snapshot)
1080 		return;
1081 
1082 	kfree(snapshot->reg.instdone.slice_common);
1083 	kfree(snapshot->reg.instdone.slice_common_extra);
1084 	kfree(snapshot->reg.instdone.slice_common_extra2);
1085 	kfree(snapshot->reg.instdone.sampler);
1086 	kfree(snapshot->reg.instdone.row);
1087 	kfree(snapshot->reg.instdone.geom_svg);
1088 	kfree(snapshot->name);
1089 	kfree(snapshot);
1090 }
1091 
1092 /**
1093  * xe_hw_engine_print - Xe HW Engine Print.
1094  * @hwe: Hardware Engine.
1095  * @p: drm_printer.
1096  *
1097  * This function quickly capture a snapshot and immediately print it out.
1098  */
1099 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
1100 {
1101 	struct xe_hw_engine_snapshot *snapshot;
1102 
1103 	snapshot = xe_hw_engine_snapshot_capture(hwe);
1104 	xe_hw_engine_snapshot_print(snapshot, p);
1105 	xe_hw_engine_snapshot_free(snapshot);
1106 }
1107 
1108 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
1109 				enum xe_engine_class engine_class)
1110 {
1111 	u32 mask = 0;
1112 	enum xe_hw_engine_id id;
1113 
1114 	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
1115 		if (engine_infos[id].class == engine_class &&
1116 		    gt->info.engine_mask & BIT(id))
1117 			mask |= BIT(engine_infos[id].instance);
1118 	}
1119 	return mask;
1120 }
1121 
1122 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
1123 {
1124 	struct xe_gt *gt = hwe->gt;
1125 	struct xe_device *xe = gt_to_xe(gt);
1126 
1127 	if (hwe->class == XE_ENGINE_CLASS_OTHER)
1128 		return true;
1129 
1130 	/* Check for engines disabled by ccs_mode setting */
1131 	if (xe_gt_ccs_mode_enabled(gt) &&
1132 	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
1133 	    hwe->logical_instance >= gt->ccs_mode)
1134 		return true;
1135 
1136 	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
1137 		hwe->instance == gt->usm.reserved_bcs_instance;
1138 }
1139 
1140 const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
1141 {
1142 	switch (class) {
1143 	case XE_ENGINE_CLASS_RENDER:
1144 		return "rcs";
1145 	case XE_ENGINE_CLASS_VIDEO_DECODE:
1146 		return "vcs";
1147 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
1148 		return "vecs";
1149 	case XE_ENGINE_CLASS_COPY:
1150 		return "bcs";
1151 	case XE_ENGINE_CLASS_OTHER:
1152 		return "other";
1153 	case XE_ENGINE_CLASS_COMPUTE:
1154 		return "ccs";
1155 	case XE_ENGINE_CLASS_MAX:
1156 		break;
1157 	}
1158 
1159 	return NULL;
1160 }
1161 
1162 u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
1163 {
1164 	return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base));
1165 }
1166 
1167 enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe)
1168 {
1169 	return engine_infos[hwe->engine_id].domain;
1170 }
1171 
1172 static const enum xe_engine_class user_to_xe_engine_class[] = {
1173 	[DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
1174 	[DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
1175 	[DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
1176 	[DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
1177 	[DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
1178 };
1179 
1180 /**
1181  * xe_hw_engine_lookup() - Lookup hardware engine for class:instance
1182  * @xe: xe device
1183  * @eci: engine class and instance
1184  *
1185  * This function will find a hardware engine for given engine
1186  * class and instance.
1187  *
1188  * Return: If found xe_hw_engine pointer, NULL otherwise.
1189  */
1190 struct xe_hw_engine *
1191 xe_hw_engine_lookup(struct xe_device *xe,
1192 		    struct drm_xe_engine_class_instance eci)
1193 {
1194 	unsigned int idx;
1195 
1196 	if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
1197 		return NULL;
1198 
1199 	if (eci.gt_id >= xe->info.gt_count)
1200 		return NULL;
1201 
1202 	idx = array_index_nospec(eci.engine_class,
1203 				 ARRAY_SIZE(user_to_xe_engine_class));
1204 
1205 	return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
1206 			       user_to_xe_engine_class[idx],
1207 			       eci.engine_instance, true);
1208 }
1209