xref: /linux/drivers/gpu/drm/xe/xe_hw_engine.c (revision dd0e89e5edc20d3875ed7ded48e7e97118cdfbc8)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_hw_engine.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "regs/xe_engine_regs.h"
11 #include "regs/xe_gt_regs.h"
12 #include "regs/xe_regs.h"
13 #include "xe_assert.h"
14 #include "xe_bo.h"
15 #include "xe_device.h"
16 #include "xe_execlist.h"
17 #include "xe_force_wake.h"
18 #include "xe_gt.h"
19 #include "xe_gt_topology.h"
20 #include "xe_hw_fence.h"
21 #include "xe_irq.h"
22 #include "xe_lrc.h"
23 #include "xe_macros.h"
24 #include "xe_mmio.h"
25 #include "xe_reg_sr.h"
26 #include "xe_rtp.h"
27 #include "xe_sched_job.h"
28 #include "xe_tuning.h"
29 #include "xe_wa.h"
30 
31 #define MAX_MMIO_BASES 3
32 struct engine_info {
33 	const char *name;
34 	unsigned int class : 8;
35 	unsigned int instance : 8;
36 	enum xe_force_wake_domains domain;
37 	u32 mmio_base;
38 };
39 
40 static const struct engine_info engine_infos[] = {
41 	[XE_HW_ENGINE_RCS0] = {
42 		.name = "rcs0",
43 		.class = XE_ENGINE_CLASS_RENDER,
44 		.instance = 0,
45 		.domain = XE_FW_RENDER,
46 		.mmio_base = RENDER_RING_BASE,
47 	},
48 	[XE_HW_ENGINE_BCS0] = {
49 		.name = "bcs0",
50 		.class = XE_ENGINE_CLASS_COPY,
51 		.instance = 0,
52 		.domain = XE_FW_RENDER,
53 		.mmio_base = BLT_RING_BASE,
54 	},
55 	[XE_HW_ENGINE_BCS1] = {
56 		.name = "bcs1",
57 		.class = XE_ENGINE_CLASS_COPY,
58 		.instance = 1,
59 		.domain = XE_FW_RENDER,
60 		.mmio_base = XEHPC_BCS1_RING_BASE,
61 	},
62 	[XE_HW_ENGINE_BCS2] = {
63 		.name = "bcs2",
64 		.class = XE_ENGINE_CLASS_COPY,
65 		.instance = 2,
66 		.domain = XE_FW_RENDER,
67 		.mmio_base = XEHPC_BCS2_RING_BASE,
68 	},
69 	[XE_HW_ENGINE_BCS3] = {
70 		.name = "bcs3",
71 		.class = XE_ENGINE_CLASS_COPY,
72 		.instance = 3,
73 		.domain = XE_FW_RENDER,
74 		.mmio_base = XEHPC_BCS3_RING_BASE,
75 	},
76 	[XE_HW_ENGINE_BCS4] = {
77 		.name = "bcs4",
78 		.class = XE_ENGINE_CLASS_COPY,
79 		.instance = 4,
80 		.domain = XE_FW_RENDER,
81 		.mmio_base = XEHPC_BCS4_RING_BASE,
82 	},
83 	[XE_HW_ENGINE_BCS5] = {
84 		.name = "bcs5",
85 		.class = XE_ENGINE_CLASS_COPY,
86 		.instance = 5,
87 		.domain = XE_FW_RENDER,
88 		.mmio_base = XEHPC_BCS5_RING_BASE,
89 	},
90 	[XE_HW_ENGINE_BCS6] = {
91 		.name = "bcs6",
92 		.class = XE_ENGINE_CLASS_COPY,
93 		.instance = 6,
94 		.domain = XE_FW_RENDER,
95 		.mmio_base = XEHPC_BCS6_RING_BASE,
96 	},
97 	[XE_HW_ENGINE_BCS7] = {
98 		.name = "bcs7",
99 		.class = XE_ENGINE_CLASS_COPY,
100 		.instance = 7,
101 		.domain = XE_FW_RENDER,
102 		.mmio_base = XEHPC_BCS7_RING_BASE,
103 	},
104 	[XE_HW_ENGINE_BCS8] = {
105 		.name = "bcs8",
106 		.class = XE_ENGINE_CLASS_COPY,
107 		.instance = 8,
108 		.domain = XE_FW_RENDER,
109 		.mmio_base = XEHPC_BCS8_RING_BASE,
110 	},
111 
112 	[XE_HW_ENGINE_VCS0] = {
113 		.name = "vcs0",
114 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
115 		.instance = 0,
116 		.domain = XE_FW_MEDIA_VDBOX0,
117 		.mmio_base = BSD_RING_BASE,
118 	},
119 	[XE_HW_ENGINE_VCS1] = {
120 		.name = "vcs1",
121 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
122 		.instance = 1,
123 		.domain = XE_FW_MEDIA_VDBOX1,
124 		.mmio_base = BSD2_RING_BASE,
125 	},
126 	[XE_HW_ENGINE_VCS2] = {
127 		.name = "vcs2",
128 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
129 		.instance = 2,
130 		.domain = XE_FW_MEDIA_VDBOX2,
131 		.mmio_base = BSD3_RING_BASE,
132 	},
133 	[XE_HW_ENGINE_VCS3] = {
134 		.name = "vcs3",
135 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
136 		.instance = 3,
137 		.domain = XE_FW_MEDIA_VDBOX3,
138 		.mmio_base = BSD4_RING_BASE,
139 	},
140 	[XE_HW_ENGINE_VCS4] = {
141 		.name = "vcs4",
142 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
143 		.instance = 4,
144 		.domain = XE_FW_MEDIA_VDBOX4,
145 		.mmio_base = XEHP_BSD5_RING_BASE,
146 	},
147 	[XE_HW_ENGINE_VCS5] = {
148 		.name = "vcs5",
149 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
150 		.instance = 5,
151 		.domain = XE_FW_MEDIA_VDBOX5,
152 		.mmio_base = XEHP_BSD6_RING_BASE,
153 	},
154 	[XE_HW_ENGINE_VCS6] = {
155 		.name = "vcs6",
156 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
157 		.instance = 6,
158 		.domain = XE_FW_MEDIA_VDBOX6,
159 		.mmio_base = XEHP_BSD7_RING_BASE,
160 	},
161 	[XE_HW_ENGINE_VCS7] = {
162 		.name = "vcs7",
163 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
164 		.instance = 7,
165 		.domain = XE_FW_MEDIA_VDBOX7,
166 		.mmio_base = XEHP_BSD8_RING_BASE,
167 	},
168 	[XE_HW_ENGINE_VECS0] = {
169 		.name = "vecs0",
170 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
171 		.instance = 0,
172 		.domain = XE_FW_MEDIA_VEBOX0,
173 		.mmio_base = VEBOX_RING_BASE,
174 	},
175 	[XE_HW_ENGINE_VECS1] = {
176 		.name = "vecs1",
177 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
178 		.instance = 1,
179 		.domain = XE_FW_MEDIA_VEBOX1,
180 		.mmio_base = VEBOX2_RING_BASE,
181 	},
182 	[XE_HW_ENGINE_VECS2] = {
183 		.name = "vecs2",
184 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
185 		.instance = 2,
186 		.domain = XE_FW_MEDIA_VEBOX2,
187 		.mmio_base = XEHP_VEBOX3_RING_BASE,
188 	},
189 	[XE_HW_ENGINE_VECS3] = {
190 		.name = "vecs3",
191 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
192 		.instance = 3,
193 		.domain = XE_FW_MEDIA_VEBOX3,
194 		.mmio_base = XEHP_VEBOX4_RING_BASE,
195 	},
196 	[XE_HW_ENGINE_CCS0] = {
197 		.name = "ccs0",
198 		.class = XE_ENGINE_CLASS_COMPUTE,
199 		.instance = 0,
200 		.domain = XE_FW_RENDER,
201 		.mmio_base = COMPUTE0_RING_BASE,
202 	},
203 	[XE_HW_ENGINE_CCS1] = {
204 		.name = "ccs1",
205 		.class = XE_ENGINE_CLASS_COMPUTE,
206 		.instance = 1,
207 		.domain = XE_FW_RENDER,
208 		.mmio_base = COMPUTE1_RING_BASE,
209 	},
210 	[XE_HW_ENGINE_CCS2] = {
211 		.name = "ccs2",
212 		.class = XE_ENGINE_CLASS_COMPUTE,
213 		.instance = 2,
214 		.domain = XE_FW_RENDER,
215 		.mmio_base = COMPUTE2_RING_BASE,
216 	},
217 	[XE_HW_ENGINE_CCS3] = {
218 		.name = "ccs3",
219 		.class = XE_ENGINE_CLASS_COMPUTE,
220 		.instance = 3,
221 		.domain = XE_FW_RENDER,
222 		.mmio_base = COMPUTE3_RING_BASE,
223 	},
224 	[XE_HW_ENGINE_GSCCS0] = {
225 		.name = "gsccs0",
226 		.class = XE_ENGINE_CLASS_OTHER,
227 		.instance = OTHER_GSC_INSTANCE,
228 		.domain = XE_FW_GSC,
229 		.mmio_base = GSCCS_RING_BASE,
230 	},
231 };
232 
233 static void hw_engine_fini(struct drm_device *drm, void *arg)
234 {
235 	struct xe_hw_engine *hwe = arg;
236 
237 	if (hwe->exl_port)
238 		xe_execlist_port_destroy(hwe->exl_port);
239 	xe_lrc_finish(&hwe->kernel_lrc);
240 
241 	xe_bo_unpin_map_no_vm(hwe->hwsp);
242 
243 	hwe->gt = NULL;
244 }
245 
246 static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
247 				   u32 val)
248 {
249 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
250 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
251 
252 	reg.addr += hwe->mmio_base;
253 
254 	xe_mmio_write32(hwe->gt, reg, val);
255 }
256 
257 static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
258 {
259 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
260 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
261 
262 	reg.addr += hwe->mmio_base;
263 
264 	return xe_mmio_read32(hwe->gt, reg);
265 }
266 
267 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
268 {
269 	u32 ccs_mask =
270 		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
271 
272 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
273 		xe_mmio_write32(hwe->gt, RCU_MODE,
274 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
275 
276 	hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
277 	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
278 			       xe_bo_ggtt_addr(hwe->hwsp));
279 	hw_engine_mmio_write32(hwe, RING_MODE(0),
280 			       _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
281 	hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
282 			       _MASKED_BIT_DISABLE(STOP_RING));
283 	hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
284 }
285 
286 void
287 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
288 {
289 	struct xe_gt *gt = hwe->gt;
290 	const u8 mocs_write_idx = gt->mocs.uc_index;
291 	const u8 mocs_read_idx = gt->mocs.uc_index;
292 	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
293 			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
294 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
295 	const struct xe_rtp_entry_sr lrc_was[] = {
296 		/*
297 		 * Some blitter commands do not have a field for MOCS, those
298 		 * commands will use MOCS index pointed by BLIT_CCTL.
299 		 * BLIT_CCTL registers are needed to be programmed to un-cached.
300 		 */
301 		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
302 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
303 			       ENGINE_CLASS(COPY)),
304 		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
305 				 BLIT_CCTL_DST_MOCS_MASK |
306 				 BLIT_CCTL_SRC_MOCS_MASK,
307 				 blit_cctl_val,
308 				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
309 		},
310 		{}
311 	};
312 
313 	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
314 }
315 
316 static void
317 hw_engine_setup_default_state(struct xe_hw_engine *hwe)
318 {
319 	struct xe_gt *gt = hwe->gt;
320 	struct xe_device *xe = gt_to_xe(gt);
321 	/*
322 	 * RING_CMD_CCTL specifies the default MOCS entry that will be
323 	 * used by the command streamer when executing commands that
324 	 * don't have a way to explicitly specify a MOCS setting.
325 	 * The default should usually reference whichever MOCS entry
326 	 * corresponds to uncached behavior, although use of a WB cached
327 	 * entry is recommended by the spec in certain circumstances on
328 	 * specific platforms.
329 	 * Bspec: 72161
330 	 */
331 	const u8 mocs_write_idx = gt->mocs.uc_index;
332 	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
333 				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
334 				 gt->mocs.wb_index : gt->mocs.uc_index;
335 	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
336 				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
337 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
338 	const struct xe_rtp_entry_sr engine_entries[] = {
339 		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
340 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
341 		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
342 					   CMD_CCTL_WRITE_OVERRIDE_MASK |
343 					   CMD_CCTL_READ_OVERRIDE_MASK,
344 					   ring_cmd_cctl_val,
345 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
346 		},
347 		/*
348 		 * To allow the GSC engine to go idle on MTL we need to enable
349 		 * idle messaging and set the hysteresis value (we use 0xA=5us
350 		 * as recommended in spec). On platforms after MTL this is
351 		 * enabled by default.
352 		 */
353 		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
354 		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
355 		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
356 				     IDLE_MSG_DISABLE,
357 				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
358 				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
359 					   IDLE_WAIT_TIME,
360 					   0xA,
361 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
362 		},
363 		{}
364 	};
365 
366 	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
367 }
368 
369 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
370 				 enum xe_hw_engine_id id)
371 {
372 	const struct engine_info *info;
373 
374 	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
375 		return;
376 
377 	if (!(gt->info.engine_mask & BIT(id)))
378 		return;
379 
380 	info = &engine_infos[id];
381 
382 	xe_gt_assert(gt, !hwe->gt);
383 
384 	hwe->gt = gt;
385 	hwe->class = info->class;
386 	hwe->instance = info->instance;
387 	hwe->mmio_base = info->mmio_base;
388 	hwe->domain = info->domain;
389 	hwe->name = info->name;
390 	hwe->fence_irq = &gt->fence_irq[info->class];
391 	hwe->engine_id = id;
392 
393 	hwe->eclass = &gt->eclass[hwe->class];
394 	if (!hwe->eclass->sched_props.job_timeout_ms) {
395 		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
396 		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
397 		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
398 		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
399 		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
400 		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
401 		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
402 		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
403 		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
404 		/* Record default props */
405 		hwe->eclass->defaults = hwe->eclass->sched_props;
406 	}
407 
408 	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
409 	xe_tuning_process_engine(hwe);
410 	xe_wa_process_engine(hwe);
411 	hw_engine_setup_default_state(hwe);
412 
413 	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
414 	xe_reg_whitelist_process_engine(hwe);
415 }
416 
417 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
418 			  enum xe_hw_engine_id id)
419 {
420 	struct xe_device *xe = gt_to_xe(gt);
421 	struct xe_tile *tile = gt_to_tile(gt);
422 	int err;
423 
424 	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
425 	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
426 
427 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
428 	xe_reg_sr_apply_whitelist(hwe);
429 
430 	hwe->hwsp = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel,
431 					 XE_BO_CREATE_VRAM_IF_DGFX(tile) |
432 					 XE_BO_CREATE_GGTT_BIT);
433 	if (IS_ERR(hwe->hwsp)) {
434 		err = PTR_ERR(hwe->hwsp);
435 		goto err_name;
436 	}
437 
438 	err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
439 	if (err)
440 		goto err_hwsp;
441 
442 	if (!xe_device_uc_enabled(xe)) {
443 		hwe->exl_port = xe_execlist_port_create(xe, hwe);
444 		if (IS_ERR(hwe->exl_port)) {
445 			err = PTR_ERR(hwe->exl_port);
446 			goto err_kernel_lrc;
447 		}
448 	}
449 
450 	if (xe_device_uc_enabled(xe))
451 		xe_hw_engine_enable_ring(hwe);
452 
453 	/* We reserve the highest BCS instance for USM */
454 	if (xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY)
455 		gt->usm.reserved_bcs_instance = hwe->instance;
456 
457 	err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
458 	if (err)
459 		return err;
460 
461 	return 0;
462 
463 err_kernel_lrc:
464 	xe_lrc_finish(&hwe->kernel_lrc);
465 err_hwsp:
466 	xe_bo_unpin_map_no_vm(hwe->hwsp);
467 err_name:
468 	hwe->name = NULL;
469 
470 	return err;
471 }
472 
473 static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
474 {
475 	int class;
476 
477 	/* FIXME: Doing a simple logical mapping that works for most hardware */
478 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
479 		struct xe_hw_engine *hwe;
480 		enum xe_hw_engine_id id;
481 		int logical_instance = 0;
482 
483 		for_each_hw_engine(hwe, gt, id)
484 			if (hwe->class == class)
485 				hwe->logical_instance = logical_instance++;
486 	}
487 }
488 
489 static void read_media_fuses(struct xe_gt *gt)
490 {
491 	struct xe_device *xe = gt_to_xe(gt);
492 	u32 media_fuse;
493 	u16 vdbox_mask;
494 	u16 vebox_mask;
495 	int i, j;
496 
497 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
498 
499 	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
500 
501 	/*
502 	 * Pre-Xe_HP platforms had register bits representing absent engines,
503 	 * whereas Xe_HP and beyond have bits representing present engines.
504 	 * Invert the polarity on old platforms so that we can use common
505 	 * handling below.
506 	 */
507 	if (GRAPHICS_VERx100(xe) < 1250)
508 		media_fuse = ~media_fuse;
509 
510 	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
511 	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
512 
513 	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
514 		if (!(gt->info.engine_mask & BIT(i)))
515 			continue;
516 
517 		if (!(BIT(j) & vdbox_mask)) {
518 			gt->info.engine_mask &= ~BIT(i);
519 			drm_info(&xe->drm, "vcs%u fused off\n", j);
520 		}
521 	}
522 
523 	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
524 		if (!(gt->info.engine_mask & BIT(i)))
525 			continue;
526 
527 		if (!(BIT(j) & vebox_mask)) {
528 			gt->info.engine_mask &= ~BIT(i);
529 			drm_info(&xe->drm, "vecs%u fused off\n", j);
530 		}
531 	}
532 }
533 
534 static void read_copy_fuses(struct xe_gt *gt)
535 {
536 	struct xe_device *xe = gt_to_xe(gt);
537 	u32 bcs_mask;
538 
539 	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
540 		return;
541 
542 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
543 
544 	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
545 	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
546 
547 	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
548 	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
549 		if (!(gt->info.engine_mask & BIT(i)))
550 			continue;
551 
552 		if (!(BIT(j / 2) & bcs_mask)) {
553 			gt->info.engine_mask &= ~BIT(i);
554 			drm_info(&xe->drm, "bcs%u fused off\n", j);
555 		}
556 	}
557 }
558 
559 static void read_compute_fuses_from_dss(struct xe_gt *gt)
560 {
561 	struct xe_device *xe = gt_to_xe(gt);
562 
563 	/*
564 	 * CCS fusing based on DSS masks only applies to platforms that can
565 	 * have more than one CCS.
566 	 */
567 	if (hweight64(gt->info.engine_mask &
568 		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
569 		return;
570 
571 	/*
572 	 * CCS availability on Xe_HP is inferred from the presence of DSS in
573 	 * each quadrant.
574 	 */
575 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
576 		if (!(gt->info.engine_mask & BIT(i)))
577 			continue;
578 
579 		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
580 			gt->info.engine_mask &= ~BIT(i);
581 			drm_info(&xe->drm, "ccs%u fused off\n", j);
582 		}
583 	}
584 }
585 
586 static void read_compute_fuses_from_reg(struct xe_gt *gt)
587 {
588 	struct xe_device *xe = gt_to_xe(gt);
589 	u32 ccs_mask;
590 
591 	ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
592 	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
593 
594 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
595 		if (!(gt->info.engine_mask & BIT(i)))
596 			continue;
597 
598 		if ((ccs_mask & BIT(j)) == 0) {
599 			gt->info.engine_mask &= ~BIT(i);
600 			drm_info(&xe->drm, "ccs%u fused off\n", j);
601 		}
602 	}
603 }
604 
605 static void read_compute_fuses(struct xe_gt *gt)
606 {
607 	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
608 		read_compute_fuses_from_reg(gt);
609 	else
610 		read_compute_fuses_from_dss(gt);
611 }
612 
613 int xe_hw_engines_init_early(struct xe_gt *gt)
614 {
615 	int i;
616 
617 	read_media_fuses(gt);
618 	read_copy_fuses(gt);
619 	read_compute_fuses(gt);
620 
621 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
622 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
623 
624 	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
625 		hw_engine_init_early(gt, &gt->hw_engines[i], i);
626 
627 	return 0;
628 }
629 
630 int xe_hw_engines_init(struct xe_gt *gt)
631 {
632 	int err;
633 	struct xe_hw_engine *hwe;
634 	enum xe_hw_engine_id id;
635 
636 	for_each_hw_engine(hwe, gt, id) {
637 		err = hw_engine_init(gt, hwe, id);
638 		if (err)
639 			return err;
640 	}
641 
642 	hw_engine_setup_logical_mapping(gt);
643 
644 	return 0;
645 }
646 
647 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
648 {
649 	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
650 
651 	if (hwe->irq_handler)
652 		hwe->irq_handler(hwe, intr_vec);
653 
654 	if (intr_vec & GT_RENDER_USER_INTERRUPT)
655 		xe_hw_fence_irq_run(hwe->fence_irq);
656 }
657 
658 /**
659  * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
660  * @hwe: Xe HW Engine.
661  *
662  * This can be printed out in a later stage like during dev_coredump
663  * analysis.
664  *
665  * Returns: a Xe HW Engine snapshot object that must be freed by the
666  * caller, using `xe_hw_engine_snapshot_free`.
667  */
668 struct xe_hw_engine_snapshot *
669 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
670 {
671 	struct xe_hw_engine_snapshot *snapshot;
672 	int len;
673 
674 	if (!xe_hw_engine_is_valid(hwe))
675 		return NULL;
676 
677 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
678 
679 	if (!snapshot)
680 		return NULL;
681 
682 	len = strlen(hwe->name) + 1;
683 	snapshot->name = kzalloc(len, GFP_ATOMIC);
684 	if (snapshot->name)
685 		strscpy(snapshot->name, hwe->name, len);
686 
687 	snapshot->class = hwe->class;
688 	snapshot->logical_instance = hwe->logical_instance;
689 	snapshot->forcewake.domain = hwe->domain;
690 	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
691 						    hwe->domain);
692 	snapshot->mmio_base = hwe->mmio_base;
693 
694 	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
695 	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe,
696 							   RING_HWS_PGA(0));
697 	snapshot->reg.ring_execlist_status_lo =
698 		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
699 	snapshot->reg.ring_execlist_status_hi =
700 		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
701 	snapshot->reg.ring_execlist_sq_contents_lo =
702 		hw_engine_mmio_read32(hwe,
703 				      RING_EXECLIST_SQ_CONTENTS_LO(0));
704 	snapshot->reg.ring_execlist_sq_contents_hi =
705 		hw_engine_mmio_read32(hwe,
706 				      RING_EXECLIST_SQ_CONTENTS_HI(0));
707 	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
708 	snapshot->reg.ring_head =
709 		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
710 	snapshot->reg.ring_tail =
711 		hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
712 	snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0));
713 	snapshot->reg.ring_mi_mode =
714 		hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
715 	snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0));
716 	snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0));
717 	snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
718 	snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
719 	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
720 	snapshot->reg.ring_acthd_udw =
721 		hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
722 	snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
723 	snapshot->reg.ring_bbaddr_udw =
724 		hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
725 	snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
726 	snapshot->reg.ring_dma_fadd_udw =
727 		hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
728 	snapshot->reg.ring_dma_fadd =
729 		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
730 	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
731 
732 	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
733 		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
734 
735 	return snapshot;
736 }
737 
738 /**
739  * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
740  * @snapshot: Xe HW Engine snapshot object.
741  * @p: drm_printer where it will be printed out.
742  *
743  * This function prints out a given Xe HW Engine snapshot object.
744  */
745 void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
746 				 struct drm_printer *p)
747 {
748 	if (!snapshot)
749 		return;
750 
751 	drm_printf(p, "%s (physical), logical instance=%d\n",
752 		   snapshot->name ? snapshot->name : "",
753 		   snapshot->logical_instance);
754 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
755 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
756 	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
757 	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
758 	drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n",
759 		   snapshot->reg.ring_execlist_status_lo);
760 	drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n",
761 		   snapshot->reg.ring_execlist_status_hi);
762 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
763 		   snapshot->reg.ring_execlist_sq_contents_lo);
764 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
765 		   snapshot->reg.ring_execlist_sq_contents_hi);
766 	drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
767 	drm_printf(p, "\tRING_HEAD:  0x%08x\n", snapshot->reg.ring_head);
768 	drm_printf(p, "\tRING_TAIL:  0x%08x\n", snapshot->reg.ring_tail);
769 	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
770 	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
771 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
772 		   snapshot->reg.ring_mode);
773 	drm_printf(p, "\tRING_IMR:   0x%08x\n", snapshot->reg.ring_imr);
774 	drm_printf(p, "\tRING_ESR:   0x%08x\n", snapshot->reg.ring_esr);
775 	drm_printf(p, "\tRING_EMR:   0x%08x\n", snapshot->reg.ring_emr);
776 	drm_printf(p, "\tRING_EIR:   0x%08x\n", snapshot->reg.ring_eir);
777 	drm_printf(p, "\tACTHD:  0x%08x_%08x\n", snapshot->reg.ring_acthd_udw,
778 		   snapshot->reg.ring_acthd);
779 	drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", snapshot->reg.ring_bbaddr_udw,
780 		   snapshot->reg.ring_bbaddr);
781 	drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
782 		   snapshot->reg.ring_dma_fadd_udw,
783 		   snapshot->reg.ring_dma_fadd);
784 	drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr);
785 	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
786 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
787 			   snapshot->reg.rcu_mode);
788 }
789 
790 /**
791  * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
792  * @snapshot: Xe HW Engine snapshot object.
793  *
794  * This function free all the memory that needed to be allocated at capture
795  * time.
796  */
797 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
798 {
799 	if (!snapshot)
800 		return;
801 
802 	kfree(snapshot->name);
803 	kfree(snapshot);
804 }
805 
806 /**
807  * xe_hw_engine_print - Xe HW Engine Print.
808  * @hwe: Hardware Engine.
809  * @p: drm_printer.
810  *
811  * This function quickly capture a snapshot and immediately print it out.
812  */
813 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
814 {
815 	struct xe_hw_engine_snapshot *snapshot;
816 
817 	snapshot = xe_hw_engine_snapshot_capture(hwe);
818 	xe_hw_engine_snapshot_print(snapshot, p);
819 	xe_hw_engine_snapshot_free(snapshot);
820 }
821 
822 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
823 				enum xe_engine_class engine_class)
824 {
825 	u32 mask = 0;
826 	enum xe_hw_engine_id id;
827 
828 	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
829 		if (engine_infos[id].class == engine_class &&
830 		    gt->info.engine_mask & BIT(id))
831 			mask |= BIT(engine_infos[id].instance);
832 	}
833 	return mask;
834 }
835 
836 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
837 {
838 	struct xe_gt *gt = hwe->gt;
839 	struct xe_device *xe = gt_to_xe(gt);
840 
841 	if (hwe->class == XE_ENGINE_CLASS_OTHER)
842 		return true;
843 
844 	return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
845 		hwe->instance == gt->usm.reserved_bcs_instance;
846 }
847