xref: /linux/drivers/gpu/drm/xe/xe_hw_engine.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_hw_engine.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "regs/xe_engine_regs.h"
11 #include "regs/xe_gt_regs.h"
12 #include "xe_assert.h"
13 #include "xe_bo.h"
14 #include "xe_device.h"
15 #include "xe_execlist.h"
16 #include "xe_force_wake.h"
17 #include "xe_gt.h"
18 #include "xe_gt_ccs_mode.h"
19 #include "xe_gt_topology.h"
20 #include "xe_hw_fence.h"
21 #include "xe_irq.h"
22 #include "xe_lrc.h"
23 #include "xe_macros.h"
24 #include "xe_mmio.h"
25 #include "xe_reg_sr.h"
26 #include "xe_rtp.h"
27 #include "xe_sched_job.h"
28 #include "xe_sriov.h"
29 #include "xe_tuning.h"
30 #include "xe_uc_fw.h"
31 #include "xe_wa.h"
32 
33 #define MAX_MMIO_BASES 3
34 struct engine_info {
35 	const char *name;
36 	unsigned int class : 8;
37 	unsigned int instance : 8;
38 	unsigned int irq_offset : 8;
39 	enum xe_force_wake_domains domain;
40 	u32 mmio_base;
41 };
42 
43 static const struct engine_info engine_infos[] = {
44 	[XE_HW_ENGINE_RCS0] = {
45 		.name = "rcs0",
46 		.class = XE_ENGINE_CLASS_RENDER,
47 		.instance = 0,
48 		.irq_offset = ilog2(INTR_RCS0),
49 		.domain = XE_FW_RENDER,
50 		.mmio_base = RENDER_RING_BASE,
51 	},
52 	[XE_HW_ENGINE_BCS0] = {
53 		.name = "bcs0",
54 		.class = XE_ENGINE_CLASS_COPY,
55 		.instance = 0,
56 		.irq_offset = ilog2(INTR_BCS(0)),
57 		.domain = XE_FW_RENDER,
58 		.mmio_base = BLT_RING_BASE,
59 	},
60 	[XE_HW_ENGINE_BCS1] = {
61 		.name = "bcs1",
62 		.class = XE_ENGINE_CLASS_COPY,
63 		.instance = 1,
64 		.irq_offset = ilog2(INTR_BCS(1)),
65 		.domain = XE_FW_RENDER,
66 		.mmio_base = XEHPC_BCS1_RING_BASE,
67 	},
68 	[XE_HW_ENGINE_BCS2] = {
69 		.name = "bcs2",
70 		.class = XE_ENGINE_CLASS_COPY,
71 		.instance = 2,
72 		.irq_offset = ilog2(INTR_BCS(2)),
73 		.domain = XE_FW_RENDER,
74 		.mmio_base = XEHPC_BCS2_RING_BASE,
75 	},
76 	[XE_HW_ENGINE_BCS3] = {
77 		.name = "bcs3",
78 		.class = XE_ENGINE_CLASS_COPY,
79 		.instance = 3,
80 		.irq_offset = ilog2(INTR_BCS(3)),
81 		.domain = XE_FW_RENDER,
82 		.mmio_base = XEHPC_BCS3_RING_BASE,
83 	},
84 	[XE_HW_ENGINE_BCS4] = {
85 		.name = "bcs4",
86 		.class = XE_ENGINE_CLASS_COPY,
87 		.instance = 4,
88 		.irq_offset = ilog2(INTR_BCS(4)),
89 		.domain = XE_FW_RENDER,
90 		.mmio_base = XEHPC_BCS4_RING_BASE,
91 	},
92 	[XE_HW_ENGINE_BCS5] = {
93 		.name = "bcs5",
94 		.class = XE_ENGINE_CLASS_COPY,
95 		.instance = 5,
96 		.irq_offset = ilog2(INTR_BCS(5)),
97 		.domain = XE_FW_RENDER,
98 		.mmio_base = XEHPC_BCS5_RING_BASE,
99 	},
100 	[XE_HW_ENGINE_BCS6] = {
101 		.name = "bcs6",
102 		.class = XE_ENGINE_CLASS_COPY,
103 		.instance = 6,
104 		.irq_offset = ilog2(INTR_BCS(6)),
105 		.domain = XE_FW_RENDER,
106 		.mmio_base = XEHPC_BCS6_RING_BASE,
107 	},
108 	[XE_HW_ENGINE_BCS7] = {
109 		.name = "bcs7",
110 		.class = XE_ENGINE_CLASS_COPY,
111 		.irq_offset = ilog2(INTR_BCS(7)),
112 		.instance = 7,
113 		.domain = XE_FW_RENDER,
114 		.mmio_base = XEHPC_BCS7_RING_BASE,
115 	},
116 	[XE_HW_ENGINE_BCS8] = {
117 		.name = "bcs8",
118 		.class = XE_ENGINE_CLASS_COPY,
119 		.instance = 8,
120 		.irq_offset = ilog2(INTR_BCS8),
121 		.domain = XE_FW_RENDER,
122 		.mmio_base = XEHPC_BCS8_RING_BASE,
123 	},
124 
125 	[XE_HW_ENGINE_VCS0] = {
126 		.name = "vcs0",
127 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
128 		.instance = 0,
129 		.irq_offset = 32 + ilog2(INTR_VCS(0)),
130 		.domain = XE_FW_MEDIA_VDBOX0,
131 		.mmio_base = BSD_RING_BASE,
132 	},
133 	[XE_HW_ENGINE_VCS1] = {
134 		.name = "vcs1",
135 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
136 		.instance = 1,
137 		.irq_offset = 32 + ilog2(INTR_VCS(1)),
138 		.domain = XE_FW_MEDIA_VDBOX1,
139 		.mmio_base = BSD2_RING_BASE,
140 	},
141 	[XE_HW_ENGINE_VCS2] = {
142 		.name = "vcs2",
143 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
144 		.instance = 2,
145 		.irq_offset = 32 + ilog2(INTR_VCS(2)),
146 		.domain = XE_FW_MEDIA_VDBOX2,
147 		.mmio_base = BSD3_RING_BASE,
148 	},
149 	[XE_HW_ENGINE_VCS3] = {
150 		.name = "vcs3",
151 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
152 		.instance = 3,
153 		.irq_offset = 32 + ilog2(INTR_VCS(3)),
154 		.domain = XE_FW_MEDIA_VDBOX3,
155 		.mmio_base = BSD4_RING_BASE,
156 	},
157 	[XE_HW_ENGINE_VCS4] = {
158 		.name = "vcs4",
159 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
160 		.instance = 4,
161 		.irq_offset = 32 + ilog2(INTR_VCS(4)),
162 		.domain = XE_FW_MEDIA_VDBOX4,
163 		.mmio_base = XEHP_BSD5_RING_BASE,
164 	},
165 	[XE_HW_ENGINE_VCS5] = {
166 		.name = "vcs5",
167 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
168 		.instance = 5,
169 		.irq_offset = 32 + ilog2(INTR_VCS(5)),
170 		.domain = XE_FW_MEDIA_VDBOX5,
171 		.mmio_base = XEHP_BSD6_RING_BASE,
172 	},
173 	[XE_HW_ENGINE_VCS6] = {
174 		.name = "vcs6",
175 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
176 		.instance = 6,
177 		.irq_offset = 32 + ilog2(INTR_VCS(6)),
178 		.domain = XE_FW_MEDIA_VDBOX6,
179 		.mmio_base = XEHP_BSD7_RING_BASE,
180 	},
181 	[XE_HW_ENGINE_VCS7] = {
182 		.name = "vcs7",
183 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
184 		.instance = 7,
185 		.irq_offset = 32 + ilog2(INTR_VCS(7)),
186 		.domain = XE_FW_MEDIA_VDBOX7,
187 		.mmio_base = XEHP_BSD8_RING_BASE,
188 	},
189 	[XE_HW_ENGINE_VECS0] = {
190 		.name = "vecs0",
191 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
192 		.instance = 0,
193 		.irq_offset = 32 + ilog2(INTR_VECS(0)),
194 		.domain = XE_FW_MEDIA_VEBOX0,
195 		.mmio_base = VEBOX_RING_BASE,
196 	},
197 	[XE_HW_ENGINE_VECS1] = {
198 		.name = "vecs1",
199 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
200 		.instance = 1,
201 		.irq_offset = 32 + ilog2(INTR_VECS(1)),
202 		.domain = XE_FW_MEDIA_VEBOX1,
203 		.mmio_base = VEBOX2_RING_BASE,
204 	},
205 	[XE_HW_ENGINE_VECS2] = {
206 		.name = "vecs2",
207 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
208 		.instance = 2,
209 		.irq_offset = 32 + ilog2(INTR_VECS(2)),
210 		.domain = XE_FW_MEDIA_VEBOX2,
211 		.mmio_base = XEHP_VEBOX3_RING_BASE,
212 	},
213 	[XE_HW_ENGINE_VECS3] = {
214 		.name = "vecs3",
215 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
216 		.instance = 3,
217 		.irq_offset = 32 + ilog2(INTR_VECS(3)),
218 		.domain = XE_FW_MEDIA_VEBOX3,
219 		.mmio_base = XEHP_VEBOX4_RING_BASE,
220 	},
221 	[XE_HW_ENGINE_CCS0] = {
222 		.name = "ccs0",
223 		.class = XE_ENGINE_CLASS_COMPUTE,
224 		.instance = 0,
225 		.irq_offset = ilog2(INTR_CCS(0)),
226 		.domain = XE_FW_RENDER,
227 		.mmio_base = COMPUTE0_RING_BASE,
228 	},
229 	[XE_HW_ENGINE_CCS1] = {
230 		.name = "ccs1",
231 		.class = XE_ENGINE_CLASS_COMPUTE,
232 		.instance = 1,
233 		.irq_offset = ilog2(INTR_CCS(1)),
234 		.domain = XE_FW_RENDER,
235 		.mmio_base = COMPUTE1_RING_BASE,
236 	},
237 	[XE_HW_ENGINE_CCS2] = {
238 		.name = "ccs2",
239 		.class = XE_ENGINE_CLASS_COMPUTE,
240 		.instance = 2,
241 		.irq_offset = ilog2(INTR_CCS(2)),
242 		.domain = XE_FW_RENDER,
243 		.mmio_base = COMPUTE2_RING_BASE,
244 	},
245 	[XE_HW_ENGINE_CCS3] = {
246 		.name = "ccs3",
247 		.class = XE_ENGINE_CLASS_COMPUTE,
248 		.instance = 3,
249 		.irq_offset = ilog2(INTR_CCS(3)),
250 		.domain = XE_FW_RENDER,
251 		.mmio_base = COMPUTE3_RING_BASE,
252 	},
253 	[XE_HW_ENGINE_GSCCS0] = {
254 		.name = "gsccs0",
255 		.class = XE_ENGINE_CLASS_OTHER,
256 		.instance = OTHER_GSC_INSTANCE,
257 		.domain = XE_FW_GSC,
258 		.mmio_base = GSCCS_RING_BASE,
259 	},
260 };
261 
262 static void hw_engine_fini(struct drm_device *drm, void *arg)
263 {
264 	struct xe_hw_engine *hwe = arg;
265 
266 	if (hwe->exl_port)
267 		xe_execlist_port_destroy(hwe->exl_port);
268 	xe_lrc_finish(&hwe->kernel_lrc);
269 
270 	hwe->gt = NULL;
271 }
272 
273 static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
274 				   u32 val)
275 {
276 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
277 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
278 
279 	reg.addr += hwe->mmio_base;
280 
281 	xe_mmio_write32(hwe->gt, reg, val);
282 }
283 
284 static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
285 {
286 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
287 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
288 
289 	reg.addr += hwe->mmio_base;
290 
291 	return xe_mmio_read32(hwe->gt, reg);
292 }
293 
294 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
295 {
296 	u32 ccs_mask =
297 		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
298 
299 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
300 		xe_mmio_write32(hwe->gt, RCU_MODE,
301 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
302 
303 	hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
304 	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
305 			       xe_bo_ggtt_addr(hwe->hwsp));
306 	hw_engine_mmio_write32(hwe, RING_MODE(0),
307 			       _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
308 	hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
309 			       _MASKED_BIT_DISABLE(STOP_RING));
310 	hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
311 }
312 
313 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
314 						 const struct xe_hw_engine *hwe)
315 {
316 	return xe_gt_ccs_mode_enabled(gt) &&
317 	       xe_rtp_match_first_render_or_compute(gt, hwe);
318 }
319 
320 static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
321 				      const struct xe_hw_engine *hwe)
322 {
323 	if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
324 		return false;
325 
326 	if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
327 	    hwe->class != XE_ENGINE_CLASS_RENDER)
328 		return false;
329 
330 	return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
331 }
332 
333 void
334 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
335 {
336 	struct xe_gt *gt = hwe->gt;
337 	const u8 mocs_write_idx = gt->mocs.uc_index;
338 	const u8 mocs_read_idx = gt->mocs.uc_index;
339 	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
340 			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
341 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
342 	const struct xe_rtp_entry_sr lrc_was[] = {
343 		/*
344 		 * Some blitter commands do not have a field for MOCS, those
345 		 * commands will use MOCS index pointed by BLIT_CCTL.
346 		 * BLIT_CCTL registers are needed to be programmed to un-cached.
347 		 */
348 		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
349 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
350 			       ENGINE_CLASS(COPY)),
351 		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
352 				 BLIT_CCTL_DST_MOCS_MASK |
353 				 BLIT_CCTL_SRC_MOCS_MASK,
354 				 blit_cctl_val,
355 				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
356 		},
357 		/* Use Fixed slice CCS mode */
358 		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
359 		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
360 		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
361 					   RCU_MODE_FIXED_SLICE_CCS_MODE))
362 		},
363 		/* Disable WMTP if HW doesn't support it */
364 		{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
365 		  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
366 		  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
367 					   PREEMPT_GPGPU_LEVEL_MASK,
368 					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
369 		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
370 		},
371 		{}
372 	};
373 
374 	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
375 }
376 
377 static void
378 hw_engine_setup_default_state(struct xe_hw_engine *hwe)
379 {
380 	struct xe_gt *gt = hwe->gt;
381 	struct xe_device *xe = gt_to_xe(gt);
382 	/*
383 	 * RING_CMD_CCTL specifies the default MOCS entry that will be
384 	 * used by the command streamer when executing commands that
385 	 * don't have a way to explicitly specify a MOCS setting.
386 	 * The default should usually reference whichever MOCS entry
387 	 * corresponds to uncached behavior, although use of a WB cached
388 	 * entry is recommended by the spec in certain circumstances on
389 	 * specific platforms.
390 	 * Bspec: 72161
391 	 */
392 	const u8 mocs_write_idx = gt->mocs.uc_index;
393 	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
394 				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
395 				 gt->mocs.wb_index : gt->mocs.uc_index;
396 	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
397 				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
398 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
399 	const struct xe_rtp_entry_sr engine_entries[] = {
400 		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
401 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
402 		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
403 					   CMD_CCTL_WRITE_OVERRIDE_MASK |
404 					   CMD_CCTL_READ_OVERRIDE_MASK,
405 					   ring_cmd_cctl_val,
406 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
407 		},
408 		/*
409 		 * To allow the GSC engine to go idle on MTL we need to enable
410 		 * idle messaging and set the hysteresis value (we use 0xA=5us
411 		 * as recommended in spec). On platforms after MTL this is
412 		 * enabled by default.
413 		 */
414 		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
415 		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
416 		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
417 				     IDLE_MSG_DISABLE,
418 				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
419 				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
420 					   IDLE_WAIT_TIME,
421 					   0xA,
422 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
423 		},
424 		{}
425 	};
426 
427 	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
428 }
429 
430 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
431 				 enum xe_hw_engine_id id)
432 {
433 	const struct engine_info *info;
434 
435 	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
436 		return;
437 
438 	if (!(gt->info.engine_mask & BIT(id)))
439 		return;
440 
441 	info = &engine_infos[id];
442 
443 	xe_gt_assert(gt, !hwe->gt);
444 
445 	hwe->gt = gt;
446 	hwe->class = info->class;
447 	hwe->instance = info->instance;
448 	hwe->mmio_base = info->mmio_base;
449 	hwe->irq_offset = info->irq_offset;
450 	hwe->domain = info->domain;
451 	hwe->name = info->name;
452 	hwe->fence_irq = &gt->fence_irq[info->class];
453 	hwe->engine_id = id;
454 
455 	hwe->eclass = &gt->eclass[hwe->class];
456 	if (!hwe->eclass->sched_props.job_timeout_ms) {
457 		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
458 		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
459 		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
460 		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
461 		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
462 		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
463 		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
464 		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
465 		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
466 		/* Record default props */
467 		hwe->eclass->defaults = hwe->eclass->sched_props;
468 	}
469 
470 	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
471 	xe_tuning_process_engine(hwe);
472 	xe_wa_process_engine(hwe);
473 	hw_engine_setup_default_state(hwe);
474 
475 	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
476 	xe_reg_whitelist_process_engine(hwe);
477 }
478 
479 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
480 			  enum xe_hw_engine_id id)
481 {
482 	struct xe_device *xe = gt_to_xe(gt);
483 	struct xe_tile *tile = gt_to_tile(gt);
484 	int err;
485 
486 	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
487 	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
488 
489 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
490 	xe_reg_sr_apply_whitelist(hwe);
491 
492 	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
493 						 XE_BO_CREATE_VRAM_IF_DGFX(tile) |
494 						 XE_BO_CREATE_GGTT_BIT);
495 	if (IS_ERR(hwe->hwsp)) {
496 		err = PTR_ERR(hwe->hwsp);
497 		goto err_name;
498 	}
499 
500 	err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
501 	if (err)
502 		goto err_hwsp;
503 
504 	if (!xe_device_uc_enabled(xe)) {
505 		hwe->exl_port = xe_execlist_port_create(xe, hwe);
506 		if (IS_ERR(hwe->exl_port)) {
507 			err = PTR_ERR(hwe->exl_port);
508 			goto err_kernel_lrc;
509 		}
510 	}
511 
512 	if (xe_device_uc_enabled(xe))
513 		xe_hw_engine_enable_ring(hwe);
514 
515 	/* We reserve the highest BCS instance for USM */
516 	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
517 		gt->usm.reserved_bcs_instance = hwe->instance;
518 
519 	err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
520 	if (err)
521 		return err;
522 
523 	return 0;
524 
525 err_kernel_lrc:
526 	xe_lrc_finish(&hwe->kernel_lrc);
527 err_hwsp:
528 	xe_bo_unpin_map_no_vm(hwe->hwsp);
529 err_name:
530 	hwe->name = NULL;
531 
532 	return err;
533 }
534 
535 static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
536 {
537 	int class;
538 
539 	/* FIXME: Doing a simple logical mapping that works for most hardware */
540 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
541 		struct xe_hw_engine *hwe;
542 		enum xe_hw_engine_id id;
543 		int logical_instance = 0;
544 
545 		for_each_hw_engine(hwe, gt, id)
546 			if (hwe->class == class)
547 				hwe->logical_instance = logical_instance++;
548 	}
549 }
550 
551 static void read_media_fuses(struct xe_gt *gt)
552 {
553 	struct xe_device *xe = gt_to_xe(gt);
554 	u32 media_fuse;
555 	u16 vdbox_mask;
556 	u16 vebox_mask;
557 	int i, j;
558 
559 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
560 
561 	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
562 
563 	/*
564 	 * Pre-Xe_HP platforms had register bits representing absent engines,
565 	 * whereas Xe_HP and beyond have bits representing present engines.
566 	 * Invert the polarity on old platforms so that we can use common
567 	 * handling below.
568 	 */
569 	if (GRAPHICS_VERx100(xe) < 1250)
570 		media_fuse = ~media_fuse;
571 
572 	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
573 	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
574 
575 	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
576 		if (!(gt->info.engine_mask & BIT(i)))
577 			continue;
578 
579 		if (!(BIT(j) & vdbox_mask)) {
580 			gt->info.engine_mask &= ~BIT(i);
581 			drm_info(&xe->drm, "vcs%u fused off\n", j);
582 		}
583 	}
584 
585 	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
586 		if (!(gt->info.engine_mask & BIT(i)))
587 			continue;
588 
589 		if (!(BIT(j) & vebox_mask)) {
590 			gt->info.engine_mask &= ~BIT(i);
591 			drm_info(&xe->drm, "vecs%u fused off\n", j);
592 		}
593 	}
594 }
595 
596 static void read_copy_fuses(struct xe_gt *gt)
597 {
598 	struct xe_device *xe = gt_to_xe(gt);
599 	u32 bcs_mask;
600 
601 	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
602 		return;
603 
604 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
605 
606 	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
607 	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
608 
609 	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
610 	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
611 		if (!(gt->info.engine_mask & BIT(i)))
612 			continue;
613 
614 		if (!(BIT(j / 2) & bcs_mask)) {
615 			gt->info.engine_mask &= ~BIT(i);
616 			drm_info(&xe->drm, "bcs%u fused off\n", j);
617 		}
618 	}
619 }
620 
621 static void read_compute_fuses_from_dss(struct xe_gt *gt)
622 {
623 	struct xe_device *xe = gt_to_xe(gt);
624 
625 	/*
626 	 * CCS fusing based on DSS masks only applies to platforms that can
627 	 * have more than one CCS.
628 	 */
629 	if (hweight64(gt->info.engine_mask &
630 		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
631 		return;
632 
633 	/*
634 	 * CCS availability on Xe_HP is inferred from the presence of DSS in
635 	 * each quadrant.
636 	 */
637 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
638 		if (!(gt->info.engine_mask & BIT(i)))
639 			continue;
640 
641 		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
642 			gt->info.engine_mask &= ~BIT(i);
643 			drm_info(&xe->drm, "ccs%u fused off\n", j);
644 		}
645 	}
646 }
647 
648 static void read_compute_fuses_from_reg(struct xe_gt *gt)
649 {
650 	struct xe_device *xe = gt_to_xe(gt);
651 	u32 ccs_mask;
652 
653 	ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
654 	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
655 
656 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
657 		if (!(gt->info.engine_mask & BIT(i)))
658 			continue;
659 
660 		if ((ccs_mask & BIT(j)) == 0) {
661 			gt->info.engine_mask &= ~BIT(i);
662 			drm_info(&xe->drm, "ccs%u fused off\n", j);
663 		}
664 	}
665 }
666 
667 static void read_compute_fuses(struct xe_gt *gt)
668 {
669 	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
670 		read_compute_fuses_from_reg(gt);
671 	else
672 		read_compute_fuses_from_dss(gt);
673 }
674 
675 static void check_gsc_availability(struct xe_gt *gt)
676 {
677 	struct xe_device *xe = gt_to_xe(gt);
678 
679 	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
680 		return;
681 
682 	/*
683 	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
684 	 * have the FW there is nothing we need the engine for and can therefore
685 	 * skip its initialization.
686 	 */
687 	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
688 		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
689 		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
690 	}
691 }
692 
693 int xe_hw_engines_init_early(struct xe_gt *gt)
694 {
695 	int i;
696 
697 	read_media_fuses(gt);
698 	read_copy_fuses(gt);
699 	read_compute_fuses(gt);
700 	check_gsc_availability(gt);
701 
702 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
703 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
704 
705 	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
706 		hw_engine_init_early(gt, &gt->hw_engines[i], i);
707 
708 	return 0;
709 }
710 
711 int xe_hw_engines_init(struct xe_gt *gt)
712 {
713 	int err;
714 	struct xe_hw_engine *hwe;
715 	enum xe_hw_engine_id id;
716 
717 	for_each_hw_engine(hwe, gt, id) {
718 		err = hw_engine_init(gt, hwe, id);
719 		if (err)
720 			return err;
721 	}
722 
723 	hw_engine_setup_logical_mapping(gt);
724 
725 	return 0;
726 }
727 
728 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
729 {
730 	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
731 
732 	if (hwe->irq_handler)
733 		hwe->irq_handler(hwe, intr_vec);
734 
735 	if (intr_vec & GT_RENDER_USER_INTERRUPT)
736 		xe_hw_fence_irq_run(hwe->fence_irq);
737 }
738 
739 /**
740  * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
741  * @hwe: Xe HW Engine.
742  *
743  * This can be printed out in a later stage like during dev_coredump
744  * analysis.
745  *
746  * Returns: a Xe HW Engine snapshot object that must be freed by the
747  * caller, using `xe_hw_engine_snapshot_free`.
748  */
749 struct xe_hw_engine_snapshot *
750 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
751 {
752 	struct xe_hw_engine_snapshot *snapshot;
753 	u64 val;
754 
755 	if (!xe_hw_engine_is_valid(hwe))
756 		return NULL;
757 
758 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
759 
760 	if (!snapshot)
761 		return NULL;
762 
763 	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
764 	snapshot->class = hwe->class;
765 	snapshot->logical_instance = hwe->logical_instance;
766 	snapshot->forcewake.domain = hwe->domain;
767 	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
768 						    hwe->domain);
769 	snapshot->mmio_base = hwe->mmio_base;
770 
771 	/* no more VF accessible data below this point */
772 	if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
773 		return snapshot;
774 
775 	snapshot->reg.ring_execlist_status =
776 		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
777 	val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
778 	snapshot->reg.ring_execlist_status |= val << 32;
779 
780 	snapshot->reg.ring_execlist_sq_contents =
781 		hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
782 	val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
783 	snapshot->reg.ring_execlist_sq_contents |= val << 32;
784 
785 	snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
786 	val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
787 	snapshot->reg.ring_acthd |= val << 32;
788 
789 	snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
790 	val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
791 	snapshot->reg.ring_bbaddr |= val << 32;
792 
793 	snapshot->reg.ring_dma_fadd =
794 		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
795 	val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
796 	snapshot->reg.ring_dma_fadd |= val << 32;
797 
798 	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
799 	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
800 	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
801 	snapshot->reg.ring_head =
802 		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
803 	snapshot->reg.ring_tail =
804 		hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
805 	snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0));
806 	snapshot->reg.ring_mi_mode =
807 		hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
808 	snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0));
809 	snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0));
810 	snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
811 	snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
812 	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
813 	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
814 
815 	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
816 		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
817 
818 	return snapshot;
819 }
820 
821 /**
822  * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
823  * @snapshot: Xe HW Engine snapshot object.
824  * @p: drm_printer where it will be printed out.
825  *
826  * This function prints out a given Xe HW Engine snapshot object.
827  */
828 void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
829 				 struct drm_printer *p)
830 {
831 	if (!snapshot)
832 		return;
833 
834 	drm_printf(p, "%s (physical), logical instance=%d\n",
835 		   snapshot->name ? snapshot->name : "",
836 		   snapshot->logical_instance);
837 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
838 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
839 	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
840 	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
841 	drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
842 		   snapshot->reg.ring_execlist_status);
843 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
844 		   snapshot->reg.ring_execlist_sq_contents);
845 	drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
846 	drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
847 	drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
848 	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
849 	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
850 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
851 		   snapshot->reg.ring_mode);
852 	drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
853 	drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
854 	drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
855 	drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
856 	drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
857 	drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
858 	drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
859 	drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
860 	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
861 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
862 			   snapshot->reg.rcu_mode);
863 }
864 
865 /**
866  * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
867  * @snapshot: Xe HW Engine snapshot object.
868  *
869  * This function free all the memory that needed to be allocated at capture
870  * time.
871  */
872 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
873 {
874 	if (!snapshot)
875 		return;
876 
877 	kfree(snapshot->name);
878 	kfree(snapshot);
879 }
880 
881 /**
882  * xe_hw_engine_print - Xe HW Engine Print.
883  * @hwe: Hardware Engine.
884  * @p: drm_printer.
885  *
886  * This function quickly capture a snapshot and immediately print it out.
887  */
888 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
889 {
890 	struct xe_hw_engine_snapshot *snapshot;
891 
892 	snapshot = xe_hw_engine_snapshot_capture(hwe);
893 	xe_hw_engine_snapshot_print(snapshot, p);
894 	xe_hw_engine_snapshot_free(snapshot);
895 }
896 
897 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
898 				enum xe_engine_class engine_class)
899 {
900 	u32 mask = 0;
901 	enum xe_hw_engine_id id;
902 
903 	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
904 		if (engine_infos[id].class == engine_class &&
905 		    gt->info.engine_mask & BIT(id))
906 			mask |= BIT(engine_infos[id].instance);
907 	}
908 	return mask;
909 }
910 
911 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
912 {
913 	struct xe_gt *gt = hwe->gt;
914 	struct xe_device *xe = gt_to_xe(gt);
915 
916 	if (hwe->class == XE_ENGINE_CLASS_OTHER)
917 		return true;
918 
919 	/* Check for engines disabled by ccs_mode setting */
920 	if (xe_gt_ccs_mode_enabled(gt) &&
921 	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
922 	    hwe->logical_instance >= gt->ccs_mode)
923 		return true;
924 
925 	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
926 		hwe->instance == gt->usm.reserved_bcs_instance;
927 }
928