xref: /linux/drivers/gpu/drm/xe/xe_hw_engine.c (revision 68c402fe5c5e5aa9a04c8bba9d99feb08a68afa7)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_hw_engine.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "regs/xe_engine_regs.h"
11 #include "regs/xe_gt_regs.h"
12 #include "xe_assert.h"
13 #include "xe_bo.h"
14 #include "xe_device.h"
15 #include "xe_execlist.h"
16 #include "xe_force_wake.h"
17 #include "xe_gsc.h"
18 #include "xe_gt.h"
19 #include "xe_gt_ccs_mode.h"
20 #include "xe_gt_printk.h"
21 #include "xe_gt_topology.h"
22 #include "xe_hw_fence.h"
23 #include "xe_irq.h"
24 #include "xe_lrc.h"
25 #include "xe_macros.h"
26 #include "xe_mmio.h"
27 #include "xe_reg_sr.h"
28 #include "xe_rtp.h"
29 #include "xe_sched_job.h"
30 #include "xe_sriov.h"
31 #include "xe_tuning.h"
32 #include "xe_uc_fw.h"
33 #include "xe_wa.h"
34 
35 #define MAX_MMIO_BASES 3
36 struct engine_info {
37 	const char *name;
38 	unsigned int class : 8;
39 	unsigned int instance : 8;
40 	unsigned int irq_offset : 8;
41 	enum xe_force_wake_domains domain;
42 	u32 mmio_base;
43 };
44 
45 static const struct engine_info engine_infos[] = {
46 	[XE_HW_ENGINE_RCS0] = {
47 		.name = "rcs0",
48 		.class = XE_ENGINE_CLASS_RENDER,
49 		.instance = 0,
50 		.irq_offset = ilog2(INTR_RCS0),
51 		.domain = XE_FW_RENDER,
52 		.mmio_base = RENDER_RING_BASE,
53 	},
54 	[XE_HW_ENGINE_BCS0] = {
55 		.name = "bcs0",
56 		.class = XE_ENGINE_CLASS_COPY,
57 		.instance = 0,
58 		.irq_offset = ilog2(INTR_BCS(0)),
59 		.domain = XE_FW_RENDER,
60 		.mmio_base = BLT_RING_BASE,
61 	},
62 	[XE_HW_ENGINE_BCS1] = {
63 		.name = "bcs1",
64 		.class = XE_ENGINE_CLASS_COPY,
65 		.instance = 1,
66 		.irq_offset = ilog2(INTR_BCS(1)),
67 		.domain = XE_FW_RENDER,
68 		.mmio_base = XEHPC_BCS1_RING_BASE,
69 	},
70 	[XE_HW_ENGINE_BCS2] = {
71 		.name = "bcs2",
72 		.class = XE_ENGINE_CLASS_COPY,
73 		.instance = 2,
74 		.irq_offset = ilog2(INTR_BCS(2)),
75 		.domain = XE_FW_RENDER,
76 		.mmio_base = XEHPC_BCS2_RING_BASE,
77 	},
78 	[XE_HW_ENGINE_BCS3] = {
79 		.name = "bcs3",
80 		.class = XE_ENGINE_CLASS_COPY,
81 		.instance = 3,
82 		.irq_offset = ilog2(INTR_BCS(3)),
83 		.domain = XE_FW_RENDER,
84 		.mmio_base = XEHPC_BCS3_RING_BASE,
85 	},
86 	[XE_HW_ENGINE_BCS4] = {
87 		.name = "bcs4",
88 		.class = XE_ENGINE_CLASS_COPY,
89 		.instance = 4,
90 		.irq_offset = ilog2(INTR_BCS(4)),
91 		.domain = XE_FW_RENDER,
92 		.mmio_base = XEHPC_BCS4_RING_BASE,
93 	},
94 	[XE_HW_ENGINE_BCS5] = {
95 		.name = "bcs5",
96 		.class = XE_ENGINE_CLASS_COPY,
97 		.instance = 5,
98 		.irq_offset = ilog2(INTR_BCS(5)),
99 		.domain = XE_FW_RENDER,
100 		.mmio_base = XEHPC_BCS5_RING_BASE,
101 	},
102 	[XE_HW_ENGINE_BCS6] = {
103 		.name = "bcs6",
104 		.class = XE_ENGINE_CLASS_COPY,
105 		.instance = 6,
106 		.irq_offset = ilog2(INTR_BCS(6)),
107 		.domain = XE_FW_RENDER,
108 		.mmio_base = XEHPC_BCS6_RING_BASE,
109 	},
110 	[XE_HW_ENGINE_BCS7] = {
111 		.name = "bcs7",
112 		.class = XE_ENGINE_CLASS_COPY,
113 		.irq_offset = ilog2(INTR_BCS(7)),
114 		.instance = 7,
115 		.domain = XE_FW_RENDER,
116 		.mmio_base = XEHPC_BCS7_RING_BASE,
117 	},
118 	[XE_HW_ENGINE_BCS8] = {
119 		.name = "bcs8",
120 		.class = XE_ENGINE_CLASS_COPY,
121 		.instance = 8,
122 		.irq_offset = ilog2(INTR_BCS8),
123 		.domain = XE_FW_RENDER,
124 		.mmio_base = XEHPC_BCS8_RING_BASE,
125 	},
126 
127 	[XE_HW_ENGINE_VCS0] = {
128 		.name = "vcs0",
129 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
130 		.instance = 0,
131 		.irq_offset = 32 + ilog2(INTR_VCS(0)),
132 		.domain = XE_FW_MEDIA_VDBOX0,
133 		.mmio_base = BSD_RING_BASE,
134 	},
135 	[XE_HW_ENGINE_VCS1] = {
136 		.name = "vcs1",
137 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
138 		.instance = 1,
139 		.irq_offset = 32 + ilog2(INTR_VCS(1)),
140 		.domain = XE_FW_MEDIA_VDBOX1,
141 		.mmio_base = BSD2_RING_BASE,
142 	},
143 	[XE_HW_ENGINE_VCS2] = {
144 		.name = "vcs2",
145 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
146 		.instance = 2,
147 		.irq_offset = 32 + ilog2(INTR_VCS(2)),
148 		.domain = XE_FW_MEDIA_VDBOX2,
149 		.mmio_base = BSD3_RING_BASE,
150 	},
151 	[XE_HW_ENGINE_VCS3] = {
152 		.name = "vcs3",
153 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
154 		.instance = 3,
155 		.irq_offset = 32 + ilog2(INTR_VCS(3)),
156 		.domain = XE_FW_MEDIA_VDBOX3,
157 		.mmio_base = BSD4_RING_BASE,
158 	},
159 	[XE_HW_ENGINE_VCS4] = {
160 		.name = "vcs4",
161 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
162 		.instance = 4,
163 		.irq_offset = 32 + ilog2(INTR_VCS(4)),
164 		.domain = XE_FW_MEDIA_VDBOX4,
165 		.mmio_base = XEHP_BSD5_RING_BASE,
166 	},
167 	[XE_HW_ENGINE_VCS5] = {
168 		.name = "vcs5",
169 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
170 		.instance = 5,
171 		.irq_offset = 32 + ilog2(INTR_VCS(5)),
172 		.domain = XE_FW_MEDIA_VDBOX5,
173 		.mmio_base = XEHP_BSD6_RING_BASE,
174 	},
175 	[XE_HW_ENGINE_VCS6] = {
176 		.name = "vcs6",
177 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
178 		.instance = 6,
179 		.irq_offset = 32 + ilog2(INTR_VCS(6)),
180 		.domain = XE_FW_MEDIA_VDBOX6,
181 		.mmio_base = XEHP_BSD7_RING_BASE,
182 	},
183 	[XE_HW_ENGINE_VCS7] = {
184 		.name = "vcs7",
185 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
186 		.instance = 7,
187 		.irq_offset = 32 + ilog2(INTR_VCS(7)),
188 		.domain = XE_FW_MEDIA_VDBOX7,
189 		.mmio_base = XEHP_BSD8_RING_BASE,
190 	},
191 	[XE_HW_ENGINE_VECS0] = {
192 		.name = "vecs0",
193 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
194 		.instance = 0,
195 		.irq_offset = 32 + ilog2(INTR_VECS(0)),
196 		.domain = XE_FW_MEDIA_VEBOX0,
197 		.mmio_base = VEBOX_RING_BASE,
198 	},
199 	[XE_HW_ENGINE_VECS1] = {
200 		.name = "vecs1",
201 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
202 		.instance = 1,
203 		.irq_offset = 32 + ilog2(INTR_VECS(1)),
204 		.domain = XE_FW_MEDIA_VEBOX1,
205 		.mmio_base = VEBOX2_RING_BASE,
206 	},
207 	[XE_HW_ENGINE_VECS2] = {
208 		.name = "vecs2",
209 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
210 		.instance = 2,
211 		.irq_offset = 32 + ilog2(INTR_VECS(2)),
212 		.domain = XE_FW_MEDIA_VEBOX2,
213 		.mmio_base = XEHP_VEBOX3_RING_BASE,
214 	},
215 	[XE_HW_ENGINE_VECS3] = {
216 		.name = "vecs3",
217 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
218 		.instance = 3,
219 		.irq_offset = 32 + ilog2(INTR_VECS(3)),
220 		.domain = XE_FW_MEDIA_VEBOX3,
221 		.mmio_base = XEHP_VEBOX4_RING_BASE,
222 	},
223 	[XE_HW_ENGINE_CCS0] = {
224 		.name = "ccs0",
225 		.class = XE_ENGINE_CLASS_COMPUTE,
226 		.instance = 0,
227 		.irq_offset = ilog2(INTR_CCS(0)),
228 		.domain = XE_FW_RENDER,
229 		.mmio_base = COMPUTE0_RING_BASE,
230 	},
231 	[XE_HW_ENGINE_CCS1] = {
232 		.name = "ccs1",
233 		.class = XE_ENGINE_CLASS_COMPUTE,
234 		.instance = 1,
235 		.irq_offset = ilog2(INTR_CCS(1)),
236 		.domain = XE_FW_RENDER,
237 		.mmio_base = COMPUTE1_RING_BASE,
238 	},
239 	[XE_HW_ENGINE_CCS2] = {
240 		.name = "ccs2",
241 		.class = XE_ENGINE_CLASS_COMPUTE,
242 		.instance = 2,
243 		.irq_offset = ilog2(INTR_CCS(2)),
244 		.domain = XE_FW_RENDER,
245 		.mmio_base = COMPUTE2_RING_BASE,
246 	},
247 	[XE_HW_ENGINE_CCS3] = {
248 		.name = "ccs3",
249 		.class = XE_ENGINE_CLASS_COMPUTE,
250 		.instance = 3,
251 		.irq_offset = ilog2(INTR_CCS(3)),
252 		.domain = XE_FW_RENDER,
253 		.mmio_base = COMPUTE3_RING_BASE,
254 	},
255 	[XE_HW_ENGINE_GSCCS0] = {
256 		.name = "gsccs0",
257 		.class = XE_ENGINE_CLASS_OTHER,
258 		.instance = OTHER_GSC_INSTANCE,
259 		.domain = XE_FW_GSC,
260 		.mmio_base = GSCCS_RING_BASE,
261 	},
262 };
263 
264 static void hw_engine_fini(struct drm_device *drm, void *arg)
265 {
266 	struct xe_hw_engine *hwe = arg;
267 
268 	if (hwe->exl_port)
269 		xe_execlist_port_destroy(hwe->exl_port);
270 	xe_lrc_finish(&hwe->kernel_lrc);
271 
272 	hwe->gt = NULL;
273 }
274 
275 static void hw_engine_mmio_write32(struct xe_hw_engine *hwe, struct xe_reg reg,
276 				   u32 val)
277 {
278 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
279 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
280 
281 	reg.addr += hwe->mmio_base;
282 
283 	xe_mmio_write32(hwe->gt, reg, val);
284 }
285 
286 static u32 hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
287 {
288 	xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
289 	xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
290 
291 	reg.addr += hwe->mmio_base;
292 
293 	return xe_mmio_read32(hwe->gt, reg);
294 }
295 
296 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
297 {
298 	u32 ccs_mask =
299 		xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
300 
301 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
302 		xe_mmio_write32(hwe->gt, RCU_MODE,
303 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
304 
305 	hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
306 	hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
307 			       xe_bo_ggtt_addr(hwe->hwsp));
308 	hw_engine_mmio_write32(hwe, RING_MODE(0),
309 			       _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
310 	hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
311 			       _MASKED_BIT_DISABLE(STOP_RING));
312 	hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
313 }
314 
315 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
316 						 const struct xe_hw_engine *hwe)
317 {
318 	return xe_gt_ccs_mode_enabled(gt) &&
319 	       xe_rtp_match_first_render_or_compute(gt, hwe);
320 }
321 
322 static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
323 				      const struct xe_hw_engine *hwe)
324 {
325 	if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
326 		return false;
327 
328 	if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
329 	    hwe->class != XE_ENGINE_CLASS_RENDER)
330 		return false;
331 
332 	return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
333 }
334 
335 void
336 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
337 {
338 	struct xe_gt *gt = hwe->gt;
339 	const u8 mocs_write_idx = gt->mocs.uc_index;
340 	const u8 mocs_read_idx = gt->mocs.uc_index;
341 	u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
342 			    REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
343 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
344 	const struct xe_rtp_entry_sr lrc_was[] = {
345 		/*
346 		 * Some blitter commands do not have a field for MOCS, those
347 		 * commands will use MOCS index pointed by BLIT_CCTL.
348 		 * BLIT_CCTL registers are needed to be programmed to un-cached.
349 		 */
350 		{ XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
351 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
352 			       ENGINE_CLASS(COPY)),
353 		  XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
354 				 BLIT_CCTL_DST_MOCS_MASK |
355 				 BLIT_CCTL_SRC_MOCS_MASK,
356 				 blit_cctl_val,
357 				 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
358 		},
359 		/* Use Fixed slice CCS mode */
360 		{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
361 		  XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
362 		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
363 					   RCU_MODE_FIXED_SLICE_CCS_MODE))
364 		},
365 		/* Disable WMTP if HW doesn't support it */
366 		{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
367 		  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
368 		  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
369 					   PREEMPT_GPGPU_LEVEL_MASK,
370 					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
371 		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
372 		},
373 		{}
374 	};
375 
376 	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
377 }
378 
379 static void
380 hw_engine_setup_default_state(struct xe_hw_engine *hwe)
381 {
382 	struct xe_gt *gt = hwe->gt;
383 	struct xe_device *xe = gt_to_xe(gt);
384 	/*
385 	 * RING_CMD_CCTL specifies the default MOCS entry that will be
386 	 * used by the command streamer when executing commands that
387 	 * don't have a way to explicitly specify a MOCS setting.
388 	 * The default should usually reference whichever MOCS entry
389 	 * corresponds to uncached behavior, although use of a WB cached
390 	 * entry is recommended by the spec in certain circumstances on
391 	 * specific platforms.
392 	 * Bspec: 72161
393 	 */
394 	const u8 mocs_write_idx = gt->mocs.uc_index;
395 	const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
396 				 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
397 				 gt->mocs.wb_index : gt->mocs.uc_index;
398 	u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
399 				REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
400 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
401 	const struct xe_rtp_entry_sr engine_entries[] = {
402 		{ XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
403 		  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
404 		  XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
405 					   CMD_CCTL_WRITE_OVERRIDE_MASK |
406 					   CMD_CCTL_READ_OVERRIDE_MASK,
407 					   ring_cmd_cctl_val,
408 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
409 		},
410 		/*
411 		 * To allow the GSC engine to go idle on MTL we need to enable
412 		 * idle messaging and set the hysteresis value (we use 0xA=5us
413 		 * as recommended in spec). On platforms after MTL this is
414 		 * enabled by default.
415 		 */
416 		{ XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
417 		  XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
418 		  XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
419 				     IDLE_MSG_DISABLE,
420 				     XE_RTP_ACTION_FLAG(ENGINE_BASE)),
421 				 FIELD_SET(RING_PWRCTX_MAXCNT(0),
422 					   IDLE_WAIT_TIME,
423 					   0xA,
424 					   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
425 		},
426 		{}
427 	};
428 
429 	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
430 }
431 
432 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
433 				 enum xe_hw_engine_id id)
434 {
435 	const struct engine_info *info;
436 
437 	if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
438 		return;
439 
440 	if (!(gt->info.engine_mask & BIT(id)))
441 		return;
442 
443 	info = &engine_infos[id];
444 
445 	xe_gt_assert(gt, !hwe->gt);
446 
447 	hwe->gt = gt;
448 	hwe->class = info->class;
449 	hwe->instance = info->instance;
450 	hwe->mmio_base = info->mmio_base;
451 	hwe->irq_offset = info->irq_offset;
452 	hwe->domain = info->domain;
453 	hwe->name = info->name;
454 	hwe->fence_irq = &gt->fence_irq[info->class];
455 	hwe->engine_id = id;
456 
457 	hwe->eclass = &gt->eclass[hwe->class];
458 	if (!hwe->eclass->sched_props.job_timeout_ms) {
459 		hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
460 		hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
461 		hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
462 		hwe->eclass->sched_props.timeslice_us = 1 * 1000;
463 		hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
464 		hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
465 		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
466 		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
467 		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
468 
469 		/*
470 		 * The GSC engine can accept submissions while the GSC shim is
471 		 * being reset, during which time the submission is stalled. In
472 		 * the worst case, the shim reset can take up to the maximum GSC
473 		 * command execution time (250ms), so the request start can be
474 		 * delayed by that much; the request itself can take that long
475 		 * without being preemptible, which means worst case it can
476 		 * theoretically take up to 500ms for a preemption to go through
477 		 * on the GSC engine. Adding to that an extra 100ms as a safety
478 		 * margin, we get a minimum recommended timeout of 600ms.
479 		 * The preempt_timeout value can't be tuned for OTHER_CLASS
480 		 * because the class is reserved for kernel usage, so we just
481 		 * need to make sure that the starting value is above that
482 		 * threshold; since our default value (640ms) is greater than
483 		 * 600ms, the only way we can go below is via a kconfig setting.
484 		 * If that happens, log it in dmesg and update the value.
485 		 */
486 		if (hwe->class == XE_ENGINE_CLASS_OTHER) {
487 			const u32 min_preempt_timeout = 600 * 1000;
488 			if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) {
489 				hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout;
490 				xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n");
491 			}
492 		}
493 
494 		/* Record default props */
495 		hwe->eclass->defaults = hwe->eclass->sched_props;
496 	}
497 
498 	xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
499 	xe_tuning_process_engine(hwe);
500 	xe_wa_process_engine(hwe);
501 	hw_engine_setup_default_state(hwe);
502 
503 	xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
504 	xe_reg_whitelist_process_engine(hwe);
505 }
506 
507 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
508 			  enum xe_hw_engine_id id)
509 {
510 	struct xe_device *xe = gt_to_xe(gt);
511 	struct xe_tile *tile = gt_to_tile(gt);
512 	int err;
513 
514 	xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
515 	xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
516 
517 	xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
518 	xe_reg_sr_apply_whitelist(hwe);
519 
520 	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
521 						 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
522 						 XE_BO_FLAG_GGTT |
523 						 XE_BO_FLAG_GGTT_INVALIDATE);
524 	if (IS_ERR(hwe->hwsp)) {
525 		err = PTR_ERR(hwe->hwsp);
526 		goto err_name;
527 	}
528 
529 	err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
530 	if (err)
531 		goto err_hwsp;
532 
533 	if (!xe_device_uc_enabled(xe)) {
534 		hwe->exl_port = xe_execlist_port_create(xe, hwe);
535 		if (IS_ERR(hwe->exl_port)) {
536 			err = PTR_ERR(hwe->exl_port);
537 			goto err_kernel_lrc;
538 		}
539 	}
540 
541 	if (xe_device_uc_enabled(xe)) {
542 		/* GSCCS has a special interrupt for reset */
543 		if (hwe->class == XE_ENGINE_CLASS_OTHER)
544 			hwe->irq_handler = xe_gsc_hwe_irq_handler;
545 
546 		xe_hw_engine_enable_ring(hwe);
547 	}
548 
549 	/* We reserve the highest BCS instance for USM */
550 	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
551 		gt->usm.reserved_bcs_instance = hwe->instance;
552 
553 	return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
554 
555 err_kernel_lrc:
556 	xe_lrc_finish(&hwe->kernel_lrc);
557 err_hwsp:
558 	xe_bo_unpin_map_no_vm(hwe->hwsp);
559 err_name:
560 	hwe->name = NULL;
561 
562 	return err;
563 }
564 
565 static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
566 {
567 	int class;
568 
569 	/* FIXME: Doing a simple logical mapping that works for most hardware */
570 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
571 		struct xe_hw_engine *hwe;
572 		enum xe_hw_engine_id id;
573 		int logical_instance = 0;
574 
575 		for_each_hw_engine(hwe, gt, id)
576 			if (hwe->class == class)
577 				hwe->logical_instance = logical_instance++;
578 	}
579 }
580 
581 static void read_media_fuses(struct xe_gt *gt)
582 {
583 	struct xe_device *xe = gt_to_xe(gt);
584 	u32 media_fuse;
585 	u16 vdbox_mask;
586 	u16 vebox_mask;
587 	int i, j;
588 
589 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
590 
591 	media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
592 
593 	/*
594 	 * Pre-Xe_HP platforms had register bits representing absent engines,
595 	 * whereas Xe_HP and beyond have bits representing present engines.
596 	 * Invert the polarity on old platforms so that we can use common
597 	 * handling below.
598 	 */
599 	if (GRAPHICS_VERx100(xe) < 1250)
600 		media_fuse = ~media_fuse;
601 
602 	vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
603 	vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
604 
605 	for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
606 		if (!(gt->info.engine_mask & BIT(i)))
607 			continue;
608 
609 		if (!(BIT(j) & vdbox_mask)) {
610 			gt->info.engine_mask &= ~BIT(i);
611 			drm_info(&xe->drm, "vcs%u fused off\n", j);
612 		}
613 	}
614 
615 	for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
616 		if (!(gt->info.engine_mask & BIT(i)))
617 			continue;
618 
619 		if (!(BIT(j) & vebox_mask)) {
620 			gt->info.engine_mask &= ~BIT(i);
621 			drm_info(&xe->drm, "vecs%u fused off\n", j);
622 		}
623 	}
624 }
625 
626 static void read_copy_fuses(struct xe_gt *gt)
627 {
628 	struct xe_device *xe = gt_to_xe(gt);
629 	u32 bcs_mask;
630 
631 	if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
632 		return;
633 
634 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
635 
636 	bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
637 	bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
638 
639 	/* BCS0 is always present; only BCS1-BCS8 may be fused off */
640 	for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
641 		if (!(gt->info.engine_mask & BIT(i)))
642 			continue;
643 
644 		if (!(BIT(j / 2) & bcs_mask)) {
645 			gt->info.engine_mask &= ~BIT(i);
646 			drm_info(&xe->drm, "bcs%u fused off\n", j);
647 		}
648 	}
649 }
650 
651 static void read_compute_fuses_from_dss(struct xe_gt *gt)
652 {
653 	struct xe_device *xe = gt_to_xe(gt);
654 
655 	/*
656 	 * CCS fusing based on DSS masks only applies to platforms that can
657 	 * have more than one CCS.
658 	 */
659 	if (hweight64(gt->info.engine_mask &
660 		      GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
661 		return;
662 
663 	/*
664 	 * CCS availability on Xe_HP is inferred from the presence of DSS in
665 	 * each quadrant.
666 	 */
667 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
668 		if (!(gt->info.engine_mask & BIT(i)))
669 			continue;
670 
671 		if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
672 			gt->info.engine_mask &= ~BIT(i);
673 			drm_info(&xe->drm, "ccs%u fused off\n", j);
674 		}
675 	}
676 }
677 
678 static void read_compute_fuses_from_reg(struct xe_gt *gt)
679 {
680 	struct xe_device *xe = gt_to_xe(gt);
681 	u32 ccs_mask;
682 
683 	ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
684 	ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
685 
686 	for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
687 		if (!(gt->info.engine_mask & BIT(i)))
688 			continue;
689 
690 		if ((ccs_mask & BIT(j)) == 0) {
691 			gt->info.engine_mask &= ~BIT(i);
692 			drm_info(&xe->drm, "ccs%u fused off\n", j);
693 		}
694 	}
695 }
696 
697 static void read_compute_fuses(struct xe_gt *gt)
698 {
699 	if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
700 		read_compute_fuses_from_reg(gt);
701 	else
702 		read_compute_fuses_from_dss(gt);
703 }
704 
705 static void check_gsc_availability(struct xe_gt *gt)
706 {
707 	struct xe_device *xe = gt_to_xe(gt);
708 
709 	if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
710 		return;
711 
712 	/*
713 	 * The GSCCS is only used to communicate with the GSC FW, so if we don't
714 	 * have the FW there is nothing we need the engine for and can therefore
715 	 * skip its initialization.
716 	 */
717 	if (!xe_uc_fw_is_available(&gt->uc.gsc.fw)) {
718 		gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
719 		drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
720 	}
721 }
722 
723 int xe_hw_engines_init_early(struct xe_gt *gt)
724 {
725 	int i;
726 
727 	read_media_fuses(gt);
728 	read_copy_fuses(gt);
729 	read_compute_fuses(gt);
730 	check_gsc_availability(gt);
731 
732 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
733 	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
734 
735 	for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
736 		hw_engine_init_early(gt, &gt->hw_engines[i], i);
737 
738 	return 0;
739 }
740 
741 int xe_hw_engines_init(struct xe_gt *gt)
742 {
743 	int err;
744 	struct xe_hw_engine *hwe;
745 	enum xe_hw_engine_id id;
746 
747 	for_each_hw_engine(hwe, gt, id) {
748 		err = hw_engine_init(gt, hwe, id);
749 		if (err)
750 			return err;
751 	}
752 
753 	hw_engine_setup_logical_mapping(gt);
754 
755 	return 0;
756 }
757 
758 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
759 {
760 	wake_up_all(&gt_to_xe(hwe->gt)->ufence_wq);
761 
762 	if (hwe->irq_handler)
763 		hwe->irq_handler(hwe, intr_vec);
764 
765 	if (intr_vec & GT_RENDER_USER_INTERRUPT)
766 		xe_hw_fence_irq_run(hwe->fence_irq);
767 }
768 
769 /**
770  * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
771  * @hwe: Xe HW Engine.
772  *
773  * This can be printed out in a later stage like during dev_coredump
774  * analysis.
775  *
776  * Returns: a Xe HW Engine snapshot object that must be freed by the
777  * caller, using `xe_hw_engine_snapshot_free`.
778  */
779 struct xe_hw_engine_snapshot *
780 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
781 {
782 	struct xe_hw_engine_snapshot *snapshot;
783 	u64 val;
784 
785 	if (!xe_hw_engine_is_valid(hwe))
786 		return NULL;
787 
788 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
789 
790 	if (!snapshot)
791 		return NULL;
792 
793 	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
794 	snapshot->class = hwe->class;
795 	snapshot->logical_instance = hwe->logical_instance;
796 	snapshot->forcewake.domain = hwe->domain;
797 	snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
798 						    hwe->domain);
799 	snapshot->mmio_base = hwe->mmio_base;
800 
801 	/* no more VF accessible data below this point */
802 	if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
803 		return snapshot;
804 
805 	snapshot->reg.ring_execlist_status =
806 		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
807 	val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
808 	snapshot->reg.ring_execlist_status |= val << 32;
809 
810 	snapshot->reg.ring_execlist_sq_contents =
811 		hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
812 	val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
813 	snapshot->reg.ring_execlist_sq_contents |= val << 32;
814 
815 	snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
816 	val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
817 	snapshot->reg.ring_acthd |= val << 32;
818 
819 	snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
820 	val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
821 	snapshot->reg.ring_bbaddr |= val << 32;
822 
823 	snapshot->reg.ring_dma_fadd =
824 		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
825 	val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
826 	snapshot->reg.ring_dma_fadd |= val << 32;
827 
828 	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
829 	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
830 	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
831 	snapshot->reg.ring_head =
832 		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
833 	snapshot->reg.ring_tail =
834 		hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
835 	snapshot->reg.ring_ctl = hw_engine_mmio_read32(hwe, RING_CTL(0));
836 	snapshot->reg.ring_mi_mode =
837 		hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
838 	snapshot->reg.ring_mode = hw_engine_mmio_read32(hwe, RING_MODE(0));
839 	snapshot->reg.ring_imr = hw_engine_mmio_read32(hwe, RING_IMR(0));
840 	snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
841 	snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
842 	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
843 	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
844 
845 	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
846 		snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
847 
848 	return snapshot;
849 }
850 
851 /**
852  * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
853  * @snapshot: Xe HW Engine snapshot object.
854  * @p: drm_printer where it will be printed out.
855  *
856  * This function prints out a given Xe HW Engine snapshot object.
857  */
858 void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
859 				 struct drm_printer *p)
860 {
861 	if (!snapshot)
862 		return;
863 
864 	drm_printf(p, "%s (physical), logical instance=%d\n",
865 		   snapshot->name ? snapshot->name : "",
866 		   snapshot->logical_instance);
867 	drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
868 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
869 	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
870 	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
871 	drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
872 		   snapshot->reg.ring_execlist_status);
873 	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
874 		   snapshot->reg.ring_execlist_sq_contents);
875 	drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
876 	drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
877 	drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
878 	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
879 	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
880 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
881 		   snapshot->reg.ring_mode);
882 	drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
883 	drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
884 	drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
885 	drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
886 	drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
887 	drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
888 	drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
889 	drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
890 	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
891 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
892 			   snapshot->reg.rcu_mode);
893 }
894 
895 /**
896  * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
897  * @snapshot: Xe HW Engine snapshot object.
898  *
899  * This function free all the memory that needed to be allocated at capture
900  * time.
901  */
902 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
903 {
904 	if (!snapshot)
905 		return;
906 
907 	kfree(snapshot->name);
908 	kfree(snapshot);
909 }
910 
911 /**
912  * xe_hw_engine_print - Xe HW Engine Print.
913  * @hwe: Hardware Engine.
914  * @p: drm_printer.
915  *
916  * This function quickly capture a snapshot and immediately print it out.
917  */
918 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
919 {
920 	struct xe_hw_engine_snapshot *snapshot;
921 
922 	snapshot = xe_hw_engine_snapshot_capture(hwe);
923 	xe_hw_engine_snapshot_print(snapshot, p);
924 	xe_hw_engine_snapshot_free(snapshot);
925 }
926 
927 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
928 				enum xe_engine_class engine_class)
929 {
930 	u32 mask = 0;
931 	enum xe_hw_engine_id id;
932 
933 	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
934 		if (engine_infos[id].class == engine_class &&
935 		    gt->info.engine_mask & BIT(id))
936 			mask |= BIT(engine_infos[id].instance);
937 	}
938 	return mask;
939 }
940 
941 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
942 {
943 	struct xe_gt *gt = hwe->gt;
944 	struct xe_device *xe = gt_to_xe(gt);
945 
946 	if (hwe->class == XE_ENGINE_CLASS_OTHER)
947 		return true;
948 
949 	/* Check for engines disabled by ccs_mode setting */
950 	if (xe_gt_ccs_mode_enabled(gt) &&
951 	    hwe->class == XE_ENGINE_CLASS_COMPUTE &&
952 	    hwe->logical_instance >= gt->ccs_mode)
953 		return true;
954 
955 	return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
956 		hwe->instance == gt->usm.reserved_bcs_instance;
957 }
958