1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_hw_engine.h"
7
8 #include <linux/nospec.h>
9
10 #include <drm/drm_managed.h>
11 #include <uapi/drm/xe_drm.h>
12
13 #include "regs/xe_engine_regs.h"
14 #include "regs/xe_gt_regs.h"
15 #include "xe_assert.h"
16 #include "xe_bo.h"
17 #include "xe_device.h"
18 #include "xe_execlist.h"
19 #include "xe_force_wake.h"
20 #include "xe_gsc.h"
21 #include "xe_gt.h"
22 #include "xe_gt_ccs_mode.h"
23 #include "xe_gt_printk.h"
24 #include "xe_gt_mcr.h"
25 #include "xe_gt_topology.h"
26 #include "xe_hw_engine_group.h"
27 #include "xe_hw_fence.h"
28 #include "xe_irq.h"
29 #include "xe_lrc.h"
30 #include "xe_macros.h"
31 #include "xe_mmio.h"
32 #include "xe_reg_sr.h"
33 #include "xe_reg_whitelist.h"
34 #include "xe_rtp.h"
35 #include "xe_sched_job.h"
36 #include "xe_sriov.h"
37 #include "xe_tuning.h"
38 #include "xe_uc_fw.h"
39 #include "xe_wa.h"
40
41 #define MAX_MMIO_BASES 3
42 struct engine_info {
43 const char *name;
44 unsigned int class : 8;
45 unsigned int instance : 8;
46 unsigned int irq_offset : 8;
47 enum xe_force_wake_domains domain;
48 u32 mmio_base;
49 };
50
51 static const struct engine_info engine_infos[] = {
52 [XE_HW_ENGINE_RCS0] = {
53 .name = "rcs0",
54 .class = XE_ENGINE_CLASS_RENDER,
55 .instance = 0,
56 .irq_offset = ilog2(INTR_RCS0),
57 .domain = XE_FW_RENDER,
58 .mmio_base = RENDER_RING_BASE,
59 },
60 [XE_HW_ENGINE_BCS0] = {
61 .name = "bcs0",
62 .class = XE_ENGINE_CLASS_COPY,
63 .instance = 0,
64 .irq_offset = ilog2(INTR_BCS(0)),
65 .domain = XE_FW_RENDER,
66 .mmio_base = BLT_RING_BASE,
67 },
68 [XE_HW_ENGINE_BCS1] = {
69 .name = "bcs1",
70 .class = XE_ENGINE_CLASS_COPY,
71 .instance = 1,
72 .irq_offset = ilog2(INTR_BCS(1)),
73 .domain = XE_FW_RENDER,
74 .mmio_base = XEHPC_BCS1_RING_BASE,
75 },
76 [XE_HW_ENGINE_BCS2] = {
77 .name = "bcs2",
78 .class = XE_ENGINE_CLASS_COPY,
79 .instance = 2,
80 .irq_offset = ilog2(INTR_BCS(2)),
81 .domain = XE_FW_RENDER,
82 .mmio_base = XEHPC_BCS2_RING_BASE,
83 },
84 [XE_HW_ENGINE_BCS3] = {
85 .name = "bcs3",
86 .class = XE_ENGINE_CLASS_COPY,
87 .instance = 3,
88 .irq_offset = ilog2(INTR_BCS(3)),
89 .domain = XE_FW_RENDER,
90 .mmio_base = XEHPC_BCS3_RING_BASE,
91 },
92 [XE_HW_ENGINE_BCS4] = {
93 .name = "bcs4",
94 .class = XE_ENGINE_CLASS_COPY,
95 .instance = 4,
96 .irq_offset = ilog2(INTR_BCS(4)),
97 .domain = XE_FW_RENDER,
98 .mmio_base = XEHPC_BCS4_RING_BASE,
99 },
100 [XE_HW_ENGINE_BCS5] = {
101 .name = "bcs5",
102 .class = XE_ENGINE_CLASS_COPY,
103 .instance = 5,
104 .irq_offset = ilog2(INTR_BCS(5)),
105 .domain = XE_FW_RENDER,
106 .mmio_base = XEHPC_BCS5_RING_BASE,
107 },
108 [XE_HW_ENGINE_BCS6] = {
109 .name = "bcs6",
110 .class = XE_ENGINE_CLASS_COPY,
111 .instance = 6,
112 .irq_offset = ilog2(INTR_BCS(6)),
113 .domain = XE_FW_RENDER,
114 .mmio_base = XEHPC_BCS6_RING_BASE,
115 },
116 [XE_HW_ENGINE_BCS7] = {
117 .name = "bcs7",
118 .class = XE_ENGINE_CLASS_COPY,
119 .irq_offset = ilog2(INTR_BCS(7)),
120 .instance = 7,
121 .domain = XE_FW_RENDER,
122 .mmio_base = XEHPC_BCS7_RING_BASE,
123 },
124 [XE_HW_ENGINE_BCS8] = {
125 .name = "bcs8",
126 .class = XE_ENGINE_CLASS_COPY,
127 .instance = 8,
128 .irq_offset = ilog2(INTR_BCS8),
129 .domain = XE_FW_RENDER,
130 .mmio_base = XEHPC_BCS8_RING_BASE,
131 },
132
133 [XE_HW_ENGINE_VCS0] = {
134 .name = "vcs0",
135 .class = XE_ENGINE_CLASS_VIDEO_DECODE,
136 .instance = 0,
137 .irq_offset = 32 + ilog2(INTR_VCS(0)),
138 .domain = XE_FW_MEDIA_VDBOX0,
139 .mmio_base = BSD_RING_BASE,
140 },
141 [XE_HW_ENGINE_VCS1] = {
142 .name = "vcs1",
143 .class = XE_ENGINE_CLASS_VIDEO_DECODE,
144 .instance = 1,
145 .irq_offset = 32 + ilog2(INTR_VCS(1)),
146 .domain = XE_FW_MEDIA_VDBOX1,
147 .mmio_base = BSD2_RING_BASE,
148 },
149 [XE_HW_ENGINE_VCS2] = {
150 .name = "vcs2",
151 .class = XE_ENGINE_CLASS_VIDEO_DECODE,
152 .instance = 2,
153 .irq_offset = 32 + ilog2(INTR_VCS(2)),
154 .domain = XE_FW_MEDIA_VDBOX2,
155 .mmio_base = BSD3_RING_BASE,
156 },
157 [XE_HW_ENGINE_VCS3] = {
158 .name = "vcs3",
159 .class = XE_ENGINE_CLASS_VIDEO_DECODE,
160 .instance = 3,
161 .irq_offset = 32 + ilog2(INTR_VCS(3)),
162 .domain = XE_FW_MEDIA_VDBOX3,
163 .mmio_base = BSD4_RING_BASE,
164 },
165 [XE_HW_ENGINE_VCS4] = {
166 .name = "vcs4",
167 .class = XE_ENGINE_CLASS_VIDEO_DECODE,
168 .instance = 4,
169 .irq_offset = 32 + ilog2(INTR_VCS(4)),
170 .domain = XE_FW_MEDIA_VDBOX4,
171 .mmio_base = XEHP_BSD5_RING_BASE,
172 },
173 [XE_HW_ENGINE_VCS5] = {
174 .name = "vcs5",
175 .class = XE_ENGINE_CLASS_VIDEO_DECODE,
176 .instance = 5,
177 .irq_offset = 32 + ilog2(INTR_VCS(5)),
178 .domain = XE_FW_MEDIA_VDBOX5,
179 .mmio_base = XEHP_BSD6_RING_BASE,
180 },
181 [XE_HW_ENGINE_VCS6] = {
182 .name = "vcs6",
183 .class = XE_ENGINE_CLASS_VIDEO_DECODE,
184 .instance = 6,
185 .irq_offset = 32 + ilog2(INTR_VCS(6)),
186 .domain = XE_FW_MEDIA_VDBOX6,
187 .mmio_base = XEHP_BSD7_RING_BASE,
188 },
189 [XE_HW_ENGINE_VCS7] = {
190 .name = "vcs7",
191 .class = XE_ENGINE_CLASS_VIDEO_DECODE,
192 .instance = 7,
193 .irq_offset = 32 + ilog2(INTR_VCS(7)),
194 .domain = XE_FW_MEDIA_VDBOX7,
195 .mmio_base = XEHP_BSD8_RING_BASE,
196 },
197 [XE_HW_ENGINE_VECS0] = {
198 .name = "vecs0",
199 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
200 .instance = 0,
201 .irq_offset = 32 + ilog2(INTR_VECS(0)),
202 .domain = XE_FW_MEDIA_VEBOX0,
203 .mmio_base = VEBOX_RING_BASE,
204 },
205 [XE_HW_ENGINE_VECS1] = {
206 .name = "vecs1",
207 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
208 .instance = 1,
209 .irq_offset = 32 + ilog2(INTR_VECS(1)),
210 .domain = XE_FW_MEDIA_VEBOX1,
211 .mmio_base = VEBOX2_RING_BASE,
212 },
213 [XE_HW_ENGINE_VECS2] = {
214 .name = "vecs2",
215 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
216 .instance = 2,
217 .irq_offset = 32 + ilog2(INTR_VECS(2)),
218 .domain = XE_FW_MEDIA_VEBOX2,
219 .mmio_base = XEHP_VEBOX3_RING_BASE,
220 },
221 [XE_HW_ENGINE_VECS3] = {
222 .name = "vecs3",
223 .class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
224 .instance = 3,
225 .irq_offset = 32 + ilog2(INTR_VECS(3)),
226 .domain = XE_FW_MEDIA_VEBOX3,
227 .mmio_base = XEHP_VEBOX4_RING_BASE,
228 },
229 [XE_HW_ENGINE_CCS0] = {
230 .name = "ccs0",
231 .class = XE_ENGINE_CLASS_COMPUTE,
232 .instance = 0,
233 .irq_offset = ilog2(INTR_CCS(0)),
234 .domain = XE_FW_RENDER,
235 .mmio_base = COMPUTE0_RING_BASE,
236 },
237 [XE_HW_ENGINE_CCS1] = {
238 .name = "ccs1",
239 .class = XE_ENGINE_CLASS_COMPUTE,
240 .instance = 1,
241 .irq_offset = ilog2(INTR_CCS(1)),
242 .domain = XE_FW_RENDER,
243 .mmio_base = COMPUTE1_RING_BASE,
244 },
245 [XE_HW_ENGINE_CCS2] = {
246 .name = "ccs2",
247 .class = XE_ENGINE_CLASS_COMPUTE,
248 .instance = 2,
249 .irq_offset = ilog2(INTR_CCS(2)),
250 .domain = XE_FW_RENDER,
251 .mmio_base = COMPUTE2_RING_BASE,
252 },
253 [XE_HW_ENGINE_CCS3] = {
254 .name = "ccs3",
255 .class = XE_ENGINE_CLASS_COMPUTE,
256 .instance = 3,
257 .irq_offset = ilog2(INTR_CCS(3)),
258 .domain = XE_FW_RENDER,
259 .mmio_base = COMPUTE3_RING_BASE,
260 },
261 [XE_HW_ENGINE_GSCCS0] = {
262 .name = "gsccs0",
263 .class = XE_ENGINE_CLASS_OTHER,
264 .instance = OTHER_GSC_INSTANCE,
265 .domain = XE_FW_GSC,
266 .mmio_base = GSCCS_RING_BASE,
267 },
268 };
269
hw_engine_fini(void * arg)270 static void hw_engine_fini(void *arg)
271 {
272 struct xe_hw_engine *hwe = arg;
273
274 if (hwe->exl_port)
275 xe_execlist_port_destroy(hwe->exl_port);
276
277 hwe->gt = NULL;
278 }
279
280 /**
281 * xe_hw_engine_mmio_write32() - Write engine register
282 * @hwe: engine
283 * @reg: register to write into
284 * @val: desired 32-bit value to write
285 *
286 * This function will write val into an engine specific register.
287 * Forcewake must be held by the caller.
288 *
289 */
xe_hw_engine_mmio_write32(struct xe_hw_engine * hwe,struct xe_reg reg,u32 val)290 void xe_hw_engine_mmio_write32(struct xe_hw_engine *hwe,
291 struct xe_reg reg, u32 val)
292 {
293 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
294 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
295
296 reg.addr += hwe->mmio_base;
297
298 xe_mmio_write32(hwe->gt, reg, val);
299 }
300
301 /**
302 * xe_hw_engine_mmio_read32() - Read engine register
303 * @hwe: engine
304 * @reg: register to read from
305 *
306 * This function will read from an engine specific register.
307 * Forcewake must be held by the caller.
308 *
309 * Return: value of the 32-bit register.
310 */
xe_hw_engine_mmio_read32(struct xe_hw_engine * hwe,struct xe_reg reg)311 u32 xe_hw_engine_mmio_read32(struct xe_hw_engine *hwe, struct xe_reg reg)
312 {
313 xe_gt_assert(hwe->gt, !(reg.addr & hwe->mmio_base));
314 xe_force_wake_assert_held(gt_to_fw(hwe->gt), hwe->domain);
315
316 reg.addr += hwe->mmio_base;
317
318 return xe_mmio_read32(hwe->gt, reg);
319 }
320
xe_hw_engine_enable_ring(struct xe_hw_engine * hwe)321 void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
322 {
323 u32 ccs_mask =
324 xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
325
326 if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
327 xe_mmio_write32(hwe->gt, RCU_MODE,
328 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
329
330 xe_hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
331 xe_hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
332 xe_bo_ggtt_addr(hwe->hwsp));
333 xe_hw_engine_mmio_write32(hwe, RING_MODE(0),
334 _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
335 xe_hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
336 _MASKED_BIT_DISABLE(STOP_RING));
337 xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
338 }
339
xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt * gt,const struct xe_hw_engine * hwe)340 static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
341 const struct xe_hw_engine *hwe)
342 {
343 return xe_gt_ccs_mode_enabled(gt) &&
344 xe_rtp_match_first_render_or_compute(gt, hwe);
345 }
346
xe_rtp_cfeg_wmtp_disabled(const struct xe_gt * gt,const struct xe_hw_engine * hwe)347 static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
348 const struct xe_hw_engine *hwe)
349 {
350 if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
351 return false;
352
353 if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
354 hwe->class != XE_ENGINE_CLASS_RENDER)
355 return false;
356
357 return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
358 }
359
360 void
xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine * hwe)361 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
362 {
363 struct xe_gt *gt = hwe->gt;
364 const u8 mocs_write_idx = gt->mocs.uc_index;
365 const u8 mocs_read_idx = gt->mocs.uc_index;
366 u32 blit_cctl_val = REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, mocs_write_idx) |
367 REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, mocs_read_idx);
368 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
369 const struct xe_rtp_entry_sr lrc_setup[] = {
370 /*
371 * Some blitter commands do not have a field for MOCS, those
372 * commands will use MOCS index pointed by BLIT_CCTL.
373 * BLIT_CCTL registers are needed to be programmed to un-cached.
374 */
375 { XE_RTP_NAME("BLIT_CCTL_default_MOCS"),
376 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED),
377 ENGINE_CLASS(COPY)),
378 XE_RTP_ACTIONS(FIELD_SET(BLIT_CCTL(0),
379 BLIT_CCTL_DST_MOCS_MASK |
380 BLIT_CCTL_SRC_MOCS_MASK,
381 blit_cctl_val,
382 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
383 },
384 /* Use Fixed slice CCS mode */
385 { XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
386 XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
387 XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
388 RCU_MODE_FIXED_SLICE_CCS_MODE))
389 },
390 /* Disable WMTP if HW doesn't support it */
391 { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
392 XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
393 XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
394 PREEMPT_GPGPU_LEVEL_MASK,
395 PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
396 XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
397 },
398 {}
399 };
400
401 xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc);
402 }
403
404 static void
hw_engine_setup_default_state(struct xe_hw_engine * hwe)405 hw_engine_setup_default_state(struct xe_hw_engine *hwe)
406 {
407 struct xe_gt *gt = hwe->gt;
408 struct xe_device *xe = gt_to_xe(gt);
409 /*
410 * RING_CMD_CCTL specifies the default MOCS entry that will be
411 * used by the command streamer when executing commands that
412 * don't have a way to explicitly specify a MOCS setting.
413 * The default should usually reference whichever MOCS entry
414 * corresponds to uncached behavior, although use of a WB cached
415 * entry is recommended by the spec in certain circumstances on
416 * specific platforms.
417 * Bspec: 72161
418 */
419 const u8 mocs_write_idx = gt->mocs.uc_index;
420 const u8 mocs_read_idx = hwe->class == XE_ENGINE_CLASS_COMPUTE &&
421 (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC) ?
422 gt->mocs.wb_index : gt->mocs.uc_index;
423 u32 ring_cmd_cctl_val = REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, mocs_write_idx) |
424 REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, mocs_read_idx);
425 struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
426 const struct xe_rtp_entry_sr engine_entries[] = {
427 { XE_RTP_NAME("RING_CMD_CCTL_default_MOCS"),
428 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, XE_RTP_END_VERSION_UNDEFINED)),
429 XE_RTP_ACTIONS(FIELD_SET(RING_CMD_CCTL(0),
430 CMD_CCTL_WRITE_OVERRIDE_MASK |
431 CMD_CCTL_READ_OVERRIDE_MASK,
432 ring_cmd_cctl_val,
433 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
434 },
435 /*
436 * To allow the GSC engine to go idle on MTL we need to enable
437 * idle messaging and set the hysteresis value (we use 0xA=5us
438 * as recommended in spec). On platforms after MTL this is
439 * enabled by default.
440 */
441 { XE_RTP_NAME("MTL GSCCS IDLE MSG enable"),
442 XE_RTP_RULES(MEDIA_VERSION(1300), ENGINE_CLASS(OTHER)),
443 XE_RTP_ACTIONS(CLR(RING_PSMI_CTL(0),
444 IDLE_MSG_DISABLE,
445 XE_RTP_ACTION_FLAG(ENGINE_BASE)),
446 FIELD_SET(RING_PWRCTX_MAXCNT(0),
447 IDLE_WAIT_TIME,
448 0xA,
449 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
450 },
451 /* Enable Priority Mem Read */
452 { XE_RTP_NAME("Priority_Mem_Read"),
453 XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
454 XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
455 XE_RTP_ACTION_FLAG(ENGINE_BASE)))
456 },
457 {}
458 };
459
460 xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
461 }
462
hw_engine_init_early(struct xe_gt * gt,struct xe_hw_engine * hwe,enum xe_hw_engine_id id)463 static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
464 enum xe_hw_engine_id id)
465 {
466 const struct engine_info *info;
467
468 if (WARN_ON(id >= ARRAY_SIZE(engine_infos) || !engine_infos[id].name))
469 return;
470
471 if (!(gt->info.engine_mask & BIT(id)))
472 return;
473
474 info = &engine_infos[id];
475
476 xe_gt_assert(gt, !hwe->gt);
477
478 hwe->gt = gt;
479 hwe->class = info->class;
480 hwe->instance = info->instance;
481 hwe->mmio_base = info->mmio_base;
482 hwe->irq_offset = info->irq_offset;
483 hwe->domain = info->domain;
484 hwe->name = info->name;
485 hwe->fence_irq = >->fence_irq[info->class];
486 hwe->engine_id = id;
487
488 hwe->eclass = >->eclass[hwe->class];
489 if (!hwe->eclass->sched_props.job_timeout_ms) {
490 hwe->eclass->sched_props.job_timeout_ms = 5 * 1000;
491 hwe->eclass->sched_props.job_timeout_min = XE_HW_ENGINE_JOB_TIMEOUT_MIN;
492 hwe->eclass->sched_props.job_timeout_max = XE_HW_ENGINE_JOB_TIMEOUT_MAX;
493 hwe->eclass->sched_props.timeslice_us = 1 * 1000;
494 hwe->eclass->sched_props.timeslice_min = XE_HW_ENGINE_TIMESLICE_MIN;
495 hwe->eclass->sched_props.timeslice_max = XE_HW_ENGINE_TIMESLICE_MAX;
496 hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
497 hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
498 hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
499
500 /*
501 * The GSC engine can accept submissions while the GSC shim is
502 * being reset, during which time the submission is stalled. In
503 * the worst case, the shim reset can take up to the maximum GSC
504 * command execution time (250ms), so the request start can be
505 * delayed by that much; the request itself can take that long
506 * without being preemptible, which means worst case it can
507 * theoretically take up to 500ms for a preemption to go through
508 * on the GSC engine. Adding to that an extra 100ms as a safety
509 * margin, we get a minimum recommended timeout of 600ms.
510 * The preempt_timeout value can't be tuned for OTHER_CLASS
511 * because the class is reserved for kernel usage, so we just
512 * need to make sure that the starting value is above that
513 * threshold; since our default value (640ms) is greater than
514 * 600ms, the only way we can go below is via a kconfig setting.
515 * If that happens, log it in dmesg and update the value.
516 */
517 if (hwe->class == XE_ENGINE_CLASS_OTHER) {
518 const u32 min_preempt_timeout = 600 * 1000;
519 if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) {
520 hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout;
521 xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n");
522 }
523 }
524
525 /* Record default props */
526 hwe->eclass->defaults = hwe->eclass->sched_props;
527 }
528
529 xe_reg_sr_init(&hwe->reg_sr, hwe->name, gt_to_xe(gt));
530 xe_tuning_process_engine(hwe);
531 xe_wa_process_engine(hwe);
532 hw_engine_setup_default_state(hwe);
533
534 xe_reg_sr_init(&hwe->reg_whitelist, hwe->name, gt_to_xe(gt));
535 xe_reg_whitelist_process_engine(hwe);
536 }
537
hw_engine_init(struct xe_gt * gt,struct xe_hw_engine * hwe,enum xe_hw_engine_id id)538 static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
539 enum xe_hw_engine_id id)
540 {
541 struct xe_device *xe = gt_to_xe(gt);
542 struct xe_tile *tile = gt_to_tile(gt);
543 int err;
544
545 xe_gt_assert(gt, id < ARRAY_SIZE(engine_infos) && engine_infos[id].name);
546 xe_gt_assert(gt, gt->info.engine_mask & BIT(id));
547
548 xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
549 xe_reg_sr_apply_whitelist(hwe);
550
551 hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
552 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
553 XE_BO_FLAG_GGTT |
554 XE_BO_FLAG_GGTT_INVALIDATE);
555 if (IS_ERR(hwe->hwsp)) {
556 err = PTR_ERR(hwe->hwsp);
557 goto err_name;
558 }
559
560 if (!xe_device_uc_enabled(xe)) {
561 hwe->exl_port = xe_execlist_port_create(xe, hwe);
562 if (IS_ERR(hwe->exl_port)) {
563 err = PTR_ERR(hwe->exl_port);
564 goto err_hwsp;
565 }
566 } else {
567 /* GSCCS has a special interrupt for reset */
568 if (hwe->class == XE_ENGINE_CLASS_OTHER)
569 hwe->irq_handler = xe_gsc_hwe_irq_handler;
570
571 if (!IS_SRIOV_VF(xe))
572 xe_hw_engine_enable_ring(hwe);
573 }
574
575 /* We reserve the highest BCS instance for USM */
576 if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
577 gt->usm.reserved_bcs_instance = hwe->instance;
578
579 return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe);
580
581 err_hwsp:
582 xe_bo_unpin_map_no_vm(hwe->hwsp);
583 err_name:
584 hwe->name = NULL;
585
586 return err;
587 }
588
hw_engine_setup_logical_mapping(struct xe_gt * gt)589 static void hw_engine_setup_logical_mapping(struct xe_gt *gt)
590 {
591 int class;
592
593 /* FIXME: Doing a simple logical mapping that works for most hardware */
594 for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
595 struct xe_hw_engine *hwe;
596 enum xe_hw_engine_id id;
597 int logical_instance = 0;
598
599 for_each_hw_engine(hwe, gt, id)
600 if (hwe->class == class)
601 hwe->logical_instance = logical_instance++;
602 }
603 }
604
read_media_fuses(struct xe_gt * gt)605 static void read_media_fuses(struct xe_gt *gt)
606 {
607 struct xe_device *xe = gt_to_xe(gt);
608 u32 media_fuse;
609 u16 vdbox_mask;
610 u16 vebox_mask;
611 int i, j;
612
613 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
614
615 media_fuse = xe_mmio_read32(gt, GT_VEBOX_VDBOX_DISABLE);
616
617 /*
618 * Pre-Xe_HP platforms had register bits representing absent engines,
619 * whereas Xe_HP and beyond have bits representing present engines.
620 * Invert the polarity on old platforms so that we can use common
621 * handling below.
622 */
623 if (GRAPHICS_VERx100(xe) < 1250)
624 media_fuse = ~media_fuse;
625
626 vdbox_mask = REG_FIELD_GET(GT_VDBOX_DISABLE_MASK, media_fuse);
627 vebox_mask = REG_FIELD_GET(GT_VEBOX_DISABLE_MASK, media_fuse);
628
629 for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
630 if (!(gt->info.engine_mask & BIT(i)))
631 continue;
632
633 if (!(BIT(j) & vdbox_mask)) {
634 gt->info.engine_mask &= ~BIT(i);
635 drm_info(&xe->drm, "vcs%u fused off\n", j);
636 }
637 }
638
639 for (i = XE_HW_ENGINE_VECS0, j = 0; i <= XE_HW_ENGINE_VECS3; ++i, ++j) {
640 if (!(gt->info.engine_mask & BIT(i)))
641 continue;
642
643 if (!(BIT(j) & vebox_mask)) {
644 gt->info.engine_mask &= ~BIT(i);
645 drm_info(&xe->drm, "vecs%u fused off\n", j);
646 }
647 }
648 }
649
read_copy_fuses(struct xe_gt * gt)650 static void read_copy_fuses(struct xe_gt *gt)
651 {
652 struct xe_device *xe = gt_to_xe(gt);
653 u32 bcs_mask;
654
655 if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
656 return;
657
658 xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
659
660 bcs_mask = xe_mmio_read32(gt, MIRROR_FUSE3);
661 bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
662
663 /* BCS0 is always present; only BCS1-BCS8 may be fused off */
664 for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
665 if (!(gt->info.engine_mask & BIT(i)))
666 continue;
667
668 if (!(BIT(j / 2) & bcs_mask)) {
669 gt->info.engine_mask &= ~BIT(i);
670 drm_info(&xe->drm, "bcs%u fused off\n", j);
671 }
672 }
673 }
674
read_compute_fuses_from_dss(struct xe_gt * gt)675 static void read_compute_fuses_from_dss(struct xe_gt *gt)
676 {
677 struct xe_device *xe = gt_to_xe(gt);
678
679 /*
680 * CCS fusing based on DSS masks only applies to platforms that can
681 * have more than one CCS.
682 */
683 if (hweight64(gt->info.engine_mask &
684 GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
685 return;
686
687 /*
688 * CCS availability on Xe_HP is inferred from the presence of DSS in
689 * each quadrant.
690 */
691 for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
692 if (!(gt->info.engine_mask & BIT(i)))
693 continue;
694
695 if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
696 gt->info.engine_mask &= ~BIT(i);
697 drm_info(&xe->drm, "ccs%u fused off\n", j);
698 }
699 }
700 }
701
read_compute_fuses_from_reg(struct xe_gt * gt)702 static void read_compute_fuses_from_reg(struct xe_gt *gt)
703 {
704 struct xe_device *xe = gt_to_xe(gt);
705 u32 ccs_mask;
706
707 ccs_mask = xe_mmio_read32(gt, XEHP_FUSE4);
708 ccs_mask = REG_FIELD_GET(CCS_EN_MASK, ccs_mask);
709
710 for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
711 if (!(gt->info.engine_mask & BIT(i)))
712 continue;
713
714 if ((ccs_mask & BIT(j)) == 0) {
715 gt->info.engine_mask &= ~BIT(i);
716 drm_info(&xe->drm, "ccs%u fused off\n", j);
717 }
718 }
719 }
720
read_compute_fuses(struct xe_gt * gt)721 static void read_compute_fuses(struct xe_gt *gt)
722 {
723 if (GRAPHICS_VER(gt_to_xe(gt)) >= 20)
724 read_compute_fuses_from_reg(gt);
725 else
726 read_compute_fuses_from_dss(gt);
727 }
728
check_gsc_availability(struct xe_gt * gt)729 static void check_gsc_availability(struct xe_gt *gt)
730 {
731 struct xe_device *xe = gt_to_xe(gt);
732
733 if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
734 return;
735
736 /*
737 * The GSCCS is only used to communicate with the GSC FW, so if we don't
738 * have the FW there is nothing we need the engine for and can therefore
739 * skip its initialization.
740 */
741 if (!xe_uc_fw_is_available(>->uc.gsc.fw)) {
742 gt->info.engine_mask &= ~BIT(XE_HW_ENGINE_GSCCS0);
743
744 /* interrupts where previously enabled, so turn them off */
745 xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, 0);
746 xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~0);
747
748 drm_info(&xe->drm, "gsccs disabled due to lack of FW\n");
749 }
750 }
751
xe_hw_engines_init_early(struct xe_gt * gt)752 int xe_hw_engines_init_early(struct xe_gt *gt)
753 {
754 int i;
755
756 read_media_fuses(gt);
757 read_copy_fuses(gt);
758 read_compute_fuses(gt);
759 check_gsc_availability(gt);
760
761 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
762 BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
763
764 for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
765 hw_engine_init_early(gt, >->hw_engines[i], i);
766
767 return 0;
768 }
769
xe_hw_engines_init(struct xe_gt * gt)770 int xe_hw_engines_init(struct xe_gt *gt)
771 {
772 int err;
773 struct xe_hw_engine *hwe;
774 enum xe_hw_engine_id id;
775
776 for_each_hw_engine(hwe, gt, id) {
777 err = hw_engine_init(gt, hwe, id);
778 if (err)
779 return err;
780 }
781
782 hw_engine_setup_logical_mapping(gt);
783 err = xe_hw_engine_setup_groups(gt);
784 if (err)
785 return err;
786
787 return 0;
788 }
789
xe_hw_engine_handle_irq(struct xe_hw_engine * hwe,u16 intr_vec)790 void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
791 {
792 wake_up_all(>_to_xe(hwe->gt)->ufence_wq);
793
794 if (hwe->irq_handler)
795 hwe->irq_handler(hwe, intr_vec);
796
797 if (intr_vec & GT_RENDER_USER_INTERRUPT)
798 xe_hw_fence_irq_run(hwe->fence_irq);
799 }
800
801 static bool
is_slice_common_per_gslice(struct xe_device * xe)802 is_slice_common_per_gslice(struct xe_device *xe)
803 {
804 return GRAPHICS_VERx100(xe) >= 1255;
805 }
806
807 static void
xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine * hwe,struct xe_hw_engine_snapshot * snapshot)808 xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe,
809 struct xe_hw_engine_snapshot *snapshot)
810 {
811 struct xe_gt *gt = hwe->gt;
812 struct xe_device *xe = gt_to_xe(gt);
813 unsigned int dss;
814 u16 group, instance;
815
816 snapshot->reg.instdone.ring = xe_hw_engine_mmio_read32(hwe, RING_INSTDONE(0));
817
818 if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
819 return;
820
821 if (is_slice_common_per_gslice(xe) == false) {
822 snapshot->reg.instdone.slice_common[0] =
823 xe_mmio_read32(gt, SC_INSTDONE);
824 snapshot->reg.instdone.slice_common_extra[0] =
825 xe_mmio_read32(gt, SC_INSTDONE_EXTRA);
826 snapshot->reg.instdone.slice_common_extra2[0] =
827 xe_mmio_read32(gt, SC_INSTDONE_EXTRA2);
828 } else {
829 for_each_geometry_dss(dss, gt, group, instance) {
830 snapshot->reg.instdone.slice_common[dss] =
831 xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE, group, instance);
832 snapshot->reg.instdone.slice_common_extra[dss] =
833 xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA, group, instance);
834 snapshot->reg.instdone.slice_common_extra2[dss] =
835 xe_gt_mcr_unicast_read(gt, XEHPG_SC_INSTDONE_EXTRA2, group, instance);
836 }
837 }
838
839 for_each_geometry_dss(dss, gt, group, instance) {
840 snapshot->reg.instdone.sampler[dss] =
841 xe_gt_mcr_unicast_read(gt, SAMPLER_INSTDONE, group, instance);
842 snapshot->reg.instdone.row[dss] =
843 xe_gt_mcr_unicast_read(gt, ROW_INSTDONE, group, instance);
844
845 if (GRAPHICS_VERx100(xe) >= 1255)
846 snapshot->reg.instdone.geom_svg[dss] =
847 xe_gt_mcr_unicast_read(gt, XEHPG_INSTDONE_GEOM_SVGUNIT,
848 group, instance);
849 }
850 }
851
852 /**
853 * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine.
854 * @hwe: Xe HW Engine.
855 *
856 * This can be printed out in a later stage like during dev_coredump
857 * analysis.
858 *
859 * Returns: a Xe HW Engine snapshot object that must be freed by the
860 * caller, using `xe_hw_engine_snapshot_free`.
861 */
862 struct xe_hw_engine_snapshot *
xe_hw_engine_snapshot_capture(struct xe_hw_engine * hwe)863 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
864 {
865 struct xe_hw_engine_snapshot *snapshot;
866 size_t len;
867 u64 val;
868
869 if (!xe_hw_engine_is_valid(hwe))
870 return NULL;
871
872 snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
873
874 if (!snapshot)
875 return NULL;
876
877 /* Because XE_MAX_DSS_FUSE_BITS is defined in xe_gt_types.h and it
878 * includes xe_hw_engine_types.h the length of this 3 registers can't be
879 * set in struct xe_hw_engine_snapshot, so here doing additional
880 * allocations.
881 */
882 len = (XE_MAX_DSS_FUSE_BITS * sizeof(u32));
883 snapshot->reg.instdone.slice_common = kzalloc(len, GFP_ATOMIC);
884 snapshot->reg.instdone.slice_common_extra = kzalloc(len, GFP_ATOMIC);
885 snapshot->reg.instdone.slice_common_extra2 = kzalloc(len, GFP_ATOMIC);
886 snapshot->reg.instdone.sampler = kzalloc(len, GFP_ATOMIC);
887 snapshot->reg.instdone.row = kzalloc(len, GFP_ATOMIC);
888 snapshot->reg.instdone.geom_svg = kzalloc(len, GFP_ATOMIC);
889 if (!snapshot->reg.instdone.slice_common ||
890 !snapshot->reg.instdone.slice_common_extra ||
891 !snapshot->reg.instdone.slice_common_extra2 ||
892 !snapshot->reg.instdone.sampler ||
893 !snapshot->reg.instdone.row ||
894 !snapshot->reg.instdone.geom_svg) {
895 xe_hw_engine_snapshot_free(snapshot);
896 return NULL;
897 }
898
899 snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
900 snapshot->hwe = hwe;
901 snapshot->logical_instance = hwe->logical_instance;
902 snapshot->forcewake.domain = hwe->domain;
903 snapshot->forcewake.ref = xe_force_wake_ref(gt_to_fw(hwe->gt),
904 hwe->domain);
905 snapshot->mmio_base = hwe->mmio_base;
906
907 /* no more VF accessible data below this point */
908 if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
909 return snapshot;
910
911 snapshot->reg.ring_execlist_status =
912 xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
913 val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
914 snapshot->reg.ring_execlist_status |= val << 32;
915
916 snapshot->reg.ring_execlist_sq_contents =
917 xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
918 val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
919 snapshot->reg.ring_execlist_sq_contents |= val << 32;
920
921 snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0));
922 val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
923 snapshot->reg.ring_acthd |= val << 32;
924
925 snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0));
926 val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
927 snapshot->reg.ring_bbaddr |= val << 32;
928
929 snapshot->reg.ring_dma_fadd =
930 xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
931 val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
932 snapshot->reg.ring_dma_fadd |= val << 32;
933
934 snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
935 snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
936 snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0));
937 if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) {
938 val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0));
939 snapshot->reg.ring_start |= val << 32;
940 }
941 if (xe_gt_has_indirect_ring_state(hwe->gt)) {
942 snapshot->reg.indirect_ring_state =
943 xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0));
944 }
945
946 snapshot->reg.ring_head =
947 xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
948 snapshot->reg.ring_tail =
949 xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR;
950 snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0));
951 snapshot->reg.ring_mi_mode =
952 xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
953 snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0));
954 snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0));
955 snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0));
956 snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0));
957 snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0));
958 snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0));
959 xe_hw_engine_snapshot_instdone_capture(hwe, snapshot);
960
961 if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
962 snapshot->reg.rcu_mode = xe_mmio_read32(hwe->gt, RCU_MODE);
963
964 return snapshot;
965 }
966
967 static void
xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot * snapshot,struct drm_printer * p)968 xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p)
969 {
970 struct xe_gt *gt = snapshot->hwe->gt;
971 struct xe_device *xe = gt_to_xe(gt);
972 u16 group, instance;
973 unsigned int dss;
974
975 drm_printf(p, "\tRING_INSTDONE: 0x%08x\n", snapshot->reg.instdone.ring);
976
977 if (snapshot->hwe->class != XE_ENGINE_CLASS_RENDER)
978 return;
979
980 if (is_slice_common_per_gslice(xe) == false) {
981 drm_printf(p, "\tSC_INSTDONE[0]: 0x%08x\n",
982 snapshot->reg.instdone.slice_common[0]);
983 drm_printf(p, "\tSC_INSTDONE_EXTRA[0]: 0x%08x\n",
984 snapshot->reg.instdone.slice_common_extra[0]);
985 drm_printf(p, "\tSC_INSTDONE_EXTRA2[0]: 0x%08x\n",
986 snapshot->reg.instdone.slice_common_extra2[0]);
987 } else {
988 for_each_geometry_dss(dss, gt, group, instance) {
989 drm_printf(p, "\tSC_INSTDONE[%u]: 0x%08x\n", dss,
990 snapshot->reg.instdone.slice_common[dss]);
991 drm_printf(p, "\tSC_INSTDONE_EXTRA[%u]: 0x%08x\n", dss,
992 snapshot->reg.instdone.slice_common_extra[dss]);
993 drm_printf(p, "\tSC_INSTDONE_EXTRA2[%u]: 0x%08x\n", dss,
994 snapshot->reg.instdone.slice_common_extra2[dss]);
995 }
996 }
997
998 for_each_geometry_dss(dss, gt, group, instance) {
999 drm_printf(p, "\tSAMPLER_INSTDONE[%u]: 0x%08x\n", dss,
1000 snapshot->reg.instdone.sampler[dss]);
1001 drm_printf(p, "\tROW_INSTDONE[%u]: 0x%08x\n", dss,
1002 snapshot->reg.instdone.row[dss]);
1003
1004 if (GRAPHICS_VERx100(xe) >= 1255)
1005 drm_printf(p, "\tINSTDONE_GEOM_SVGUNIT[%u]: 0x%08x\n",
1006 dss, snapshot->reg.instdone.geom_svg[dss]);
1007 }
1008 }
1009
1010 /**
1011 * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot.
1012 * @snapshot: Xe HW Engine snapshot object.
1013 * @p: drm_printer where it will be printed out.
1014 *
1015 * This function prints out a given Xe HW Engine snapshot object.
1016 */
xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot * snapshot,struct drm_printer * p)1017 void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
1018 struct drm_printer *p)
1019 {
1020 if (!snapshot)
1021 return;
1022
1023 drm_printf(p, "%s (physical), logical instance=%d\n",
1024 snapshot->name ? snapshot->name : "",
1025 snapshot->logical_instance);
1026 drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n",
1027 snapshot->forcewake.domain, snapshot->forcewake.ref);
1028 drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
1029 drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
1030 drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
1031 snapshot->reg.ring_execlist_status);
1032 drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
1033 snapshot->reg.ring_execlist_sq_contents);
1034 drm_printf(p, "\tRING_START: 0x%016llx\n", snapshot->reg.ring_start);
1035 drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
1036 drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
1037 drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
1038 drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
1039 drm_printf(p, "\tRING_MODE: 0x%08x\n",
1040 snapshot->reg.ring_mode);
1041 drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
1042 drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
1043 drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
1044 drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
1045 drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
1046 drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
1047 drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
1048 drm_printf(p, "\tINDIRECT_RING_STATE: 0x%08x\n",
1049 snapshot->reg.indirect_ring_state);
1050 drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
1051 xe_hw_engine_snapshot_instdone_print(snapshot, p);
1052
1053 if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE)
1054 drm_printf(p, "\tRCU_MODE: 0x%08x\n",
1055 snapshot->reg.rcu_mode);
1056 drm_puts(p, "\n");
1057 }
1058
1059 /**
1060 * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot.
1061 * @snapshot: Xe HW Engine snapshot object.
1062 *
1063 * This function free all the memory that needed to be allocated at capture
1064 * time.
1065 */
xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot * snapshot)1066 void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot)
1067 {
1068 if (!snapshot)
1069 return;
1070
1071 kfree(snapshot->reg.instdone.slice_common);
1072 kfree(snapshot->reg.instdone.slice_common_extra);
1073 kfree(snapshot->reg.instdone.slice_common_extra2);
1074 kfree(snapshot->reg.instdone.sampler);
1075 kfree(snapshot->reg.instdone.row);
1076 kfree(snapshot->reg.instdone.geom_svg);
1077 kfree(snapshot->name);
1078 kfree(snapshot);
1079 }
1080
1081 /**
1082 * xe_hw_engine_print - Xe HW Engine Print.
1083 * @hwe: Hardware Engine.
1084 * @p: drm_printer.
1085 *
1086 * This function quickly capture a snapshot and immediately print it out.
1087 */
xe_hw_engine_print(struct xe_hw_engine * hwe,struct drm_printer * p)1088 void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p)
1089 {
1090 struct xe_hw_engine_snapshot *snapshot;
1091
1092 snapshot = xe_hw_engine_snapshot_capture(hwe);
1093 xe_hw_engine_snapshot_print(snapshot, p);
1094 xe_hw_engine_snapshot_free(snapshot);
1095 }
1096
xe_hw_engine_mask_per_class(struct xe_gt * gt,enum xe_engine_class engine_class)1097 u32 xe_hw_engine_mask_per_class(struct xe_gt *gt,
1098 enum xe_engine_class engine_class)
1099 {
1100 u32 mask = 0;
1101 enum xe_hw_engine_id id;
1102
1103 for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
1104 if (engine_infos[id].class == engine_class &&
1105 gt->info.engine_mask & BIT(id))
1106 mask |= BIT(engine_infos[id].instance);
1107 }
1108 return mask;
1109 }
1110
xe_hw_engine_is_reserved(struct xe_hw_engine * hwe)1111 bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
1112 {
1113 struct xe_gt *gt = hwe->gt;
1114 struct xe_device *xe = gt_to_xe(gt);
1115
1116 if (hwe->class == XE_ENGINE_CLASS_OTHER)
1117 return true;
1118
1119 /* Check for engines disabled by ccs_mode setting */
1120 if (xe_gt_ccs_mode_enabled(gt) &&
1121 hwe->class == XE_ENGINE_CLASS_COMPUTE &&
1122 hwe->logical_instance >= gt->ccs_mode)
1123 return true;
1124
1125 return xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
1126 hwe->instance == gt->usm.reserved_bcs_instance;
1127 }
1128
xe_hw_engine_class_to_str(enum xe_engine_class class)1129 const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
1130 {
1131 switch (class) {
1132 case XE_ENGINE_CLASS_RENDER:
1133 return "rcs";
1134 case XE_ENGINE_CLASS_VIDEO_DECODE:
1135 return "vcs";
1136 case XE_ENGINE_CLASS_VIDEO_ENHANCE:
1137 return "vecs";
1138 case XE_ENGINE_CLASS_COPY:
1139 return "bcs";
1140 case XE_ENGINE_CLASS_OTHER:
1141 return "other";
1142 case XE_ENGINE_CLASS_COMPUTE:
1143 return "ccs";
1144 case XE_ENGINE_CLASS_MAX:
1145 break;
1146 }
1147
1148 return NULL;
1149 }
1150
xe_hw_engine_read_timestamp(struct xe_hw_engine * hwe)1151 u64 xe_hw_engine_read_timestamp(struct xe_hw_engine *hwe)
1152 {
1153 return xe_mmio_read64_2x32(hwe->gt, RING_TIMESTAMP(hwe->mmio_base));
1154 }
1155
xe_hw_engine_to_fw_domain(struct xe_hw_engine * hwe)1156 enum xe_force_wake_domains xe_hw_engine_to_fw_domain(struct xe_hw_engine *hwe)
1157 {
1158 return engine_infos[hwe->engine_id].domain;
1159 }
1160
1161 static const enum xe_engine_class user_to_xe_engine_class[] = {
1162 [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER,
1163 [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY,
1164 [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE,
1165 [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE,
1166 [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE,
1167 };
1168
1169 /**
1170 * xe_hw_engine_lookup() - Lookup hardware engine for class:instance
1171 * @xe: xe device
1172 * @eci: engine class and instance
1173 *
1174 * This function will find a hardware engine for given engine
1175 * class and instance.
1176 *
1177 * Return: If found xe_hw_engine pointer, NULL otherwise.
1178 */
1179 struct xe_hw_engine *
xe_hw_engine_lookup(struct xe_device * xe,struct drm_xe_engine_class_instance eci)1180 xe_hw_engine_lookup(struct xe_device *xe,
1181 struct drm_xe_engine_class_instance eci)
1182 {
1183 unsigned int idx;
1184
1185 if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
1186 return NULL;
1187
1188 if (eci.gt_id >= xe->info.gt_count)
1189 return NULL;
1190
1191 idx = array_index_nospec(eci.engine_class,
1192 ARRAY_SIZE(user_to_xe_engine_class));
1193
1194 return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id),
1195 user_to_xe_engine_class[idx],
1196 eci.engine_instance, true);
1197 }
1198