xref: /linux/drivers/gpu/drm/xe/xe_lrc.c (revision 26ac2df47d4c58f17210b7a59037e40f7eca693e)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_lrc.h"
7 
8 #include "instructions/xe_mi_commands.h"
9 #include "instructions/xe_gfxpipe_commands.h"
10 #include "regs/xe_engine_regs.h"
11 #include "regs/xe_gpu_commands.h"
12 #include "regs/xe_lrc_layout.h"
13 #include "xe_bb.h"
14 #include "xe_bo.h"
15 #include "xe_device.h"
16 #include "xe_drm_client.h"
17 #include "xe_exec_queue_types.h"
18 #include "xe_gt.h"
19 #include "xe_gt_printk.h"
20 #include "xe_hw_fence.h"
21 #include "xe_map.h"
22 #include "xe_memirq.h"
23 #include "xe_sriov.h"
24 #include "xe_vm.h"
25 
26 #define LRC_VALID				(1 << 0)
27 #define LRC_PRIVILEGE				(1 << 8)
28 #define LRC_ADDRESSING_MODE_SHIFT		3
29 #define LRC_LEGACY_64B_CONTEXT			3
30 
31 #define ENGINE_CLASS_SHIFT			61
32 #define ENGINE_INSTANCE_SHIFT			48
33 
34 static struct xe_device *
35 lrc_to_xe(struct xe_lrc *lrc)
36 {
37 	return gt_to_xe(lrc->fence_ctx.gt);
38 }
39 
40 size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
41 {
42 	switch (class) {
43 	case XE_ENGINE_CLASS_RENDER:
44 		if (GRAPHICS_VER(xe) >= 20)
45 			return 4 * SZ_4K;
46 		else
47 			return 14 * SZ_4K;
48 	case XE_ENGINE_CLASS_COMPUTE:
49 		/* 14 pages since graphics_ver == 11 */
50 		if (GRAPHICS_VER(xe) >= 20)
51 			return 3 * SZ_4K;
52 		else
53 			return 14 * SZ_4K;
54 	default:
55 		WARN(1, "Unknown engine class: %d", class);
56 		fallthrough;
57 	case XE_ENGINE_CLASS_COPY:
58 	case XE_ENGINE_CLASS_VIDEO_DECODE:
59 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
60 	case XE_ENGINE_CLASS_OTHER:
61 		return 2 * SZ_4K;
62 	}
63 }
64 
65 /*
66  * The per-platform tables are u8-encoded in @data. Decode @data and set the
67  * addresses' offset and commands in @regs. The following encoding is used
68  * for each byte. There are 2 steps: decoding commands and decoding addresses.
69  *
70  * Commands:
71  * [7]: create NOPs - number of NOPs are set in lower bits
72  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
73  *      MI_LRI_FORCE_POSTED
74  * [5:0]: Number of NOPs or registers to set values to in case of
75  *        MI_LOAD_REGISTER_IMM
76  *
77  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
78  * number of registers. They are set by using the REG/REG16 macros: the former
79  * is used for offsets smaller than 0x200 while the latter is for values bigger
80  * than that. Those macros already set all the bits documented below correctly:
81  *
82  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
83  *      follow, for the lower bits
84  * [6:0]: Register offset, without considering the engine base.
85  *
86  * This function only tweaks the commands and register offsets. Values are not
87  * filled out.
88  */
89 static void set_offsets(u32 *regs,
90 			const u8 *data,
91 			const struct xe_hw_engine *hwe)
92 #define NOP(x) (BIT(7) | (x))
93 #define LRI(count, flags) ((flags) << 6 | (count) | \
94 			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
95 #define POSTED BIT(0)
96 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
97 #define REG16(x) \
98 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
99 	(((x) >> 2) & 0x7f)
100 {
101 	const u32 base = hwe->mmio_base;
102 
103 	while (*data) {
104 		u8 count, flags;
105 
106 		if (*data & BIT(7)) { /* skip */
107 			count = *data++ & ~BIT(7);
108 			regs += count;
109 			continue;
110 		}
111 
112 		count = *data & 0x3f;
113 		flags = *data >> 6;
114 		data++;
115 
116 		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
117 		if (flags & POSTED)
118 			*regs |= MI_LRI_FORCE_POSTED;
119 		*regs |= MI_LRI_LRM_CS_MMIO;
120 		regs++;
121 
122 		xe_gt_assert(hwe->gt, count);
123 		do {
124 			u32 offset = 0;
125 			u8 v;
126 
127 			do {
128 				v = *data++;
129 				offset <<= 7;
130 				offset |= v & ~BIT(7);
131 			} while (v & BIT(7));
132 
133 			regs[0] = base + (offset << 2);
134 			regs += 2;
135 		} while (--count);
136 	}
137 
138 	*regs = MI_BATCH_BUFFER_END | BIT(0);
139 }
140 
141 static const u8 gen12_xcs_offsets[] = {
142 	NOP(1),
143 	LRI(13, POSTED),
144 	REG16(0x244),
145 	REG(0x034),
146 	REG(0x030),
147 	REG(0x038),
148 	REG(0x03c),
149 	REG(0x168),
150 	REG(0x140),
151 	REG(0x110),
152 	REG(0x1c0),
153 	REG(0x1c4),
154 	REG(0x1c8),
155 	REG(0x180),
156 	REG16(0x2b4),
157 
158 	NOP(5),
159 	LRI(9, POSTED),
160 	REG16(0x3a8),
161 	REG16(0x28c),
162 	REG16(0x288),
163 	REG16(0x284),
164 	REG16(0x280),
165 	REG16(0x27c),
166 	REG16(0x278),
167 	REG16(0x274),
168 	REG16(0x270),
169 
170 	0
171 };
172 
173 static const u8 dg2_xcs_offsets[] = {
174 	NOP(1),
175 	LRI(15, POSTED),
176 	REG16(0x244),
177 	REG(0x034),
178 	REG(0x030),
179 	REG(0x038),
180 	REG(0x03c),
181 	REG(0x168),
182 	REG(0x140),
183 	REG(0x110),
184 	REG(0x1c0),
185 	REG(0x1c4),
186 	REG(0x1c8),
187 	REG(0x180),
188 	REG16(0x2b4),
189 	REG(0x120),
190 	REG(0x124),
191 
192 	NOP(1),
193 	LRI(9, POSTED),
194 	REG16(0x3a8),
195 	REG16(0x28c),
196 	REG16(0x288),
197 	REG16(0x284),
198 	REG16(0x280),
199 	REG16(0x27c),
200 	REG16(0x278),
201 	REG16(0x274),
202 	REG16(0x270),
203 
204 	0
205 };
206 
207 static const u8 gen12_rcs_offsets[] = {
208 	NOP(1),
209 	LRI(13, POSTED),
210 	REG16(0x244),
211 	REG(0x034),
212 	REG(0x030),
213 	REG(0x038),
214 	REG(0x03c),
215 	REG(0x168),
216 	REG(0x140),
217 	REG(0x110),
218 	REG(0x1c0),
219 	REG(0x1c4),
220 	REG(0x1c8),
221 	REG(0x180),
222 	REG16(0x2b4),
223 
224 	NOP(5),
225 	LRI(9, POSTED),
226 	REG16(0x3a8),
227 	REG16(0x28c),
228 	REG16(0x288),
229 	REG16(0x284),
230 	REG16(0x280),
231 	REG16(0x27c),
232 	REG16(0x278),
233 	REG16(0x274),
234 	REG16(0x270),
235 
236 	LRI(3, POSTED),
237 	REG(0x1b0),
238 	REG16(0x5a8),
239 	REG16(0x5ac),
240 
241 	NOP(6),
242 	LRI(1, 0),
243 	REG(0x0c8),
244 	NOP(3 + 9 + 1),
245 
246 	LRI(51, POSTED),
247 	REG16(0x588),
248 	REG16(0x588),
249 	REG16(0x588),
250 	REG16(0x588),
251 	REG16(0x588),
252 	REG16(0x588),
253 	REG(0x028),
254 	REG(0x09c),
255 	REG(0x0c0),
256 	REG(0x178),
257 	REG(0x17c),
258 	REG16(0x358),
259 	REG(0x170),
260 	REG(0x150),
261 	REG(0x154),
262 	REG(0x158),
263 	REG16(0x41c),
264 	REG16(0x600),
265 	REG16(0x604),
266 	REG16(0x608),
267 	REG16(0x60c),
268 	REG16(0x610),
269 	REG16(0x614),
270 	REG16(0x618),
271 	REG16(0x61c),
272 	REG16(0x620),
273 	REG16(0x624),
274 	REG16(0x628),
275 	REG16(0x62c),
276 	REG16(0x630),
277 	REG16(0x634),
278 	REG16(0x638),
279 	REG16(0x63c),
280 	REG16(0x640),
281 	REG16(0x644),
282 	REG16(0x648),
283 	REG16(0x64c),
284 	REG16(0x650),
285 	REG16(0x654),
286 	REG16(0x658),
287 	REG16(0x65c),
288 	REG16(0x660),
289 	REG16(0x664),
290 	REG16(0x668),
291 	REG16(0x66c),
292 	REG16(0x670),
293 	REG16(0x674),
294 	REG16(0x678),
295 	REG16(0x67c),
296 	REG(0x068),
297 	REG(0x084),
298 	NOP(1),
299 
300 	0
301 };
302 
303 static const u8 xehp_rcs_offsets[] = {
304 	NOP(1),
305 	LRI(13, POSTED),
306 	REG16(0x244),
307 	REG(0x034),
308 	REG(0x030),
309 	REG(0x038),
310 	REG(0x03c),
311 	REG(0x168),
312 	REG(0x140),
313 	REG(0x110),
314 	REG(0x1c0),
315 	REG(0x1c4),
316 	REG(0x1c8),
317 	REG(0x180),
318 	REG16(0x2b4),
319 
320 	NOP(5),
321 	LRI(9, POSTED),
322 	REG16(0x3a8),
323 	REG16(0x28c),
324 	REG16(0x288),
325 	REG16(0x284),
326 	REG16(0x280),
327 	REG16(0x27c),
328 	REG16(0x278),
329 	REG16(0x274),
330 	REG16(0x270),
331 
332 	LRI(3, POSTED),
333 	REG(0x1b0),
334 	REG16(0x5a8),
335 	REG16(0x5ac),
336 
337 	NOP(6),
338 	LRI(1, 0),
339 	REG(0x0c8),
340 
341 	0
342 };
343 
344 static const u8 dg2_rcs_offsets[] = {
345 	NOP(1),
346 	LRI(15, POSTED),
347 	REG16(0x244),
348 	REG(0x034),
349 	REG(0x030),
350 	REG(0x038),
351 	REG(0x03c),
352 	REG(0x168),
353 	REG(0x140),
354 	REG(0x110),
355 	REG(0x1c0),
356 	REG(0x1c4),
357 	REG(0x1c8),
358 	REG(0x180),
359 	REG16(0x2b4),
360 	REG(0x120),
361 	REG(0x124),
362 
363 	NOP(1),
364 	LRI(9, POSTED),
365 	REG16(0x3a8),
366 	REG16(0x28c),
367 	REG16(0x288),
368 	REG16(0x284),
369 	REG16(0x280),
370 	REG16(0x27c),
371 	REG16(0x278),
372 	REG16(0x274),
373 	REG16(0x270),
374 
375 	LRI(3, POSTED),
376 	REG(0x1b0),
377 	REG16(0x5a8),
378 	REG16(0x5ac),
379 
380 	NOP(6),
381 	LRI(1, 0),
382 	REG(0x0c8),
383 
384 	0
385 };
386 
387 static const u8 mtl_rcs_offsets[] = {
388 	NOP(1),
389 	LRI(15, POSTED),
390 	REG16(0x244),
391 	REG(0x034),
392 	REG(0x030),
393 	REG(0x038),
394 	REG(0x03c),
395 	REG(0x168),
396 	REG(0x140),
397 	REG(0x110),
398 	REG(0x1c0),
399 	REG(0x1c4),
400 	REG(0x1c8),
401 	REG(0x180),
402 	REG16(0x2b4),
403 	REG(0x120),
404 	REG(0x124),
405 
406 	NOP(1),
407 	LRI(9, POSTED),
408 	REG16(0x3a8),
409 	REG16(0x28c),
410 	REG16(0x288),
411 	REG16(0x284),
412 	REG16(0x280),
413 	REG16(0x27c),
414 	REG16(0x278),
415 	REG16(0x274),
416 	REG16(0x270),
417 
418 	NOP(2),
419 	LRI(2, POSTED),
420 	REG16(0x5a8),
421 	REG16(0x5ac),
422 
423 	NOP(6),
424 	LRI(1, 0),
425 	REG(0x0c8),
426 
427 	0
428 };
429 
430 #define XE2_CTX_COMMON \
431 	NOP(1),                 /* [0x00] */ \
432 	LRI(15, POSTED),        /* [0x01] */ \
433 	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
434 	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
435 	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
436 	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
437 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
438 	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
439 	REG(0x140),             /* [0x0e] BB_ADDR */ \
440 	REG(0x110),             /* [0x10] BB_STATE */ \
441 	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
442 	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
443 	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
444 	REG(0x180),             /* [0x18] CCID */ \
445 	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
446 	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
447 	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
448 	\
449 	NOP(1),                 /* [0x20] */ \
450 	LRI(9, POSTED),         /* [0x21] */ \
451 	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
452 	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
453 	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
454 	REG16(0x284),           /* [0x28] dummy reg */ \
455 	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
456 	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
457 	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
458 	REG16(0x274),           /* [0x30] PTBP_UDW */ \
459 	REG16(0x270)            /* [0x32] PTBP_LDW */
460 
461 static const u8 xe2_rcs_offsets[] = {
462 	XE2_CTX_COMMON,
463 
464 	NOP(2),                 /* [0x34] */
465 	LRI(2, POSTED),         /* [0x36] */
466 	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
467 	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
468 
469 	NOP(6),                 /* [0x41] */
470 	LRI(1, 0),              /* [0x47] */
471 	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
472 
473 	0
474 };
475 
476 static const u8 xe2_bcs_offsets[] = {
477 	XE2_CTX_COMMON,
478 
479 	NOP(4 + 8 + 1),         /* [0x34] */
480 	LRI(2, POSTED),         /* [0x41] */
481 	REG16(0x200),           /* [0x42] BCS_SWCTRL */
482 	REG16(0x204),           /* [0x44] BLIT_CCTL */
483 
484 	0
485 };
486 
487 static const u8 xe2_xcs_offsets[] = {
488 	XE2_CTX_COMMON,
489 
490 	0
491 };
492 
493 #undef REG16
494 #undef REG
495 #undef LRI
496 #undef NOP
497 
498 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
499 {
500 	if (class == XE_ENGINE_CLASS_RENDER) {
501 		if (GRAPHICS_VER(xe) >= 20)
502 			return xe2_rcs_offsets;
503 		else if (GRAPHICS_VERx100(xe) >= 1270)
504 			return mtl_rcs_offsets;
505 		else if (GRAPHICS_VERx100(xe) >= 1255)
506 			return dg2_rcs_offsets;
507 		else if (GRAPHICS_VERx100(xe) >= 1250)
508 			return xehp_rcs_offsets;
509 		else
510 			return gen12_rcs_offsets;
511 	} else if (class == XE_ENGINE_CLASS_COPY) {
512 		if (GRAPHICS_VER(xe) >= 20)
513 			return xe2_bcs_offsets;
514 		else
515 			return gen12_xcs_offsets;
516 	} else {
517 		if (GRAPHICS_VER(xe) >= 20)
518 			return xe2_xcs_offsets;
519 		else if (GRAPHICS_VERx100(xe) >= 1255)
520 			return dg2_xcs_offsets;
521 		else
522 			return gen12_xcs_offsets;
523 	}
524 }
525 
526 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
527 {
528 	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
529 				    _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
530 				    CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
531 
532 	/* TODO: Timestamp */
533 }
534 
535 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
536 {
537 	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
538 	struct xe_device *xe = gt_to_xe(hwe->gt);
539 
540 	if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
541 		return;
542 
543 	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
544 					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
545 	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
546 	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
547 
548 	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
549 				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
550 	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
551 	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
552 	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
553 	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
554 }
555 
556 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
557 {
558 	struct xe_device *xe = gt_to_xe(hwe->gt);
559 
560 	if (GRAPHICS_VERx100(xe) >= 1250)
561 		return 0x70;
562 	else
563 		return 0x60;
564 }
565 
566 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
567 {
568 	int x;
569 
570 	x = lrc_ring_mi_mode(hwe);
571 	regs[x + 1] &= ~STOP_RING;
572 	regs[x + 1] |= STOP_RING << 16;
573 }
574 
575 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
576 {
577 	return 0;
578 }
579 
580 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
581 {
582 	return lrc->ring.size;
583 }
584 
585 /* Make the magic macros work */
586 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
587 
588 #define LRC_SEQNO_PPHWSP_OFFSET 512
589 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
590 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
591 #define LRC_PPHWSP_SIZE SZ_4K
592 
593 static size_t lrc_reg_size(struct xe_device *xe)
594 {
595 	if (GRAPHICS_VERx100(xe) >= 1250)
596 		return 96 * sizeof(u32);
597 	else
598 		return 80 * sizeof(u32);
599 }
600 
601 size_t xe_lrc_skip_size(struct xe_device *xe)
602 {
603 	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
604 }
605 
606 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
607 {
608 	/* The seqno is stored in the driver-defined portion of PPHWSP */
609 	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
610 }
611 
612 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
613 {
614 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
615 	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
616 }
617 
618 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
619 {
620 	/* The parallel is stored in the driver-defined portion of PPHWSP */
621 	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
622 }
623 
624 static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
625 {
626 	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
627 }
628 
629 #define DECL_MAP_ADDR_HELPERS(elem) \
630 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
631 { \
632 	struct iosys_map map = lrc->bo->vmap; \
633 \
634 	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
635 	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
636 	return map; \
637 } \
638 static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
639 { \
640 	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
641 } \
642 
643 DECL_MAP_ADDR_HELPERS(ring)
644 DECL_MAP_ADDR_HELPERS(pphwsp)
645 DECL_MAP_ADDR_HELPERS(seqno)
646 DECL_MAP_ADDR_HELPERS(regs)
647 DECL_MAP_ADDR_HELPERS(start_seqno)
648 DECL_MAP_ADDR_HELPERS(parallel)
649 
650 #undef DECL_MAP_ADDR_HELPERS
651 
652 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
653 {
654 	return __xe_lrc_pphwsp_ggtt_addr(lrc);
655 }
656 
657 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
658 {
659 	struct xe_device *xe = lrc_to_xe(lrc);
660 	struct iosys_map map;
661 
662 	map = __xe_lrc_regs_map(lrc);
663 	iosys_map_incr(&map, reg_nr * sizeof(u32));
664 	return xe_map_read32(xe, &map);
665 }
666 
667 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
668 {
669 	struct xe_device *xe = lrc_to_xe(lrc);
670 	struct iosys_map map;
671 
672 	map = __xe_lrc_regs_map(lrc);
673 	iosys_map_incr(&map, reg_nr * sizeof(u32));
674 	xe_map_write32(xe, &map, val);
675 }
676 
677 static void *empty_lrc_data(struct xe_hw_engine *hwe)
678 {
679 	struct xe_device *xe = gt_to_xe(hwe->gt);
680 	void *data;
681 	u32 *regs;
682 
683 	data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL);
684 	if (!data)
685 		return NULL;
686 
687 	/* 1st page: Per-Process of HW status Page */
688 	regs = data + LRC_PPHWSP_SIZE;
689 	set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
690 	set_context_control(regs, hwe);
691 	set_memory_based_intr(regs, hwe);
692 	reset_stop_ring(regs, hwe);
693 
694 	return data;
695 }
696 
697 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
698 {
699 	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
700 
701 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
702 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
703 }
704 
705 #define PVC_CTX_ASID		(0x2e + 1)
706 #define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
707 
708 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
709 		struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
710 {
711 	struct xe_gt *gt = hwe->gt;
712 	struct xe_tile *tile = gt_to_tile(gt);
713 	struct xe_device *xe = gt_to_xe(gt);
714 	struct iosys_map map;
715 	void *init_data = NULL;
716 	u32 arb_enable;
717 	int err;
718 
719 	lrc->flags = 0;
720 
721 	/*
722 	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
723 	 * via VM bind calls.
724 	 */
725 	lrc->bo = xe_bo_create_pin_map(xe, tile, vm,
726 				      ring_size + xe_lrc_size(xe, hwe->class),
727 				      ttm_bo_type_kernel,
728 				      XE_BO_CREATE_VRAM_IF_DGFX(tile) |
729 				      XE_BO_CREATE_GGTT_BIT);
730 	if (IS_ERR(lrc->bo))
731 		return PTR_ERR(lrc->bo);
732 
733 	lrc->tile = gt_to_tile(hwe->gt);
734 	lrc->ring.size = ring_size;
735 	lrc->ring.tail = 0;
736 
737 	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
738 			     hwe->fence_irq, hwe->name);
739 
740 	if (!gt->default_lrc[hwe->class]) {
741 		init_data = empty_lrc_data(hwe);
742 		if (!init_data) {
743 			err = -ENOMEM;
744 			goto err_lrc_finish;
745 		}
746 	}
747 
748 	/*
749 	 * Init Per-Process of HW status Page, LRC / context state to known
750 	 * values
751 	 */
752 	map = __xe_lrc_pphwsp_map(lrc);
753 	if (!init_data) {
754 		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
755 		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
756 				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
757 				 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE);
758 	} else {
759 		xe_map_memcpy_to(xe, &map, 0, init_data,
760 				 xe_lrc_size(xe, hwe->class));
761 		kfree(init_data);
762 	}
763 
764 	if (vm) {
765 		xe_lrc_set_ppgtt(lrc, vm);
766 
767 		if (vm->xef)
768 			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
769 	}
770 
771 	xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
772 	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
773 	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
774 	xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
775 			     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
776 	if (xe->info.has_asid && vm)
777 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
778 
779 	lrc->desc = LRC_VALID;
780 	lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT;
781 	/* TODO: Priority */
782 
783 	/* While this appears to have something about privileged batches or
784 	 * some such, it really just means PPGTT mode.
785 	 */
786 	if (vm)
787 		lrc->desc |= LRC_PRIVILEGE;
788 
789 	if (GRAPHICS_VERx100(xe) < 1250) {
790 		lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT;
791 		lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT;
792 	}
793 
794 	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
795 	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
796 
797 	map = __xe_lrc_seqno_map(lrc);
798 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
799 
800 	map = __xe_lrc_start_seqno_map(lrc);
801 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
802 
803 	return 0;
804 
805 err_lrc_finish:
806 	xe_lrc_finish(lrc);
807 	return err;
808 }
809 
810 void xe_lrc_finish(struct xe_lrc *lrc)
811 {
812 	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
813 	xe_bo_lock(lrc->bo, false);
814 	xe_bo_unpin(lrc->bo);
815 	xe_bo_unlock(lrc->bo);
816 	xe_bo_put(lrc->bo);
817 }
818 
819 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
820 {
821 	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
822 }
823 
824 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
825 {
826 	return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
827 }
828 
829 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
830 {
831 	const u32 head = xe_lrc_ring_head(lrc);
832 	const u32 tail = lrc->ring.tail;
833 	const u32 size = lrc->ring.size;
834 
835 	return ((head - tail - 1) & (size - 1)) + 1;
836 }
837 
838 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
839 				const void *data, size_t size)
840 {
841 	struct xe_device *xe = lrc_to_xe(lrc);
842 
843 	iosys_map_incr(&ring, lrc->ring.tail);
844 	xe_map_memcpy_to(xe, &ring, 0, data, size);
845 	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
846 }
847 
848 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
849 {
850 	struct xe_device *xe = lrc_to_xe(lrc);
851 	struct iosys_map ring;
852 	u32 rhs;
853 	size_t aligned_size;
854 
855 	xe_assert(xe, IS_ALIGNED(size, 4));
856 	aligned_size = ALIGN(size, 8);
857 
858 	ring = __xe_lrc_ring_map(lrc);
859 
860 	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
861 	rhs = lrc->ring.size - lrc->ring.tail;
862 	if (size > rhs) {
863 		__xe_lrc_write_ring(lrc, ring, data, rhs);
864 		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
865 	} else {
866 		__xe_lrc_write_ring(lrc, ring, data, size);
867 	}
868 
869 	if (aligned_size > size) {
870 		u32 noop = MI_NOOP;
871 
872 		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
873 	}
874 }
875 
876 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
877 {
878 	return lrc->desc | xe_lrc_ggtt_addr(lrc);
879 }
880 
881 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
882 {
883 	return __xe_lrc_seqno_ggtt_addr(lrc);
884 }
885 
886 struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc)
887 {
888 	return &xe_hw_fence_create(&lrc->fence_ctx,
889 				   __xe_lrc_seqno_map(lrc))->dma;
890 }
891 
892 s32 xe_lrc_seqno(struct xe_lrc *lrc)
893 {
894 	struct iosys_map map = __xe_lrc_seqno_map(lrc);
895 
896 	return xe_map_read32(lrc_to_xe(lrc), &map);
897 }
898 
899 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
900 {
901 	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
902 
903 	return xe_map_read32(lrc_to_xe(lrc), &map);
904 }
905 
906 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
907 {
908 	return __xe_lrc_start_seqno_ggtt_addr(lrc);
909 }
910 
911 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
912 {
913 	return __xe_lrc_parallel_ggtt_addr(lrc);
914 }
915 
916 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
917 {
918 	return __xe_lrc_parallel_map(lrc);
919 }
920 
921 static int instr_dw(u32 cmd_header)
922 {
923 	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
924 	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
925 	    GFXPIPE_SINGLE_DW_CMD(0, 0))
926 		return 1;
927 
928 	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
929 	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
930 		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
931 
932 	/* Most instructions have the # of dwords (minus 2) in 7:0 */
933 	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
934 }
935 
936 static int dump_mi_command(struct drm_printer *p,
937 			   struct xe_gt *gt,
938 			   u32 *dw,
939 			   int remaining_dw)
940 {
941 	u32 inst_header = *dw;
942 	u32 numdw = instr_dw(inst_header);
943 	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
944 	int num_noop;
945 
946 	/* First check for commands that don't have/use a '# DW' field */
947 	switch (inst_header & MI_OPCODE) {
948 	case MI_NOOP:
949 		num_noop = 1;
950 		while (num_noop < remaining_dw &&
951 		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
952 			num_noop++;
953 		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
954 		return num_noop;
955 
956 	case MI_TOPOLOGY_FILTER:
957 		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
958 		return 1;
959 
960 	case MI_BATCH_BUFFER_END:
961 		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
962 		/* Return 'remaining_dw' to consume the rest of the LRC */
963 		return remaining_dw;
964 	}
965 
966 	/*
967 	 * Any remaining commands include a # of dwords.  We should make sure
968 	 * it doesn't exceed the remaining size of the LRC.
969 	 */
970 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
971 		numdw = remaining_dw;
972 
973 	switch (inst_header & MI_OPCODE) {
974 	case MI_LOAD_REGISTER_IMM:
975 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
976 			   inst_header, (numdw - 1) / 2);
977 		for (int i = 1; i < numdw; i += 2)
978 			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
979 		return numdw;
980 
981 	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
982 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
983 			   inst_header,
984 			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
985 			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
986 		if (numdw == 4)
987 			drm_printf(p, " - %#6x = %#010llx\n",
988 				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
989 		else
990 			drm_printf(p, " - %*ph (%s)\n",
991 				   (int)sizeof(u32) * (numdw - 1), dw + 1,
992 				   numdw < 4 ? "truncated" : "malformed");
993 		return numdw;
994 
995 	case MI_FORCE_WAKEUP:
996 		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
997 		return numdw;
998 
999 	default:
1000 		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1001 			   inst_header, opcode, numdw);
1002 		return numdw;
1003 	}
1004 }
1005 
1006 static int dump_gfxpipe_command(struct drm_printer *p,
1007 				struct xe_gt *gt,
1008 				u32 *dw,
1009 				int remaining_dw)
1010 {
1011 	u32 numdw = instr_dw(*dw);
1012 	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1013 	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1014 	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1015 
1016 	/*
1017 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1018 	 * remaining size of the LRC.
1019 	 */
1020 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1021 		numdw = remaining_dw;
1022 
1023 	switch (*dw & GFXPIPE_MATCH_MASK) {
1024 #define MATCH(cmd) \
1025 	case cmd: \
1026 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1027 		return numdw
1028 #define MATCH3D(cmd) \
1029 	case CMD_##cmd: \
1030 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1031 		return numdw
1032 
1033 	MATCH(STATE_BASE_ADDRESS);
1034 	MATCH(STATE_SIP);
1035 	MATCH(GPGPU_CSR_BASE_ADDRESS);
1036 	MATCH(STATE_COMPUTE_MODE);
1037 	MATCH3D(3DSTATE_BTD);
1038 
1039 	MATCH3D(3DSTATE_VF_STATISTICS);
1040 
1041 	MATCH(PIPELINE_SELECT);
1042 
1043 	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1044 	MATCH3D(3DSTATE_CLEAR_PARAMS);
1045 	MATCH3D(3DSTATE_DEPTH_BUFFER);
1046 	MATCH3D(3DSTATE_STENCIL_BUFFER);
1047 	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1048 	MATCH3D(3DSTATE_VERTEX_BUFFERS);
1049 	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1050 	MATCH3D(3DSTATE_INDEX_BUFFER);
1051 	MATCH3D(3DSTATE_VF);
1052 	MATCH3D(3DSTATE_MULTISAMPLE);
1053 	MATCH3D(3DSTATE_CC_STATE_POINTERS);
1054 	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1055 	MATCH3D(3DSTATE_VS);
1056 	MATCH3D(3DSTATE_GS);
1057 	MATCH3D(3DSTATE_CLIP);
1058 	MATCH3D(3DSTATE_SF);
1059 	MATCH3D(3DSTATE_WM);
1060 	MATCH3D(3DSTATE_CONSTANT_VS);
1061 	MATCH3D(3DSTATE_CONSTANT_GS);
1062 	MATCH3D(3DSTATE_SAMPLE_MASK);
1063 	MATCH3D(3DSTATE_CONSTANT_HS);
1064 	MATCH3D(3DSTATE_CONSTANT_DS);
1065 	MATCH3D(3DSTATE_HS);
1066 	MATCH3D(3DSTATE_TE);
1067 	MATCH3D(3DSTATE_DS);
1068 	MATCH3D(3DSTATE_STREAMOUT);
1069 	MATCH3D(3DSTATE_SBE);
1070 	MATCH3D(3DSTATE_PS);
1071 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1072 	MATCH3D(3DSTATE_CPS_POINTERS);
1073 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1074 	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1075 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1076 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1077 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1078 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1079 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1080 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1081 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1082 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1083 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1084 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1085 	MATCH3D(3DSTATE_VF_INSTANCING);
1086 	MATCH3D(3DSTATE_VF_SGVS);
1087 	MATCH3D(3DSTATE_VF_TOPOLOGY);
1088 	MATCH3D(3DSTATE_WM_CHROMAKEY);
1089 	MATCH3D(3DSTATE_PS_BLEND);
1090 	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1091 	MATCH3D(3DSTATE_PS_EXTRA);
1092 	MATCH3D(3DSTATE_RASTER);
1093 	MATCH3D(3DSTATE_SBE_SWIZ);
1094 	MATCH3D(3DSTATE_WM_HZ_OP);
1095 	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1096 	MATCH3D(3DSTATE_VF_SGVS_2);
1097 	MATCH3D(3DSTATE_VFG);
1098 	MATCH3D(3DSTATE_URB_ALLOC_VS);
1099 	MATCH3D(3DSTATE_URB_ALLOC_HS);
1100 	MATCH3D(3DSTATE_URB_ALLOC_DS);
1101 	MATCH3D(3DSTATE_URB_ALLOC_GS);
1102 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1103 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1104 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1105 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1106 	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1107 	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1108 	MATCH3D(3DSTATE_AMFS);
1109 	MATCH3D(3DSTATE_DEPTH_BOUNDS);
1110 	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1111 	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1112 	MATCH3D(3DSTATE_MESH_CONTROL);
1113 	MATCH3D(3DSTATE_MESH_DISTRIB);
1114 	MATCH3D(3DSTATE_TASK_REDISTRIB);
1115 	MATCH3D(3DSTATE_MESH_SHADER);
1116 	MATCH3D(3DSTATE_MESH_SHADER_DATA);
1117 	MATCH3D(3DSTATE_TASK_CONTROL);
1118 	MATCH3D(3DSTATE_TASK_SHADER);
1119 	MATCH3D(3DSTATE_TASK_SHADER_DATA);
1120 	MATCH3D(3DSTATE_URB_ALLOC_MESH);
1121 	MATCH3D(3DSTATE_URB_ALLOC_TASK);
1122 	MATCH3D(3DSTATE_CLIP_MESH);
1123 	MATCH3D(3DSTATE_SBE_MESH);
1124 	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1125 
1126 	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1127 	MATCH3D(3DSTATE_CHROMA_KEY);
1128 	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1129 	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1130 	MATCH3D(3DSTATE_LINE_STIPPLE);
1131 	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1132 	MATCH3D(3DSTATE_MONOFILTER_SIZE);
1133 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1134 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1135 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1136 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1137 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1138 	MATCH3D(3DSTATE_SO_DECL_LIST);
1139 	MATCH3D(3DSTATE_SO_BUFFER);
1140 	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1141 	MATCH3D(3DSTATE_SAMPLE_PATTERN);
1142 	MATCH3D(3DSTATE_3D_MODE);
1143 	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1144 	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1145 	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1146 
1147 	default:
1148 		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1149 			   *dw, pipeline, opcode, subopcode, numdw);
1150 		return numdw;
1151 	}
1152 }
1153 
1154 void xe_lrc_dump_default(struct drm_printer *p,
1155 			 struct xe_gt *gt,
1156 			 enum xe_engine_class hwe_class)
1157 {
1158 	u32 *dw;
1159 	int remaining_dw, num_dw;
1160 
1161 	if (!gt->default_lrc[hwe_class]) {
1162 		drm_printf(p, "No default LRC for class %d\n", hwe_class);
1163 		return;
1164 	}
1165 
1166 	/*
1167 	 * Skip the beginning of the LRC since it contains the per-process
1168 	 * hardware status page.
1169 	 */
1170 	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1171 	remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4;
1172 
1173 	while (remaining_dw > 0) {
1174 		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1175 			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1176 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1177 			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1178 		} else {
1179 			num_dw = min(instr_dw(*dw), remaining_dw);
1180 			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1181 				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1182 				   num_dw);
1183 		}
1184 
1185 		dw += num_dw;
1186 		remaining_dw -= num_dw;
1187 	}
1188 }
1189 
1190 struct instr_state {
1191 	u32 instr;
1192 	u16 num_dw;
1193 };
1194 
1195 static const struct instr_state xe_hpg_svg_state[] = {
1196 	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1197 	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1198 	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1199 	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1200 	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1201 	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1202 	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1203 	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1204 	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1205 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1206 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1207 	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1208 	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1209 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1210 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1211 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1212 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1213 	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1214 	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1215 	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1216 	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1217 	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1218 	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1219 	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1220 	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1221 	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1222 	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1223 	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1224 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1225 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1226 	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1227 	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1228 	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1229 	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1230 	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1231 	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1232 	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1233 	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1234 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1235 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1236 	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1237 	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1238 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1239 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1240 	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1241 	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1242 	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1243 	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1244 	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1245 	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1246 };
1247 
1248 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1249 {
1250 	struct xe_gt *gt = q->hwe->gt;
1251 	struct xe_device *xe = gt_to_xe(gt);
1252 	const struct instr_state *state_table = NULL;
1253 	int state_table_size = 0;
1254 
1255 	/*
1256 	 * At the moment we only need to emit non-register state for the RCS
1257 	 * engine.
1258 	 */
1259 	if (q->hwe->class != XE_ENGINE_CLASS_RENDER)
1260 		return;
1261 
1262 	switch (GRAPHICS_VERx100(xe)) {
1263 	case 1255:
1264 	case 1270 ... 2004:
1265 		state_table = xe_hpg_svg_state;
1266 		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1267 		break;
1268 	default:
1269 		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1270 			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1271 		return;
1272 	}
1273 
1274 	for (int i = 0; i < state_table_size; i++) {
1275 		u32 instr = state_table[i].instr;
1276 		u16 num_dw = state_table[i].num_dw;
1277 		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1278 
1279 		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1280 		xe_gt_assert(gt, num_dw != 0);
1281 		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1282 
1283 		/*
1284 		 * Xe2's SVG context is the same as the one on DG2 / MTL
1285 		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1286 		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1287 		 * Just make the replacement here rather than defining a
1288 		 * whole separate table for the single trivial change.
1289 		 */
1290 		if (GRAPHICS_VER(xe) >= 20 &&
1291 		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1292 			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1293 
1294 		bb->cs[bb->len] = instr;
1295 		if (!is_single_dw)
1296 			bb->cs[bb->len] |= (num_dw - 2);
1297 
1298 		bb->len += num_dw;
1299 	}
1300 }
1301