xref: /linux/drivers/gpu/drm/xe/xe_lrc.c (revision 7cc9196675234d4de0e1e19b9da1a8b86ecfeedd)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_lrc.h"
7 
8 #include "instructions/xe_mi_commands.h"
9 #include "instructions/xe_gfxpipe_commands.h"
10 #include "regs/xe_engine_regs.h"
11 #include "regs/xe_gpu_commands.h"
12 #include "regs/xe_lrc_layout.h"
13 #include "xe_bb.h"
14 #include "xe_bo.h"
15 #include "xe_device.h"
16 #include "xe_drm_client.h"
17 #include "xe_exec_queue_types.h"
18 #include "xe_gt.h"
19 #include "xe_gt_printk.h"
20 #include "xe_hw_fence.h"
21 #include "xe_map.h"
22 #include "xe_memirq.h"
23 #include "xe_sriov.h"
24 #include "xe_vm.h"
25 
26 #define LRC_VALID				(1 << 0)
27 #define LRC_PRIVILEGE				(1 << 8)
28 #define LRC_ADDRESSING_MODE_SHIFT		3
29 #define LRC_LEGACY_64B_CONTEXT			3
30 
31 #define ENGINE_CLASS_SHIFT			61
32 #define ENGINE_INSTANCE_SHIFT			48
33 
34 static struct xe_device *
35 lrc_to_xe(struct xe_lrc *lrc)
36 {
37 	return gt_to_xe(lrc->fence_ctx.gt);
38 }
39 
40 size_t xe_lrc_size(struct xe_device *xe, enum xe_engine_class class)
41 {
42 	switch (class) {
43 	case XE_ENGINE_CLASS_RENDER:
44 		if (GRAPHICS_VER(xe) >= 20)
45 			return 4 * SZ_4K;
46 		else
47 			return 14 * SZ_4K;
48 	case XE_ENGINE_CLASS_COMPUTE:
49 		/* 14 pages since graphics_ver == 11 */
50 		if (GRAPHICS_VER(xe) >= 20)
51 			return 3 * SZ_4K;
52 		else
53 			return 14 * SZ_4K;
54 	default:
55 		WARN(1, "Unknown engine class: %d", class);
56 		fallthrough;
57 	case XE_ENGINE_CLASS_COPY:
58 	case XE_ENGINE_CLASS_VIDEO_DECODE:
59 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
60 	case XE_ENGINE_CLASS_OTHER:
61 		return 2 * SZ_4K;
62 	}
63 }
64 
65 /*
66  * The per-platform tables are u8-encoded in @data. Decode @data and set the
67  * addresses' offset and commands in @regs. The following encoding is used
68  * for each byte. There are 2 steps: decoding commands and decoding addresses.
69  *
70  * Commands:
71  * [7]: create NOPs - number of NOPs are set in lower bits
72  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
73  *      MI_LRI_FORCE_POSTED
74  * [5:0]: Number of NOPs or registers to set values to in case of
75  *        MI_LOAD_REGISTER_IMM
76  *
77  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
78  * number of registers. They are set by using the REG/REG16 macros: the former
79  * is used for offsets smaller than 0x200 while the latter is for values bigger
80  * than that. Those macros already set all the bits documented below correctly:
81  *
82  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
83  *      follow, for the lower bits
84  * [6:0]: Register offset, without considering the engine base.
85  *
86  * This function only tweaks the commands and register offsets. Values are not
87  * filled out.
88  */
89 static void set_offsets(u32 *regs,
90 			const u8 *data,
91 			const struct xe_hw_engine *hwe)
92 #define NOP(x) (BIT(7) | (x))
93 #define LRI(count, flags) ((flags) << 6 | (count) | \
94 			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
95 #define POSTED BIT(0)
96 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
97 #define REG16(x) \
98 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
99 	(((x) >> 2) & 0x7f)
100 #define END 0
101 {
102 	const u32 base = hwe->mmio_base;
103 
104 	while (*data) {
105 		u8 count, flags;
106 
107 		if (*data & BIT(7)) { /* skip */
108 			count = *data++ & ~BIT(7);
109 			regs += count;
110 			continue;
111 		}
112 
113 		count = *data & 0x3f;
114 		flags = *data >> 6;
115 		data++;
116 
117 		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
118 		if (flags & POSTED)
119 			*regs |= MI_LRI_FORCE_POSTED;
120 		*regs |= MI_LRI_LRM_CS_MMIO;
121 		regs++;
122 
123 		xe_gt_assert(hwe->gt, count);
124 		do {
125 			u32 offset = 0;
126 			u8 v;
127 
128 			do {
129 				v = *data++;
130 				offset <<= 7;
131 				offset |= v & ~BIT(7);
132 			} while (v & BIT(7));
133 
134 			regs[0] = base + (offset << 2);
135 			regs += 2;
136 		} while (--count);
137 	}
138 
139 	*regs = MI_BATCH_BUFFER_END | BIT(0);
140 }
141 
142 static const u8 gen12_xcs_offsets[] = {
143 	NOP(1),
144 	LRI(13, POSTED),
145 	REG16(0x244),
146 	REG(0x034),
147 	REG(0x030),
148 	REG(0x038),
149 	REG(0x03c),
150 	REG(0x168),
151 	REG(0x140),
152 	REG(0x110),
153 	REG(0x1c0),
154 	REG(0x1c4),
155 	REG(0x1c8),
156 	REG(0x180),
157 	REG16(0x2b4),
158 
159 	NOP(5),
160 	LRI(9, POSTED),
161 	REG16(0x3a8),
162 	REG16(0x28c),
163 	REG16(0x288),
164 	REG16(0x284),
165 	REG16(0x280),
166 	REG16(0x27c),
167 	REG16(0x278),
168 	REG16(0x274),
169 	REG16(0x270),
170 
171 	END
172 };
173 
174 static const u8 dg2_xcs_offsets[] = {
175 	NOP(1),
176 	LRI(15, POSTED),
177 	REG16(0x244),
178 	REG(0x034),
179 	REG(0x030),
180 	REG(0x038),
181 	REG(0x03c),
182 	REG(0x168),
183 	REG(0x140),
184 	REG(0x110),
185 	REG(0x1c0),
186 	REG(0x1c4),
187 	REG(0x1c8),
188 	REG(0x180),
189 	REG16(0x2b4),
190 	REG(0x120),
191 	REG(0x124),
192 
193 	NOP(1),
194 	LRI(9, POSTED),
195 	REG16(0x3a8),
196 	REG16(0x28c),
197 	REG16(0x288),
198 	REG16(0x284),
199 	REG16(0x280),
200 	REG16(0x27c),
201 	REG16(0x278),
202 	REG16(0x274),
203 	REG16(0x270),
204 
205 	END
206 };
207 
208 static const u8 gen12_rcs_offsets[] = {
209 	NOP(1),
210 	LRI(13, POSTED),
211 	REG16(0x244),
212 	REG(0x034),
213 	REG(0x030),
214 	REG(0x038),
215 	REG(0x03c),
216 	REG(0x168),
217 	REG(0x140),
218 	REG(0x110),
219 	REG(0x1c0),
220 	REG(0x1c4),
221 	REG(0x1c8),
222 	REG(0x180),
223 	REG16(0x2b4),
224 
225 	NOP(5),
226 	LRI(9, POSTED),
227 	REG16(0x3a8),
228 	REG16(0x28c),
229 	REG16(0x288),
230 	REG16(0x284),
231 	REG16(0x280),
232 	REG16(0x27c),
233 	REG16(0x278),
234 	REG16(0x274),
235 	REG16(0x270),
236 
237 	LRI(3, POSTED),
238 	REG(0x1b0),
239 	REG16(0x5a8),
240 	REG16(0x5ac),
241 
242 	NOP(6),
243 	LRI(1, 0),
244 	REG(0x0c8),
245 	NOP(3 + 9 + 1),
246 
247 	LRI(51, POSTED),
248 	REG16(0x588),
249 	REG16(0x588),
250 	REG16(0x588),
251 	REG16(0x588),
252 	REG16(0x588),
253 	REG16(0x588),
254 	REG(0x028),
255 	REG(0x09c),
256 	REG(0x0c0),
257 	REG(0x178),
258 	REG(0x17c),
259 	REG16(0x358),
260 	REG(0x170),
261 	REG(0x150),
262 	REG(0x154),
263 	REG(0x158),
264 	REG16(0x41c),
265 	REG16(0x600),
266 	REG16(0x604),
267 	REG16(0x608),
268 	REG16(0x60c),
269 	REG16(0x610),
270 	REG16(0x614),
271 	REG16(0x618),
272 	REG16(0x61c),
273 	REG16(0x620),
274 	REG16(0x624),
275 	REG16(0x628),
276 	REG16(0x62c),
277 	REG16(0x630),
278 	REG16(0x634),
279 	REG16(0x638),
280 	REG16(0x63c),
281 	REG16(0x640),
282 	REG16(0x644),
283 	REG16(0x648),
284 	REG16(0x64c),
285 	REG16(0x650),
286 	REG16(0x654),
287 	REG16(0x658),
288 	REG16(0x65c),
289 	REG16(0x660),
290 	REG16(0x664),
291 	REG16(0x668),
292 	REG16(0x66c),
293 	REG16(0x670),
294 	REG16(0x674),
295 	REG16(0x678),
296 	REG16(0x67c),
297 	REG(0x068),
298 	REG(0x084),
299 	NOP(1),
300 
301 	END
302 };
303 
304 static const u8 xehp_rcs_offsets[] = {
305 	NOP(1),
306 	LRI(13, POSTED),
307 	REG16(0x244),
308 	REG(0x034),
309 	REG(0x030),
310 	REG(0x038),
311 	REG(0x03c),
312 	REG(0x168),
313 	REG(0x140),
314 	REG(0x110),
315 	REG(0x1c0),
316 	REG(0x1c4),
317 	REG(0x1c8),
318 	REG(0x180),
319 	REG16(0x2b4),
320 
321 	NOP(5),
322 	LRI(9, POSTED),
323 	REG16(0x3a8),
324 	REG16(0x28c),
325 	REG16(0x288),
326 	REG16(0x284),
327 	REG16(0x280),
328 	REG16(0x27c),
329 	REG16(0x278),
330 	REG16(0x274),
331 	REG16(0x270),
332 
333 	LRI(3, POSTED),
334 	REG(0x1b0),
335 	REG16(0x5a8),
336 	REG16(0x5ac),
337 
338 	NOP(6),
339 	LRI(1, 0),
340 	REG(0x0c8),
341 
342 	END
343 };
344 
345 static const u8 dg2_rcs_offsets[] = {
346 	NOP(1),
347 	LRI(15, POSTED),
348 	REG16(0x244),
349 	REG(0x034),
350 	REG(0x030),
351 	REG(0x038),
352 	REG(0x03c),
353 	REG(0x168),
354 	REG(0x140),
355 	REG(0x110),
356 	REG(0x1c0),
357 	REG(0x1c4),
358 	REG(0x1c8),
359 	REG(0x180),
360 	REG16(0x2b4),
361 	REG(0x120),
362 	REG(0x124),
363 
364 	NOP(1),
365 	LRI(9, POSTED),
366 	REG16(0x3a8),
367 	REG16(0x28c),
368 	REG16(0x288),
369 	REG16(0x284),
370 	REG16(0x280),
371 	REG16(0x27c),
372 	REG16(0x278),
373 	REG16(0x274),
374 	REG16(0x270),
375 
376 	LRI(3, POSTED),
377 	REG(0x1b0),
378 	REG16(0x5a8),
379 	REG16(0x5ac),
380 
381 	NOP(6),
382 	LRI(1, 0),
383 	REG(0x0c8),
384 
385 	END
386 };
387 
388 static const u8 mtl_rcs_offsets[] = {
389 	NOP(1),
390 	LRI(15, POSTED),
391 	REG16(0x244),
392 	REG(0x034),
393 	REG(0x030),
394 	REG(0x038),
395 	REG(0x03c),
396 	REG(0x168),
397 	REG(0x140),
398 	REG(0x110),
399 	REG(0x1c0),
400 	REG(0x1c4),
401 	REG(0x1c8),
402 	REG(0x180),
403 	REG16(0x2b4),
404 	REG(0x120),
405 	REG(0x124),
406 
407 	NOP(1),
408 	LRI(9, POSTED),
409 	REG16(0x3a8),
410 	REG16(0x28c),
411 	REG16(0x288),
412 	REG16(0x284),
413 	REG16(0x280),
414 	REG16(0x27c),
415 	REG16(0x278),
416 	REG16(0x274),
417 	REG16(0x270),
418 
419 	NOP(2),
420 	LRI(2, POSTED),
421 	REG16(0x5a8),
422 	REG16(0x5ac),
423 
424 	NOP(6),
425 	LRI(1, 0),
426 	REG(0x0c8),
427 
428 	END
429 };
430 
431 #define XE2_CTX_COMMON \
432 	NOP(1),                 /* [0x00] */ \
433 	LRI(15, POSTED),        /* [0x01] */ \
434 	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
435 	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
436 	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
437 	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
438 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
439 	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
440 	REG(0x140),             /* [0x0e] BB_ADDR */ \
441 	REG(0x110),             /* [0x10] BB_STATE */ \
442 	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
443 	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
444 	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
445 	REG(0x180),             /* [0x18] CCID */ \
446 	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
447 	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
448 	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
449 	\
450 	NOP(1),                 /* [0x20] */ \
451 	LRI(9, POSTED),         /* [0x21] */ \
452 	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
453 	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
454 	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
455 	REG16(0x284),           /* [0x28] dummy reg */ \
456 	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
457 	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
458 	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
459 	REG16(0x274),           /* [0x30] PTBP_UDW */ \
460 	REG16(0x270)            /* [0x32] PTBP_LDW */
461 
462 static const u8 xe2_rcs_offsets[] = {
463 	XE2_CTX_COMMON,
464 
465 	NOP(2),                 /* [0x34] */
466 	LRI(2, POSTED),         /* [0x36] */
467 	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
468 	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
469 
470 	NOP(6),                 /* [0x41] */
471 	LRI(1, 0),              /* [0x47] */
472 	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
473 
474 	END
475 };
476 
477 static const u8 xe2_bcs_offsets[] = {
478 	XE2_CTX_COMMON,
479 
480 	NOP(4 + 8 + 1),         /* [0x34] */
481 	LRI(2, POSTED),         /* [0x41] */
482 	REG16(0x200),           /* [0x42] BCS_SWCTRL */
483 	REG16(0x204),           /* [0x44] BLIT_CCTL */
484 
485 	END
486 };
487 
488 static const u8 xe2_xcs_offsets[] = {
489 	XE2_CTX_COMMON,
490 
491 	END
492 };
493 
494 #undef END
495 #undef REG16
496 #undef REG
497 #undef LRI
498 #undef NOP
499 
500 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
501 {
502 	if (class == XE_ENGINE_CLASS_RENDER) {
503 		if (GRAPHICS_VER(xe) >= 20)
504 			return xe2_rcs_offsets;
505 		else if (GRAPHICS_VERx100(xe) >= 1270)
506 			return mtl_rcs_offsets;
507 		else if (GRAPHICS_VERx100(xe) >= 1255)
508 			return dg2_rcs_offsets;
509 		else if (GRAPHICS_VERx100(xe) >= 1250)
510 			return xehp_rcs_offsets;
511 		else
512 			return gen12_rcs_offsets;
513 	} else if (class == XE_ENGINE_CLASS_COPY) {
514 		if (GRAPHICS_VER(xe) >= 20)
515 			return xe2_bcs_offsets;
516 		else
517 			return gen12_xcs_offsets;
518 	} else {
519 		if (GRAPHICS_VER(xe) >= 20)
520 			return xe2_xcs_offsets;
521 		else if (GRAPHICS_VERx100(xe) >= 1255)
522 			return dg2_xcs_offsets;
523 		else
524 			return gen12_xcs_offsets;
525 	}
526 }
527 
528 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
529 {
530 	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
531 				    _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
532 				    CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
533 
534 	/* TODO: Timestamp */
535 }
536 
537 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
538 {
539 	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
540 	struct xe_device *xe = gt_to_xe(hwe->gt);
541 
542 	if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
543 		return;
544 
545 	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
546 					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
547 	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
548 	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
549 
550 	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
551 				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
552 	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
553 	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
554 	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
555 	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
556 }
557 
558 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
559 {
560 	struct xe_device *xe = gt_to_xe(hwe->gt);
561 
562 	if (GRAPHICS_VERx100(xe) >= 1250)
563 		return 0x70;
564 	else
565 		return 0x60;
566 }
567 
568 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
569 {
570 	int x;
571 
572 	x = lrc_ring_mi_mode(hwe);
573 	regs[x + 1] &= ~STOP_RING;
574 	regs[x + 1] |= STOP_RING << 16;
575 }
576 
577 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
578 {
579 	return 0;
580 }
581 
582 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
583 {
584 	return lrc->ring.size;
585 }
586 
587 /* Make the magic macros work */
588 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
589 
590 #define LRC_SEQNO_PPHWSP_OFFSET 512
591 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
592 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
593 #define LRC_PPHWSP_SIZE SZ_4K
594 
595 static size_t lrc_reg_size(struct xe_device *xe)
596 {
597 	if (GRAPHICS_VERx100(xe) >= 1250)
598 		return 96 * sizeof(u32);
599 	else
600 		return 80 * sizeof(u32);
601 }
602 
603 size_t xe_lrc_skip_size(struct xe_device *xe)
604 {
605 	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
606 }
607 
608 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
609 {
610 	/* The seqno is stored in the driver-defined portion of PPHWSP */
611 	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
612 }
613 
614 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
615 {
616 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
617 	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
618 }
619 
620 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
621 {
622 	/* The parallel is stored in the driver-defined portion of PPHWSP */
623 	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
624 }
625 
626 static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
627 {
628 	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
629 }
630 
631 #define DECL_MAP_ADDR_HELPERS(elem) \
632 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
633 { \
634 	struct iosys_map map = lrc->bo->vmap; \
635 \
636 	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
637 	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
638 	return map; \
639 } \
640 static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
641 { \
642 	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
643 } \
644 
645 DECL_MAP_ADDR_HELPERS(ring)
646 DECL_MAP_ADDR_HELPERS(pphwsp)
647 DECL_MAP_ADDR_HELPERS(seqno)
648 DECL_MAP_ADDR_HELPERS(regs)
649 DECL_MAP_ADDR_HELPERS(start_seqno)
650 DECL_MAP_ADDR_HELPERS(parallel)
651 
652 #undef DECL_MAP_ADDR_HELPERS
653 
654 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
655 {
656 	return __xe_lrc_pphwsp_ggtt_addr(lrc);
657 }
658 
659 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
660 {
661 	struct xe_device *xe = lrc_to_xe(lrc);
662 	struct iosys_map map;
663 
664 	map = __xe_lrc_regs_map(lrc);
665 	iosys_map_incr(&map, reg_nr * sizeof(u32));
666 	return xe_map_read32(xe, &map);
667 }
668 
669 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
670 {
671 	struct xe_device *xe = lrc_to_xe(lrc);
672 	struct iosys_map map;
673 
674 	map = __xe_lrc_regs_map(lrc);
675 	iosys_map_incr(&map, reg_nr * sizeof(u32));
676 	xe_map_write32(xe, &map, val);
677 }
678 
679 static void *empty_lrc_data(struct xe_hw_engine *hwe)
680 {
681 	struct xe_device *xe = gt_to_xe(hwe->gt);
682 	void *data;
683 	u32 *regs;
684 
685 	data = kzalloc(xe_lrc_size(xe, hwe->class), GFP_KERNEL);
686 	if (!data)
687 		return NULL;
688 
689 	/* 1st page: Per-Process of HW status Page */
690 	regs = data + LRC_PPHWSP_SIZE;
691 	set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
692 	set_context_control(regs, hwe);
693 	set_memory_based_intr(regs, hwe);
694 	reset_stop_ring(regs, hwe);
695 
696 	return data;
697 }
698 
699 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
700 {
701 	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
702 
703 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
704 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
705 }
706 
707 #define PVC_CTX_ASID		(0x2e + 1)
708 #define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
709 #define ACC_GRANULARITY_S       20
710 #define ACC_NOTIFY_S            16
711 
712 int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
713 		struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
714 {
715 	struct xe_gt *gt = hwe->gt;
716 	struct xe_tile *tile = gt_to_tile(gt);
717 	struct xe_device *xe = gt_to_xe(gt);
718 	struct iosys_map map;
719 	void *init_data = NULL;
720 	u32 arb_enable;
721 	int err;
722 
723 	lrc->flags = 0;
724 
725 	/*
726 	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
727 	 * via VM bind calls.
728 	 */
729 	lrc->bo = xe_bo_create_pin_map(xe, tile, vm,
730 				      ring_size + xe_lrc_size(xe, hwe->class),
731 				      ttm_bo_type_kernel,
732 				      XE_BO_CREATE_VRAM_IF_DGFX(tile) |
733 				      XE_BO_CREATE_GGTT_BIT);
734 	if (IS_ERR(lrc->bo))
735 		return PTR_ERR(lrc->bo);
736 
737 	lrc->tile = gt_to_tile(hwe->gt);
738 	lrc->ring.size = ring_size;
739 	lrc->ring.tail = 0;
740 
741 	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
742 			     hwe->fence_irq, hwe->name);
743 
744 	if (!gt->default_lrc[hwe->class]) {
745 		init_data = empty_lrc_data(hwe);
746 		if (!init_data) {
747 			err = -ENOMEM;
748 			goto err_lrc_finish;
749 		}
750 	}
751 
752 	/*
753 	 * Init Per-Process of HW status Page, LRC / context state to known
754 	 * values
755 	 */
756 	map = __xe_lrc_pphwsp_map(lrc);
757 	if (!init_data) {
758 		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
759 		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
760 				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
761 				 xe_lrc_size(xe, hwe->class) - LRC_PPHWSP_SIZE);
762 	} else {
763 		xe_map_memcpy_to(xe, &map, 0, init_data,
764 				 xe_lrc_size(xe, hwe->class));
765 		kfree(init_data);
766 	}
767 
768 	if (vm) {
769 		xe_lrc_set_ppgtt(lrc, vm);
770 
771 		if (vm->xef)
772 			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
773 	}
774 
775 	xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
776 	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
777 	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
778 	xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
779 			     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
780 	if (xe->info.has_asid && vm)
781 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID,
782 				     (q->usm.acc_granularity <<
783 				      ACC_GRANULARITY_S) | vm->usm.asid);
784 	if (xe->info.has_usm && vm)
785 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD,
786 				     (q->usm.acc_notify << ACC_NOTIFY_S) |
787 				     q->usm.acc_trigger);
788 
789 	lrc->desc = LRC_VALID;
790 	lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT;
791 	/* TODO: Priority */
792 
793 	/* While this appears to have something about privileged batches or
794 	 * some such, it really just means PPGTT mode.
795 	 */
796 	if (vm)
797 		lrc->desc |= LRC_PRIVILEGE;
798 
799 	if (GRAPHICS_VERx100(xe) < 1250) {
800 		lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT;
801 		lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT;
802 	}
803 
804 	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
805 	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
806 
807 	map = __xe_lrc_seqno_map(lrc);
808 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
809 
810 	map = __xe_lrc_start_seqno_map(lrc);
811 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
812 
813 	return 0;
814 
815 err_lrc_finish:
816 	xe_lrc_finish(lrc);
817 	return err;
818 }
819 
820 void xe_lrc_finish(struct xe_lrc *lrc)
821 {
822 	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
823 	xe_bo_lock(lrc->bo, false);
824 	xe_bo_unpin(lrc->bo);
825 	xe_bo_unlock(lrc->bo);
826 	xe_bo_put(lrc->bo);
827 }
828 
829 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
830 {
831 	xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
832 }
833 
834 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
835 {
836 	return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
837 }
838 
839 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
840 {
841 	const u32 head = xe_lrc_ring_head(lrc);
842 	const u32 tail = lrc->ring.tail;
843 	const u32 size = lrc->ring.size;
844 
845 	return ((head - tail - 1) & (size - 1)) + 1;
846 }
847 
848 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
849 				const void *data, size_t size)
850 {
851 	struct xe_device *xe = lrc_to_xe(lrc);
852 
853 	iosys_map_incr(&ring, lrc->ring.tail);
854 	xe_map_memcpy_to(xe, &ring, 0, data, size);
855 	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
856 }
857 
858 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
859 {
860 	struct xe_device *xe = lrc_to_xe(lrc);
861 	struct iosys_map ring;
862 	u32 rhs;
863 	size_t aligned_size;
864 
865 	xe_assert(xe, IS_ALIGNED(size, 4));
866 	aligned_size = ALIGN(size, 8);
867 
868 	ring = __xe_lrc_ring_map(lrc);
869 
870 	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
871 	rhs = lrc->ring.size - lrc->ring.tail;
872 	if (size > rhs) {
873 		__xe_lrc_write_ring(lrc, ring, data, rhs);
874 		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
875 	} else {
876 		__xe_lrc_write_ring(lrc, ring, data, size);
877 	}
878 
879 	if (aligned_size > size) {
880 		u32 noop = MI_NOOP;
881 
882 		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
883 	}
884 }
885 
886 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
887 {
888 	return lrc->desc | xe_lrc_ggtt_addr(lrc);
889 }
890 
891 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
892 {
893 	return __xe_lrc_seqno_ggtt_addr(lrc);
894 }
895 
896 struct dma_fence *xe_lrc_create_seqno_fence(struct xe_lrc *lrc)
897 {
898 	return &xe_hw_fence_create(&lrc->fence_ctx,
899 				   __xe_lrc_seqno_map(lrc))->dma;
900 }
901 
902 s32 xe_lrc_seqno(struct xe_lrc *lrc)
903 {
904 	struct iosys_map map = __xe_lrc_seqno_map(lrc);
905 
906 	return xe_map_read32(lrc_to_xe(lrc), &map);
907 }
908 
909 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
910 {
911 	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
912 
913 	return xe_map_read32(lrc_to_xe(lrc), &map);
914 }
915 
916 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
917 {
918 	return __xe_lrc_start_seqno_ggtt_addr(lrc);
919 }
920 
921 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
922 {
923 	return __xe_lrc_parallel_ggtt_addr(lrc);
924 }
925 
926 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
927 {
928 	return __xe_lrc_parallel_map(lrc);
929 }
930 
931 static int instr_dw(u32 cmd_header)
932 {
933 	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
934 	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
935 	    GFXPIPE_SINGLE_DW_CMD(0, 0))
936 		return 1;
937 
938 	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
939 	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
940 		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
941 
942 	/* Most instructions have the # of dwords (minus 2) in 7:0 */
943 	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
944 }
945 
946 static int dump_mi_command(struct drm_printer *p,
947 			   struct xe_gt *gt,
948 			   u32 *dw,
949 			   int remaining_dw)
950 {
951 	u32 inst_header = *dw;
952 	u32 numdw = instr_dw(inst_header);
953 	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
954 	int num_noop;
955 
956 	/* First check for commands that don't have/use a '# DW' field */
957 	switch (inst_header & MI_OPCODE) {
958 	case MI_NOOP:
959 		num_noop = 1;
960 		while (num_noop < remaining_dw &&
961 		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
962 			num_noop++;
963 		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
964 		return num_noop;
965 
966 	case MI_TOPOLOGY_FILTER:
967 		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
968 		return 1;
969 
970 	case MI_BATCH_BUFFER_END:
971 		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
972 		/* Return 'remaining_dw' to consume the rest of the LRC */
973 		return remaining_dw;
974 	}
975 
976 	/*
977 	 * Any remaining commands include a # of dwords.  We should make sure
978 	 * it doesn't exceed the remaining size of the LRC.
979 	 */
980 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
981 		numdw = remaining_dw;
982 
983 	switch (inst_header & MI_OPCODE) {
984 	case MI_LOAD_REGISTER_IMM:
985 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
986 			   inst_header, (numdw - 1) / 2);
987 		for (int i = 1; i < numdw; i += 2)
988 			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
989 		return numdw;
990 
991 	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
992 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
993 			   inst_header,
994 			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
995 			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
996 		if (numdw == 4)
997 			drm_printf(p, " - %#6x = %#010llx\n",
998 				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
999 		else
1000 			drm_printf(p, " - %*ph (%s)\n",
1001 				   (int)sizeof(u32) * (numdw - 1), dw + 1,
1002 				   numdw < 4 ? "truncated" : "malformed");
1003 		return numdw;
1004 
1005 	case MI_FORCE_WAKEUP:
1006 		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1007 		return numdw;
1008 
1009 	default:
1010 		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1011 			   inst_header, opcode, numdw);
1012 		return numdw;
1013 	}
1014 }
1015 
1016 static int dump_gfxpipe_command(struct drm_printer *p,
1017 				struct xe_gt *gt,
1018 				u32 *dw,
1019 				int remaining_dw)
1020 {
1021 	u32 numdw = instr_dw(*dw);
1022 	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1023 	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1024 	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1025 
1026 	/*
1027 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1028 	 * remaining size of the LRC.
1029 	 */
1030 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1031 		numdw = remaining_dw;
1032 
1033 	switch (*dw & GFXPIPE_MATCH_MASK) {
1034 #define MATCH(cmd) \
1035 	case cmd: \
1036 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1037 		return numdw
1038 #define MATCH3D(cmd) \
1039 	case CMD_##cmd: \
1040 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1041 		return numdw
1042 
1043 	MATCH(STATE_BASE_ADDRESS);
1044 	MATCH(STATE_SIP);
1045 	MATCH(GPGPU_CSR_BASE_ADDRESS);
1046 	MATCH(STATE_COMPUTE_MODE);
1047 	MATCH3D(3DSTATE_BTD);
1048 
1049 	MATCH3D(3DSTATE_VF_STATISTICS);
1050 
1051 	MATCH(PIPELINE_SELECT);
1052 
1053 	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1054 	MATCH3D(3DSTATE_CLEAR_PARAMS);
1055 	MATCH3D(3DSTATE_DEPTH_BUFFER);
1056 	MATCH3D(3DSTATE_STENCIL_BUFFER);
1057 	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1058 	MATCH3D(3DSTATE_VERTEX_BUFFERS);
1059 	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1060 	MATCH3D(3DSTATE_INDEX_BUFFER);
1061 	MATCH3D(3DSTATE_VF);
1062 	MATCH3D(3DSTATE_MULTISAMPLE);
1063 	MATCH3D(3DSTATE_CC_STATE_POINTERS);
1064 	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1065 	MATCH3D(3DSTATE_VS);
1066 	MATCH3D(3DSTATE_GS);
1067 	MATCH3D(3DSTATE_CLIP);
1068 	MATCH3D(3DSTATE_SF);
1069 	MATCH3D(3DSTATE_WM);
1070 	MATCH3D(3DSTATE_CONSTANT_VS);
1071 	MATCH3D(3DSTATE_CONSTANT_GS);
1072 	MATCH3D(3DSTATE_SAMPLE_MASK);
1073 	MATCH3D(3DSTATE_CONSTANT_HS);
1074 	MATCH3D(3DSTATE_CONSTANT_DS);
1075 	MATCH3D(3DSTATE_HS);
1076 	MATCH3D(3DSTATE_TE);
1077 	MATCH3D(3DSTATE_DS);
1078 	MATCH3D(3DSTATE_STREAMOUT);
1079 	MATCH3D(3DSTATE_SBE);
1080 	MATCH3D(3DSTATE_PS);
1081 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1082 	MATCH3D(3DSTATE_CPS_POINTERS);
1083 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1084 	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1085 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1086 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1087 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1088 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1089 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1090 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1091 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1092 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1093 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1094 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1095 	MATCH3D(3DSTATE_VF_INSTANCING);
1096 	MATCH3D(3DSTATE_VF_SGVS);
1097 	MATCH3D(3DSTATE_VF_TOPOLOGY);
1098 	MATCH3D(3DSTATE_WM_CHROMAKEY);
1099 	MATCH3D(3DSTATE_PS_BLEND);
1100 	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1101 	MATCH3D(3DSTATE_PS_EXTRA);
1102 	MATCH3D(3DSTATE_RASTER);
1103 	MATCH3D(3DSTATE_SBE_SWIZ);
1104 	MATCH3D(3DSTATE_WM_HZ_OP);
1105 	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1106 	MATCH3D(3DSTATE_VF_SGVS_2);
1107 	MATCH3D(3DSTATE_VFG);
1108 	MATCH3D(3DSTATE_URB_ALLOC_VS);
1109 	MATCH3D(3DSTATE_URB_ALLOC_HS);
1110 	MATCH3D(3DSTATE_URB_ALLOC_DS);
1111 	MATCH3D(3DSTATE_URB_ALLOC_GS);
1112 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1113 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1114 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1115 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1116 	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1117 	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1118 	MATCH3D(3DSTATE_AMFS);
1119 	MATCH3D(3DSTATE_DEPTH_BOUNDS);
1120 	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1121 	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1122 	MATCH3D(3DSTATE_MESH_CONTROL);
1123 	MATCH3D(3DSTATE_MESH_DISTRIB);
1124 	MATCH3D(3DSTATE_TASK_REDISTRIB);
1125 	MATCH3D(3DSTATE_MESH_SHADER);
1126 	MATCH3D(3DSTATE_MESH_SHADER_DATA);
1127 	MATCH3D(3DSTATE_TASK_CONTROL);
1128 	MATCH3D(3DSTATE_TASK_SHADER);
1129 	MATCH3D(3DSTATE_TASK_SHADER_DATA);
1130 	MATCH3D(3DSTATE_URB_ALLOC_MESH);
1131 	MATCH3D(3DSTATE_URB_ALLOC_TASK);
1132 	MATCH3D(3DSTATE_CLIP_MESH);
1133 	MATCH3D(3DSTATE_SBE_MESH);
1134 	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1135 
1136 	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1137 	MATCH3D(3DSTATE_CHROMA_KEY);
1138 	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1139 	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1140 	MATCH3D(3DSTATE_LINE_STIPPLE);
1141 	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1142 	MATCH3D(3DSTATE_MONOFILTER_SIZE);
1143 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1144 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1145 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1146 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1147 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1148 	MATCH3D(3DSTATE_SO_DECL_LIST);
1149 	MATCH3D(3DSTATE_SO_BUFFER);
1150 	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1151 	MATCH3D(3DSTATE_SAMPLE_PATTERN);
1152 	MATCH3D(3DSTATE_3D_MODE);
1153 	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1154 	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1155 	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1156 
1157 	default:
1158 		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1159 			   *dw, pipeline, opcode, subopcode, numdw);
1160 		return numdw;
1161 	}
1162 }
1163 
1164 void xe_lrc_dump_default(struct drm_printer *p,
1165 			 struct xe_gt *gt,
1166 			 enum xe_engine_class hwe_class)
1167 {
1168 	u32 *dw;
1169 	int remaining_dw, num_dw;
1170 
1171 	if (!gt->default_lrc[hwe_class]) {
1172 		drm_printf(p, "No default LRC for class %d\n", hwe_class);
1173 		return;
1174 	}
1175 
1176 	/*
1177 	 * Skip the beginning of the LRC since it contains the per-process
1178 	 * hardware status page.
1179 	 */
1180 	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1181 	remaining_dw = (xe_lrc_size(gt_to_xe(gt), hwe_class) - LRC_PPHWSP_SIZE) / 4;
1182 
1183 	while (remaining_dw > 0) {
1184 		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1185 			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1186 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1187 			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1188 		} else {
1189 			num_dw = min(instr_dw(*dw), remaining_dw);
1190 			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1191 				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1192 				   num_dw);
1193 		}
1194 
1195 		dw += num_dw;
1196 		remaining_dw -= num_dw;
1197 	}
1198 }
1199 
1200 struct instr_state {
1201 	u32 instr;
1202 	u16 num_dw;
1203 };
1204 
1205 static const struct instr_state xe_hpg_svg_state[] = {
1206 	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1207 	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1208 	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1209 	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1210 	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1211 	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1212 	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1213 	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1214 	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1215 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1216 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1217 	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1218 	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1219 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1220 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1221 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1222 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1223 	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1224 	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1225 	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1226 	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1227 	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1228 	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1229 	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1230 	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1231 	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1232 	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1233 	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1234 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1235 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1236 	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1237 	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1238 	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1239 	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1240 	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1241 	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1242 	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1243 	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1244 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1245 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1246 	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1247 	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1248 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1249 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1250 	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1251 	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1252 	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1253 	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1254 	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1255 	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1256 };
1257 
1258 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1259 {
1260 	struct xe_gt *gt = q->hwe->gt;
1261 	struct xe_device *xe = gt_to_xe(gt);
1262 	const struct instr_state *state_table = NULL;
1263 	int state_table_size = 0;
1264 
1265 	/*
1266 	 * At the moment we only need to emit non-register state for the RCS
1267 	 * engine.
1268 	 */
1269 	if (q->hwe->class != XE_ENGINE_CLASS_RENDER)
1270 		return;
1271 
1272 	switch (GRAPHICS_VERx100(xe)) {
1273 	case 1255:
1274 	case 1270 ... 2004:
1275 		state_table = xe_hpg_svg_state;
1276 		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1277 		break;
1278 	default:
1279 		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1280 			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1281 		return;
1282 	}
1283 
1284 	for (int i = 0; i < state_table_size; i++) {
1285 		u32 instr = state_table[i].instr;
1286 		u16 num_dw = state_table[i].num_dw;
1287 		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1288 
1289 		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1290 		xe_gt_assert(gt, num_dw != 0);
1291 		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1292 
1293 		/*
1294 		 * Xe2's SVG context is the same as the one on DG2 / MTL
1295 		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1296 		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1297 		 * Just make the replacement here rather than defining a
1298 		 * whole separate table for the single trivial change.
1299 		 */
1300 		if (GRAPHICS_VER(xe) >= 20 &&
1301 		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1302 			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1303 
1304 		bb->cs[bb->len] = instr;
1305 		if (!is_single_dw)
1306 			bb->cs[bb->len] |= (num_dw - 2);
1307 
1308 		bb->len += num_dw;
1309 	}
1310 }
1311