xref: /linux/drivers/gpu/drm/xe/xe_lrc.c (revision 7f4f3b14e8079ecde096bd734af10e30d40c27b7)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_lrc.h"
7 
8 #include <generated/xe_wa_oob.h>
9 
10 #include <linux/ascii85.h>
11 
12 #include "instructions/xe_mi_commands.h"
13 #include "instructions/xe_gfxpipe_commands.h"
14 #include "instructions/xe_gfx_state_commands.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_lrc_layout.h"
17 #include "xe_bb.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_drm_client.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_printk.h"
24 #include "xe_hw_fence.h"
25 #include "xe_map.h"
26 #include "xe_memirq.h"
27 #include "xe_sriov.h"
28 #include "xe_vm.h"
29 #include "xe_wa.h"
30 
31 #define LRC_VALID				BIT_ULL(0)
32 #define LRC_PRIVILEGE				BIT_ULL(8)
33 #define LRC_ADDRESSING_MODE			GENMASK_ULL(4, 3)
34 #define LRC_LEGACY_64B_CONTEXT			3
35 
36 #define LRC_ENGINE_CLASS			GENMASK_ULL(63, 61)
37 #define LRC_ENGINE_INSTANCE			GENMASK_ULL(53, 48)
38 
39 #define LRC_INDIRECT_RING_STATE_SIZE		SZ_4K
40 
41 static struct xe_device *
42 lrc_to_xe(struct xe_lrc *lrc)
43 {
44 	return gt_to_xe(lrc->fence_ctx.gt);
45 }
46 
47 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
48 {
49 	struct xe_device *xe = gt_to_xe(gt);
50 	size_t size;
51 
52 	switch (class) {
53 	case XE_ENGINE_CLASS_RENDER:
54 		if (GRAPHICS_VER(xe) >= 20)
55 			size = 4 * SZ_4K;
56 		else
57 			size = 14 * SZ_4K;
58 		break;
59 	case XE_ENGINE_CLASS_COMPUTE:
60 		/* 14 pages since graphics_ver == 11 */
61 		if (GRAPHICS_VER(xe) >= 20)
62 			size = 3 * SZ_4K;
63 		else
64 			size = 14 * SZ_4K;
65 		break;
66 	default:
67 		WARN(1, "Unknown engine class: %d", class);
68 		fallthrough;
69 	case XE_ENGINE_CLASS_COPY:
70 	case XE_ENGINE_CLASS_VIDEO_DECODE:
71 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
72 	case XE_ENGINE_CLASS_OTHER:
73 		size = 2 * SZ_4K;
74 	}
75 
76 	/* Add indirect ring state page */
77 	if (xe_gt_has_indirect_ring_state(gt))
78 		size += LRC_INDIRECT_RING_STATE_SIZE;
79 
80 	return size;
81 }
82 
83 /*
84  * The per-platform tables are u8-encoded in @data. Decode @data and set the
85  * addresses' offset and commands in @regs. The following encoding is used
86  * for each byte. There are 2 steps: decoding commands and decoding addresses.
87  *
88  * Commands:
89  * [7]: create NOPs - number of NOPs are set in lower bits
90  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
91  *      MI_LRI_FORCE_POSTED
92  * [5:0]: Number of NOPs or registers to set values to in case of
93  *        MI_LOAD_REGISTER_IMM
94  *
95  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
96  * number of registers. They are set by using the REG/REG16 macros: the former
97  * is used for offsets smaller than 0x200 while the latter is for values bigger
98  * than that. Those macros already set all the bits documented below correctly:
99  *
100  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
101  *      follow, for the lower bits
102  * [6:0]: Register offset, without considering the engine base.
103  *
104  * This function only tweaks the commands and register offsets. Values are not
105  * filled out.
106  */
107 static void set_offsets(u32 *regs,
108 			const u8 *data,
109 			const struct xe_hw_engine *hwe)
110 #define NOP(x) (BIT(7) | (x))
111 #define LRI(count, flags) ((flags) << 6 | (count) | \
112 			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
113 #define POSTED BIT(0)
114 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
115 #define REG16(x) \
116 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
117 	(((x) >> 2) & 0x7f)
118 {
119 	const u32 base = hwe->mmio_base;
120 
121 	while (*data) {
122 		u8 count, flags;
123 
124 		if (*data & BIT(7)) { /* skip */
125 			count = *data++ & ~BIT(7);
126 			regs += count;
127 			continue;
128 		}
129 
130 		count = *data & 0x3f;
131 		flags = *data >> 6;
132 		data++;
133 
134 		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
135 		if (flags & POSTED)
136 			*regs |= MI_LRI_FORCE_POSTED;
137 		*regs |= MI_LRI_LRM_CS_MMIO;
138 		regs++;
139 
140 		xe_gt_assert(hwe->gt, count);
141 		do {
142 			u32 offset = 0;
143 			u8 v;
144 
145 			do {
146 				v = *data++;
147 				offset <<= 7;
148 				offset |= v & ~BIT(7);
149 			} while (v & BIT(7));
150 
151 			regs[0] = base + (offset << 2);
152 			regs += 2;
153 		} while (--count);
154 	}
155 
156 	*regs = MI_BATCH_BUFFER_END | BIT(0);
157 }
158 
159 static const u8 gen12_xcs_offsets[] = {
160 	NOP(1),
161 	LRI(13, POSTED),
162 	REG16(0x244),
163 	REG(0x034),
164 	REG(0x030),
165 	REG(0x038),
166 	REG(0x03c),
167 	REG(0x168),
168 	REG(0x140),
169 	REG(0x110),
170 	REG(0x1c0),
171 	REG(0x1c4),
172 	REG(0x1c8),
173 	REG(0x180),
174 	REG16(0x2b4),
175 
176 	NOP(5),
177 	LRI(9, POSTED),
178 	REG16(0x3a8),
179 	REG16(0x28c),
180 	REG16(0x288),
181 	REG16(0x284),
182 	REG16(0x280),
183 	REG16(0x27c),
184 	REG16(0x278),
185 	REG16(0x274),
186 	REG16(0x270),
187 
188 	0
189 };
190 
191 static const u8 dg2_xcs_offsets[] = {
192 	NOP(1),
193 	LRI(15, POSTED),
194 	REG16(0x244),
195 	REG(0x034),
196 	REG(0x030),
197 	REG(0x038),
198 	REG(0x03c),
199 	REG(0x168),
200 	REG(0x140),
201 	REG(0x110),
202 	REG(0x1c0),
203 	REG(0x1c4),
204 	REG(0x1c8),
205 	REG(0x180),
206 	REG16(0x2b4),
207 	REG(0x120),
208 	REG(0x124),
209 
210 	NOP(1),
211 	LRI(9, POSTED),
212 	REG16(0x3a8),
213 	REG16(0x28c),
214 	REG16(0x288),
215 	REG16(0x284),
216 	REG16(0x280),
217 	REG16(0x27c),
218 	REG16(0x278),
219 	REG16(0x274),
220 	REG16(0x270),
221 
222 	0
223 };
224 
225 static const u8 gen12_rcs_offsets[] = {
226 	NOP(1),
227 	LRI(13, POSTED),
228 	REG16(0x244),
229 	REG(0x034),
230 	REG(0x030),
231 	REG(0x038),
232 	REG(0x03c),
233 	REG(0x168),
234 	REG(0x140),
235 	REG(0x110),
236 	REG(0x1c0),
237 	REG(0x1c4),
238 	REG(0x1c8),
239 	REG(0x180),
240 	REG16(0x2b4),
241 
242 	NOP(5),
243 	LRI(9, POSTED),
244 	REG16(0x3a8),
245 	REG16(0x28c),
246 	REG16(0x288),
247 	REG16(0x284),
248 	REG16(0x280),
249 	REG16(0x27c),
250 	REG16(0x278),
251 	REG16(0x274),
252 	REG16(0x270),
253 
254 	LRI(3, POSTED),
255 	REG(0x1b0),
256 	REG16(0x5a8),
257 	REG16(0x5ac),
258 
259 	NOP(6),
260 	LRI(1, 0),
261 	REG(0x0c8),
262 	NOP(3 + 9 + 1),
263 
264 	LRI(51, POSTED),
265 	REG16(0x588),
266 	REG16(0x588),
267 	REG16(0x588),
268 	REG16(0x588),
269 	REG16(0x588),
270 	REG16(0x588),
271 	REG(0x028),
272 	REG(0x09c),
273 	REG(0x0c0),
274 	REG(0x178),
275 	REG(0x17c),
276 	REG16(0x358),
277 	REG(0x170),
278 	REG(0x150),
279 	REG(0x154),
280 	REG(0x158),
281 	REG16(0x41c),
282 	REG16(0x600),
283 	REG16(0x604),
284 	REG16(0x608),
285 	REG16(0x60c),
286 	REG16(0x610),
287 	REG16(0x614),
288 	REG16(0x618),
289 	REG16(0x61c),
290 	REG16(0x620),
291 	REG16(0x624),
292 	REG16(0x628),
293 	REG16(0x62c),
294 	REG16(0x630),
295 	REG16(0x634),
296 	REG16(0x638),
297 	REG16(0x63c),
298 	REG16(0x640),
299 	REG16(0x644),
300 	REG16(0x648),
301 	REG16(0x64c),
302 	REG16(0x650),
303 	REG16(0x654),
304 	REG16(0x658),
305 	REG16(0x65c),
306 	REG16(0x660),
307 	REG16(0x664),
308 	REG16(0x668),
309 	REG16(0x66c),
310 	REG16(0x670),
311 	REG16(0x674),
312 	REG16(0x678),
313 	REG16(0x67c),
314 	REG(0x068),
315 	REG(0x084),
316 	NOP(1),
317 
318 	0
319 };
320 
321 static const u8 xehp_rcs_offsets[] = {
322 	NOP(1),
323 	LRI(13, POSTED),
324 	REG16(0x244),
325 	REG(0x034),
326 	REG(0x030),
327 	REG(0x038),
328 	REG(0x03c),
329 	REG(0x168),
330 	REG(0x140),
331 	REG(0x110),
332 	REG(0x1c0),
333 	REG(0x1c4),
334 	REG(0x1c8),
335 	REG(0x180),
336 	REG16(0x2b4),
337 
338 	NOP(5),
339 	LRI(9, POSTED),
340 	REG16(0x3a8),
341 	REG16(0x28c),
342 	REG16(0x288),
343 	REG16(0x284),
344 	REG16(0x280),
345 	REG16(0x27c),
346 	REG16(0x278),
347 	REG16(0x274),
348 	REG16(0x270),
349 
350 	LRI(3, POSTED),
351 	REG(0x1b0),
352 	REG16(0x5a8),
353 	REG16(0x5ac),
354 
355 	NOP(6),
356 	LRI(1, 0),
357 	REG(0x0c8),
358 
359 	0
360 };
361 
362 static const u8 dg2_rcs_offsets[] = {
363 	NOP(1),
364 	LRI(15, POSTED),
365 	REG16(0x244),
366 	REG(0x034),
367 	REG(0x030),
368 	REG(0x038),
369 	REG(0x03c),
370 	REG(0x168),
371 	REG(0x140),
372 	REG(0x110),
373 	REG(0x1c0),
374 	REG(0x1c4),
375 	REG(0x1c8),
376 	REG(0x180),
377 	REG16(0x2b4),
378 	REG(0x120),
379 	REG(0x124),
380 
381 	NOP(1),
382 	LRI(9, POSTED),
383 	REG16(0x3a8),
384 	REG16(0x28c),
385 	REG16(0x288),
386 	REG16(0x284),
387 	REG16(0x280),
388 	REG16(0x27c),
389 	REG16(0x278),
390 	REG16(0x274),
391 	REG16(0x270),
392 
393 	LRI(3, POSTED),
394 	REG(0x1b0),
395 	REG16(0x5a8),
396 	REG16(0x5ac),
397 
398 	NOP(6),
399 	LRI(1, 0),
400 	REG(0x0c8),
401 
402 	0
403 };
404 
405 static const u8 mtl_rcs_offsets[] = {
406 	NOP(1),
407 	LRI(15, POSTED),
408 	REG16(0x244),
409 	REG(0x034),
410 	REG(0x030),
411 	REG(0x038),
412 	REG(0x03c),
413 	REG(0x168),
414 	REG(0x140),
415 	REG(0x110),
416 	REG(0x1c0),
417 	REG(0x1c4),
418 	REG(0x1c8),
419 	REG(0x180),
420 	REG16(0x2b4),
421 	REG(0x120),
422 	REG(0x124),
423 
424 	NOP(1),
425 	LRI(9, POSTED),
426 	REG16(0x3a8),
427 	REG16(0x28c),
428 	REG16(0x288),
429 	REG16(0x284),
430 	REG16(0x280),
431 	REG16(0x27c),
432 	REG16(0x278),
433 	REG16(0x274),
434 	REG16(0x270),
435 
436 	NOP(2),
437 	LRI(2, POSTED),
438 	REG16(0x5a8),
439 	REG16(0x5ac),
440 
441 	NOP(6),
442 	LRI(1, 0),
443 	REG(0x0c8),
444 
445 	0
446 };
447 
448 #define XE2_CTX_COMMON \
449 	NOP(1),                 /* [0x00] */ \
450 	LRI(15, POSTED),        /* [0x01] */ \
451 	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
452 	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
453 	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
454 	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
455 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
456 	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
457 	REG(0x140),             /* [0x0e] BB_ADDR */ \
458 	REG(0x110),             /* [0x10] BB_STATE */ \
459 	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
460 	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
461 	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
462 	REG(0x180),             /* [0x18] CCID */ \
463 	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
464 	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
465 	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
466 	\
467 	NOP(1),                 /* [0x20] */ \
468 	LRI(9, POSTED),         /* [0x21] */ \
469 	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
470 	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
471 	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
472 	REG16(0x284),           /* [0x28] dummy reg */ \
473 	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
474 	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
475 	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
476 	REG16(0x274),           /* [0x30] PTBP_UDW */ \
477 	REG16(0x270)            /* [0x32] PTBP_LDW */
478 
479 static const u8 xe2_rcs_offsets[] = {
480 	XE2_CTX_COMMON,
481 
482 	NOP(2),                 /* [0x34] */
483 	LRI(2, POSTED),         /* [0x36] */
484 	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
485 	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
486 
487 	NOP(6),                 /* [0x41] */
488 	LRI(1, 0),              /* [0x47] */
489 	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
490 
491 	0
492 };
493 
494 static const u8 xe2_bcs_offsets[] = {
495 	XE2_CTX_COMMON,
496 
497 	NOP(4 + 8 + 1),         /* [0x34] */
498 	LRI(2, POSTED),         /* [0x41] */
499 	REG16(0x200),           /* [0x42] BCS_SWCTRL */
500 	REG16(0x204),           /* [0x44] BLIT_CCTL */
501 
502 	0
503 };
504 
505 static const u8 xe2_xcs_offsets[] = {
506 	XE2_CTX_COMMON,
507 
508 	0
509 };
510 
511 static const u8 xe2_indirect_ring_state_offsets[] = {
512 	NOP(1),                 /* [0x00] */
513 	LRI(5, POSTED),         /* [0x01] */
514 	REG(0x034),             /* [0x02] RING_BUFFER_HEAD */
515 	REG(0x030),             /* [0x04] RING_BUFFER_TAIL */
516 	REG(0x038),             /* [0x06] RING_BUFFER_START */
517 	REG(0x048),             /* [0x08] RING_BUFFER_START_UDW */
518 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */
519 
520 	NOP(5),                 /* [0x0c] */
521 	LRI(9, POSTED),         /* [0x11] */
522 	REG(0x168),             /* [0x12] BB_ADDR_UDW */
523 	REG(0x140),             /* [0x14] BB_ADDR */
524 	REG(0x110),             /* [0x16] BB_STATE */
525 	REG16(0x588),           /* [0x18] BB_STACK_WRITE_PORT */
526 	REG16(0x588),           /* [0x20] BB_STACK_WRITE_PORT */
527 	REG16(0x588),           /* [0x22] BB_STACK_WRITE_PORT */
528 	REG16(0x588),           /* [0x24] BB_STACK_WRITE_PORT */
529 	REG16(0x588),           /* [0x26] BB_STACK_WRITE_PORT */
530 	REG16(0x588),           /* [0x28] BB_STACK_WRITE_PORT */
531 
532 	NOP(12),                 /* [0x00] */
533 
534 	0
535 };
536 
537 #undef REG16
538 #undef REG
539 #undef LRI
540 #undef NOP
541 
542 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
543 {
544 	if (class == XE_ENGINE_CLASS_RENDER) {
545 		if (GRAPHICS_VER(xe) >= 20)
546 			return xe2_rcs_offsets;
547 		else if (GRAPHICS_VERx100(xe) >= 1270)
548 			return mtl_rcs_offsets;
549 		else if (GRAPHICS_VERx100(xe) >= 1255)
550 			return dg2_rcs_offsets;
551 		else if (GRAPHICS_VERx100(xe) >= 1250)
552 			return xehp_rcs_offsets;
553 		else
554 			return gen12_rcs_offsets;
555 	} else if (class == XE_ENGINE_CLASS_COPY) {
556 		if (GRAPHICS_VER(xe) >= 20)
557 			return xe2_bcs_offsets;
558 		else
559 			return gen12_xcs_offsets;
560 	} else {
561 		if (GRAPHICS_VER(xe) >= 20)
562 			return xe2_xcs_offsets;
563 		else if (GRAPHICS_VERx100(xe) >= 1255)
564 			return dg2_xcs_offsets;
565 		else
566 			return gen12_xcs_offsets;
567 	}
568 }
569 
570 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
571 {
572 	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
573 						       CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
574 
575 	if (xe_gt_has_indirect_ring_state(hwe->gt))
576 		regs[CTX_CONTEXT_CONTROL] |=
577 			_MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
578 
579 	/* TODO: Timestamp */
580 }
581 
582 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
583 {
584 	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
585 	struct xe_device *xe = gt_to_xe(hwe->gt);
586 
587 	if (!xe_device_uses_memirq(xe))
588 		return;
589 
590 	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
591 					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
592 	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
593 	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
594 
595 	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
596 				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
597 	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
598 	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
599 	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
600 	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
601 }
602 
603 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
604 {
605 	struct xe_device *xe = gt_to_xe(hwe->gt);
606 
607 	if (GRAPHICS_VERx100(xe) >= 1250)
608 		return 0x70;
609 	else
610 		return 0x60;
611 }
612 
613 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
614 {
615 	int x;
616 
617 	x = lrc_ring_mi_mode(hwe);
618 	regs[x + 1] &= ~STOP_RING;
619 	regs[x + 1] |= STOP_RING << 16;
620 }
621 
622 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
623 {
624 	return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
625 }
626 
627 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
628 {
629 	return 0;
630 }
631 
632 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
633 {
634 	return lrc->ring.size;
635 }
636 
637 /* Make the magic macros work */
638 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
639 #define __xe_lrc_regs_offset xe_lrc_regs_offset
640 
641 #define LRC_SEQNO_PPHWSP_OFFSET 512
642 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
643 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
644 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
645 #define LRC_PPHWSP_SIZE SZ_4K
646 
647 u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
648 {
649 	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
650 }
651 
652 static size_t lrc_reg_size(struct xe_device *xe)
653 {
654 	if (GRAPHICS_VERx100(xe) >= 1250)
655 		return 96 * sizeof(u32);
656 	else
657 		return 80 * sizeof(u32);
658 }
659 
660 size_t xe_lrc_skip_size(struct xe_device *xe)
661 {
662 	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
663 }
664 
665 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
666 {
667 	/* The seqno is stored in the driver-defined portion of PPHWSP */
668 	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
669 }
670 
671 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
672 {
673 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
674 	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
675 }
676 
677 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
678 {
679 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
680 	return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
681 }
682 
683 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
684 {
685 	/* The parallel is stored in the driver-defined portion of PPHWSP */
686 	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
687 }
688 
689 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
690 {
691 	return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
692 }
693 
694 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
695 {
696 	/* Indirect ring state page is at the very end of LRC */
697 	return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
698 }
699 
700 #define DECL_MAP_ADDR_HELPERS(elem) \
701 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
702 { \
703 	struct iosys_map map = lrc->bo->vmap; \
704 \
705 	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
706 	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
707 	return map; \
708 } \
709 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
710 { \
711 	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
712 } \
713 
714 DECL_MAP_ADDR_HELPERS(ring)
715 DECL_MAP_ADDR_HELPERS(pphwsp)
716 DECL_MAP_ADDR_HELPERS(seqno)
717 DECL_MAP_ADDR_HELPERS(regs)
718 DECL_MAP_ADDR_HELPERS(start_seqno)
719 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
720 DECL_MAP_ADDR_HELPERS(ctx_timestamp)
721 DECL_MAP_ADDR_HELPERS(parallel)
722 DECL_MAP_ADDR_HELPERS(indirect_ring)
723 
724 #undef DECL_MAP_ADDR_HELPERS
725 
726 /**
727  * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
728  * @lrc: Pointer to the lrc.
729  *
730  * Returns: ctx timestamp GGTT address
731  */
732 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
733 {
734 	return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
735 }
736 
737 /**
738  * xe_lrc_ctx_timestamp() - Read ctx timestamp value
739  * @lrc: Pointer to the lrc.
740  *
741  * Returns: ctx timestamp value
742  */
743 u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
744 {
745 	struct xe_device *xe = lrc_to_xe(lrc);
746 	struct iosys_map map;
747 
748 	map = __xe_lrc_ctx_timestamp_map(lrc);
749 	return xe_map_read32(xe, &map);
750 }
751 
752 /**
753  * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
754  * @lrc: Pointer to the lrc.
755  *
756  * Returns: ctx timestamp job GGTT address
757  */
758 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
759 {
760 	return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
761 }
762 
763 /**
764  * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
765  * @lrc: Pointer to the lrc.
766  *
767  * Returns: ctx timestamp job value
768  */
769 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
770 {
771 	struct xe_device *xe = lrc_to_xe(lrc);
772 	struct iosys_map map;
773 
774 	map = __xe_lrc_ctx_job_timestamp_map(lrc);
775 	return xe_map_read32(xe, &map);
776 }
777 
778 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
779 {
780 	return __xe_lrc_pphwsp_ggtt_addr(lrc);
781 }
782 
783 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
784 {
785 	if (!xe_lrc_has_indirect_ring_state(lrc))
786 		return 0;
787 
788 	return __xe_lrc_indirect_ring_ggtt_addr(lrc);
789 }
790 
791 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
792 {
793 	struct xe_device *xe = lrc_to_xe(lrc);
794 	struct iosys_map map;
795 
796 	map = __xe_lrc_indirect_ring_map(lrc);
797 	iosys_map_incr(&map, reg_nr * sizeof(u32));
798 	return xe_map_read32(xe, &map);
799 }
800 
801 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
802 					  int reg_nr, u32 val)
803 {
804 	struct xe_device *xe = lrc_to_xe(lrc);
805 	struct iosys_map map;
806 
807 	map = __xe_lrc_indirect_ring_map(lrc);
808 	iosys_map_incr(&map, reg_nr * sizeof(u32));
809 	xe_map_write32(xe, &map, val);
810 }
811 
812 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
813 {
814 	struct xe_device *xe = lrc_to_xe(lrc);
815 	struct iosys_map map;
816 
817 	map = __xe_lrc_regs_map(lrc);
818 	iosys_map_incr(&map, reg_nr * sizeof(u32));
819 	return xe_map_read32(xe, &map);
820 }
821 
822 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
823 {
824 	struct xe_device *xe = lrc_to_xe(lrc);
825 	struct iosys_map map;
826 
827 	map = __xe_lrc_regs_map(lrc);
828 	iosys_map_incr(&map, reg_nr * sizeof(u32));
829 	xe_map_write32(xe, &map, val);
830 }
831 
832 static void *empty_lrc_data(struct xe_hw_engine *hwe)
833 {
834 	struct xe_gt *gt = hwe->gt;
835 	void *data;
836 	u32 *regs;
837 
838 	data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
839 	if (!data)
840 		return NULL;
841 
842 	/* 1st page: Per-Process of HW status Page */
843 	regs = data + LRC_PPHWSP_SIZE;
844 	set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
845 	set_context_control(regs, hwe);
846 	set_memory_based_intr(regs, hwe);
847 	reset_stop_ring(regs, hwe);
848 	if (xe_gt_has_indirect_ring_state(gt)) {
849 		regs = data + xe_gt_lrc_size(gt, hwe->class) -
850 		       LRC_INDIRECT_RING_STATE_SIZE;
851 		set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
852 	}
853 
854 	return data;
855 }
856 
857 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
858 {
859 	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
860 
861 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
862 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
863 }
864 
865 static void xe_lrc_finish(struct xe_lrc *lrc)
866 {
867 	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
868 	xe_bo_lock(lrc->bo, false);
869 	xe_bo_unpin(lrc->bo);
870 	xe_bo_unlock(lrc->bo);
871 	xe_bo_put(lrc->bo);
872 }
873 
874 #define PVC_CTX_ASID		(0x2e + 1)
875 #define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
876 
877 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
878 		       struct xe_vm *vm, u32 ring_size)
879 {
880 	struct xe_gt *gt = hwe->gt;
881 	struct xe_tile *tile = gt_to_tile(gt);
882 	struct xe_device *xe = gt_to_xe(gt);
883 	struct iosys_map map;
884 	void *init_data = NULL;
885 	u32 arb_enable;
886 	u32 lrc_size;
887 	int err;
888 
889 	kref_init(&lrc->refcount);
890 	lrc->flags = 0;
891 	lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
892 	if (xe_gt_has_indirect_ring_state(gt))
893 		lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
894 
895 	/*
896 	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
897 	 * via VM bind calls.
898 	 */
899 	lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
900 				       ttm_bo_type_kernel,
901 				       XE_BO_FLAG_VRAM_IF_DGFX(tile) |
902 				       XE_BO_FLAG_GGTT |
903 				       XE_BO_FLAG_GGTT_INVALIDATE);
904 	if (IS_ERR(lrc->bo))
905 		return PTR_ERR(lrc->bo);
906 
907 	lrc->size = lrc_size;
908 	lrc->tile = gt_to_tile(hwe->gt);
909 	lrc->ring.size = ring_size;
910 	lrc->ring.tail = 0;
911 	lrc->ctx_timestamp = 0;
912 
913 	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
914 			     hwe->fence_irq, hwe->name);
915 
916 	if (!gt->default_lrc[hwe->class]) {
917 		init_data = empty_lrc_data(hwe);
918 		if (!init_data) {
919 			err = -ENOMEM;
920 			goto err_lrc_finish;
921 		}
922 	}
923 
924 	/*
925 	 * Init Per-Process of HW status Page, LRC / context state to known
926 	 * values
927 	 */
928 	map = __xe_lrc_pphwsp_map(lrc);
929 	if (!init_data) {
930 		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
931 		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
932 				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
933 				 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
934 	} else {
935 		xe_map_memcpy_to(xe, &map, 0, init_data,
936 				 xe_gt_lrc_size(gt, hwe->class));
937 		kfree(init_data);
938 	}
939 
940 	if (vm) {
941 		xe_lrc_set_ppgtt(lrc, vm);
942 
943 		if (vm->xef)
944 			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
945 	}
946 
947 	if (xe_gt_has_indirect_ring_state(gt)) {
948 		xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
949 				     __xe_lrc_indirect_ring_ggtt_addr(lrc));
950 
951 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
952 					      __xe_lrc_ring_ggtt_addr(lrc));
953 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
954 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
955 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
956 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
957 					      RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
958 	} else {
959 		xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
960 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
961 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
962 		xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
963 				     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
964 	}
965 
966 	xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
967 
968 	if (xe->info.has_asid && vm)
969 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
970 
971 	lrc->desc = LRC_VALID;
972 	lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
973 	/* TODO: Priority */
974 
975 	/* While this appears to have something about privileged batches or
976 	 * some such, it really just means PPGTT mode.
977 	 */
978 	if (vm)
979 		lrc->desc |= LRC_PRIVILEGE;
980 
981 	if (GRAPHICS_VERx100(xe) < 1250) {
982 		lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
983 		lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
984 	}
985 
986 	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
987 	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
988 
989 	map = __xe_lrc_seqno_map(lrc);
990 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
991 
992 	map = __xe_lrc_start_seqno_map(lrc);
993 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
994 
995 	return 0;
996 
997 err_lrc_finish:
998 	xe_lrc_finish(lrc);
999 	return err;
1000 }
1001 
1002 /**
1003  * xe_lrc_create - Create a LRC
1004  * @hwe: Hardware Engine
1005  * @vm: The VM (address space)
1006  * @ring_size: LRC ring size
1007  *
1008  * Allocate and initialize the Logical Ring Context (LRC).
1009  *
1010  * Return pointer to created LRC upon success and an error pointer
1011  * upon failure.
1012  */
1013 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
1014 			     u32 ring_size)
1015 {
1016 	struct xe_lrc *lrc;
1017 	int err;
1018 
1019 	lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1020 	if (!lrc)
1021 		return ERR_PTR(-ENOMEM);
1022 
1023 	err = xe_lrc_init(lrc, hwe, vm, ring_size);
1024 	if (err) {
1025 		kfree(lrc);
1026 		return ERR_PTR(err);
1027 	}
1028 
1029 	return lrc;
1030 }
1031 
1032 /**
1033  * xe_lrc_destroy - Destroy the LRC
1034  * @ref: reference to LRC
1035  *
1036  * Called when ref == 0, release resources held by the Logical Ring Context
1037  * (LRC) and free the LRC memory.
1038  */
1039 void xe_lrc_destroy(struct kref *ref)
1040 {
1041 	struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1042 
1043 	xe_lrc_finish(lrc);
1044 	kfree(lrc);
1045 }
1046 
1047 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1048 {
1049 	if (xe_lrc_has_indirect_ring_state(lrc))
1050 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1051 	else
1052 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1053 }
1054 
1055 u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1056 {
1057 	if (xe_lrc_has_indirect_ring_state(lrc))
1058 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1059 	else
1060 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1061 }
1062 
1063 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1064 {
1065 	if (xe_lrc_has_indirect_ring_state(lrc))
1066 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1067 	else
1068 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1069 }
1070 
1071 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1072 {
1073 	if (xe_lrc_has_indirect_ring_state(lrc))
1074 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1075 	else
1076 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1077 }
1078 
1079 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1080 {
1081 	const u32 head = xe_lrc_ring_head(lrc);
1082 	const u32 tail = lrc->ring.tail;
1083 	const u32 size = lrc->ring.size;
1084 
1085 	return ((head - tail - 1) & (size - 1)) + 1;
1086 }
1087 
1088 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1089 				const void *data, size_t size)
1090 {
1091 	struct xe_device *xe = lrc_to_xe(lrc);
1092 
1093 	iosys_map_incr(&ring, lrc->ring.tail);
1094 	xe_map_memcpy_to(xe, &ring, 0, data, size);
1095 	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1096 }
1097 
1098 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1099 {
1100 	struct xe_device *xe = lrc_to_xe(lrc);
1101 	struct iosys_map ring;
1102 	u32 rhs;
1103 	size_t aligned_size;
1104 
1105 	xe_assert(xe, IS_ALIGNED(size, 4));
1106 	aligned_size = ALIGN(size, 8);
1107 
1108 	ring = __xe_lrc_ring_map(lrc);
1109 
1110 	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1111 	rhs = lrc->ring.size - lrc->ring.tail;
1112 	if (size > rhs) {
1113 		__xe_lrc_write_ring(lrc, ring, data, rhs);
1114 		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1115 	} else {
1116 		__xe_lrc_write_ring(lrc, ring, data, size);
1117 	}
1118 
1119 	if (aligned_size > size) {
1120 		u32 noop = MI_NOOP;
1121 
1122 		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1123 	}
1124 }
1125 
1126 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1127 {
1128 	return lrc->desc | xe_lrc_ggtt_addr(lrc);
1129 }
1130 
1131 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1132 {
1133 	return __xe_lrc_seqno_ggtt_addr(lrc);
1134 }
1135 
1136 /**
1137  * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1138  *
1139  * Allocate but don't initialize an lrc seqno fence.
1140  *
1141  * Return: Pointer to the allocated fence or
1142  * negative error pointer on error.
1143  */
1144 struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1145 {
1146 	return xe_hw_fence_alloc();
1147 }
1148 
1149 /**
1150  * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1151  * @fence: Pointer to the fence to free.
1152  *
1153  * Frees an lrc seqno fence that hasn't yet been
1154  * initialized.
1155  */
1156 void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1157 {
1158 	xe_hw_fence_free(fence);
1159 }
1160 
1161 /**
1162  * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1163  * @lrc: Pointer to the lrc.
1164  * @fence: Pointer to the fence to initialize.
1165  *
1166  * Initializes a pre-allocated lrc seqno fence.
1167  * After initialization, the fence is subject to normal
1168  * dma-fence refcounting.
1169  */
1170 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1171 {
1172 	xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1173 }
1174 
1175 s32 xe_lrc_seqno(struct xe_lrc *lrc)
1176 {
1177 	struct iosys_map map = __xe_lrc_seqno_map(lrc);
1178 
1179 	return xe_map_read32(lrc_to_xe(lrc), &map);
1180 }
1181 
1182 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1183 {
1184 	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1185 
1186 	return xe_map_read32(lrc_to_xe(lrc), &map);
1187 }
1188 
1189 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1190 {
1191 	return __xe_lrc_start_seqno_ggtt_addr(lrc);
1192 }
1193 
1194 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1195 {
1196 	return __xe_lrc_parallel_ggtt_addr(lrc);
1197 }
1198 
1199 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1200 {
1201 	return __xe_lrc_parallel_map(lrc);
1202 }
1203 
1204 static int instr_dw(u32 cmd_header)
1205 {
1206 	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1207 	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1208 	    GFXPIPE_SINGLE_DW_CMD(0, 0))
1209 		return 1;
1210 
1211 	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1212 	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1213 		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1214 
1215 	/* Most instructions have the # of dwords (minus 2) in 7:0 */
1216 	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1217 }
1218 
1219 static int dump_mi_command(struct drm_printer *p,
1220 			   struct xe_gt *gt,
1221 			   u32 *dw,
1222 			   int remaining_dw)
1223 {
1224 	u32 inst_header = *dw;
1225 	u32 numdw = instr_dw(inst_header);
1226 	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1227 	int num_noop;
1228 
1229 	/* First check for commands that don't have/use a '# DW' field */
1230 	switch (inst_header & MI_OPCODE) {
1231 	case MI_NOOP:
1232 		num_noop = 1;
1233 		while (num_noop < remaining_dw &&
1234 		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1235 			num_noop++;
1236 		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1237 		return num_noop;
1238 
1239 	case MI_TOPOLOGY_FILTER:
1240 		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1241 		return 1;
1242 
1243 	case MI_BATCH_BUFFER_END:
1244 		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1245 		/* Return 'remaining_dw' to consume the rest of the LRC */
1246 		return remaining_dw;
1247 	}
1248 
1249 	/*
1250 	 * Any remaining commands include a # of dwords.  We should make sure
1251 	 * it doesn't exceed the remaining size of the LRC.
1252 	 */
1253 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1254 		numdw = remaining_dw;
1255 
1256 	switch (inst_header & MI_OPCODE) {
1257 	case MI_LOAD_REGISTER_IMM:
1258 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1259 			   inst_header, (numdw - 1) / 2);
1260 		for (int i = 1; i < numdw; i += 2)
1261 			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1262 		return numdw;
1263 
1264 	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1265 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1266 			   inst_header,
1267 			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1268 			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1269 		if (numdw == 4)
1270 			drm_printf(p, " - %#6x = %#010llx\n",
1271 				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1272 		else
1273 			drm_printf(p, " - %*ph (%s)\n",
1274 				   (int)sizeof(u32) * (numdw - 1), dw + 1,
1275 				   numdw < 4 ? "truncated" : "malformed");
1276 		return numdw;
1277 
1278 	case MI_FORCE_WAKEUP:
1279 		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1280 		return numdw;
1281 
1282 	default:
1283 		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1284 			   inst_header, opcode, numdw);
1285 		return numdw;
1286 	}
1287 }
1288 
1289 static int dump_gfxpipe_command(struct drm_printer *p,
1290 				struct xe_gt *gt,
1291 				u32 *dw,
1292 				int remaining_dw)
1293 {
1294 	u32 numdw = instr_dw(*dw);
1295 	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1296 	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1297 	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1298 
1299 	/*
1300 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1301 	 * remaining size of the LRC.
1302 	 */
1303 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1304 		numdw = remaining_dw;
1305 
1306 	switch (*dw & GFXPIPE_MATCH_MASK) {
1307 #define MATCH(cmd) \
1308 	case cmd: \
1309 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1310 		return numdw
1311 #define MATCH3D(cmd) \
1312 	case CMD_##cmd: \
1313 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1314 		return numdw
1315 
1316 	MATCH(STATE_BASE_ADDRESS);
1317 	MATCH(STATE_SIP);
1318 	MATCH(GPGPU_CSR_BASE_ADDRESS);
1319 	MATCH(STATE_COMPUTE_MODE);
1320 	MATCH3D(3DSTATE_BTD);
1321 	MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1322 	MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1323 
1324 	MATCH3D(3DSTATE_VF_STATISTICS);
1325 
1326 	MATCH(PIPELINE_SELECT);
1327 
1328 	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1329 	MATCH3D(3DSTATE_CLEAR_PARAMS);
1330 	MATCH3D(3DSTATE_DEPTH_BUFFER);
1331 	MATCH3D(3DSTATE_STENCIL_BUFFER);
1332 	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1333 	MATCH3D(3DSTATE_VERTEX_BUFFERS);
1334 	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1335 	MATCH3D(3DSTATE_INDEX_BUFFER);
1336 	MATCH3D(3DSTATE_VF);
1337 	MATCH3D(3DSTATE_MULTISAMPLE);
1338 	MATCH3D(3DSTATE_CC_STATE_POINTERS);
1339 	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1340 	MATCH3D(3DSTATE_VS);
1341 	MATCH3D(3DSTATE_GS);
1342 	MATCH3D(3DSTATE_CLIP);
1343 	MATCH3D(3DSTATE_SF);
1344 	MATCH3D(3DSTATE_WM);
1345 	MATCH3D(3DSTATE_CONSTANT_VS);
1346 	MATCH3D(3DSTATE_CONSTANT_GS);
1347 	MATCH3D(3DSTATE_CONSTANT_PS);
1348 	MATCH3D(3DSTATE_SAMPLE_MASK);
1349 	MATCH3D(3DSTATE_CONSTANT_HS);
1350 	MATCH3D(3DSTATE_CONSTANT_DS);
1351 	MATCH3D(3DSTATE_HS);
1352 	MATCH3D(3DSTATE_TE);
1353 	MATCH3D(3DSTATE_DS);
1354 	MATCH3D(3DSTATE_STREAMOUT);
1355 	MATCH3D(3DSTATE_SBE);
1356 	MATCH3D(3DSTATE_PS);
1357 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1358 	MATCH3D(3DSTATE_CPS_POINTERS);
1359 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1360 	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1361 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1362 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1363 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1364 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1365 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1366 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1367 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1368 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1369 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1370 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1371 	MATCH3D(3DSTATE_VF_INSTANCING);
1372 	MATCH3D(3DSTATE_VF_SGVS);
1373 	MATCH3D(3DSTATE_VF_TOPOLOGY);
1374 	MATCH3D(3DSTATE_WM_CHROMAKEY);
1375 	MATCH3D(3DSTATE_PS_BLEND);
1376 	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1377 	MATCH3D(3DSTATE_PS_EXTRA);
1378 	MATCH3D(3DSTATE_RASTER);
1379 	MATCH3D(3DSTATE_SBE_SWIZ);
1380 	MATCH3D(3DSTATE_WM_HZ_OP);
1381 	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1382 	MATCH3D(3DSTATE_VF_SGVS_2);
1383 	MATCH3D(3DSTATE_VFG);
1384 	MATCH3D(3DSTATE_URB_ALLOC_VS);
1385 	MATCH3D(3DSTATE_URB_ALLOC_HS);
1386 	MATCH3D(3DSTATE_URB_ALLOC_DS);
1387 	MATCH3D(3DSTATE_URB_ALLOC_GS);
1388 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1389 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1390 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1391 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1392 	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1393 	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1394 	MATCH3D(3DSTATE_AMFS);
1395 	MATCH3D(3DSTATE_DEPTH_BOUNDS);
1396 	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1397 	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1398 	MATCH3D(3DSTATE_MESH_CONTROL);
1399 	MATCH3D(3DSTATE_MESH_DISTRIB);
1400 	MATCH3D(3DSTATE_TASK_REDISTRIB);
1401 	MATCH3D(3DSTATE_MESH_SHADER);
1402 	MATCH3D(3DSTATE_MESH_SHADER_DATA);
1403 	MATCH3D(3DSTATE_TASK_CONTROL);
1404 	MATCH3D(3DSTATE_TASK_SHADER);
1405 	MATCH3D(3DSTATE_TASK_SHADER_DATA);
1406 	MATCH3D(3DSTATE_URB_ALLOC_MESH);
1407 	MATCH3D(3DSTATE_URB_ALLOC_TASK);
1408 	MATCH3D(3DSTATE_CLIP_MESH);
1409 	MATCH3D(3DSTATE_SBE_MESH);
1410 	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1411 
1412 	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1413 	MATCH3D(3DSTATE_CHROMA_KEY);
1414 	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1415 	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1416 	MATCH3D(3DSTATE_LINE_STIPPLE);
1417 	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1418 	MATCH3D(3DSTATE_MONOFILTER_SIZE);
1419 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1420 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1421 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1422 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1423 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1424 	MATCH3D(3DSTATE_SO_DECL_LIST);
1425 	MATCH3D(3DSTATE_SO_BUFFER);
1426 	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1427 	MATCH3D(3DSTATE_SAMPLE_PATTERN);
1428 	MATCH3D(3DSTATE_3D_MODE);
1429 	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1430 	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1431 	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1432 
1433 	default:
1434 		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1435 			   *dw, pipeline, opcode, subopcode, numdw);
1436 		return numdw;
1437 	}
1438 }
1439 
1440 static int dump_gfx_state_command(struct drm_printer *p,
1441 				  struct xe_gt *gt,
1442 				  u32 *dw,
1443 				  int remaining_dw)
1444 {
1445 	u32 numdw = instr_dw(*dw);
1446 	u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1447 
1448 	/*
1449 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1450 	 * remaining size of the LRC.
1451 	 */
1452 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1453 		numdw = remaining_dw;
1454 
1455 	switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1456 	MATCH(STATE_WRITE_INLINE);
1457 
1458 	default:
1459 		drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1460 			   *dw, opcode, numdw);
1461 		return numdw;
1462 	}
1463 }
1464 
1465 void xe_lrc_dump_default(struct drm_printer *p,
1466 			 struct xe_gt *gt,
1467 			 enum xe_engine_class hwe_class)
1468 {
1469 	u32 *dw;
1470 	int remaining_dw, num_dw;
1471 
1472 	if (!gt->default_lrc[hwe_class]) {
1473 		drm_printf(p, "No default LRC for class %d\n", hwe_class);
1474 		return;
1475 	}
1476 
1477 	/*
1478 	 * Skip the beginning of the LRC since it contains the per-process
1479 	 * hardware status page.
1480 	 */
1481 	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1482 	remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1483 
1484 	while (remaining_dw > 0) {
1485 		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1486 			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1487 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1488 			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1489 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1490 			num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1491 		} else {
1492 			num_dw = min(instr_dw(*dw), remaining_dw);
1493 			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1494 				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1495 				   num_dw);
1496 		}
1497 
1498 		dw += num_dw;
1499 		remaining_dw -= num_dw;
1500 	}
1501 }
1502 
1503 struct instr_state {
1504 	u32 instr;
1505 	u16 num_dw;
1506 };
1507 
1508 static const struct instr_state xe_hpg_svg_state[] = {
1509 	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1510 	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1511 	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1512 	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1513 	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1514 	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1515 	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1516 	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1517 	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1518 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1519 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1520 	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1521 	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1522 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1523 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1524 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1525 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1526 	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1527 	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1528 	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1529 	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1530 	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1531 	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1532 	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1533 	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1534 	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1535 	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1536 	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1537 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1538 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1539 	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1540 	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1541 	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1542 	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1543 	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1544 	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1545 	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1546 	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1547 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1548 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1549 	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1550 	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1551 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1552 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1553 	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1554 	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1555 	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1556 	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1557 	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1558 	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1559 };
1560 
1561 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1562 {
1563 	struct xe_gt *gt = q->hwe->gt;
1564 	struct xe_device *xe = gt_to_xe(gt);
1565 	const struct instr_state *state_table = NULL;
1566 	int state_table_size = 0;
1567 
1568 	/*
1569 	 * Wa_14019789679
1570 	 *
1571 	 * If the driver doesn't explicitly emit the SVG instructions while
1572 	 * setting up the default LRC, the context switch will write 0's
1573 	 * (noops) into the LRC memory rather than the expected instruction
1574 	 * headers.  Application contexts start out as a copy of the default
1575 	 * LRC, and if they also do not emit specific settings for some SVG
1576 	 * state, then on context restore they'll unintentionally inherit
1577 	 * whatever state setting the previous context had programmed into the
1578 	 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
1579 	 * prevent the hardware from resetting that state back to any specific
1580 	 * value).
1581 	 *
1582 	 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
1583 	 * since that's a specific state setting that can easily cause GPU
1584 	 * hangs if unintentionally inherited.  However to be safe we'll
1585 	 * continue to emit all of the SVG state since it's best not to leak
1586 	 * any of the state between contexts, even if that leakage is harmless.
1587 	 */
1588 	if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
1589 		state_table = xe_hpg_svg_state;
1590 		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1591 	}
1592 
1593 	if (!state_table) {
1594 		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1595 			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1596 		return;
1597 	}
1598 
1599 	for (int i = 0; i < state_table_size; i++) {
1600 		u32 instr = state_table[i].instr;
1601 		u16 num_dw = state_table[i].num_dw;
1602 		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1603 
1604 		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1605 		xe_gt_assert(gt, num_dw != 0);
1606 		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1607 
1608 		/*
1609 		 * Xe2's SVG context is the same as the one on DG2 / MTL
1610 		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1611 		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1612 		 * Just make the replacement here rather than defining a
1613 		 * whole separate table for the single trivial change.
1614 		 */
1615 		if (GRAPHICS_VER(xe) >= 20 &&
1616 		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1617 			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1618 
1619 		bb->cs[bb->len] = instr;
1620 		if (!is_single_dw)
1621 			bb->cs[bb->len] |= (num_dw - 2);
1622 
1623 		bb->len += num_dw;
1624 	}
1625 }
1626 
1627 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1628 {
1629 	struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1630 
1631 	if (!snapshot)
1632 		return NULL;
1633 
1634 	if (lrc->bo->vm)
1635 		xe_vm_get(lrc->bo->vm);
1636 
1637 	snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1638 	snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1639 	snapshot->head = xe_lrc_ring_head(lrc);
1640 	snapshot->tail.internal = lrc->ring.tail;
1641 	snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1642 	snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1643 	snapshot->seqno = xe_lrc_seqno(lrc);
1644 	snapshot->lrc_bo = xe_bo_get(lrc->bo);
1645 	snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
1646 	snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
1647 	snapshot->lrc_snapshot = NULL;
1648 	snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1649 	snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
1650 	return snapshot;
1651 }
1652 
1653 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1654 {
1655 	struct xe_bo *bo;
1656 	struct xe_vm *vm;
1657 	struct iosys_map src;
1658 
1659 	if (!snapshot)
1660 		return;
1661 
1662 	bo = snapshot->lrc_bo;
1663 	vm = bo->vm;
1664 	snapshot->lrc_bo = NULL;
1665 
1666 	snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1667 	if (!snapshot->lrc_snapshot)
1668 		goto put_bo;
1669 
1670 	xe_bo_lock(bo, false);
1671 	if (!ttm_bo_vmap(&bo->ttm, &src)) {
1672 		xe_map_memcpy_from(xe_bo_device(bo),
1673 				   snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1674 				   snapshot->lrc_size);
1675 		ttm_bo_vunmap(&bo->ttm, &src);
1676 	} else {
1677 		kvfree(snapshot->lrc_snapshot);
1678 		snapshot->lrc_snapshot = NULL;
1679 	}
1680 	xe_bo_unlock(bo);
1681 put_bo:
1682 	xe_bo_put(bo);
1683 	if (vm)
1684 		xe_vm_put(vm);
1685 }
1686 
1687 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1688 {
1689 	unsigned long i;
1690 
1691 	if (!snapshot)
1692 		return;
1693 
1694 	drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
1695 	drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1696 		   snapshot->indirect_context_desc);
1697 	drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1698 	drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1699 		   snapshot->tail.internal, snapshot->tail.memory);
1700 	drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1701 	drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1702 	drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
1703 	drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
1704 
1705 	if (!snapshot->lrc_snapshot)
1706 		return;
1707 
1708 	drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1709 	drm_puts(p, "\t[HWSP].data: ");
1710 	for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1711 		u32 *val = snapshot->lrc_snapshot + i;
1712 		char dumped[ASCII85_BUFSZ];
1713 
1714 		drm_puts(p, ascii85_encode(*val, dumped));
1715 	}
1716 
1717 	drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1718 	drm_puts(p, "\t[HWCTX].data: ");
1719 	for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1720 		u32 *val = snapshot->lrc_snapshot + i;
1721 		char dumped[ASCII85_BUFSZ];
1722 
1723 		drm_puts(p, ascii85_encode(*val, dumped));
1724 	}
1725 	drm_puts(p, "\n");
1726 }
1727 
1728 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1729 {
1730 	if (!snapshot)
1731 		return;
1732 
1733 	kvfree(snapshot->lrc_snapshot);
1734 	if (snapshot->lrc_bo) {
1735 		struct xe_vm *vm;
1736 
1737 		vm = snapshot->lrc_bo->vm;
1738 		xe_bo_put(snapshot->lrc_bo);
1739 		if (vm)
1740 			xe_vm_put(vm);
1741 	}
1742 	kfree(snapshot);
1743 }
1744 
1745 /**
1746  * xe_lrc_update_timestamp() - Update ctx timestamp
1747  * @lrc: Pointer to the lrc.
1748  * @old_ts: Old timestamp value
1749  *
1750  * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
1751  * update saved value.
1752  *
1753  * Returns: New ctx timestamp value
1754  */
1755 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
1756 {
1757 	*old_ts = lrc->ctx_timestamp;
1758 
1759 	lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1760 
1761 	return lrc->ctx_timestamp;
1762 }
1763