xref: /linux/drivers/gpu/drm/xe/xe_lrc.c (revision ea04ef19ebdcd22e8a21054a19c2c8fefae011ce)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_lrc.h"
7 
8 #include <linux/ascii85.h>
9 
10 #include "instructions/xe_mi_commands.h"
11 #include "instructions/xe_gfxpipe_commands.h"
12 #include "instructions/xe_gfx_state_commands.h"
13 #include "regs/xe_engine_regs.h"
14 #include "regs/xe_lrc_layout.h"
15 #include "xe_bb.h"
16 #include "xe_bo.h"
17 #include "xe_device.h"
18 #include "xe_drm_client.h"
19 #include "xe_exec_queue_types.h"
20 #include "xe_gt.h"
21 #include "xe_gt_printk.h"
22 #include "xe_hw_fence.h"
23 #include "xe_map.h"
24 #include "xe_memirq.h"
25 #include "xe_sriov.h"
26 #include "xe_vm.h"
27 
28 #define LRC_VALID				BIT_ULL(0)
29 #define LRC_PRIVILEGE				BIT_ULL(8)
30 #define LRC_ADDRESSING_MODE			GENMASK_ULL(4, 3)
31 #define LRC_LEGACY_64B_CONTEXT			3
32 
33 #define LRC_ENGINE_CLASS			GENMASK_ULL(63, 61)
34 #define LRC_ENGINE_INSTANCE			GENMASK_ULL(53, 48)
35 
36 #define LRC_INDIRECT_RING_STATE_SIZE		SZ_4K
37 
38 struct xe_lrc_snapshot {
39 	struct xe_bo *lrc_bo;
40 	void *lrc_snapshot;
41 	unsigned long lrc_size, lrc_offset;
42 
43 	u32 context_desc;
44 	u32 indirect_context_desc;
45 	u32 head;
46 	struct {
47 		u32 internal;
48 		u32 memory;
49 	} tail;
50 	u32 start_seqno;
51 	u32 seqno;
52 };
53 
54 static struct xe_device *
55 lrc_to_xe(struct xe_lrc *lrc)
56 {
57 	return gt_to_xe(lrc->fence_ctx.gt);
58 }
59 
60 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
61 {
62 	struct xe_device *xe = gt_to_xe(gt);
63 	size_t size;
64 
65 	switch (class) {
66 	case XE_ENGINE_CLASS_RENDER:
67 		if (GRAPHICS_VER(xe) >= 20)
68 			size = 4 * SZ_4K;
69 		else
70 			size = 14 * SZ_4K;
71 		break;
72 	case XE_ENGINE_CLASS_COMPUTE:
73 		/* 14 pages since graphics_ver == 11 */
74 		if (GRAPHICS_VER(xe) >= 20)
75 			size = 3 * SZ_4K;
76 		else
77 			size = 14 * SZ_4K;
78 		break;
79 	default:
80 		WARN(1, "Unknown engine class: %d", class);
81 		fallthrough;
82 	case XE_ENGINE_CLASS_COPY:
83 	case XE_ENGINE_CLASS_VIDEO_DECODE:
84 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
85 	case XE_ENGINE_CLASS_OTHER:
86 		size = 2 * SZ_4K;
87 	}
88 
89 	/* Add indirect ring state page */
90 	if (xe_gt_has_indirect_ring_state(gt))
91 		size += LRC_INDIRECT_RING_STATE_SIZE;
92 
93 	return size;
94 }
95 
96 /*
97  * The per-platform tables are u8-encoded in @data. Decode @data and set the
98  * addresses' offset and commands in @regs. The following encoding is used
99  * for each byte. There are 2 steps: decoding commands and decoding addresses.
100  *
101  * Commands:
102  * [7]: create NOPs - number of NOPs are set in lower bits
103  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
104  *      MI_LRI_FORCE_POSTED
105  * [5:0]: Number of NOPs or registers to set values to in case of
106  *        MI_LOAD_REGISTER_IMM
107  *
108  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
109  * number of registers. They are set by using the REG/REG16 macros: the former
110  * is used for offsets smaller than 0x200 while the latter is for values bigger
111  * than that. Those macros already set all the bits documented below correctly:
112  *
113  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
114  *      follow, for the lower bits
115  * [6:0]: Register offset, without considering the engine base.
116  *
117  * This function only tweaks the commands and register offsets. Values are not
118  * filled out.
119  */
120 static void set_offsets(u32 *regs,
121 			const u8 *data,
122 			const struct xe_hw_engine *hwe)
123 #define NOP(x) (BIT(7) | (x))
124 #define LRI(count, flags) ((flags) << 6 | (count) | \
125 			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
126 #define POSTED BIT(0)
127 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
128 #define REG16(x) \
129 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
130 	(((x) >> 2) & 0x7f)
131 {
132 	const u32 base = hwe->mmio_base;
133 
134 	while (*data) {
135 		u8 count, flags;
136 
137 		if (*data & BIT(7)) { /* skip */
138 			count = *data++ & ~BIT(7);
139 			regs += count;
140 			continue;
141 		}
142 
143 		count = *data & 0x3f;
144 		flags = *data >> 6;
145 		data++;
146 
147 		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
148 		if (flags & POSTED)
149 			*regs |= MI_LRI_FORCE_POSTED;
150 		*regs |= MI_LRI_LRM_CS_MMIO;
151 		regs++;
152 
153 		xe_gt_assert(hwe->gt, count);
154 		do {
155 			u32 offset = 0;
156 			u8 v;
157 
158 			do {
159 				v = *data++;
160 				offset <<= 7;
161 				offset |= v & ~BIT(7);
162 			} while (v & BIT(7));
163 
164 			regs[0] = base + (offset << 2);
165 			regs += 2;
166 		} while (--count);
167 	}
168 
169 	*regs = MI_BATCH_BUFFER_END | BIT(0);
170 }
171 
172 static const u8 gen12_xcs_offsets[] = {
173 	NOP(1),
174 	LRI(13, POSTED),
175 	REG16(0x244),
176 	REG(0x034),
177 	REG(0x030),
178 	REG(0x038),
179 	REG(0x03c),
180 	REG(0x168),
181 	REG(0x140),
182 	REG(0x110),
183 	REG(0x1c0),
184 	REG(0x1c4),
185 	REG(0x1c8),
186 	REG(0x180),
187 	REG16(0x2b4),
188 
189 	NOP(5),
190 	LRI(9, POSTED),
191 	REG16(0x3a8),
192 	REG16(0x28c),
193 	REG16(0x288),
194 	REG16(0x284),
195 	REG16(0x280),
196 	REG16(0x27c),
197 	REG16(0x278),
198 	REG16(0x274),
199 	REG16(0x270),
200 
201 	0
202 };
203 
204 static const u8 dg2_xcs_offsets[] = {
205 	NOP(1),
206 	LRI(15, POSTED),
207 	REG16(0x244),
208 	REG(0x034),
209 	REG(0x030),
210 	REG(0x038),
211 	REG(0x03c),
212 	REG(0x168),
213 	REG(0x140),
214 	REG(0x110),
215 	REG(0x1c0),
216 	REG(0x1c4),
217 	REG(0x1c8),
218 	REG(0x180),
219 	REG16(0x2b4),
220 	REG(0x120),
221 	REG(0x124),
222 
223 	NOP(1),
224 	LRI(9, POSTED),
225 	REG16(0x3a8),
226 	REG16(0x28c),
227 	REG16(0x288),
228 	REG16(0x284),
229 	REG16(0x280),
230 	REG16(0x27c),
231 	REG16(0x278),
232 	REG16(0x274),
233 	REG16(0x270),
234 
235 	0
236 };
237 
238 static const u8 gen12_rcs_offsets[] = {
239 	NOP(1),
240 	LRI(13, POSTED),
241 	REG16(0x244),
242 	REG(0x034),
243 	REG(0x030),
244 	REG(0x038),
245 	REG(0x03c),
246 	REG(0x168),
247 	REG(0x140),
248 	REG(0x110),
249 	REG(0x1c0),
250 	REG(0x1c4),
251 	REG(0x1c8),
252 	REG(0x180),
253 	REG16(0x2b4),
254 
255 	NOP(5),
256 	LRI(9, POSTED),
257 	REG16(0x3a8),
258 	REG16(0x28c),
259 	REG16(0x288),
260 	REG16(0x284),
261 	REG16(0x280),
262 	REG16(0x27c),
263 	REG16(0x278),
264 	REG16(0x274),
265 	REG16(0x270),
266 
267 	LRI(3, POSTED),
268 	REG(0x1b0),
269 	REG16(0x5a8),
270 	REG16(0x5ac),
271 
272 	NOP(6),
273 	LRI(1, 0),
274 	REG(0x0c8),
275 	NOP(3 + 9 + 1),
276 
277 	LRI(51, POSTED),
278 	REG16(0x588),
279 	REG16(0x588),
280 	REG16(0x588),
281 	REG16(0x588),
282 	REG16(0x588),
283 	REG16(0x588),
284 	REG(0x028),
285 	REG(0x09c),
286 	REG(0x0c0),
287 	REG(0x178),
288 	REG(0x17c),
289 	REG16(0x358),
290 	REG(0x170),
291 	REG(0x150),
292 	REG(0x154),
293 	REG(0x158),
294 	REG16(0x41c),
295 	REG16(0x600),
296 	REG16(0x604),
297 	REG16(0x608),
298 	REG16(0x60c),
299 	REG16(0x610),
300 	REG16(0x614),
301 	REG16(0x618),
302 	REG16(0x61c),
303 	REG16(0x620),
304 	REG16(0x624),
305 	REG16(0x628),
306 	REG16(0x62c),
307 	REG16(0x630),
308 	REG16(0x634),
309 	REG16(0x638),
310 	REG16(0x63c),
311 	REG16(0x640),
312 	REG16(0x644),
313 	REG16(0x648),
314 	REG16(0x64c),
315 	REG16(0x650),
316 	REG16(0x654),
317 	REG16(0x658),
318 	REG16(0x65c),
319 	REG16(0x660),
320 	REG16(0x664),
321 	REG16(0x668),
322 	REG16(0x66c),
323 	REG16(0x670),
324 	REG16(0x674),
325 	REG16(0x678),
326 	REG16(0x67c),
327 	REG(0x068),
328 	REG(0x084),
329 	NOP(1),
330 
331 	0
332 };
333 
334 static const u8 xehp_rcs_offsets[] = {
335 	NOP(1),
336 	LRI(13, POSTED),
337 	REG16(0x244),
338 	REG(0x034),
339 	REG(0x030),
340 	REG(0x038),
341 	REG(0x03c),
342 	REG(0x168),
343 	REG(0x140),
344 	REG(0x110),
345 	REG(0x1c0),
346 	REG(0x1c4),
347 	REG(0x1c8),
348 	REG(0x180),
349 	REG16(0x2b4),
350 
351 	NOP(5),
352 	LRI(9, POSTED),
353 	REG16(0x3a8),
354 	REG16(0x28c),
355 	REG16(0x288),
356 	REG16(0x284),
357 	REG16(0x280),
358 	REG16(0x27c),
359 	REG16(0x278),
360 	REG16(0x274),
361 	REG16(0x270),
362 
363 	LRI(3, POSTED),
364 	REG(0x1b0),
365 	REG16(0x5a8),
366 	REG16(0x5ac),
367 
368 	NOP(6),
369 	LRI(1, 0),
370 	REG(0x0c8),
371 
372 	0
373 };
374 
375 static const u8 dg2_rcs_offsets[] = {
376 	NOP(1),
377 	LRI(15, POSTED),
378 	REG16(0x244),
379 	REG(0x034),
380 	REG(0x030),
381 	REG(0x038),
382 	REG(0x03c),
383 	REG(0x168),
384 	REG(0x140),
385 	REG(0x110),
386 	REG(0x1c0),
387 	REG(0x1c4),
388 	REG(0x1c8),
389 	REG(0x180),
390 	REG16(0x2b4),
391 	REG(0x120),
392 	REG(0x124),
393 
394 	NOP(1),
395 	LRI(9, POSTED),
396 	REG16(0x3a8),
397 	REG16(0x28c),
398 	REG16(0x288),
399 	REG16(0x284),
400 	REG16(0x280),
401 	REG16(0x27c),
402 	REG16(0x278),
403 	REG16(0x274),
404 	REG16(0x270),
405 
406 	LRI(3, POSTED),
407 	REG(0x1b0),
408 	REG16(0x5a8),
409 	REG16(0x5ac),
410 
411 	NOP(6),
412 	LRI(1, 0),
413 	REG(0x0c8),
414 
415 	0
416 };
417 
418 static const u8 mtl_rcs_offsets[] = {
419 	NOP(1),
420 	LRI(15, POSTED),
421 	REG16(0x244),
422 	REG(0x034),
423 	REG(0x030),
424 	REG(0x038),
425 	REG(0x03c),
426 	REG(0x168),
427 	REG(0x140),
428 	REG(0x110),
429 	REG(0x1c0),
430 	REG(0x1c4),
431 	REG(0x1c8),
432 	REG(0x180),
433 	REG16(0x2b4),
434 	REG(0x120),
435 	REG(0x124),
436 
437 	NOP(1),
438 	LRI(9, POSTED),
439 	REG16(0x3a8),
440 	REG16(0x28c),
441 	REG16(0x288),
442 	REG16(0x284),
443 	REG16(0x280),
444 	REG16(0x27c),
445 	REG16(0x278),
446 	REG16(0x274),
447 	REG16(0x270),
448 
449 	NOP(2),
450 	LRI(2, POSTED),
451 	REG16(0x5a8),
452 	REG16(0x5ac),
453 
454 	NOP(6),
455 	LRI(1, 0),
456 	REG(0x0c8),
457 
458 	0
459 };
460 
461 #define XE2_CTX_COMMON \
462 	NOP(1),                 /* [0x00] */ \
463 	LRI(15, POSTED),        /* [0x01] */ \
464 	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
465 	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
466 	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
467 	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
468 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
469 	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
470 	REG(0x140),             /* [0x0e] BB_ADDR */ \
471 	REG(0x110),             /* [0x10] BB_STATE */ \
472 	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
473 	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
474 	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
475 	REG(0x180),             /* [0x18] CCID */ \
476 	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
477 	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
478 	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
479 	\
480 	NOP(1),                 /* [0x20] */ \
481 	LRI(9, POSTED),         /* [0x21] */ \
482 	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
483 	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
484 	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
485 	REG16(0x284),           /* [0x28] dummy reg */ \
486 	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
487 	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
488 	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
489 	REG16(0x274),           /* [0x30] PTBP_UDW */ \
490 	REG16(0x270)            /* [0x32] PTBP_LDW */
491 
492 static const u8 xe2_rcs_offsets[] = {
493 	XE2_CTX_COMMON,
494 
495 	NOP(2),                 /* [0x34] */
496 	LRI(2, POSTED),         /* [0x36] */
497 	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
498 	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
499 
500 	NOP(6),                 /* [0x41] */
501 	LRI(1, 0),              /* [0x47] */
502 	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
503 
504 	0
505 };
506 
507 static const u8 xe2_bcs_offsets[] = {
508 	XE2_CTX_COMMON,
509 
510 	NOP(4 + 8 + 1),         /* [0x34] */
511 	LRI(2, POSTED),         /* [0x41] */
512 	REG16(0x200),           /* [0x42] BCS_SWCTRL */
513 	REG16(0x204),           /* [0x44] BLIT_CCTL */
514 
515 	0
516 };
517 
518 static const u8 xe2_xcs_offsets[] = {
519 	XE2_CTX_COMMON,
520 
521 	0
522 };
523 
524 static const u8 xe2_indirect_ring_state_offsets[] = {
525 	NOP(1),                 /* [0x00] */
526 	LRI(5, POSTED),         /* [0x01] */
527 	REG(0x034),             /* [0x02] RING_BUFFER_HEAD */
528 	REG(0x030),             /* [0x04] RING_BUFFER_TAIL */
529 	REG(0x038),             /* [0x06] RING_BUFFER_START */
530 	REG(0x048),             /* [0x08] RING_BUFFER_START_UDW */
531 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */
532 
533 	NOP(5),                 /* [0x0c] */
534 	LRI(9, POSTED),         /* [0x11] */
535 	REG(0x168),             /* [0x12] BB_ADDR_UDW */
536 	REG(0x140),             /* [0x14] BB_ADDR */
537 	REG(0x110),             /* [0x16] BB_STATE */
538 	REG16(0x588),           /* [0x18] BB_STACK_WRITE_PORT */
539 	REG16(0x588),           /* [0x20] BB_STACK_WRITE_PORT */
540 	REG16(0x588),           /* [0x22] BB_STACK_WRITE_PORT */
541 	REG16(0x588),           /* [0x24] BB_STACK_WRITE_PORT */
542 	REG16(0x588),           /* [0x26] BB_STACK_WRITE_PORT */
543 	REG16(0x588),           /* [0x28] BB_STACK_WRITE_PORT */
544 
545 	NOP(12),                 /* [0x00] */
546 
547 	0
548 };
549 
550 #undef REG16
551 #undef REG
552 #undef LRI
553 #undef NOP
554 
555 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
556 {
557 	if (class == XE_ENGINE_CLASS_RENDER) {
558 		if (GRAPHICS_VER(xe) >= 20)
559 			return xe2_rcs_offsets;
560 		else if (GRAPHICS_VERx100(xe) >= 1270)
561 			return mtl_rcs_offsets;
562 		else if (GRAPHICS_VERx100(xe) >= 1255)
563 			return dg2_rcs_offsets;
564 		else if (GRAPHICS_VERx100(xe) >= 1250)
565 			return xehp_rcs_offsets;
566 		else
567 			return gen12_rcs_offsets;
568 	} else if (class == XE_ENGINE_CLASS_COPY) {
569 		if (GRAPHICS_VER(xe) >= 20)
570 			return xe2_bcs_offsets;
571 		else
572 			return gen12_xcs_offsets;
573 	} else {
574 		if (GRAPHICS_VER(xe) >= 20)
575 			return xe2_xcs_offsets;
576 		else if (GRAPHICS_VERx100(xe) >= 1255)
577 			return dg2_xcs_offsets;
578 		else
579 			return gen12_xcs_offsets;
580 	}
581 }
582 
583 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
584 {
585 	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
586 						       CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
587 
588 	if (xe_gt_has_indirect_ring_state(hwe->gt))
589 		regs[CTX_CONTEXT_CONTROL] |=
590 			_MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
591 
592 	/* TODO: Timestamp */
593 }
594 
595 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
596 {
597 	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
598 	struct xe_device *xe = gt_to_xe(hwe->gt);
599 
600 	if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
601 		return;
602 
603 	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
604 					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
605 	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
606 	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
607 
608 	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
609 				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
610 	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
611 	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
612 	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
613 	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
614 }
615 
616 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
617 {
618 	struct xe_device *xe = gt_to_xe(hwe->gt);
619 
620 	if (GRAPHICS_VERx100(xe) >= 1250)
621 		return 0x70;
622 	else
623 		return 0x60;
624 }
625 
626 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
627 {
628 	int x;
629 
630 	x = lrc_ring_mi_mode(hwe);
631 	regs[x + 1] &= ~STOP_RING;
632 	regs[x + 1] |= STOP_RING << 16;
633 }
634 
635 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
636 {
637 	return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
638 }
639 
640 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
641 {
642 	return 0;
643 }
644 
645 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
646 {
647 	return lrc->ring.size;
648 }
649 
650 /* Make the magic macros work */
651 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
652 
653 #define LRC_SEQNO_PPHWSP_OFFSET 512
654 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
655 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
656 #define LRC_PPHWSP_SIZE SZ_4K
657 
658 static size_t lrc_reg_size(struct xe_device *xe)
659 {
660 	if (GRAPHICS_VERx100(xe) >= 1250)
661 		return 96 * sizeof(u32);
662 	else
663 		return 80 * sizeof(u32);
664 }
665 
666 size_t xe_lrc_skip_size(struct xe_device *xe)
667 {
668 	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
669 }
670 
671 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
672 {
673 	/* The seqno is stored in the driver-defined portion of PPHWSP */
674 	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
675 }
676 
677 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
678 {
679 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
680 	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
681 }
682 
683 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
684 {
685 	/* The parallel is stored in the driver-defined portion of PPHWSP */
686 	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
687 }
688 
689 static inline u32 __xe_lrc_regs_offset(struct xe_lrc *lrc)
690 {
691 	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
692 }
693 
694 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
695 {
696 	/* Indirect ring state page is at the very end of LRC */
697 	return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
698 }
699 
700 #define DECL_MAP_ADDR_HELPERS(elem) \
701 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
702 { \
703 	struct iosys_map map = lrc->bo->vmap; \
704 \
705 	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
706 	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
707 	return map; \
708 } \
709 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
710 { \
711 	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
712 } \
713 
714 DECL_MAP_ADDR_HELPERS(ring)
715 DECL_MAP_ADDR_HELPERS(pphwsp)
716 DECL_MAP_ADDR_HELPERS(seqno)
717 DECL_MAP_ADDR_HELPERS(regs)
718 DECL_MAP_ADDR_HELPERS(start_seqno)
719 DECL_MAP_ADDR_HELPERS(parallel)
720 DECL_MAP_ADDR_HELPERS(indirect_ring)
721 
722 #undef DECL_MAP_ADDR_HELPERS
723 
724 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
725 {
726 	return __xe_lrc_pphwsp_ggtt_addr(lrc);
727 }
728 
729 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
730 {
731 	if (!xe_lrc_has_indirect_ring_state(lrc))
732 		return 0;
733 
734 	return __xe_lrc_indirect_ring_ggtt_addr(lrc);
735 }
736 
737 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
738 {
739 	struct xe_device *xe = lrc_to_xe(lrc);
740 	struct iosys_map map;
741 
742 	map = __xe_lrc_indirect_ring_map(lrc);
743 	iosys_map_incr(&map, reg_nr * sizeof(u32));
744 	return xe_map_read32(xe, &map);
745 }
746 
747 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
748 					  int reg_nr, u32 val)
749 {
750 	struct xe_device *xe = lrc_to_xe(lrc);
751 	struct iosys_map map;
752 
753 	map = __xe_lrc_indirect_ring_map(lrc);
754 	iosys_map_incr(&map, reg_nr * sizeof(u32));
755 	xe_map_write32(xe, &map, val);
756 }
757 
758 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
759 {
760 	struct xe_device *xe = lrc_to_xe(lrc);
761 	struct iosys_map map;
762 
763 	map = __xe_lrc_regs_map(lrc);
764 	iosys_map_incr(&map, reg_nr * sizeof(u32));
765 	return xe_map_read32(xe, &map);
766 }
767 
768 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
769 {
770 	struct xe_device *xe = lrc_to_xe(lrc);
771 	struct iosys_map map;
772 
773 	map = __xe_lrc_regs_map(lrc);
774 	iosys_map_incr(&map, reg_nr * sizeof(u32));
775 	xe_map_write32(xe, &map, val);
776 }
777 
778 static void *empty_lrc_data(struct xe_hw_engine *hwe)
779 {
780 	struct xe_gt *gt = hwe->gt;
781 	void *data;
782 	u32 *regs;
783 
784 	data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
785 	if (!data)
786 		return NULL;
787 
788 	/* 1st page: Per-Process of HW status Page */
789 	regs = data + LRC_PPHWSP_SIZE;
790 	set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
791 	set_context_control(regs, hwe);
792 	set_memory_based_intr(regs, hwe);
793 	reset_stop_ring(regs, hwe);
794 	if (xe_gt_has_indirect_ring_state(gt)) {
795 		regs = data + xe_gt_lrc_size(gt, hwe->class) -
796 		       LRC_INDIRECT_RING_STATE_SIZE;
797 		set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
798 	}
799 
800 	return data;
801 }
802 
803 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
804 {
805 	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
806 
807 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
808 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
809 }
810 
811 static void xe_lrc_finish(struct xe_lrc *lrc)
812 {
813 	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
814 	xe_bo_lock(lrc->bo, false);
815 	xe_bo_unpin(lrc->bo);
816 	xe_bo_unlock(lrc->bo);
817 	xe_bo_put(lrc->bo);
818 }
819 
820 #define PVC_CTX_ASID		(0x2e + 1)
821 #define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
822 
823 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
824 		       struct xe_vm *vm, u32 ring_size)
825 {
826 	struct xe_gt *gt = hwe->gt;
827 	struct xe_tile *tile = gt_to_tile(gt);
828 	struct xe_device *xe = gt_to_xe(gt);
829 	struct iosys_map map;
830 	void *init_data = NULL;
831 	u32 arb_enable;
832 	u32 lrc_size;
833 	int err;
834 
835 	kref_init(&lrc->refcount);
836 	lrc->flags = 0;
837 	lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
838 	if (xe_gt_has_indirect_ring_state(gt))
839 		lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
840 
841 	/*
842 	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
843 	 * via VM bind calls.
844 	 */
845 	lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
846 				       ttm_bo_type_kernel,
847 				       XE_BO_FLAG_VRAM_IF_DGFX(tile) |
848 				       XE_BO_FLAG_GGTT |
849 				       XE_BO_FLAG_GGTT_INVALIDATE);
850 	if (IS_ERR(lrc->bo))
851 		return PTR_ERR(lrc->bo);
852 
853 	lrc->size = lrc_size;
854 	lrc->tile = gt_to_tile(hwe->gt);
855 	lrc->ring.size = ring_size;
856 	lrc->ring.tail = 0;
857 	lrc->ctx_timestamp = 0;
858 
859 	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
860 			     hwe->fence_irq, hwe->name);
861 
862 	if (!gt->default_lrc[hwe->class]) {
863 		init_data = empty_lrc_data(hwe);
864 		if (!init_data) {
865 			err = -ENOMEM;
866 			goto err_lrc_finish;
867 		}
868 	}
869 
870 	/*
871 	 * Init Per-Process of HW status Page, LRC / context state to known
872 	 * values
873 	 */
874 	map = __xe_lrc_pphwsp_map(lrc);
875 	if (!init_data) {
876 		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
877 		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
878 				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
879 				 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
880 	} else {
881 		xe_map_memcpy_to(xe, &map, 0, init_data,
882 				 xe_gt_lrc_size(gt, hwe->class));
883 		kfree(init_data);
884 	}
885 
886 	if (vm) {
887 		xe_lrc_set_ppgtt(lrc, vm);
888 
889 		if (vm->xef)
890 			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
891 	}
892 
893 	if (xe_gt_has_indirect_ring_state(gt)) {
894 		xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
895 				     __xe_lrc_indirect_ring_ggtt_addr(lrc));
896 
897 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
898 					      __xe_lrc_ring_ggtt_addr(lrc));
899 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
900 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
901 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
902 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
903 					      RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
904 	} else {
905 		xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
906 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
907 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
908 		xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
909 				     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
910 	}
911 
912 	xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
913 
914 	if (xe->info.has_asid && vm)
915 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
916 
917 	lrc->desc = LRC_VALID;
918 	lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
919 	/* TODO: Priority */
920 
921 	/* While this appears to have something about privileged batches or
922 	 * some such, it really just means PPGTT mode.
923 	 */
924 	if (vm)
925 		lrc->desc |= LRC_PRIVILEGE;
926 
927 	if (GRAPHICS_VERx100(xe) < 1250) {
928 		lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
929 		lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
930 	}
931 
932 	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
933 	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
934 
935 	map = __xe_lrc_seqno_map(lrc);
936 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
937 
938 	map = __xe_lrc_start_seqno_map(lrc);
939 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
940 
941 	return 0;
942 
943 err_lrc_finish:
944 	xe_lrc_finish(lrc);
945 	return err;
946 }
947 
948 /**
949  * xe_lrc_create - Create a LRC
950  * @hwe: Hardware Engine
951  * @vm: The VM (address space)
952  * @ring_size: LRC ring size
953  *
954  * Allocate and initialize the Logical Ring Context (LRC).
955  *
956  * Return pointer to created LRC upon success and an error pointer
957  * upon failure.
958  */
959 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
960 			     u32 ring_size)
961 {
962 	struct xe_lrc *lrc;
963 	int err;
964 
965 	lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
966 	if (!lrc)
967 		return ERR_PTR(-ENOMEM);
968 
969 	err = xe_lrc_init(lrc, hwe, vm, ring_size);
970 	if (err) {
971 		kfree(lrc);
972 		return ERR_PTR(err);
973 	}
974 
975 	return lrc;
976 }
977 
978 /**
979  * xe_lrc_destroy - Destroy the LRC
980  * @ref: reference to LRC
981  *
982  * Called when ref == 0, release resources held by the Logical Ring Context
983  * (LRC) and free the LRC memory.
984  */
985 void xe_lrc_destroy(struct kref *ref)
986 {
987 	struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
988 
989 	xe_lrc_finish(lrc);
990 	kfree(lrc);
991 }
992 
993 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
994 {
995 	if (xe_lrc_has_indirect_ring_state(lrc))
996 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
997 	else
998 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
999 }
1000 
1001 u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1002 {
1003 	if (xe_lrc_has_indirect_ring_state(lrc))
1004 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1005 	else
1006 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1007 }
1008 
1009 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1010 {
1011 	if (xe_lrc_has_indirect_ring_state(lrc))
1012 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1013 	else
1014 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1015 }
1016 
1017 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1018 {
1019 	if (xe_lrc_has_indirect_ring_state(lrc))
1020 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1021 	else
1022 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1023 }
1024 
1025 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1026 {
1027 	const u32 head = xe_lrc_ring_head(lrc);
1028 	const u32 tail = lrc->ring.tail;
1029 	const u32 size = lrc->ring.size;
1030 
1031 	return ((head - tail - 1) & (size - 1)) + 1;
1032 }
1033 
1034 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1035 				const void *data, size_t size)
1036 {
1037 	struct xe_device *xe = lrc_to_xe(lrc);
1038 
1039 	iosys_map_incr(&ring, lrc->ring.tail);
1040 	xe_map_memcpy_to(xe, &ring, 0, data, size);
1041 	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1042 }
1043 
1044 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1045 {
1046 	struct xe_device *xe = lrc_to_xe(lrc);
1047 	struct iosys_map ring;
1048 	u32 rhs;
1049 	size_t aligned_size;
1050 
1051 	xe_assert(xe, IS_ALIGNED(size, 4));
1052 	aligned_size = ALIGN(size, 8);
1053 
1054 	ring = __xe_lrc_ring_map(lrc);
1055 
1056 	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1057 	rhs = lrc->ring.size - lrc->ring.tail;
1058 	if (size > rhs) {
1059 		__xe_lrc_write_ring(lrc, ring, data, rhs);
1060 		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1061 	} else {
1062 		__xe_lrc_write_ring(lrc, ring, data, size);
1063 	}
1064 
1065 	if (aligned_size > size) {
1066 		u32 noop = MI_NOOP;
1067 
1068 		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1069 	}
1070 }
1071 
1072 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1073 {
1074 	return lrc->desc | xe_lrc_ggtt_addr(lrc);
1075 }
1076 
1077 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1078 {
1079 	return __xe_lrc_seqno_ggtt_addr(lrc);
1080 }
1081 
1082 /**
1083  * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1084  *
1085  * Allocate but don't initialize an lrc seqno fence.
1086  *
1087  * Return: Pointer to the allocated fence or
1088  * negative error pointer on error.
1089  */
1090 struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1091 {
1092 	return xe_hw_fence_alloc();
1093 }
1094 
1095 /**
1096  * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1097  * @fence: Pointer to the fence to free.
1098  *
1099  * Frees an lrc seqno fence that hasn't yet been
1100  * initialized.
1101  */
1102 void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1103 {
1104 	xe_hw_fence_free(fence);
1105 }
1106 
1107 /**
1108  * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1109  * @lrc: Pointer to the lrc.
1110  * @fence: Pointer to the fence to initialize.
1111  *
1112  * Initializes a pre-allocated lrc seqno fence.
1113  * After initialization, the fence is subject to normal
1114  * dma-fence refcounting.
1115  */
1116 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1117 {
1118 	xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1119 }
1120 
1121 s32 xe_lrc_seqno(struct xe_lrc *lrc)
1122 {
1123 	struct iosys_map map = __xe_lrc_seqno_map(lrc);
1124 
1125 	return xe_map_read32(lrc_to_xe(lrc), &map);
1126 }
1127 
1128 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1129 {
1130 	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1131 
1132 	return xe_map_read32(lrc_to_xe(lrc), &map);
1133 }
1134 
1135 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1136 {
1137 	return __xe_lrc_start_seqno_ggtt_addr(lrc);
1138 }
1139 
1140 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1141 {
1142 	return __xe_lrc_parallel_ggtt_addr(lrc);
1143 }
1144 
1145 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1146 {
1147 	return __xe_lrc_parallel_map(lrc);
1148 }
1149 
1150 static int instr_dw(u32 cmd_header)
1151 {
1152 	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1153 	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1154 	    GFXPIPE_SINGLE_DW_CMD(0, 0))
1155 		return 1;
1156 
1157 	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1158 	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1159 		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1160 
1161 	/* Most instructions have the # of dwords (minus 2) in 7:0 */
1162 	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1163 }
1164 
1165 static int dump_mi_command(struct drm_printer *p,
1166 			   struct xe_gt *gt,
1167 			   u32 *dw,
1168 			   int remaining_dw)
1169 {
1170 	u32 inst_header = *dw;
1171 	u32 numdw = instr_dw(inst_header);
1172 	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1173 	int num_noop;
1174 
1175 	/* First check for commands that don't have/use a '# DW' field */
1176 	switch (inst_header & MI_OPCODE) {
1177 	case MI_NOOP:
1178 		num_noop = 1;
1179 		while (num_noop < remaining_dw &&
1180 		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1181 			num_noop++;
1182 		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1183 		return num_noop;
1184 
1185 	case MI_TOPOLOGY_FILTER:
1186 		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1187 		return 1;
1188 
1189 	case MI_BATCH_BUFFER_END:
1190 		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1191 		/* Return 'remaining_dw' to consume the rest of the LRC */
1192 		return remaining_dw;
1193 	}
1194 
1195 	/*
1196 	 * Any remaining commands include a # of dwords.  We should make sure
1197 	 * it doesn't exceed the remaining size of the LRC.
1198 	 */
1199 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1200 		numdw = remaining_dw;
1201 
1202 	switch (inst_header & MI_OPCODE) {
1203 	case MI_LOAD_REGISTER_IMM:
1204 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1205 			   inst_header, (numdw - 1) / 2);
1206 		for (int i = 1; i < numdw; i += 2)
1207 			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1208 		return numdw;
1209 
1210 	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1211 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1212 			   inst_header,
1213 			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1214 			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1215 		if (numdw == 4)
1216 			drm_printf(p, " - %#6x = %#010llx\n",
1217 				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1218 		else
1219 			drm_printf(p, " - %*ph (%s)\n",
1220 				   (int)sizeof(u32) * (numdw - 1), dw + 1,
1221 				   numdw < 4 ? "truncated" : "malformed");
1222 		return numdw;
1223 
1224 	case MI_FORCE_WAKEUP:
1225 		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1226 		return numdw;
1227 
1228 	default:
1229 		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1230 			   inst_header, opcode, numdw);
1231 		return numdw;
1232 	}
1233 }
1234 
1235 static int dump_gfxpipe_command(struct drm_printer *p,
1236 				struct xe_gt *gt,
1237 				u32 *dw,
1238 				int remaining_dw)
1239 {
1240 	u32 numdw = instr_dw(*dw);
1241 	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1242 	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1243 	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1244 
1245 	/*
1246 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1247 	 * remaining size of the LRC.
1248 	 */
1249 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1250 		numdw = remaining_dw;
1251 
1252 	switch (*dw & GFXPIPE_MATCH_MASK) {
1253 #define MATCH(cmd) \
1254 	case cmd: \
1255 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1256 		return numdw
1257 #define MATCH3D(cmd) \
1258 	case CMD_##cmd: \
1259 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1260 		return numdw
1261 
1262 	MATCH(STATE_BASE_ADDRESS);
1263 	MATCH(STATE_SIP);
1264 	MATCH(GPGPU_CSR_BASE_ADDRESS);
1265 	MATCH(STATE_COMPUTE_MODE);
1266 	MATCH3D(3DSTATE_BTD);
1267 	MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1268 	MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1269 
1270 	MATCH3D(3DSTATE_VF_STATISTICS);
1271 
1272 	MATCH(PIPELINE_SELECT);
1273 
1274 	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1275 	MATCH3D(3DSTATE_CLEAR_PARAMS);
1276 	MATCH3D(3DSTATE_DEPTH_BUFFER);
1277 	MATCH3D(3DSTATE_STENCIL_BUFFER);
1278 	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1279 	MATCH3D(3DSTATE_VERTEX_BUFFERS);
1280 	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1281 	MATCH3D(3DSTATE_INDEX_BUFFER);
1282 	MATCH3D(3DSTATE_VF);
1283 	MATCH3D(3DSTATE_MULTISAMPLE);
1284 	MATCH3D(3DSTATE_CC_STATE_POINTERS);
1285 	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1286 	MATCH3D(3DSTATE_VS);
1287 	MATCH3D(3DSTATE_GS);
1288 	MATCH3D(3DSTATE_CLIP);
1289 	MATCH3D(3DSTATE_SF);
1290 	MATCH3D(3DSTATE_WM);
1291 	MATCH3D(3DSTATE_CONSTANT_VS);
1292 	MATCH3D(3DSTATE_CONSTANT_GS);
1293 	MATCH3D(3DSTATE_CONSTANT_PS);
1294 	MATCH3D(3DSTATE_SAMPLE_MASK);
1295 	MATCH3D(3DSTATE_CONSTANT_HS);
1296 	MATCH3D(3DSTATE_CONSTANT_DS);
1297 	MATCH3D(3DSTATE_HS);
1298 	MATCH3D(3DSTATE_TE);
1299 	MATCH3D(3DSTATE_DS);
1300 	MATCH3D(3DSTATE_STREAMOUT);
1301 	MATCH3D(3DSTATE_SBE);
1302 	MATCH3D(3DSTATE_PS);
1303 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1304 	MATCH3D(3DSTATE_CPS_POINTERS);
1305 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1306 	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1307 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1308 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1309 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1310 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1311 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1312 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1313 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1314 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1315 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1316 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1317 	MATCH3D(3DSTATE_VF_INSTANCING);
1318 	MATCH3D(3DSTATE_VF_SGVS);
1319 	MATCH3D(3DSTATE_VF_TOPOLOGY);
1320 	MATCH3D(3DSTATE_WM_CHROMAKEY);
1321 	MATCH3D(3DSTATE_PS_BLEND);
1322 	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1323 	MATCH3D(3DSTATE_PS_EXTRA);
1324 	MATCH3D(3DSTATE_RASTER);
1325 	MATCH3D(3DSTATE_SBE_SWIZ);
1326 	MATCH3D(3DSTATE_WM_HZ_OP);
1327 	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1328 	MATCH3D(3DSTATE_VF_SGVS_2);
1329 	MATCH3D(3DSTATE_VFG);
1330 	MATCH3D(3DSTATE_URB_ALLOC_VS);
1331 	MATCH3D(3DSTATE_URB_ALLOC_HS);
1332 	MATCH3D(3DSTATE_URB_ALLOC_DS);
1333 	MATCH3D(3DSTATE_URB_ALLOC_GS);
1334 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1335 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1336 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1337 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1338 	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1339 	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1340 	MATCH3D(3DSTATE_AMFS);
1341 	MATCH3D(3DSTATE_DEPTH_BOUNDS);
1342 	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1343 	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1344 	MATCH3D(3DSTATE_MESH_CONTROL);
1345 	MATCH3D(3DSTATE_MESH_DISTRIB);
1346 	MATCH3D(3DSTATE_TASK_REDISTRIB);
1347 	MATCH3D(3DSTATE_MESH_SHADER);
1348 	MATCH3D(3DSTATE_MESH_SHADER_DATA);
1349 	MATCH3D(3DSTATE_TASK_CONTROL);
1350 	MATCH3D(3DSTATE_TASK_SHADER);
1351 	MATCH3D(3DSTATE_TASK_SHADER_DATA);
1352 	MATCH3D(3DSTATE_URB_ALLOC_MESH);
1353 	MATCH3D(3DSTATE_URB_ALLOC_TASK);
1354 	MATCH3D(3DSTATE_CLIP_MESH);
1355 	MATCH3D(3DSTATE_SBE_MESH);
1356 	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1357 
1358 	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1359 	MATCH3D(3DSTATE_CHROMA_KEY);
1360 	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1361 	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1362 	MATCH3D(3DSTATE_LINE_STIPPLE);
1363 	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1364 	MATCH3D(3DSTATE_MONOFILTER_SIZE);
1365 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1366 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1367 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1368 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1369 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1370 	MATCH3D(3DSTATE_SO_DECL_LIST);
1371 	MATCH3D(3DSTATE_SO_BUFFER);
1372 	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1373 	MATCH3D(3DSTATE_SAMPLE_PATTERN);
1374 	MATCH3D(3DSTATE_3D_MODE);
1375 	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1376 	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1377 	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1378 
1379 	default:
1380 		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1381 			   *dw, pipeline, opcode, subopcode, numdw);
1382 		return numdw;
1383 	}
1384 }
1385 
1386 static int dump_gfx_state_command(struct drm_printer *p,
1387 				  struct xe_gt *gt,
1388 				  u32 *dw,
1389 				  int remaining_dw)
1390 {
1391 	u32 numdw = instr_dw(*dw);
1392 	u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1393 
1394 	/*
1395 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1396 	 * remaining size of the LRC.
1397 	 */
1398 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1399 		numdw = remaining_dw;
1400 
1401 	switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1402 	MATCH(STATE_WRITE_INLINE);
1403 
1404 	default:
1405 		drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1406 			   *dw, opcode, numdw);
1407 		return numdw;
1408 	}
1409 }
1410 
1411 void xe_lrc_dump_default(struct drm_printer *p,
1412 			 struct xe_gt *gt,
1413 			 enum xe_engine_class hwe_class)
1414 {
1415 	u32 *dw;
1416 	int remaining_dw, num_dw;
1417 
1418 	if (!gt->default_lrc[hwe_class]) {
1419 		drm_printf(p, "No default LRC for class %d\n", hwe_class);
1420 		return;
1421 	}
1422 
1423 	/*
1424 	 * Skip the beginning of the LRC since it contains the per-process
1425 	 * hardware status page.
1426 	 */
1427 	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1428 	remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1429 
1430 	while (remaining_dw > 0) {
1431 		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1432 			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1433 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1434 			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1435 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1436 			num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1437 		} else {
1438 			num_dw = min(instr_dw(*dw), remaining_dw);
1439 			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1440 				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1441 				   num_dw);
1442 		}
1443 
1444 		dw += num_dw;
1445 		remaining_dw -= num_dw;
1446 	}
1447 }
1448 
1449 struct instr_state {
1450 	u32 instr;
1451 	u16 num_dw;
1452 };
1453 
1454 static const struct instr_state xe_hpg_svg_state[] = {
1455 	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1456 	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1457 	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1458 	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1459 	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1460 	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1461 	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1462 	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1463 	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1464 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1465 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1466 	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1467 	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1468 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1469 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1470 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1471 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1472 	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1473 	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1474 	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1475 	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1476 	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1477 	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1478 	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1479 	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1480 	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1481 	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1482 	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1483 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1484 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1485 	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1486 	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1487 	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1488 	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1489 	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1490 	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1491 	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1492 	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1493 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1494 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1495 	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1496 	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1497 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1498 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1499 	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1500 	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1501 	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1502 	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1503 	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1504 	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1505 };
1506 
1507 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1508 {
1509 	struct xe_gt *gt = q->hwe->gt;
1510 	struct xe_device *xe = gt_to_xe(gt);
1511 	const struct instr_state *state_table = NULL;
1512 	int state_table_size = 0;
1513 
1514 	/*
1515 	 * At the moment we only need to emit non-register state for the RCS
1516 	 * engine.
1517 	 */
1518 	if (q->hwe->class != XE_ENGINE_CLASS_RENDER)
1519 		return;
1520 
1521 	switch (GRAPHICS_VERx100(xe)) {
1522 	case 1255:
1523 	case 1270 ... 2004:
1524 		state_table = xe_hpg_svg_state;
1525 		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1526 		break;
1527 	default:
1528 		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1529 			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1530 		return;
1531 	}
1532 
1533 	for (int i = 0; i < state_table_size; i++) {
1534 		u32 instr = state_table[i].instr;
1535 		u16 num_dw = state_table[i].num_dw;
1536 		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1537 
1538 		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1539 		xe_gt_assert(gt, num_dw != 0);
1540 		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1541 
1542 		/*
1543 		 * Xe2's SVG context is the same as the one on DG2 / MTL
1544 		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1545 		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1546 		 * Just make the replacement here rather than defining a
1547 		 * whole separate table for the single trivial change.
1548 		 */
1549 		if (GRAPHICS_VER(xe) >= 20 &&
1550 		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1551 			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1552 
1553 		bb->cs[bb->len] = instr;
1554 		if (!is_single_dw)
1555 			bb->cs[bb->len] |= (num_dw - 2);
1556 
1557 		bb->len += num_dw;
1558 	}
1559 }
1560 
1561 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1562 {
1563 	struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1564 
1565 	if (!snapshot)
1566 		return NULL;
1567 
1568 	snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1569 	snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1570 	snapshot->head = xe_lrc_ring_head(lrc);
1571 	snapshot->tail.internal = lrc->ring.tail;
1572 	snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1573 	snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1574 	snapshot->seqno = xe_lrc_seqno(lrc);
1575 	snapshot->lrc_bo = xe_bo_get(lrc->bo);
1576 	snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
1577 	snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
1578 	snapshot->lrc_snapshot = NULL;
1579 	return snapshot;
1580 }
1581 
1582 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1583 {
1584 	struct xe_bo *bo;
1585 	struct iosys_map src;
1586 
1587 	if (!snapshot)
1588 		return;
1589 
1590 	bo = snapshot->lrc_bo;
1591 	snapshot->lrc_bo = NULL;
1592 
1593 	snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1594 	if (!snapshot->lrc_snapshot)
1595 		goto put_bo;
1596 
1597 	xe_bo_lock(bo, false);
1598 	if (!ttm_bo_vmap(&bo->ttm, &src)) {
1599 		xe_map_memcpy_from(xe_bo_device(bo),
1600 				   snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1601 				   snapshot->lrc_size);
1602 		ttm_bo_vunmap(&bo->ttm, &src);
1603 	} else {
1604 		kvfree(snapshot->lrc_snapshot);
1605 		snapshot->lrc_snapshot = NULL;
1606 	}
1607 	xe_bo_unlock(bo);
1608 put_bo:
1609 	xe_bo_put(bo);
1610 }
1611 
1612 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1613 {
1614 	unsigned long i;
1615 
1616 	if (!snapshot)
1617 		return;
1618 
1619 	drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
1620 	drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1621 		   snapshot->indirect_context_desc);
1622 	drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1623 	drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1624 		   snapshot->tail.internal, snapshot->tail.memory);
1625 	drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1626 	drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1627 
1628 	if (!snapshot->lrc_snapshot)
1629 		return;
1630 
1631 	drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1632 	drm_puts(p, "\t[HWSP].data: ");
1633 	for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1634 		u32 *val = snapshot->lrc_snapshot + i;
1635 		char dumped[ASCII85_BUFSZ];
1636 
1637 		drm_puts(p, ascii85_encode(*val, dumped));
1638 	}
1639 
1640 	drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1641 	drm_puts(p, "\t[HWCTX].data: ");
1642 	for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1643 		u32 *val = snapshot->lrc_snapshot + i;
1644 		char dumped[ASCII85_BUFSZ];
1645 
1646 		drm_puts(p, ascii85_encode(*val, dumped));
1647 	}
1648 	drm_puts(p, "\n");
1649 }
1650 
1651 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1652 {
1653 	if (!snapshot)
1654 		return;
1655 
1656 	kvfree(snapshot->lrc_snapshot);
1657 	if (snapshot->lrc_bo)
1658 		xe_bo_put(snapshot->lrc_bo);
1659 	kfree(snapshot);
1660 }
1661 
1662 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
1663 {
1664 	*old_ts = lrc->ctx_timestamp;
1665 
1666 	lrc->ctx_timestamp = xe_lrc_read_ctx_reg(lrc, CTX_TIMESTAMP);
1667 
1668 	return lrc->ctx_timestamp;
1669 }
1670