xref: /linux/drivers/gpu/drm/xe/xe_lrc.c (revision a36e9f5cfe9eb3a1dce8769c7058251c42705357)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_lrc.h"
7 
8 #include <linux/ascii85.h>
9 
10 #include "instructions/xe_mi_commands.h"
11 #include "instructions/xe_gfxpipe_commands.h"
12 #include "instructions/xe_gfx_state_commands.h"
13 #include "regs/xe_engine_regs.h"
14 #include "regs/xe_lrc_layout.h"
15 #include "xe_bb.h"
16 #include "xe_bo.h"
17 #include "xe_device.h"
18 #include "xe_drm_client.h"
19 #include "xe_exec_queue_types.h"
20 #include "xe_gt.h"
21 #include "xe_gt_printk.h"
22 #include "xe_hw_fence.h"
23 #include "xe_map.h"
24 #include "xe_memirq.h"
25 #include "xe_sriov.h"
26 #include "xe_vm.h"
27 
28 #define LRC_VALID				BIT_ULL(0)
29 #define LRC_PRIVILEGE				BIT_ULL(8)
30 #define LRC_ADDRESSING_MODE			GENMASK_ULL(4, 3)
31 #define LRC_LEGACY_64B_CONTEXT			3
32 
33 #define LRC_ENGINE_CLASS			GENMASK_ULL(63, 61)
34 #define LRC_ENGINE_INSTANCE			GENMASK_ULL(53, 48)
35 
36 #define LRC_INDIRECT_RING_STATE_SIZE		SZ_4K
37 
38 struct xe_lrc_snapshot {
39 	struct xe_bo *lrc_bo;
40 	void *lrc_snapshot;
41 	unsigned long lrc_size, lrc_offset;
42 
43 	u32 context_desc;
44 	u32 indirect_context_desc;
45 	u32 head;
46 	struct {
47 		u32 internal;
48 		u32 memory;
49 	} tail;
50 	u32 start_seqno;
51 	u32 seqno;
52 	u32 ctx_timestamp;
53 	u32 ctx_job_timestamp;
54 };
55 
56 static struct xe_device *
57 lrc_to_xe(struct xe_lrc *lrc)
58 {
59 	return gt_to_xe(lrc->fence_ctx.gt);
60 }
61 
62 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
63 {
64 	struct xe_device *xe = gt_to_xe(gt);
65 	size_t size;
66 
67 	switch (class) {
68 	case XE_ENGINE_CLASS_RENDER:
69 		if (GRAPHICS_VER(xe) >= 20)
70 			size = 4 * SZ_4K;
71 		else
72 			size = 14 * SZ_4K;
73 		break;
74 	case XE_ENGINE_CLASS_COMPUTE:
75 		/* 14 pages since graphics_ver == 11 */
76 		if (GRAPHICS_VER(xe) >= 20)
77 			size = 3 * SZ_4K;
78 		else
79 			size = 14 * SZ_4K;
80 		break;
81 	default:
82 		WARN(1, "Unknown engine class: %d", class);
83 		fallthrough;
84 	case XE_ENGINE_CLASS_COPY:
85 	case XE_ENGINE_CLASS_VIDEO_DECODE:
86 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
87 	case XE_ENGINE_CLASS_OTHER:
88 		size = 2 * SZ_4K;
89 	}
90 
91 	/* Add indirect ring state page */
92 	if (xe_gt_has_indirect_ring_state(gt))
93 		size += LRC_INDIRECT_RING_STATE_SIZE;
94 
95 	return size;
96 }
97 
98 /*
99  * The per-platform tables are u8-encoded in @data. Decode @data and set the
100  * addresses' offset and commands in @regs. The following encoding is used
101  * for each byte. There are 2 steps: decoding commands and decoding addresses.
102  *
103  * Commands:
104  * [7]: create NOPs - number of NOPs are set in lower bits
105  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
106  *      MI_LRI_FORCE_POSTED
107  * [5:0]: Number of NOPs or registers to set values to in case of
108  *        MI_LOAD_REGISTER_IMM
109  *
110  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
111  * number of registers. They are set by using the REG/REG16 macros: the former
112  * is used for offsets smaller than 0x200 while the latter is for values bigger
113  * than that. Those macros already set all the bits documented below correctly:
114  *
115  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
116  *      follow, for the lower bits
117  * [6:0]: Register offset, without considering the engine base.
118  *
119  * This function only tweaks the commands and register offsets. Values are not
120  * filled out.
121  */
122 static void set_offsets(u32 *regs,
123 			const u8 *data,
124 			const struct xe_hw_engine *hwe)
125 #define NOP(x) (BIT(7) | (x))
126 #define LRI(count, flags) ((flags) << 6 | (count) | \
127 			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
128 #define POSTED BIT(0)
129 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
130 #define REG16(x) \
131 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
132 	(((x) >> 2) & 0x7f)
133 {
134 	const u32 base = hwe->mmio_base;
135 
136 	while (*data) {
137 		u8 count, flags;
138 
139 		if (*data & BIT(7)) { /* skip */
140 			count = *data++ & ~BIT(7);
141 			regs += count;
142 			continue;
143 		}
144 
145 		count = *data & 0x3f;
146 		flags = *data >> 6;
147 		data++;
148 
149 		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
150 		if (flags & POSTED)
151 			*regs |= MI_LRI_FORCE_POSTED;
152 		*regs |= MI_LRI_LRM_CS_MMIO;
153 		regs++;
154 
155 		xe_gt_assert(hwe->gt, count);
156 		do {
157 			u32 offset = 0;
158 			u8 v;
159 
160 			do {
161 				v = *data++;
162 				offset <<= 7;
163 				offset |= v & ~BIT(7);
164 			} while (v & BIT(7));
165 
166 			regs[0] = base + (offset << 2);
167 			regs += 2;
168 		} while (--count);
169 	}
170 
171 	*regs = MI_BATCH_BUFFER_END | BIT(0);
172 }
173 
174 static const u8 gen12_xcs_offsets[] = {
175 	NOP(1),
176 	LRI(13, POSTED),
177 	REG16(0x244),
178 	REG(0x034),
179 	REG(0x030),
180 	REG(0x038),
181 	REG(0x03c),
182 	REG(0x168),
183 	REG(0x140),
184 	REG(0x110),
185 	REG(0x1c0),
186 	REG(0x1c4),
187 	REG(0x1c8),
188 	REG(0x180),
189 	REG16(0x2b4),
190 
191 	NOP(5),
192 	LRI(9, POSTED),
193 	REG16(0x3a8),
194 	REG16(0x28c),
195 	REG16(0x288),
196 	REG16(0x284),
197 	REG16(0x280),
198 	REG16(0x27c),
199 	REG16(0x278),
200 	REG16(0x274),
201 	REG16(0x270),
202 
203 	0
204 };
205 
206 static const u8 dg2_xcs_offsets[] = {
207 	NOP(1),
208 	LRI(15, POSTED),
209 	REG16(0x244),
210 	REG(0x034),
211 	REG(0x030),
212 	REG(0x038),
213 	REG(0x03c),
214 	REG(0x168),
215 	REG(0x140),
216 	REG(0x110),
217 	REG(0x1c0),
218 	REG(0x1c4),
219 	REG(0x1c8),
220 	REG(0x180),
221 	REG16(0x2b4),
222 	REG(0x120),
223 	REG(0x124),
224 
225 	NOP(1),
226 	LRI(9, POSTED),
227 	REG16(0x3a8),
228 	REG16(0x28c),
229 	REG16(0x288),
230 	REG16(0x284),
231 	REG16(0x280),
232 	REG16(0x27c),
233 	REG16(0x278),
234 	REG16(0x274),
235 	REG16(0x270),
236 
237 	0
238 };
239 
240 static const u8 gen12_rcs_offsets[] = {
241 	NOP(1),
242 	LRI(13, POSTED),
243 	REG16(0x244),
244 	REG(0x034),
245 	REG(0x030),
246 	REG(0x038),
247 	REG(0x03c),
248 	REG(0x168),
249 	REG(0x140),
250 	REG(0x110),
251 	REG(0x1c0),
252 	REG(0x1c4),
253 	REG(0x1c8),
254 	REG(0x180),
255 	REG16(0x2b4),
256 
257 	NOP(5),
258 	LRI(9, POSTED),
259 	REG16(0x3a8),
260 	REG16(0x28c),
261 	REG16(0x288),
262 	REG16(0x284),
263 	REG16(0x280),
264 	REG16(0x27c),
265 	REG16(0x278),
266 	REG16(0x274),
267 	REG16(0x270),
268 
269 	LRI(3, POSTED),
270 	REG(0x1b0),
271 	REG16(0x5a8),
272 	REG16(0x5ac),
273 
274 	NOP(6),
275 	LRI(1, 0),
276 	REG(0x0c8),
277 	NOP(3 + 9 + 1),
278 
279 	LRI(51, POSTED),
280 	REG16(0x588),
281 	REG16(0x588),
282 	REG16(0x588),
283 	REG16(0x588),
284 	REG16(0x588),
285 	REG16(0x588),
286 	REG(0x028),
287 	REG(0x09c),
288 	REG(0x0c0),
289 	REG(0x178),
290 	REG(0x17c),
291 	REG16(0x358),
292 	REG(0x170),
293 	REG(0x150),
294 	REG(0x154),
295 	REG(0x158),
296 	REG16(0x41c),
297 	REG16(0x600),
298 	REG16(0x604),
299 	REG16(0x608),
300 	REG16(0x60c),
301 	REG16(0x610),
302 	REG16(0x614),
303 	REG16(0x618),
304 	REG16(0x61c),
305 	REG16(0x620),
306 	REG16(0x624),
307 	REG16(0x628),
308 	REG16(0x62c),
309 	REG16(0x630),
310 	REG16(0x634),
311 	REG16(0x638),
312 	REG16(0x63c),
313 	REG16(0x640),
314 	REG16(0x644),
315 	REG16(0x648),
316 	REG16(0x64c),
317 	REG16(0x650),
318 	REG16(0x654),
319 	REG16(0x658),
320 	REG16(0x65c),
321 	REG16(0x660),
322 	REG16(0x664),
323 	REG16(0x668),
324 	REG16(0x66c),
325 	REG16(0x670),
326 	REG16(0x674),
327 	REG16(0x678),
328 	REG16(0x67c),
329 	REG(0x068),
330 	REG(0x084),
331 	NOP(1),
332 
333 	0
334 };
335 
336 static const u8 xehp_rcs_offsets[] = {
337 	NOP(1),
338 	LRI(13, POSTED),
339 	REG16(0x244),
340 	REG(0x034),
341 	REG(0x030),
342 	REG(0x038),
343 	REG(0x03c),
344 	REG(0x168),
345 	REG(0x140),
346 	REG(0x110),
347 	REG(0x1c0),
348 	REG(0x1c4),
349 	REG(0x1c8),
350 	REG(0x180),
351 	REG16(0x2b4),
352 
353 	NOP(5),
354 	LRI(9, POSTED),
355 	REG16(0x3a8),
356 	REG16(0x28c),
357 	REG16(0x288),
358 	REG16(0x284),
359 	REG16(0x280),
360 	REG16(0x27c),
361 	REG16(0x278),
362 	REG16(0x274),
363 	REG16(0x270),
364 
365 	LRI(3, POSTED),
366 	REG(0x1b0),
367 	REG16(0x5a8),
368 	REG16(0x5ac),
369 
370 	NOP(6),
371 	LRI(1, 0),
372 	REG(0x0c8),
373 
374 	0
375 };
376 
377 static const u8 dg2_rcs_offsets[] = {
378 	NOP(1),
379 	LRI(15, POSTED),
380 	REG16(0x244),
381 	REG(0x034),
382 	REG(0x030),
383 	REG(0x038),
384 	REG(0x03c),
385 	REG(0x168),
386 	REG(0x140),
387 	REG(0x110),
388 	REG(0x1c0),
389 	REG(0x1c4),
390 	REG(0x1c8),
391 	REG(0x180),
392 	REG16(0x2b4),
393 	REG(0x120),
394 	REG(0x124),
395 
396 	NOP(1),
397 	LRI(9, POSTED),
398 	REG16(0x3a8),
399 	REG16(0x28c),
400 	REG16(0x288),
401 	REG16(0x284),
402 	REG16(0x280),
403 	REG16(0x27c),
404 	REG16(0x278),
405 	REG16(0x274),
406 	REG16(0x270),
407 
408 	LRI(3, POSTED),
409 	REG(0x1b0),
410 	REG16(0x5a8),
411 	REG16(0x5ac),
412 
413 	NOP(6),
414 	LRI(1, 0),
415 	REG(0x0c8),
416 
417 	0
418 };
419 
420 static const u8 mtl_rcs_offsets[] = {
421 	NOP(1),
422 	LRI(15, POSTED),
423 	REG16(0x244),
424 	REG(0x034),
425 	REG(0x030),
426 	REG(0x038),
427 	REG(0x03c),
428 	REG(0x168),
429 	REG(0x140),
430 	REG(0x110),
431 	REG(0x1c0),
432 	REG(0x1c4),
433 	REG(0x1c8),
434 	REG(0x180),
435 	REG16(0x2b4),
436 	REG(0x120),
437 	REG(0x124),
438 
439 	NOP(1),
440 	LRI(9, POSTED),
441 	REG16(0x3a8),
442 	REG16(0x28c),
443 	REG16(0x288),
444 	REG16(0x284),
445 	REG16(0x280),
446 	REG16(0x27c),
447 	REG16(0x278),
448 	REG16(0x274),
449 	REG16(0x270),
450 
451 	NOP(2),
452 	LRI(2, POSTED),
453 	REG16(0x5a8),
454 	REG16(0x5ac),
455 
456 	NOP(6),
457 	LRI(1, 0),
458 	REG(0x0c8),
459 
460 	0
461 };
462 
463 #define XE2_CTX_COMMON \
464 	NOP(1),                 /* [0x00] */ \
465 	LRI(15, POSTED),        /* [0x01] */ \
466 	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
467 	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
468 	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
469 	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
470 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
471 	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
472 	REG(0x140),             /* [0x0e] BB_ADDR */ \
473 	REG(0x110),             /* [0x10] BB_STATE */ \
474 	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
475 	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
476 	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
477 	REG(0x180),             /* [0x18] CCID */ \
478 	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
479 	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
480 	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
481 	\
482 	NOP(1),                 /* [0x20] */ \
483 	LRI(9, POSTED),         /* [0x21] */ \
484 	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
485 	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
486 	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
487 	REG16(0x284),           /* [0x28] dummy reg */ \
488 	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
489 	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
490 	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
491 	REG16(0x274),           /* [0x30] PTBP_UDW */ \
492 	REG16(0x270)            /* [0x32] PTBP_LDW */
493 
494 static const u8 xe2_rcs_offsets[] = {
495 	XE2_CTX_COMMON,
496 
497 	NOP(2),                 /* [0x34] */
498 	LRI(2, POSTED),         /* [0x36] */
499 	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
500 	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
501 
502 	NOP(6),                 /* [0x41] */
503 	LRI(1, 0),              /* [0x47] */
504 	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
505 
506 	0
507 };
508 
509 static const u8 xe2_bcs_offsets[] = {
510 	XE2_CTX_COMMON,
511 
512 	NOP(4 + 8 + 1),         /* [0x34] */
513 	LRI(2, POSTED),         /* [0x41] */
514 	REG16(0x200),           /* [0x42] BCS_SWCTRL */
515 	REG16(0x204),           /* [0x44] BLIT_CCTL */
516 
517 	0
518 };
519 
520 static const u8 xe2_xcs_offsets[] = {
521 	XE2_CTX_COMMON,
522 
523 	0
524 };
525 
526 static const u8 xe2_indirect_ring_state_offsets[] = {
527 	NOP(1),                 /* [0x00] */
528 	LRI(5, POSTED),         /* [0x01] */
529 	REG(0x034),             /* [0x02] RING_BUFFER_HEAD */
530 	REG(0x030),             /* [0x04] RING_BUFFER_TAIL */
531 	REG(0x038),             /* [0x06] RING_BUFFER_START */
532 	REG(0x048),             /* [0x08] RING_BUFFER_START_UDW */
533 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */
534 
535 	NOP(5),                 /* [0x0c] */
536 	LRI(9, POSTED),         /* [0x11] */
537 	REG(0x168),             /* [0x12] BB_ADDR_UDW */
538 	REG(0x140),             /* [0x14] BB_ADDR */
539 	REG(0x110),             /* [0x16] BB_STATE */
540 	REG16(0x588),           /* [0x18] BB_STACK_WRITE_PORT */
541 	REG16(0x588),           /* [0x20] BB_STACK_WRITE_PORT */
542 	REG16(0x588),           /* [0x22] BB_STACK_WRITE_PORT */
543 	REG16(0x588),           /* [0x24] BB_STACK_WRITE_PORT */
544 	REG16(0x588),           /* [0x26] BB_STACK_WRITE_PORT */
545 	REG16(0x588),           /* [0x28] BB_STACK_WRITE_PORT */
546 
547 	NOP(12),                 /* [0x00] */
548 
549 	0
550 };
551 
552 #undef REG16
553 #undef REG
554 #undef LRI
555 #undef NOP
556 
557 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
558 {
559 	if (class == XE_ENGINE_CLASS_RENDER) {
560 		if (GRAPHICS_VER(xe) >= 20)
561 			return xe2_rcs_offsets;
562 		else if (GRAPHICS_VERx100(xe) >= 1270)
563 			return mtl_rcs_offsets;
564 		else if (GRAPHICS_VERx100(xe) >= 1255)
565 			return dg2_rcs_offsets;
566 		else if (GRAPHICS_VERx100(xe) >= 1250)
567 			return xehp_rcs_offsets;
568 		else
569 			return gen12_rcs_offsets;
570 	} else if (class == XE_ENGINE_CLASS_COPY) {
571 		if (GRAPHICS_VER(xe) >= 20)
572 			return xe2_bcs_offsets;
573 		else
574 			return gen12_xcs_offsets;
575 	} else {
576 		if (GRAPHICS_VER(xe) >= 20)
577 			return xe2_xcs_offsets;
578 		else if (GRAPHICS_VERx100(xe) >= 1255)
579 			return dg2_xcs_offsets;
580 		else
581 			return gen12_xcs_offsets;
582 	}
583 }
584 
585 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
586 {
587 	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
588 						       CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
589 
590 	if (xe_gt_has_indirect_ring_state(hwe->gt))
591 		regs[CTX_CONTEXT_CONTROL] |=
592 			_MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
593 
594 	/* TODO: Timestamp */
595 }
596 
597 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
598 {
599 	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
600 	struct xe_device *xe = gt_to_xe(hwe->gt);
601 
602 	if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
603 		return;
604 
605 	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
606 					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
607 	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
608 	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
609 
610 	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
611 				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
612 	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
613 	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
614 	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
615 	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
616 }
617 
618 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
619 {
620 	struct xe_device *xe = gt_to_xe(hwe->gt);
621 
622 	if (GRAPHICS_VERx100(xe) >= 1250)
623 		return 0x70;
624 	else
625 		return 0x60;
626 }
627 
628 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
629 {
630 	int x;
631 
632 	x = lrc_ring_mi_mode(hwe);
633 	regs[x + 1] &= ~STOP_RING;
634 	regs[x + 1] |= STOP_RING << 16;
635 }
636 
637 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
638 {
639 	return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
640 }
641 
642 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
643 {
644 	return 0;
645 }
646 
647 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
648 {
649 	return lrc->ring.size;
650 }
651 
652 /* Make the magic macros work */
653 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
654 #define __xe_lrc_regs_offset xe_lrc_regs_offset
655 
656 #define LRC_SEQNO_PPHWSP_OFFSET 512
657 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
658 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
659 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
660 #define LRC_PPHWSP_SIZE SZ_4K
661 
662 u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
663 {
664 	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
665 }
666 
667 static size_t lrc_reg_size(struct xe_device *xe)
668 {
669 	if (GRAPHICS_VERx100(xe) >= 1250)
670 		return 96 * sizeof(u32);
671 	else
672 		return 80 * sizeof(u32);
673 }
674 
675 size_t xe_lrc_skip_size(struct xe_device *xe)
676 {
677 	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
678 }
679 
680 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
681 {
682 	/* The seqno is stored in the driver-defined portion of PPHWSP */
683 	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
684 }
685 
686 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
687 {
688 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
689 	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
690 }
691 
692 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
693 {
694 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
695 	return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
696 }
697 
698 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
699 {
700 	/* The parallel is stored in the driver-defined portion of PPHWSP */
701 	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
702 }
703 
704 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
705 {
706 	return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
707 }
708 
709 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
710 {
711 	/* Indirect ring state page is at the very end of LRC */
712 	return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
713 }
714 
715 #define DECL_MAP_ADDR_HELPERS(elem) \
716 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
717 { \
718 	struct iosys_map map = lrc->bo->vmap; \
719 \
720 	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
721 	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
722 	return map; \
723 } \
724 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
725 { \
726 	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
727 } \
728 
729 DECL_MAP_ADDR_HELPERS(ring)
730 DECL_MAP_ADDR_HELPERS(pphwsp)
731 DECL_MAP_ADDR_HELPERS(seqno)
732 DECL_MAP_ADDR_HELPERS(regs)
733 DECL_MAP_ADDR_HELPERS(start_seqno)
734 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
735 DECL_MAP_ADDR_HELPERS(ctx_timestamp)
736 DECL_MAP_ADDR_HELPERS(parallel)
737 DECL_MAP_ADDR_HELPERS(indirect_ring)
738 
739 #undef DECL_MAP_ADDR_HELPERS
740 
741 /**
742  * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
743  * @lrc: Pointer to the lrc.
744  *
745  * Returns: ctx timestamp GGTT address
746  */
747 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
748 {
749 	return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
750 }
751 
752 /**
753  * xe_lrc_ctx_timestamp() - Read ctx timestamp value
754  * @lrc: Pointer to the lrc.
755  *
756  * Returns: ctx timestamp value
757  */
758 u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
759 {
760 	struct xe_device *xe = lrc_to_xe(lrc);
761 	struct iosys_map map;
762 
763 	map = __xe_lrc_ctx_timestamp_map(lrc);
764 	return xe_map_read32(xe, &map);
765 }
766 
767 /**
768  * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
769  * @lrc: Pointer to the lrc.
770  *
771  * Returns: ctx timestamp job GGTT address
772  */
773 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
774 {
775 	return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
776 }
777 
778 /**
779  * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
780  * @lrc: Pointer to the lrc.
781  *
782  * Returns: ctx timestamp job value
783  */
784 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
785 {
786 	struct xe_device *xe = lrc_to_xe(lrc);
787 	struct iosys_map map;
788 
789 	map = __xe_lrc_ctx_job_timestamp_map(lrc);
790 	return xe_map_read32(xe, &map);
791 }
792 
793 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
794 {
795 	return __xe_lrc_pphwsp_ggtt_addr(lrc);
796 }
797 
798 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
799 {
800 	if (!xe_lrc_has_indirect_ring_state(lrc))
801 		return 0;
802 
803 	return __xe_lrc_indirect_ring_ggtt_addr(lrc);
804 }
805 
806 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
807 {
808 	struct xe_device *xe = lrc_to_xe(lrc);
809 	struct iosys_map map;
810 
811 	map = __xe_lrc_indirect_ring_map(lrc);
812 	iosys_map_incr(&map, reg_nr * sizeof(u32));
813 	return xe_map_read32(xe, &map);
814 }
815 
816 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
817 					  int reg_nr, u32 val)
818 {
819 	struct xe_device *xe = lrc_to_xe(lrc);
820 	struct iosys_map map;
821 
822 	map = __xe_lrc_indirect_ring_map(lrc);
823 	iosys_map_incr(&map, reg_nr * sizeof(u32));
824 	xe_map_write32(xe, &map, val);
825 }
826 
827 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
828 {
829 	struct xe_device *xe = lrc_to_xe(lrc);
830 	struct iosys_map map;
831 
832 	map = __xe_lrc_regs_map(lrc);
833 	iosys_map_incr(&map, reg_nr * sizeof(u32));
834 	return xe_map_read32(xe, &map);
835 }
836 
837 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
838 {
839 	struct xe_device *xe = lrc_to_xe(lrc);
840 	struct iosys_map map;
841 
842 	map = __xe_lrc_regs_map(lrc);
843 	iosys_map_incr(&map, reg_nr * sizeof(u32));
844 	xe_map_write32(xe, &map, val);
845 }
846 
847 static void *empty_lrc_data(struct xe_hw_engine *hwe)
848 {
849 	struct xe_gt *gt = hwe->gt;
850 	void *data;
851 	u32 *regs;
852 
853 	data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
854 	if (!data)
855 		return NULL;
856 
857 	/* 1st page: Per-Process of HW status Page */
858 	regs = data + LRC_PPHWSP_SIZE;
859 	set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
860 	set_context_control(regs, hwe);
861 	set_memory_based_intr(regs, hwe);
862 	reset_stop_ring(regs, hwe);
863 	if (xe_gt_has_indirect_ring_state(gt)) {
864 		regs = data + xe_gt_lrc_size(gt, hwe->class) -
865 		       LRC_INDIRECT_RING_STATE_SIZE;
866 		set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
867 	}
868 
869 	return data;
870 }
871 
872 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
873 {
874 	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
875 
876 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
877 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
878 }
879 
880 static void xe_lrc_finish(struct xe_lrc *lrc)
881 {
882 	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
883 	xe_bo_lock(lrc->bo, false);
884 	xe_bo_unpin(lrc->bo);
885 	xe_bo_unlock(lrc->bo);
886 	xe_bo_put(lrc->bo);
887 }
888 
889 #define PVC_CTX_ASID		(0x2e + 1)
890 #define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
891 
892 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
893 		       struct xe_vm *vm, u32 ring_size)
894 {
895 	struct xe_gt *gt = hwe->gt;
896 	struct xe_tile *tile = gt_to_tile(gt);
897 	struct xe_device *xe = gt_to_xe(gt);
898 	struct iosys_map map;
899 	void *init_data = NULL;
900 	u32 arb_enable;
901 	u32 lrc_size;
902 	int err;
903 
904 	kref_init(&lrc->refcount);
905 	lrc->flags = 0;
906 	lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
907 	if (xe_gt_has_indirect_ring_state(gt))
908 		lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
909 
910 	/*
911 	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
912 	 * via VM bind calls.
913 	 */
914 	lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
915 				       ttm_bo_type_kernel,
916 				       XE_BO_FLAG_VRAM_IF_DGFX(tile) |
917 				       XE_BO_FLAG_GGTT |
918 				       XE_BO_FLAG_GGTT_INVALIDATE);
919 	if (IS_ERR(lrc->bo))
920 		return PTR_ERR(lrc->bo);
921 
922 	lrc->size = lrc_size;
923 	lrc->tile = gt_to_tile(hwe->gt);
924 	lrc->ring.size = ring_size;
925 	lrc->ring.tail = 0;
926 	lrc->ctx_timestamp = 0;
927 
928 	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
929 			     hwe->fence_irq, hwe->name);
930 
931 	if (!gt->default_lrc[hwe->class]) {
932 		init_data = empty_lrc_data(hwe);
933 		if (!init_data) {
934 			err = -ENOMEM;
935 			goto err_lrc_finish;
936 		}
937 	}
938 
939 	/*
940 	 * Init Per-Process of HW status Page, LRC / context state to known
941 	 * values
942 	 */
943 	map = __xe_lrc_pphwsp_map(lrc);
944 	if (!init_data) {
945 		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
946 		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
947 				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
948 				 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
949 	} else {
950 		xe_map_memcpy_to(xe, &map, 0, init_data,
951 				 xe_gt_lrc_size(gt, hwe->class));
952 		kfree(init_data);
953 	}
954 
955 	if (vm) {
956 		xe_lrc_set_ppgtt(lrc, vm);
957 
958 		if (vm->xef)
959 			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
960 	}
961 
962 	if (xe_gt_has_indirect_ring_state(gt)) {
963 		xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
964 				     __xe_lrc_indirect_ring_ggtt_addr(lrc));
965 
966 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
967 					      __xe_lrc_ring_ggtt_addr(lrc));
968 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
969 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
970 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
971 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
972 					      RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
973 	} else {
974 		xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
975 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
976 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
977 		xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
978 				     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
979 	}
980 
981 	xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
982 
983 	if (xe->info.has_asid && vm)
984 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
985 
986 	lrc->desc = LRC_VALID;
987 	lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
988 	/* TODO: Priority */
989 
990 	/* While this appears to have something about privileged batches or
991 	 * some such, it really just means PPGTT mode.
992 	 */
993 	if (vm)
994 		lrc->desc |= LRC_PRIVILEGE;
995 
996 	if (GRAPHICS_VERx100(xe) < 1250) {
997 		lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
998 		lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
999 	}
1000 
1001 	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1002 	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
1003 
1004 	map = __xe_lrc_seqno_map(lrc);
1005 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1006 
1007 	map = __xe_lrc_start_seqno_map(lrc);
1008 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1009 
1010 	return 0;
1011 
1012 err_lrc_finish:
1013 	xe_lrc_finish(lrc);
1014 	return err;
1015 }
1016 
1017 /**
1018  * xe_lrc_create - Create a LRC
1019  * @hwe: Hardware Engine
1020  * @vm: The VM (address space)
1021  * @ring_size: LRC ring size
1022  *
1023  * Allocate and initialize the Logical Ring Context (LRC).
1024  *
1025  * Return pointer to created LRC upon success and an error pointer
1026  * upon failure.
1027  */
1028 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
1029 			     u32 ring_size)
1030 {
1031 	struct xe_lrc *lrc;
1032 	int err;
1033 
1034 	lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1035 	if (!lrc)
1036 		return ERR_PTR(-ENOMEM);
1037 
1038 	err = xe_lrc_init(lrc, hwe, vm, ring_size);
1039 	if (err) {
1040 		kfree(lrc);
1041 		return ERR_PTR(err);
1042 	}
1043 
1044 	return lrc;
1045 }
1046 
1047 /**
1048  * xe_lrc_destroy - Destroy the LRC
1049  * @ref: reference to LRC
1050  *
1051  * Called when ref == 0, release resources held by the Logical Ring Context
1052  * (LRC) and free the LRC memory.
1053  */
1054 void xe_lrc_destroy(struct kref *ref)
1055 {
1056 	struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1057 
1058 	xe_lrc_finish(lrc);
1059 	kfree(lrc);
1060 }
1061 
1062 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1063 {
1064 	if (xe_lrc_has_indirect_ring_state(lrc))
1065 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1066 	else
1067 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1068 }
1069 
1070 u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1071 {
1072 	if (xe_lrc_has_indirect_ring_state(lrc))
1073 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1074 	else
1075 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1076 }
1077 
1078 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1079 {
1080 	if (xe_lrc_has_indirect_ring_state(lrc))
1081 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1082 	else
1083 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1084 }
1085 
1086 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1087 {
1088 	if (xe_lrc_has_indirect_ring_state(lrc))
1089 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1090 	else
1091 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1092 }
1093 
1094 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1095 {
1096 	const u32 head = xe_lrc_ring_head(lrc);
1097 	const u32 tail = lrc->ring.tail;
1098 	const u32 size = lrc->ring.size;
1099 
1100 	return ((head - tail - 1) & (size - 1)) + 1;
1101 }
1102 
1103 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1104 				const void *data, size_t size)
1105 {
1106 	struct xe_device *xe = lrc_to_xe(lrc);
1107 
1108 	iosys_map_incr(&ring, lrc->ring.tail);
1109 	xe_map_memcpy_to(xe, &ring, 0, data, size);
1110 	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1111 }
1112 
1113 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1114 {
1115 	struct xe_device *xe = lrc_to_xe(lrc);
1116 	struct iosys_map ring;
1117 	u32 rhs;
1118 	size_t aligned_size;
1119 
1120 	xe_assert(xe, IS_ALIGNED(size, 4));
1121 	aligned_size = ALIGN(size, 8);
1122 
1123 	ring = __xe_lrc_ring_map(lrc);
1124 
1125 	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1126 	rhs = lrc->ring.size - lrc->ring.tail;
1127 	if (size > rhs) {
1128 		__xe_lrc_write_ring(lrc, ring, data, rhs);
1129 		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1130 	} else {
1131 		__xe_lrc_write_ring(lrc, ring, data, size);
1132 	}
1133 
1134 	if (aligned_size > size) {
1135 		u32 noop = MI_NOOP;
1136 
1137 		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1138 	}
1139 }
1140 
1141 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1142 {
1143 	return lrc->desc | xe_lrc_ggtt_addr(lrc);
1144 }
1145 
1146 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1147 {
1148 	return __xe_lrc_seqno_ggtt_addr(lrc);
1149 }
1150 
1151 /**
1152  * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1153  *
1154  * Allocate but don't initialize an lrc seqno fence.
1155  *
1156  * Return: Pointer to the allocated fence or
1157  * negative error pointer on error.
1158  */
1159 struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1160 {
1161 	return xe_hw_fence_alloc();
1162 }
1163 
1164 /**
1165  * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1166  * @fence: Pointer to the fence to free.
1167  *
1168  * Frees an lrc seqno fence that hasn't yet been
1169  * initialized.
1170  */
1171 void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1172 {
1173 	xe_hw_fence_free(fence);
1174 }
1175 
1176 /**
1177  * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1178  * @lrc: Pointer to the lrc.
1179  * @fence: Pointer to the fence to initialize.
1180  *
1181  * Initializes a pre-allocated lrc seqno fence.
1182  * After initialization, the fence is subject to normal
1183  * dma-fence refcounting.
1184  */
1185 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1186 {
1187 	xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1188 }
1189 
1190 s32 xe_lrc_seqno(struct xe_lrc *lrc)
1191 {
1192 	struct iosys_map map = __xe_lrc_seqno_map(lrc);
1193 
1194 	return xe_map_read32(lrc_to_xe(lrc), &map);
1195 }
1196 
1197 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1198 {
1199 	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1200 
1201 	return xe_map_read32(lrc_to_xe(lrc), &map);
1202 }
1203 
1204 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1205 {
1206 	return __xe_lrc_start_seqno_ggtt_addr(lrc);
1207 }
1208 
1209 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1210 {
1211 	return __xe_lrc_parallel_ggtt_addr(lrc);
1212 }
1213 
1214 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1215 {
1216 	return __xe_lrc_parallel_map(lrc);
1217 }
1218 
1219 static int instr_dw(u32 cmd_header)
1220 {
1221 	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1222 	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1223 	    GFXPIPE_SINGLE_DW_CMD(0, 0))
1224 		return 1;
1225 
1226 	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1227 	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1228 		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1229 
1230 	/* Most instructions have the # of dwords (minus 2) in 7:0 */
1231 	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1232 }
1233 
1234 static int dump_mi_command(struct drm_printer *p,
1235 			   struct xe_gt *gt,
1236 			   u32 *dw,
1237 			   int remaining_dw)
1238 {
1239 	u32 inst_header = *dw;
1240 	u32 numdw = instr_dw(inst_header);
1241 	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1242 	int num_noop;
1243 
1244 	/* First check for commands that don't have/use a '# DW' field */
1245 	switch (inst_header & MI_OPCODE) {
1246 	case MI_NOOP:
1247 		num_noop = 1;
1248 		while (num_noop < remaining_dw &&
1249 		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1250 			num_noop++;
1251 		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1252 		return num_noop;
1253 
1254 	case MI_TOPOLOGY_FILTER:
1255 		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1256 		return 1;
1257 
1258 	case MI_BATCH_BUFFER_END:
1259 		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1260 		/* Return 'remaining_dw' to consume the rest of the LRC */
1261 		return remaining_dw;
1262 	}
1263 
1264 	/*
1265 	 * Any remaining commands include a # of dwords.  We should make sure
1266 	 * it doesn't exceed the remaining size of the LRC.
1267 	 */
1268 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1269 		numdw = remaining_dw;
1270 
1271 	switch (inst_header & MI_OPCODE) {
1272 	case MI_LOAD_REGISTER_IMM:
1273 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1274 			   inst_header, (numdw - 1) / 2);
1275 		for (int i = 1; i < numdw; i += 2)
1276 			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1277 		return numdw;
1278 
1279 	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1280 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1281 			   inst_header,
1282 			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1283 			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1284 		if (numdw == 4)
1285 			drm_printf(p, " - %#6x = %#010llx\n",
1286 				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1287 		else
1288 			drm_printf(p, " - %*ph (%s)\n",
1289 				   (int)sizeof(u32) * (numdw - 1), dw + 1,
1290 				   numdw < 4 ? "truncated" : "malformed");
1291 		return numdw;
1292 
1293 	case MI_FORCE_WAKEUP:
1294 		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1295 		return numdw;
1296 
1297 	default:
1298 		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1299 			   inst_header, opcode, numdw);
1300 		return numdw;
1301 	}
1302 }
1303 
1304 static int dump_gfxpipe_command(struct drm_printer *p,
1305 				struct xe_gt *gt,
1306 				u32 *dw,
1307 				int remaining_dw)
1308 {
1309 	u32 numdw = instr_dw(*dw);
1310 	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1311 	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1312 	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1313 
1314 	/*
1315 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1316 	 * remaining size of the LRC.
1317 	 */
1318 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1319 		numdw = remaining_dw;
1320 
1321 	switch (*dw & GFXPIPE_MATCH_MASK) {
1322 #define MATCH(cmd) \
1323 	case cmd: \
1324 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1325 		return numdw
1326 #define MATCH3D(cmd) \
1327 	case CMD_##cmd: \
1328 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1329 		return numdw
1330 
1331 	MATCH(STATE_BASE_ADDRESS);
1332 	MATCH(STATE_SIP);
1333 	MATCH(GPGPU_CSR_BASE_ADDRESS);
1334 	MATCH(STATE_COMPUTE_MODE);
1335 	MATCH3D(3DSTATE_BTD);
1336 	MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1337 	MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1338 
1339 	MATCH3D(3DSTATE_VF_STATISTICS);
1340 
1341 	MATCH(PIPELINE_SELECT);
1342 
1343 	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1344 	MATCH3D(3DSTATE_CLEAR_PARAMS);
1345 	MATCH3D(3DSTATE_DEPTH_BUFFER);
1346 	MATCH3D(3DSTATE_STENCIL_BUFFER);
1347 	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1348 	MATCH3D(3DSTATE_VERTEX_BUFFERS);
1349 	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1350 	MATCH3D(3DSTATE_INDEX_BUFFER);
1351 	MATCH3D(3DSTATE_VF);
1352 	MATCH3D(3DSTATE_MULTISAMPLE);
1353 	MATCH3D(3DSTATE_CC_STATE_POINTERS);
1354 	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1355 	MATCH3D(3DSTATE_VS);
1356 	MATCH3D(3DSTATE_GS);
1357 	MATCH3D(3DSTATE_CLIP);
1358 	MATCH3D(3DSTATE_SF);
1359 	MATCH3D(3DSTATE_WM);
1360 	MATCH3D(3DSTATE_CONSTANT_VS);
1361 	MATCH3D(3DSTATE_CONSTANT_GS);
1362 	MATCH3D(3DSTATE_CONSTANT_PS);
1363 	MATCH3D(3DSTATE_SAMPLE_MASK);
1364 	MATCH3D(3DSTATE_CONSTANT_HS);
1365 	MATCH3D(3DSTATE_CONSTANT_DS);
1366 	MATCH3D(3DSTATE_HS);
1367 	MATCH3D(3DSTATE_TE);
1368 	MATCH3D(3DSTATE_DS);
1369 	MATCH3D(3DSTATE_STREAMOUT);
1370 	MATCH3D(3DSTATE_SBE);
1371 	MATCH3D(3DSTATE_PS);
1372 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1373 	MATCH3D(3DSTATE_CPS_POINTERS);
1374 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1375 	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1376 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1377 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1378 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1379 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1380 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1381 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1382 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1383 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1384 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1385 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1386 	MATCH3D(3DSTATE_VF_INSTANCING);
1387 	MATCH3D(3DSTATE_VF_SGVS);
1388 	MATCH3D(3DSTATE_VF_TOPOLOGY);
1389 	MATCH3D(3DSTATE_WM_CHROMAKEY);
1390 	MATCH3D(3DSTATE_PS_BLEND);
1391 	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1392 	MATCH3D(3DSTATE_PS_EXTRA);
1393 	MATCH3D(3DSTATE_RASTER);
1394 	MATCH3D(3DSTATE_SBE_SWIZ);
1395 	MATCH3D(3DSTATE_WM_HZ_OP);
1396 	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1397 	MATCH3D(3DSTATE_VF_SGVS_2);
1398 	MATCH3D(3DSTATE_VFG);
1399 	MATCH3D(3DSTATE_URB_ALLOC_VS);
1400 	MATCH3D(3DSTATE_URB_ALLOC_HS);
1401 	MATCH3D(3DSTATE_URB_ALLOC_DS);
1402 	MATCH3D(3DSTATE_URB_ALLOC_GS);
1403 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1404 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1405 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1406 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1407 	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1408 	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1409 	MATCH3D(3DSTATE_AMFS);
1410 	MATCH3D(3DSTATE_DEPTH_BOUNDS);
1411 	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1412 	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1413 	MATCH3D(3DSTATE_MESH_CONTROL);
1414 	MATCH3D(3DSTATE_MESH_DISTRIB);
1415 	MATCH3D(3DSTATE_TASK_REDISTRIB);
1416 	MATCH3D(3DSTATE_MESH_SHADER);
1417 	MATCH3D(3DSTATE_MESH_SHADER_DATA);
1418 	MATCH3D(3DSTATE_TASK_CONTROL);
1419 	MATCH3D(3DSTATE_TASK_SHADER);
1420 	MATCH3D(3DSTATE_TASK_SHADER_DATA);
1421 	MATCH3D(3DSTATE_URB_ALLOC_MESH);
1422 	MATCH3D(3DSTATE_URB_ALLOC_TASK);
1423 	MATCH3D(3DSTATE_CLIP_MESH);
1424 	MATCH3D(3DSTATE_SBE_MESH);
1425 	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1426 
1427 	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1428 	MATCH3D(3DSTATE_CHROMA_KEY);
1429 	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1430 	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1431 	MATCH3D(3DSTATE_LINE_STIPPLE);
1432 	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1433 	MATCH3D(3DSTATE_MONOFILTER_SIZE);
1434 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1435 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1436 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1437 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1438 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1439 	MATCH3D(3DSTATE_SO_DECL_LIST);
1440 	MATCH3D(3DSTATE_SO_BUFFER);
1441 	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1442 	MATCH3D(3DSTATE_SAMPLE_PATTERN);
1443 	MATCH3D(3DSTATE_3D_MODE);
1444 	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1445 	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1446 	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1447 
1448 	default:
1449 		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1450 			   *dw, pipeline, opcode, subopcode, numdw);
1451 		return numdw;
1452 	}
1453 }
1454 
1455 static int dump_gfx_state_command(struct drm_printer *p,
1456 				  struct xe_gt *gt,
1457 				  u32 *dw,
1458 				  int remaining_dw)
1459 {
1460 	u32 numdw = instr_dw(*dw);
1461 	u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1462 
1463 	/*
1464 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1465 	 * remaining size of the LRC.
1466 	 */
1467 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1468 		numdw = remaining_dw;
1469 
1470 	switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1471 	MATCH(STATE_WRITE_INLINE);
1472 
1473 	default:
1474 		drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1475 			   *dw, opcode, numdw);
1476 		return numdw;
1477 	}
1478 }
1479 
1480 void xe_lrc_dump_default(struct drm_printer *p,
1481 			 struct xe_gt *gt,
1482 			 enum xe_engine_class hwe_class)
1483 {
1484 	u32 *dw;
1485 	int remaining_dw, num_dw;
1486 
1487 	if (!gt->default_lrc[hwe_class]) {
1488 		drm_printf(p, "No default LRC for class %d\n", hwe_class);
1489 		return;
1490 	}
1491 
1492 	/*
1493 	 * Skip the beginning of the LRC since it contains the per-process
1494 	 * hardware status page.
1495 	 */
1496 	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1497 	remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1498 
1499 	while (remaining_dw > 0) {
1500 		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1501 			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1502 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1503 			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1504 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1505 			num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1506 		} else {
1507 			num_dw = min(instr_dw(*dw), remaining_dw);
1508 			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1509 				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1510 				   num_dw);
1511 		}
1512 
1513 		dw += num_dw;
1514 		remaining_dw -= num_dw;
1515 	}
1516 }
1517 
1518 struct instr_state {
1519 	u32 instr;
1520 	u16 num_dw;
1521 };
1522 
1523 static const struct instr_state xe_hpg_svg_state[] = {
1524 	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1525 	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1526 	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1527 	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1528 	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1529 	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1530 	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1531 	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1532 	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1533 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1534 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1535 	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1536 	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1537 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1538 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1539 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1540 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1541 	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1542 	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1543 	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1544 	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1545 	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1546 	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1547 	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1548 	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1549 	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1550 	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1551 	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1552 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1553 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1554 	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1555 	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1556 	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1557 	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1558 	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1559 	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1560 	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1561 	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1562 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1563 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1564 	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1565 	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1566 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1567 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1568 	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1569 	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1570 	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1571 	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1572 	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1573 	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1574 };
1575 
1576 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1577 {
1578 	struct xe_gt *gt = q->hwe->gt;
1579 	struct xe_device *xe = gt_to_xe(gt);
1580 	const struct instr_state *state_table = NULL;
1581 	int state_table_size = 0;
1582 
1583 	/*
1584 	 * At the moment we only need to emit non-register state for the RCS
1585 	 * engine.
1586 	 */
1587 	if (q->hwe->class != XE_ENGINE_CLASS_RENDER)
1588 		return;
1589 
1590 	switch (GRAPHICS_VERx100(xe)) {
1591 	case 1255:
1592 	case 1270 ... 2004:
1593 		state_table = xe_hpg_svg_state;
1594 		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1595 		break;
1596 	default:
1597 		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1598 			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1599 		return;
1600 	}
1601 
1602 	for (int i = 0; i < state_table_size; i++) {
1603 		u32 instr = state_table[i].instr;
1604 		u16 num_dw = state_table[i].num_dw;
1605 		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1606 
1607 		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1608 		xe_gt_assert(gt, num_dw != 0);
1609 		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1610 
1611 		/*
1612 		 * Xe2's SVG context is the same as the one on DG2 / MTL
1613 		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1614 		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1615 		 * Just make the replacement here rather than defining a
1616 		 * whole separate table for the single trivial change.
1617 		 */
1618 		if (GRAPHICS_VER(xe) >= 20 &&
1619 		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1620 			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1621 
1622 		bb->cs[bb->len] = instr;
1623 		if (!is_single_dw)
1624 			bb->cs[bb->len] |= (num_dw - 2);
1625 
1626 		bb->len += num_dw;
1627 	}
1628 }
1629 
1630 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1631 {
1632 	struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1633 
1634 	if (!snapshot)
1635 		return NULL;
1636 
1637 	if (lrc->bo && lrc->bo->vm)
1638 		xe_vm_get(lrc->bo->vm);
1639 
1640 	snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1641 	snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1642 	snapshot->head = xe_lrc_ring_head(lrc);
1643 	snapshot->tail.internal = lrc->ring.tail;
1644 	snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1645 	snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1646 	snapshot->seqno = xe_lrc_seqno(lrc);
1647 	snapshot->lrc_bo = xe_bo_get(lrc->bo);
1648 	snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
1649 	snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
1650 	snapshot->lrc_snapshot = NULL;
1651 	snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1652 	snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
1653 	return snapshot;
1654 }
1655 
1656 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1657 {
1658 	struct xe_bo *bo;
1659 	struct xe_vm *vm;
1660 	struct iosys_map src;
1661 
1662 	if (!snapshot)
1663 		return;
1664 
1665 	bo = snapshot->lrc_bo;
1666 	vm = bo->vm;
1667 	snapshot->lrc_bo = NULL;
1668 
1669 	snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1670 	if (!snapshot->lrc_snapshot)
1671 		goto put_bo;
1672 
1673 	xe_bo_lock(bo, false);
1674 	if (!ttm_bo_vmap(&bo->ttm, &src)) {
1675 		xe_map_memcpy_from(xe_bo_device(bo),
1676 				   snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1677 				   snapshot->lrc_size);
1678 		ttm_bo_vunmap(&bo->ttm, &src);
1679 	} else {
1680 		kvfree(snapshot->lrc_snapshot);
1681 		snapshot->lrc_snapshot = NULL;
1682 	}
1683 	xe_bo_unlock(bo);
1684 put_bo:
1685 	xe_bo_put(bo);
1686 	if (vm)
1687 		xe_vm_put(vm);
1688 }
1689 
1690 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1691 {
1692 	unsigned long i;
1693 
1694 	if (!snapshot)
1695 		return;
1696 
1697 	drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
1698 	drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1699 		   snapshot->indirect_context_desc);
1700 	drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1701 	drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1702 		   snapshot->tail.internal, snapshot->tail.memory);
1703 	drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1704 	drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1705 	drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
1706 	drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
1707 
1708 	if (!snapshot->lrc_snapshot)
1709 		return;
1710 
1711 	drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1712 	drm_puts(p, "\t[HWSP].data: ");
1713 	for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1714 		u32 *val = snapshot->lrc_snapshot + i;
1715 		char dumped[ASCII85_BUFSZ];
1716 
1717 		drm_puts(p, ascii85_encode(*val, dumped));
1718 	}
1719 
1720 	drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1721 	drm_puts(p, "\t[HWCTX].data: ");
1722 	for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1723 		u32 *val = snapshot->lrc_snapshot + i;
1724 		char dumped[ASCII85_BUFSZ];
1725 
1726 		drm_puts(p, ascii85_encode(*val, dumped));
1727 	}
1728 	drm_puts(p, "\n");
1729 }
1730 
1731 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1732 {
1733 	if (!snapshot)
1734 		return;
1735 
1736 	kvfree(snapshot->lrc_snapshot);
1737 	if (snapshot->lrc_bo) {
1738 		struct xe_vm *vm;
1739 
1740 		vm = snapshot->lrc_bo->vm;
1741 		xe_bo_put(snapshot->lrc_bo);
1742 		if (vm)
1743 			xe_vm_put(vm);
1744 	}
1745 	kfree(snapshot);
1746 }
1747 
1748 /**
1749  * xe_lrc_update_timestamp() - Update ctx timestamp
1750  * @lrc: Pointer to the lrc.
1751  * @old_ts: Old timestamp value
1752  *
1753  * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
1754  * update saved value.
1755  *
1756  * Returns: New ctx timestamp value
1757  */
1758 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
1759 {
1760 	*old_ts = lrc->ctx_timestamp;
1761 
1762 	lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1763 
1764 	return lrc->ctx_timestamp;
1765 }
1766