xref: /linux/drivers/gpu/drm/xe/xe_lrc.c (revision 28472374291c380c22f40deec07a90d09bcbffb6)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_lrc.h"
7 
8 #include <generated/xe_wa_oob.h>
9 
10 #include <linux/ascii85.h>
11 
12 #include "instructions/xe_mi_commands.h"
13 #include "instructions/xe_gfxpipe_commands.h"
14 #include "instructions/xe_gfx_state_commands.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_lrc_layout.h"
17 #include "xe_bb.h"
18 #include "xe_bo.h"
19 #include "xe_device.h"
20 #include "xe_drm_client.h"
21 #include "xe_exec_queue_types.h"
22 #include "xe_gt.h"
23 #include "xe_gt_printk.h"
24 #include "xe_hw_fence.h"
25 #include "xe_map.h"
26 #include "xe_memirq.h"
27 #include "xe_sriov.h"
28 #include "xe_trace_lrc.h"
29 #include "xe_vm.h"
30 #include "xe_wa.h"
31 
32 #define LRC_VALID				BIT_ULL(0)
33 #define LRC_PRIVILEGE				BIT_ULL(8)
34 #define LRC_ADDRESSING_MODE			GENMASK_ULL(4, 3)
35 #define LRC_LEGACY_64B_CONTEXT			3
36 
37 #define LRC_ENGINE_CLASS			GENMASK_ULL(63, 61)
38 #define LRC_ENGINE_INSTANCE			GENMASK_ULL(53, 48)
39 
40 #define LRC_PPHWSP_SIZE				SZ_4K
41 #define LRC_INDIRECT_RING_STATE_SIZE		SZ_4K
42 
43 static struct xe_device *
44 lrc_to_xe(struct xe_lrc *lrc)
45 {
46 	return gt_to_xe(lrc->fence_ctx.gt);
47 }
48 
49 size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
50 {
51 	struct xe_device *xe = gt_to_xe(gt);
52 	size_t size;
53 
54 	/* Per-process HW status page (PPHWSP) */
55 	size = LRC_PPHWSP_SIZE;
56 
57 	/* Engine context image */
58 	switch (class) {
59 	case XE_ENGINE_CLASS_RENDER:
60 		if (GRAPHICS_VER(xe) >= 20)
61 			size += 3 * SZ_4K;
62 		else
63 			size += 13 * SZ_4K;
64 		break;
65 	case XE_ENGINE_CLASS_COMPUTE:
66 		if (GRAPHICS_VER(xe) >= 20)
67 			size += 2 * SZ_4K;
68 		else
69 			size += 13 * SZ_4K;
70 		break;
71 	default:
72 		WARN(1, "Unknown engine class: %d", class);
73 		fallthrough;
74 	case XE_ENGINE_CLASS_COPY:
75 	case XE_ENGINE_CLASS_VIDEO_DECODE:
76 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
77 	case XE_ENGINE_CLASS_OTHER:
78 		size += 1 * SZ_4K;
79 	}
80 
81 	/* Add indirect ring state page */
82 	if (xe_gt_has_indirect_ring_state(gt))
83 		size += LRC_INDIRECT_RING_STATE_SIZE;
84 
85 	return size;
86 }
87 
88 /*
89  * The per-platform tables are u8-encoded in @data. Decode @data and set the
90  * addresses' offset and commands in @regs. The following encoding is used
91  * for each byte. There are 2 steps: decoding commands and decoding addresses.
92  *
93  * Commands:
94  * [7]: create NOPs - number of NOPs are set in lower bits
95  * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
96  *      MI_LRI_FORCE_POSTED
97  * [5:0]: Number of NOPs or registers to set values to in case of
98  *        MI_LOAD_REGISTER_IMM
99  *
100  * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
101  * number of registers. They are set by using the REG/REG16 macros: the former
102  * is used for offsets smaller than 0x200 while the latter is for values bigger
103  * than that. Those macros already set all the bits documented below correctly:
104  *
105  * [7]: When a register offset needs more than 6 bits, use additional bytes, to
106  *      follow, for the lower bits
107  * [6:0]: Register offset, without considering the engine base.
108  *
109  * This function only tweaks the commands and register offsets. Values are not
110  * filled out.
111  */
112 static void set_offsets(u32 *regs,
113 			const u8 *data,
114 			const struct xe_hw_engine *hwe)
115 #define NOP(x) (BIT(7) | (x))
116 #define LRI(count, flags) ((flags) << 6 | (count) | \
117 			   BUILD_BUG_ON_ZERO(count >= BIT(6)))
118 #define POSTED BIT(0)
119 #define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
120 #define REG16(x) \
121 	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
122 	(((x) >> 2) & 0x7f)
123 {
124 	const u32 base = hwe->mmio_base;
125 
126 	while (*data) {
127 		u8 count, flags;
128 
129 		if (*data & BIT(7)) { /* skip */
130 			count = *data++ & ~BIT(7);
131 			regs += count;
132 			continue;
133 		}
134 
135 		count = *data & 0x3f;
136 		flags = *data >> 6;
137 		data++;
138 
139 		*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
140 		if (flags & POSTED)
141 			*regs |= MI_LRI_FORCE_POSTED;
142 		*regs |= MI_LRI_LRM_CS_MMIO;
143 		regs++;
144 
145 		xe_gt_assert(hwe->gt, count);
146 		do {
147 			u32 offset = 0;
148 			u8 v;
149 
150 			do {
151 				v = *data++;
152 				offset <<= 7;
153 				offset |= v & ~BIT(7);
154 			} while (v & BIT(7));
155 
156 			regs[0] = base + (offset << 2);
157 			regs += 2;
158 		} while (--count);
159 	}
160 
161 	*regs = MI_BATCH_BUFFER_END | BIT(0);
162 }
163 
164 static const u8 gen12_xcs_offsets[] = {
165 	NOP(1),
166 	LRI(13, POSTED),
167 	REG16(0x244),
168 	REG(0x034),
169 	REG(0x030),
170 	REG(0x038),
171 	REG(0x03c),
172 	REG(0x168),
173 	REG(0x140),
174 	REG(0x110),
175 	REG(0x1c0),
176 	REG(0x1c4),
177 	REG(0x1c8),
178 	REG(0x180),
179 	REG16(0x2b4),
180 
181 	NOP(5),
182 	LRI(9, POSTED),
183 	REG16(0x3a8),
184 	REG16(0x28c),
185 	REG16(0x288),
186 	REG16(0x284),
187 	REG16(0x280),
188 	REG16(0x27c),
189 	REG16(0x278),
190 	REG16(0x274),
191 	REG16(0x270),
192 
193 	0
194 };
195 
196 static const u8 dg2_xcs_offsets[] = {
197 	NOP(1),
198 	LRI(15, POSTED),
199 	REG16(0x244),
200 	REG(0x034),
201 	REG(0x030),
202 	REG(0x038),
203 	REG(0x03c),
204 	REG(0x168),
205 	REG(0x140),
206 	REG(0x110),
207 	REG(0x1c0),
208 	REG(0x1c4),
209 	REG(0x1c8),
210 	REG(0x180),
211 	REG16(0x2b4),
212 	REG(0x120),
213 	REG(0x124),
214 
215 	NOP(1),
216 	LRI(9, POSTED),
217 	REG16(0x3a8),
218 	REG16(0x28c),
219 	REG16(0x288),
220 	REG16(0x284),
221 	REG16(0x280),
222 	REG16(0x27c),
223 	REG16(0x278),
224 	REG16(0x274),
225 	REG16(0x270),
226 
227 	0
228 };
229 
230 static const u8 gen12_rcs_offsets[] = {
231 	NOP(1),
232 	LRI(13, POSTED),
233 	REG16(0x244),
234 	REG(0x034),
235 	REG(0x030),
236 	REG(0x038),
237 	REG(0x03c),
238 	REG(0x168),
239 	REG(0x140),
240 	REG(0x110),
241 	REG(0x1c0),
242 	REG(0x1c4),
243 	REG(0x1c8),
244 	REG(0x180),
245 	REG16(0x2b4),
246 
247 	NOP(5),
248 	LRI(9, POSTED),
249 	REG16(0x3a8),
250 	REG16(0x28c),
251 	REG16(0x288),
252 	REG16(0x284),
253 	REG16(0x280),
254 	REG16(0x27c),
255 	REG16(0x278),
256 	REG16(0x274),
257 	REG16(0x270),
258 
259 	LRI(3, POSTED),
260 	REG(0x1b0),
261 	REG16(0x5a8),
262 	REG16(0x5ac),
263 
264 	NOP(6),
265 	LRI(1, 0),
266 	REG(0x0c8),
267 	NOP(3 + 9 + 1),
268 
269 	LRI(51, POSTED),
270 	REG16(0x588),
271 	REG16(0x588),
272 	REG16(0x588),
273 	REG16(0x588),
274 	REG16(0x588),
275 	REG16(0x588),
276 	REG(0x028),
277 	REG(0x09c),
278 	REG(0x0c0),
279 	REG(0x178),
280 	REG(0x17c),
281 	REG16(0x358),
282 	REG(0x170),
283 	REG(0x150),
284 	REG(0x154),
285 	REG(0x158),
286 	REG16(0x41c),
287 	REG16(0x600),
288 	REG16(0x604),
289 	REG16(0x608),
290 	REG16(0x60c),
291 	REG16(0x610),
292 	REG16(0x614),
293 	REG16(0x618),
294 	REG16(0x61c),
295 	REG16(0x620),
296 	REG16(0x624),
297 	REG16(0x628),
298 	REG16(0x62c),
299 	REG16(0x630),
300 	REG16(0x634),
301 	REG16(0x638),
302 	REG16(0x63c),
303 	REG16(0x640),
304 	REG16(0x644),
305 	REG16(0x648),
306 	REG16(0x64c),
307 	REG16(0x650),
308 	REG16(0x654),
309 	REG16(0x658),
310 	REG16(0x65c),
311 	REG16(0x660),
312 	REG16(0x664),
313 	REG16(0x668),
314 	REG16(0x66c),
315 	REG16(0x670),
316 	REG16(0x674),
317 	REG16(0x678),
318 	REG16(0x67c),
319 	REG(0x068),
320 	REG(0x084),
321 	NOP(1),
322 
323 	0
324 };
325 
326 static const u8 xehp_rcs_offsets[] = {
327 	NOP(1),
328 	LRI(13, POSTED),
329 	REG16(0x244),
330 	REG(0x034),
331 	REG(0x030),
332 	REG(0x038),
333 	REG(0x03c),
334 	REG(0x168),
335 	REG(0x140),
336 	REG(0x110),
337 	REG(0x1c0),
338 	REG(0x1c4),
339 	REG(0x1c8),
340 	REG(0x180),
341 	REG16(0x2b4),
342 
343 	NOP(5),
344 	LRI(9, POSTED),
345 	REG16(0x3a8),
346 	REG16(0x28c),
347 	REG16(0x288),
348 	REG16(0x284),
349 	REG16(0x280),
350 	REG16(0x27c),
351 	REG16(0x278),
352 	REG16(0x274),
353 	REG16(0x270),
354 
355 	LRI(3, POSTED),
356 	REG(0x1b0),
357 	REG16(0x5a8),
358 	REG16(0x5ac),
359 
360 	NOP(6),
361 	LRI(1, 0),
362 	REG(0x0c8),
363 
364 	0
365 };
366 
367 static const u8 dg2_rcs_offsets[] = {
368 	NOP(1),
369 	LRI(15, POSTED),
370 	REG16(0x244),
371 	REG(0x034),
372 	REG(0x030),
373 	REG(0x038),
374 	REG(0x03c),
375 	REG(0x168),
376 	REG(0x140),
377 	REG(0x110),
378 	REG(0x1c0),
379 	REG(0x1c4),
380 	REG(0x1c8),
381 	REG(0x180),
382 	REG16(0x2b4),
383 	REG(0x120),
384 	REG(0x124),
385 
386 	NOP(1),
387 	LRI(9, POSTED),
388 	REG16(0x3a8),
389 	REG16(0x28c),
390 	REG16(0x288),
391 	REG16(0x284),
392 	REG16(0x280),
393 	REG16(0x27c),
394 	REG16(0x278),
395 	REG16(0x274),
396 	REG16(0x270),
397 
398 	LRI(3, POSTED),
399 	REG(0x1b0),
400 	REG16(0x5a8),
401 	REG16(0x5ac),
402 
403 	NOP(6),
404 	LRI(1, 0),
405 	REG(0x0c8),
406 
407 	0
408 };
409 
410 static const u8 mtl_rcs_offsets[] = {
411 	NOP(1),
412 	LRI(15, POSTED),
413 	REG16(0x244),
414 	REG(0x034),
415 	REG(0x030),
416 	REG(0x038),
417 	REG(0x03c),
418 	REG(0x168),
419 	REG(0x140),
420 	REG(0x110),
421 	REG(0x1c0),
422 	REG(0x1c4),
423 	REG(0x1c8),
424 	REG(0x180),
425 	REG16(0x2b4),
426 	REG(0x120),
427 	REG(0x124),
428 
429 	NOP(1),
430 	LRI(9, POSTED),
431 	REG16(0x3a8),
432 	REG16(0x28c),
433 	REG16(0x288),
434 	REG16(0x284),
435 	REG16(0x280),
436 	REG16(0x27c),
437 	REG16(0x278),
438 	REG16(0x274),
439 	REG16(0x270),
440 
441 	NOP(2),
442 	LRI(2, POSTED),
443 	REG16(0x5a8),
444 	REG16(0x5ac),
445 
446 	NOP(6),
447 	LRI(1, 0),
448 	REG(0x0c8),
449 
450 	0
451 };
452 
453 #define XE2_CTX_COMMON \
454 	NOP(1),                 /* [0x00] */ \
455 	LRI(15, POSTED),        /* [0x01] */ \
456 	REG16(0x244),           /* [0x02] CTXT_SR_CTL */ \
457 	REG(0x034),             /* [0x04] RING_BUFFER_HEAD */ \
458 	REG(0x030),             /* [0x06] RING_BUFFER_TAIL */ \
459 	REG(0x038),             /* [0x08] RING_BUFFER_START */ \
460 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */ \
461 	REG(0x168),             /* [0x0c] BB_ADDR_UDW */ \
462 	REG(0x140),             /* [0x0e] BB_ADDR */ \
463 	REG(0x110),             /* [0x10] BB_STATE */ \
464 	REG(0x1c0),             /* [0x12] BB_PER_CTX_PTR */ \
465 	REG(0x1c4),             /* [0x14] RCS_INDIRECT_CTX */ \
466 	REG(0x1c8),             /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
467 	REG(0x180),             /* [0x18] CCID */ \
468 	REG16(0x2b4),           /* [0x1a] SEMAPHORE_TOKEN */ \
469 	REG(0x120),             /* [0x1c] PRT_BB_STATE */ \
470 	REG(0x124),             /* [0x1e] PRT_BB_STATE_UDW */ \
471 	\
472 	NOP(1),                 /* [0x20] */ \
473 	LRI(9, POSTED),         /* [0x21] */ \
474 	REG16(0x3a8),           /* [0x22] CTX_TIMESTAMP */ \
475 	REG16(0x3ac),           /* [0x24] CTX_TIMESTAMP_UDW */ \
476 	REG(0x108),             /* [0x26] INDIRECT_RING_STATE */ \
477 	REG16(0x284),           /* [0x28] dummy reg */ \
478 	REG16(0x280),           /* [0x2a] CS_ACC_CTR_THOLD */ \
479 	REG16(0x27c),           /* [0x2c] CS_CTX_SYS_PASID */ \
480 	REG16(0x278),           /* [0x2e] CS_CTX_ASID */ \
481 	REG16(0x274),           /* [0x30] PTBP_UDW */ \
482 	REG16(0x270)            /* [0x32] PTBP_LDW */
483 
484 static const u8 xe2_rcs_offsets[] = {
485 	XE2_CTX_COMMON,
486 
487 	NOP(2),                 /* [0x34] */
488 	LRI(2, POSTED),         /* [0x36] */
489 	REG16(0x5a8),           /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
490 	REG16(0x5ac),           /* [0x39] PREEMPTION_STATUS */
491 
492 	NOP(6),                 /* [0x41] */
493 	LRI(1, 0),              /* [0x47] */
494 	REG(0x0c8),             /* [0x48] R_PWR_CLK_STATE */
495 
496 	0
497 };
498 
499 static const u8 xe2_bcs_offsets[] = {
500 	XE2_CTX_COMMON,
501 
502 	NOP(4 + 8 + 1),         /* [0x34] */
503 	LRI(2, POSTED),         /* [0x41] */
504 	REG16(0x200),           /* [0x42] BCS_SWCTRL */
505 	REG16(0x204),           /* [0x44] BLIT_CCTL */
506 
507 	0
508 };
509 
510 static const u8 xe2_xcs_offsets[] = {
511 	XE2_CTX_COMMON,
512 
513 	0
514 };
515 
516 static const u8 xe2_indirect_ring_state_offsets[] = {
517 	NOP(1),                 /* [0x00] */
518 	LRI(5, POSTED),         /* [0x01] */
519 	REG(0x034),             /* [0x02] RING_BUFFER_HEAD */
520 	REG(0x030),             /* [0x04] RING_BUFFER_TAIL */
521 	REG(0x038),             /* [0x06] RING_BUFFER_START */
522 	REG(0x048),             /* [0x08] RING_BUFFER_START_UDW */
523 	REG(0x03c),             /* [0x0a] RING_BUFFER_CONTROL */
524 
525 	NOP(5),                 /* [0x0c] */
526 	LRI(9, POSTED),         /* [0x11] */
527 	REG(0x168),             /* [0x12] BB_ADDR_UDW */
528 	REG(0x140),             /* [0x14] BB_ADDR */
529 	REG(0x110),             /* [0x16] BB_STATE */
530 	REG16(0x588),           /* [0x18] BB_STACK_WRITE_PORT */
531 	REG16(0x588),           /* [0x20] BB_STACK_WRITE_PORT */
532 	REG16(0x588),           /* [0x22] BB_STACK_WRITE_PORT */
533 	REG16(0x588),           /* [0x24] BB_STACK_WRITE_PORT */
534 	REG16(0x588),           /* [0x26] BB_STACK_WRITE_PORT */
535 	REG16(0x588),           /* [0x28] BB_STACK_WRITE_PORT */
536 
537 	NOP(12),                 /* [0x00] */
538 
539 	0
540 };
541 
542 #undef REG16
543 #undef REG
544 #undef LRI
545 #undef NOP
546 
547 static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
548 {
549 	if (class == XE_ENGINE_CLASS_RENDER) {
550 		if (GRAPHICS_VER(xe) >= 20)
551 			return xe2_rcs_offsets;
552 		else if (GRAPHICS_VERx100(xe) >= 1270)
553 			return mtl_rcs_offsets;
554 		else if (GRAPHICS_VERx100(xe) >= 1255)
555 			return dg2_rcs_offsets;
556 		else if (GRAPHICS_VERx100(xe) >= 1250)
557 			return xehp_rcs_offsets;
558 		else
559 			return gen12_rcs_offsets;
560 	} else if (class == XE_ENGINE_CLASS_COPY) {
561 		if (GRAPHICS_VER(xe) >= 20)
562 			return xe2_bcs_offsets;
563 		else
564 			return gen12_xcs_offsets;
565 	} else {
566 		if (GRAPHICS_VER(xe) >= 20)
567 			return xe2_xcs_offsets;
568 		else if (GRAPHICS_VERx100(xe) >= 1255)
569 			return dg2_xcs_offsets;
570 		else
571 			return gen12_xcs_offsets;
572 	}
573 }
574 
575 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
576 {
577 	regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
578 						       CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
579 
580 	if (xe_gt_has_indirect_ring_state(hwe->gt))
581 		regs[CTX_CONTEXT_CONTROL] |=
582 			_MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
583 
584 	/* TODO: Timestamp */
585 }
586 
587 static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
588 {
589 	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
590 	struct xe_device *xe = gt_to_xe(hwe->gt);
591 	u8 num_regs;
592 
593 	if (!xe_device_uses_memirq(xe))
594 		return;
595 
596 	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
597 					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
598 	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
599 	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
600 
601 	num_regs = xe_device_has_msix(xe) ? 3 : 2;
602 	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(num_regs) |
603 				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
604 	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
605 	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq, hwe);
606 	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
607 	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq, hwe);
608 
609 	if (xe_device_has_msix(xe)) {
610 		regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr;
611 		/* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */
612 	}
613 }
614 
615 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
616 {
617 	struct xe_device *xe = gt_to_xe(hwe->gt);
618 
619 	if (GRAPHICS_VERx100(xe) >= 1250)
620 		return 0x70;
621 	else
622 		return 0x60;
623 }
624 
625 static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
626 {
627 	int x;
628 
629 	x = lrc_ring_mi_mode(hwe);
630 	regs[x + 1] &= ~STOP_RING;
631 	regs[x + 1] |= STOP_RING << 16;
632 }
633 
634 static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
635 {
636 	return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
637 }
638 
639 static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
640 {
641 	return 0;
642 }
643 
644 u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
645 {
646 	return lrc->ring.size;
647 }
648 
649 /* Make the magic macros work */
650 #define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
651 #define __xe_lrc_regs_offset xe_lrc_regs_offset
652 
653 #define LRC_SEQNO_PPHWSP_OFFSET 512
654 #define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
655 #define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
656 #define LRC_PARALLEL_PPHWSP_OFFSET 2048
657 
658 u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
659 {
660 	return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
661 }
662 
663 static size_t lrc_reg_size(struct xe_device *xe)
664 {
665 	if (GRAPHICS_VERx100(xe) >= 1250)
666 		return 96 * sizeof(u32);
667 	else
668 		return 80 * sizeof(u32);
669 }
670 
671 size_t xe_lrc_skip_size(struct xe_device *xe)
672 {
673 	return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
674 }
675 
676 static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
677 {
678 	/* The seqno is stored in the driver-defined portion of PPHWSP */
679 	return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
680 }
681 
682 static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
683 {
684 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
685 	return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
686 }
687 
688 static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
689 {
690 	/* The start seqno is stored in the driver-defined portion of PPHWSP */
691 	return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
692 }
693 
694 static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
695 {
696 	/* The parallel is stored in the driver-defined portion of PPHWSP */
697 	return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
698 }
699 
700 static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
701 {
702 	return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
703 }
704 
705 static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
706 {
707 	/* Indirect ring state page is at the very end of LRC */
708 	return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
709 }
710 
711 #define DECL_MAP_ADDR_HELPERS(elem) \
712 static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
713 { \
714 	struct iosys_map map = lrc->bo->vmap; \
715 \
716 	xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map));  \
717 	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
718 	return map; \
719 } \
720 static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
721 { \
722 	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
723 } \
724 
725 DECL_MAP_ADDR_HELPERS(ring)
726 DECL_MAP_ADDR_HELPERS(pphwsp)
727 DECL_MAP_ADDR_HELPERS(seqno)
728 DECL_MAP_ADDR_HELPERS(regs)
729 DECL_MAP_ADDR_HELPERS(start_seqno)
730 DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
731 DECL_MAP_ADDR_HELPERS(ctx_timestamp)
732 DECL_MAP_ADDR_HELPERS(parallel)
733 DECL_MAP_ADDR_HELPERS(indirect_ring)
734 
735 #undef DECL_MAP_ADDR_HELPERS
736 
737 /**
738  * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
739  * @lrc: Pointer to the lrc.
740  *
741  * Returns: ctx timestamp GGTT address
742  */
743 u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
744 {
745 	return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
746 }
747 
748 /**
749  * xe_lrc_ctx_timestamp() - Read ctx timestamp value
750  * @lrc: Pointer to the lrc.
751  *
752  * Returns: ctx timestamp value
753  */
754 u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
755 {
756 	struct xe_device *xe = lrc_to_xe(lrc);
757 	struct iosys_map map;
758 
759 	map = __xe_lrc_ctx_timestamp_map(lrc);
760 	return xe_map_read32(xe, &map);
761 }
762 
763 /**
764  * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
765  * @lrc: Pointer to the lrc.
766  *
767  * Returns: ctx timestamp job GGTT address
768  */
769 u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
770 {
771 	return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
772 }
773 
774 /**
775  * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
776  * @lrc: Pointer to the lrc.
777  *
778  * Returns: ctx timestamp job value
779  */
780 u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
781 {
782 	struct xe_device *xe = lrc_to_xe(lrc);
783 	struct iosys_map map;
784 
785 	map = __xe_lrc_ctx_job_timestamp_map(lrc);
786 	return xe_map_read32(xe, &map);
787 }
788 
789 u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
790 {
791 	return __xe_lrc_pphwsp_ggtt_addr(lrc);
792 }
793 
794 u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
795 {
796 	if (!xe_lrc_has_indirect_ring_state(lrc))
797 		return 0;
798 
799 	return __xe_lrc_indirect_ring_ggtt_addr(lrc);
800 }
801 
802 static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
803 {
804 	struct xe_device *xe = lrc_to_xe(lrc);
805 	struct iosys_map map;
806 
807 	map = __xe_lrc_indirect_ring_map(lrc);
808 	iosys_map_incr(&map, reg_nr * sizeof(u32));
809 	return xe_map_read32(xe, &map);
810 }
811 
812 static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
813 					  int reg_nr, u32 val)
814 {
815 	struct xe_device *xe = lrc_to_xe(lrc);
816 	struct iosys_map map;
817 
818 	map = __xe_lrc_indirect_ring_map(lrc);
819 	iosys_map_incr(&map, reg_nr * sizeof(u32));
820 	xe_map_write32(xe, &map, val);
821 }
822 
823 u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
824 {
825 	struct xe_device *xe = lrc_to_xe(lrc);
826 	struct iosys_map map;
827 
828 	map = __xe_lrc_regs_map(lrc);
829 	iosys_map_incr(&map, reg_nr * sizeof(u32));
830 	return xe_map_read32(xe, &map);
831 }
832 
833 void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
834 {
835 	struct xe_device *xe = lrc_to_xe(lrc);
836 	struct iosys_map map;
837 
838 	map = __xe_lrc_regs_map(lrc);
839 	iosys_map_incr(&map, reg_nr * sizeof(u32));
840 	xe_map_write32(xe, &map, val);
841 }
842 
843 static void *empty_lrc_data(struct xe_hw_engine *hwe)
844 {
845 	struct xe_gt *gt = hwe->gt;
846 	void *data;
847 	u32 *regs;
848 
849 	data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
850 	if (!data)
851 		return NULL;
852 
853 	/* 1st page: Per-Process of HW status Page */
854 	regs = data + LRC_PPHWSP_SIZE;
855 	set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
856 	set_context_control(regs, hwe);
857 	set_memory_based_intr(regs, hwe);
858 	reset_stop_ring(regs, hwe);
859 	if (xe_gt_has_indirect_ring_state(gt)) {
860 		regs = data + xe_gt_lrc_size(gt, hwe->class) -
861 		       LRC_INDIRECT_RING_STATE_SIZE;
862 		set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
863 	}
864 
865 	return data;
866 }
867 
868 static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
869 {
870 	u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
871 
872 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
873 	xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
874 }
875 
876 static void xe_lrc_finish(struct xe_lrc *lrc)
877 {
878 	xe_hw_fence_ctx_finish(&lrc->fence_ctx);
879 	xe_bo_lock(lrc->bo, false);
880 	xe_bo_unpin(lrc->bo);
881 	xe_bo_unlock(lrc->bo);
882 	xe_bo_put(lrc->bo);
883 }
884 
885 #define PVC_CTX_ASID		(0x2e + 1)
886 #define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
887 
888 static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
889 		       struct xe_vm *vm, u32 ring_size, u16 msix_vec,
890 		       u32 init_flags)
891 {
892 	struct xe_gt *gt = hwe->gt;
893 	struct xe_tile *tile = gt_to_tile(gt);
894 	struct xe_device *xe = gt_to_xe(gt);
895 	struct iosys_map map;
896 	void *init_data = NULL;
897 	u32 arb_enable;
898 	u32 lrc_size;
899 	u32 bo_flags;
900 	int err;
901 
902 	kref_init(&lrc->refcount);
903 	lrc->flags = 0;
904 	lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
905 	if (xe_gt_has_indirect_ring_state(gt))
906 		lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
907 
908 	bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
909 		   XE_BO_FLAG_GGTT_INVALIDATE;
910 	if (vm && vm->xef) /* userspace */
911 		bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
912 
913 	/*
914 	 * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
915 	 * via VM bind calls.
916 	 */
917 	lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
918 				       ttm_bo_type_kernel,
919 				       bo_flags);
920 	if (IS_ERR(lrc->bo))
921 		return PTR_ERR(lrc->bo);
922 
923 	lrc->size = lrc_size;
924 	lrc->tile = gt_to_tile(hwe->gt);
925 	lrc->ring.size = ring_size;
926 	lrc->ring.tail = 0;
927 	lrc->ctx_timestamp = 0;
928 
929 	xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
930 			     hwe->fence_irq, hwe->name);
931 
932 	if (!gt->default_lrc[hwe->class]) {
933 		init_data = empty_lrc_data(hwe);
934 		if (!init_data) {
935 			err = -ENOMEM;
936 			goto err_lrc_finish;
937 		}
938 	}
939 
940 	/*
941 	 * Init Per-Process of HW status Page, LRC / context state to known
942 	 * values
943 	 */
944 	map = __xe_lrc_pphwsp_map(lrc);
945 	if (!init_data) {
946 		xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE);	/* PPHWSP */
947 		xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
948 				 gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
949 				 xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
950 	} else {
951 		xe_map_memcpy_to(xe, &map, 0, init_data,
952 				 xe_gt_lrc_size(gt, hwe->class));
953 		kfree(init_data);
954 	}
955 
956 	if (vm) {
957 		xe_lrc_set_ppgtt(lrc, vm);
958 
959 		if (vm->xef)
960 			xe_drm_client_add_bo(vm->xef->client, lrc->bo);
961 	}
962 
963 	if (xe_device_has_msix(xe)) {
964 		xe_lrc_write_ctx_reg(lrc, CTX_INT_STATUS_REPORT_PTR,
965 				     xe_memirq_status_ptr(&tile->memirq, hwe));
966 		xe_lrc_write_ctx_reg(lrc, CTX_INT_SRC_REPORT_PTR,
967 				     xe_memirq_source_ptr(&tile->memirq, hwe));
968 		xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix_vec << 16 | msix_vec);
969 	}
970 
971 	if (xe_gt_has_indirect_ring_state(gt)) {
972 		xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
973 				     __xe_lrc_indirect_ring_ggtt_addr(lrc));
974 
975 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
976 					      __xe_lrc_ring_ggtt_addr(lrc));
977 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
978 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
979 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
980 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
981 					      RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
982 	} else {
983 		xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
984 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
985 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
986 		xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
987 				     RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
988 	}
989 
990 	if (init_flags & XE_LRC_CREATE_RUNALONE)
991 		xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL,
992 				     xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
993 				     _MASKED_BIT_ENABLE(CTX_CTRL_RUN_ALONE));
994 
995 	if (init_flags & XE_LRC_CREATE_PXP)
996 		xe_lrc_write_ctx_reg(lrc, CTX_CONTEXT_CONTROL,
997 				     xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
998 				     _MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE));
999 
1000 	xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
1001 
1002 	if (xe->info.has_asid && vm)
1003 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
1004 
1005 	lrc->desc = LRC_VALID;
1006 	lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
1007 	/* TODO: Priority */
1008 
1009 	/* While this appears to have something about privileged batches or
1010 	 * some such, it really just means PPGTT mode.
1011 	 */
1012 	if (vm)
1013 		lrc->desc |= LRC_PRIVILEGE;
1014 
1015 	if (GRAPHICS_VERx100(xe) < 1250) {
1016 		lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
1017 		lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
1018 	}
1019 
1020 	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1021 	xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
1022 
1023 	map = __xe_lrc_seqno_map(lrc);
1024 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1025 
1026 	map = __xe_lrc_start_seqno_map(lrc);
1027 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
1028 
1029 	return 0;
1030 
1031 err_lrc_finish:
1032 	xe_lrc_finish(lrc);
1033 	return err;
1034 }
1035 
1036 /**
1037  * xe_lrc_create - Create a LRC
1038  * @hwe: Hardware Engine
1039  * @vm: The VM (address space)
1040  * @ring_size: LRC ring size
1041  * @msix_vec: MSI-X interrupt vector (for platforms that support it)
1042  * @flags: LRC initialization flags
1043  *
1044  * Allocate and initialize the Logical Ring Context (LRC).
1045  *
1046  * Return pointer to created LRC upon success and an error pointer
1047  * upon failure.
1048  */
1049 struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
1050 			     u32 ring_size, u16 msix_vec, u32 flags)
1051 {
1052 	struct xe_lrc *lrc;
1053 	int err;
1054 
1055 	lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
1056 	if (!lrc)
1057 		return ERR_PTR(-ENOMEM);
1058 
1059 	err = xe_lrc_init(lrc, hwe, vm, ring_size, msix_vec, flags);
1060 	if (err) {
1061 		kfree(lrc);
1062 		return ERR_PTR(err);
1063 	}
1064 
1065 	return lrc;
1066 }
1067 
1068 /**
1069  * xe_lrc_destroy - Destroy the LRC
1070  * @ref: reference to LRC
1071  *
1072  * Called when ref == 0, release resources held by the Logical Ring Context
1073  * (LRC) and free the LRC memory.
1074  */
1075 void xe_lrc_destroy(struct kref *ref)
1076 {
1077 	struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
1078 
1079 	xe_lrc_finish(lrc);
1080 	kfree(lrc);
1081 }
1082 
1083 void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
1084 {
1085 	if (xe_lrc_has_indirect_ring_state(lrc))
1086 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
1087 	else
1088 		xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
1089 }
1090 
1091 u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
1092 {
1093 	if (xe_lrc_has_indirect_ring_state(lrc))
1094 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
1095 	else
1096 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
1097 }
1098 
1099 static u32 xe_lrc_ring_start(struct xe_lrc *lrc)
1100 {
1101 	if (xe_lrc_has_indirect_ring_state(lrc))
1102 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START);
1103 	else
1104 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_START);
1105 }
1106 
1107 void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
1108 {
1109 	if (xe_lrc_has_indirect_ring_state(lrc))
1110 		xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
1111 	else
1112 		xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
1113 }
1114 
1115 u32 xe_lrc_ring_head(struct xe_lrc *lrc)
1116 {
1117 	if (xe_lrc_has_indirect_ring_state(lrc))
1118 		return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
1119 	else
1120 		return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
1121 }
1122 
1123 u32 xe_lrc_ring_space(struct xe_lrc *lrc)
1124 {
1125 	const u32 head = xe_lrc_ring_head(lrc);
1126 	const u32 tail = lrc->ring.tail;
1127 	const u32 size = lrc->ring.size;
1128 
1129 	return ((head - tail - 1) & (size - 1)) + 1;
1130 }
1131 
1132 static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
1133 				const void *data, size_t size)
1134 {
1135 	struct xe_device *xe = lrc_to_xe(lrc);
1136 
1137 	iosys_map_incr(&ring, lrc->ring.tail);
1138 	xe_map_memcpy_to(xe, &ring, 0, data, size);
1139 	lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
1140 }
1141 
1142 void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
1143 {
1144 	struct xe_device *xe = lrc_to_xe(lrc);
1145 	struct iosys_map ring;
1146 	u32 rhs;
1147 	size_t aligned_size;
1148 
1149 	xe_assert(xe, IS_ALIGNED(size, 4));
1150 	aligned_size = ALIGN(size, 8);
1151 
1152 	ring = __xe_lrc_ring_map(lrc);
1153 
1154 	xe_assert(xe, lrc->ring.tail < lrc->ring.size);
1155 	rhs = lrc->ring.size - lrc->ring.tail;
1156 	if (size > rhs) {
1157 		__xe_lrc_write_ring(lrc, ring, data, rhs);
1158 		__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
1159 	} else {
1160 		__xe_lrc_write_ring(lrc, ring, data, size);
1161 	}
1162 
1163 	if (aligned_size > size) {
1164 		u32 noop = MI_NOOP;
1165 
1166 		__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
1167 	}
1168 }
1169 
1170 u64 xe_lrc_descriptor(struct xe_lrc *lrc)
1171 {
1172 	return lrc->desc | xe_lrc_ggtt_addr(lrc);
1173 }
1174 
1175 u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
1176 {
1177 	return __xe_lrc_seqno_ggtt_addr(lrc);
1178 }
1179 
1180 /**
1181  * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
1182  *
1183  * Allocate but don't initialize an lrc seqno fence.
1184  *
1185  * Return: Pointer to the allocated fence or
1186  * negative error pointer on error.
1187  */
1188 struct dma_fence *xe_lrc_alloc_seqno_fence(void)
1189 {
1190 	return xe_hw_fence_alloc();
1191 }
1192 
1193 /**
1194  * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
1195  * @fence: Pointer to the fence to free.
1196  *
1197  * Frees an lrc seqno fence that hasn't yet been
1198  * initialized.
1199  */
1200 void xe_lrc_free_seqno_fence(struct dma_fence *fence)
1201 {
1202 	xe_hw_fence_free(fence);
1203 }
1204 
1205 /**
1206  * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
1207  * @lrc: Pointer to the lrc.
1208  * @fence: Pointer to the fence to initialize.
1209  *
1210  * Initializes a pre-allocated lrc seqno fence.
1211  * After initialization, the fence is subject to normal
1212  * dma-fence refcounting.
1213  */
1214 void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
1215 {
1216 	xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
1217 }
1218 
1219 s32 xe_lrc_seqno(struct xe_lrc *lrc)
1220 {
1221 	struct iosys_map map = __xe_lrc_seqno_map(lrc);
1222 
1223 	return xe_map_read32(lrc_to_xe(lrc), &map);
1224 }
1225 
1226 s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
1227 {
1228 	struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
1229 
1230 	return xe_map_read32(lrc_to_xe(lrc), &map);
1231 }
1232 
1233 u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
1234 {
1235 	return __xe_lrc_start_seqno_ggtt_addr(lrc);
1236 }
1237 
1238 u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
1239 {
1240 	return __xe_lrc_parallel_ggtt_addr(lrc);
1241 }
1242 
1243 struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
1244 {
1245 	return __xe_lrc_parallel_map(lrc);
1246 }
1247 
1248 static int instr_dw(u32 cmd_header)
1249 {
1250 	/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
1251 	if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
1252 	    GFXPIPE_SINGLE_DW_CMD(0, 0))
1253 		return 1;
1254 
1255 	/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
1256 	if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
1257 		return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
1258 
1259 	/* Most instructions have the # of dwords (minus 2) in 7:0 */
1260 	return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
1261 }
1262 
1263 static int dump_mi_command(struct drm_printer *p,
1264 			   struct xe_gt *gt,
1265 			   u32 *dw,
1266 			   int remaining_dw)
1267 {
1268 	u32 inst_header = *dw;
1269 	u32 numdw = instr_dw(inst_header);
1270 	u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
1271 	int num_noop;
1272 
1273 	/* First check for commands that don't have/use a '# DW' field */
1274 	switch (inst_header & MI_OPCODE) {
1275 	case MI_NOOP:
1276 		num_noop = 1;
1277 		while (num_noop < remaining_dw &&
1278 		       (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
1279 			num_noop++;
1280 		drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
1281 		return num_noop;
1282 
1283 	case MI_TOPOLOGY_FILTER:
1284 		drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
1285 		return 1;
1286 
1287 	case MI_BATCH_BUFFER_END:
1288 		drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
1289 		/* Return 'remaining_dw' to consume the rest of the LRC */
1290 		return remaining_dw;
1291 	}
1292 
1293 	/*
1294 	 * Any remaining commands include a # of dwords.  We should make sure
1295 	 * it doesn't exceed the remaining size of the LRC.
1296 	 */
1297 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1298 		numdw = remaining_dw;
1299 
1300 	switch (inst_header & MI_OPCODE) {
1301 	case MI_LOAD_REGISTER_IMM:
1302 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
1303 			   inst_header, (numdw - 1) / 2);
1304 		for (int i = 1; i < numdw; i += 2)
1305 			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
1306 		return numdw;
1307 
1308 	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
1309 		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
1310 			   inst_header,
1311 			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
1312 			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
1313 		if (numdw == 4)
1314 			drm_printf(p, " - %#6x = %#010llx\n",
1315 				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
1316 		else
1317 			drm_printf(p, " - %*ph (%s)\n",
1318 				   (int)sizeof(u32) * (numdw - 1), dw + 1,
1319 				   numdw < 4 ? "truncated" : "malformed");
1320 		return numdw;
1321 
1322 	case MI_FORCE_WAKEUP:
1323 		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
1324 		return numdw;
1325 
1326 	default:
1327 		drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
1328 			   inst_header, opcode, numdw);
1329 		return numdw;
1330 	}
1331 }
1332 
1333 static int dump_gfxpipe_command(struct drm_printer *p,
1334 				struct xe_gt *gt,
1335 				u32 *dw,
1336 				int remaining_dw)
1337 {
1338 	u32 numdw = instr_dw(*dw);
1339 	u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
1340 	u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
1341 	u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
1342 
1343 	/*
1344 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1345 	 * remaining size of the LRC.
1346 	 */
1347 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1348 		numdw = remaining_dw;
1349 
1350 	switch (*dw & GFXPIPE_MATCH_MASK) {
1351 #define MATCH(cmd) \
1352 	case cmd: \
1353 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1354 		return numdw
1355 #define MATCH3D(cmd) \
1356 	case CMD_##cmd: \
1357 		drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
1358 		return numdw
1359 
1360 	MATCH(STATE_BASE_ADDRESS);
1361 	MATCH(STATE_SIP);
1362 	MATCH(GPGPU_CSR_BASE_ADDRESS);
1363 	MATCH(STATE_COMPUTE_MODE);
1364 	MATCH3D(3DSTATE_BTD);
1365 	MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
1366 	MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
1367 
1368 	MATCH3D(3DSTATE_VF_STATISTICS);
1369 
1370 	MATCH(PIPELINE_SELECT);
1371 
1372 	MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
1373 	MATCH3D(3DSTATE_CLEAR_PARAMS);
1374 	MATCH3D(3DSTATE_DEPTH_BUFFER);
1375 	MATCH3D(3DSTATE_STENCIL_BUFFER);
1376 	MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
1377 	MATCH3D(3DSTATE_VERTEX_BUFFERS);
1378 	MATCH3D(3DSTATE_VERTEX_ELEMENTS);
1379 	MATCH3D(3DSTATE_INDEX_BUFFER);
1380 	MATCH3D(3DSTATE_VF);
1381 	MATCH3D(3DSTATE_MULTISAMPLE);
1382 	MATCH3D(3DSTATE_CC_STATE_POINTERS);
1383 	MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
1384 	MATCH3D(3DSTATE_VS);
1385 	MATCH3D(3DSTATE_GS);
1386 	MATCH3D(3DSTATE_CLIP);
1387 	MATCH3D(3DSTATE_SF);
1388 	MATCH3D(3DSTATE_WM);
1389 	MATCH3D(3DSTATE_CONSTANT_VS);
1390 	MATCH3D(3DSTATE_CONSTANT_GS);
1391 	MATCH3D(3DSTATE_CONSTANT_PS);
1392 	MATCH3D(3DSTATE_SAMPLE_MASK);
1393 	MATCH3D(3DSTATE_CONSTANT_HS);
1394 	MATCH3D(3DSTATE_CONSTANT_DS);
1395 	MATCH3D(3DSTATE_HS);
1396 	MATCH3D(3DSTATE_TE);
1397 	MATCH3D(3DSTATE_DS);
1398 	MATCH3D(3DSTATE_STREAMOUT);
1399 	MATCH3D(3DSTATE_SBE);
1400 	MATCH3D(3DSTATE_PS);
1401 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1402 	MATCH3D(3DSTATE_CPS_POINTERS);
1403 	MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1404 	MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
1405 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
1406 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
1407 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
1408 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
1409 	MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
1410 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
1411 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
1412 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
1413 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
1414 	MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
1415 	MATCH3D(3DSTATE_VF_INSTANCING);
1416 	MATCH3D(3DSTATE_VF_SGVS);
1417 	MATCH3D(3DSTATE_VF_TOPOLOGY);
1418 	MATCH3D(3DSTATE_WM_CHROMAKEY);
1419 	MATCH3D(3DSTATE_PS_BLEND);
1420 	MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
1421 	MATCH3D(3DSTATE_PS_EXTRA);
1422 	MATCH3D(3DSTATE_RASTER);
1423 	MATCH3D(3DSTATE_SBE_SWIZ);
1424 	MATCH3D(3DSTATE_WM_HZ_OP);
1425 	MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
1426 	MATCH3D(3DSTATE_VF_SGVS_2);
1427 	MATCH3D(3DSTATE_VFG);
1428 	MATCH3D(3DSTATE_URB_ALLOC_VS);
1429 	MATCH3D(3DSTATE_URB_ALLOC_HS);
1430 	MATCH3D(3DSTATE_URB_ALLOC_DS);
1431 	MATCH3D(3DSTATE_URB_ALLOC_GS);
1432 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
1433 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
1434 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
1435 	MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
1436 	MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
1437 	MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
1438 	MATCH3D(3DSTATE_AMFS);
1439 	MATCH3D(3DSTATE_DEPTH_BOUNDS);
1440 	MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
1441 	MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
1442 	MATCH3D(3DSTATE_MESH_CONTROL);
1443 	MATCH3D(3DSTATE_MESH_DISTRIB);
1444 	MATCH3D(3DSTATE_TASK_REDISTRIB);
1445 	MATCH3D(3DSTATE_MESH_SHADER);
1446 	MATCH3D(3DSTATE_MESH_SHADER_DATA);
1447 	MATCH3D(3DSTATE_TASK_CONTROL);
1448 	MATCH3D(3DSTATE_TASK_SHADER);
1449 	MATCH3D(3DSTATE_TASK_SHADER_DATA);
1450 	MATCH3D(3DSTATE_URB_ALLOC_MESH);
1451 	MATCH3D(3DSTATE_URB_ALLOC_TASK);
1452 	MATCH3D(3DSTATE_CLIP_MESH);
1453 	MATCH3D(3DSTATE_SBE_MESH);
1454 	MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
1455 	MATCH3D(3DSTATE_COARSE_PIXEL);
1456 
1457 	MATCH3D(3DSTATE_DRAWING_RECTANGLE);
1458 	MATCH3D(3DSTATE_CHROMA_KEY);
1459 	MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
1460 	MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
1461 	MATCH3D(3DSTATE_LINE_STIPPLE);
1462 	MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
1463 	MATCH3D(3DSTATE_MONOFILTER_SIZE);
1464 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1465 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1466 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1467 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1468 	MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1469 	MATCH3D(3DSTATE_SO_DECL_LIST);
1470 	MATCH3D(3DSTATE_SO_BUFFER);
1471 	MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
1472 	MATCH3D(3DSTATE_SAMPLE_PATTERN);
1473 	MATCH3D(3DSTATE_3D_MODE);
1474 	MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
1475 	MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
1476 	MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
1477 
1478 	default:
1479 		drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
1480 			   *dw, pipeline, opcode, subopcode, numdw);
1481 		return numdw;
1482 	}
1483 }
1484 
1485 static int dump_gfx_state_command(struct drm_printer *p,
1486 				  struct xe_gt *gt,
1487 				  u32 *dw,
1488 				  int remaining_dw)
1489 {
1490 	u32 numdw = instr_dw(*dw);
1491 	u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
1492 
1493 	/*
1494 	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
1495 	 * remaining size of the LRC.
1496 	 */
1497 	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
1498 		numdw = remaining_dw;
1499 
1500 	switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
1501 	MATCH(STATE_WRITE_INLINE);
1502 
1503 	default:
1504 		drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
1505 			   *dw, opcode, numdw);
1506 		return numdw;
1507 	}
1508 }
1509 
1510 void xe_lrc_dump_default(struct drm_printer *p,
1511 			 struct xe_gt *gt,
1512 			 enum xe_engine_class hwe_class)
1513 {
1514 	u32 *dw;
1515 	int remaining_dw, num_dw;
1516 
1517 	if (!gt->default_lrc[hwe_class]) {
1518 		drm_printf(p, "No default LRC for class %d\n", hwe_class);
1519 		return;
1520 	}
1521 
1522 	/*
1523 	 * Skip the beginning of the LRC since it contains the per-process
1524 	 * hardware status page.
1525 	 */
1526 	dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
1527 	remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
1528 
1529 	while (remaining_dw > 0) {
1530 		if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
1531 			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
1532 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
1533 			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
1534 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
1535 			num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
1536 		} else {
1537 			num_dw = min(instr_dw(*dw), remaining_dw);
1538 			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
1539 				   *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
1540 				   num_dw);
1541 		}
1542 
1543 		dw += num_dw;
1544 		remaining_dw -= num_dw;
1545 	}
1546 }
1547 
1548 struct instr_state {
1549 	u32 instr;
1550 	u16 num_dw;
1551 };
1552 
1553 static const struct instr_state xe_hpg_svg_state[] = {
1554 	{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
1555 	{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
1556 	{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
1557 	{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
1558 	{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
1559 	{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
1560 	{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
1561 	{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
1562 	{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
1563 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
1564 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
1565 	{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
1566 	{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
1567 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
1568 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
1569 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
1570 	{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
1571 	{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
1572 	{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
1573 	{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
1574 	{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
1575 	{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
1576 	{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
1577 	{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
1578 	{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
1579 	{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
1580 	{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
1581 	{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
1582 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
1583 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
1584 	{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
1585 	{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
1586 	{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
1587 	{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
1588 	{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
1589 	{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
1590 	{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
1591 	{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
1592 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
1593 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
1594 	{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
1595 	{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
1596 	{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
1597 	{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
1598 	{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
1599 	{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
1600 	{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
1601 	{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
1602 	{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
1603 	{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
1604 };
1605 
1606 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
1607 {
1608 	struct xe_gt *gt = q->hwe->gt;
1609 	struct xe_device *xe = gt_to_xe(gt);
1610 	const struct instr_state *state_table = NULL;
1611 	int state_table_size = 0;
1612 
1613 	/*
1614 	 * Wa_14019789679
1615 	 *
1616 	 * If the driver doesn't explicitly emit the SVG instructions while
1617 	 * setting up the default LRC, the context switch will write 0's
1618 	 * (noops) into the LRC memory rather than the expected instruction
1619 	 * headers.  Application contexts start out as a copy of the default
1620 	 * LRC, and if they also do not emit specific settings for some SVG
1621 	 * state, then on context restore they'll unintentionally inherit
1622 	 * whatever state setting the previous context had programmed into the
1623 	 * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
1624 	 * prevent the hardware from resetting that state back to any specific
1625 	 * value).
1626 	 *
1627 	 * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
1628 	 * since that's a specific state setting that can easily cause GPU
1629 	 * hangs if unintentionally inherited.  However to be safe we'll
1630 	 * continue to emit all of the SVG state since it's best not to leak
1631 	 * any of the state between contexts, even if that leakage is harmless.
1632 	 */
1633 	if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
1634 		state_table = xe_hpg_svg_state;
1635 		state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
1636 	}
1637 
1638 	if (!state_table) {
1639 		xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
1640 			  GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
1641 		return;
1642 	}
1643 
1644 	for (int i = 0; i < state_table_size; i++) {
1645 		u32 instr = state_table[i].instr;
1646 		u16 num_dw = state_table[i].num_dw;
1647 		bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
1648 
1649 		xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
1650 		xe_gt_assert(gt, num_dw != 0);
1651 		xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
1652 
1653 		/*
1654 		 * Xe2's SVG context is the same as the one on DG2 / MTL
1655 		 * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
1656 		 * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
1657 		 * Just make the replacement here rather than defining a
1658 		 * whole separate table for the single trivial change.
1659 		 */
1660 		if (GRAPHICS_VER(xe) >= 20 &&
1661 		    instr == CMD_3DSTATE_DRAWING_RECTANGLE)
1662 			instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
1663 
1664 		bb->cs[bb->len] = instr;
1665 		if (!is_single_dw)
1666 			bb->cs[bb->len] |= (num_dw - 2);
1667 
1668 		bb->len += num_dw;
1669 	}
1670 }
1671 
1672 struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
1673 {
1674 	struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
1675 
1676 	if (!snapshot)
1677 		return NULL;
1678 
1679 	if (lrc->bo->vm)
1680 		xe_vm_get(lrc->bo->vm);
1681 
1682 	snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
1683 	snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
1684 	snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
1685 	snapshot->head = xe_lrc_ring_head(lrc);
1686 	snapshot->tail.internal = lrc->ring.tail;
1687 	snapshot->tail.memory = xe_lrc_ring_tail(lrc);
1688 	snapshot->start = xe_lrc_ring_start(lrc);
1689 	snapshot->start_seqno = xe_lrc_start_seqno(lrc);
1690 	snapshot->seqno = xe_lrc_seqno(lrc);
1691 	snapshot->lrc_bo = xe_bo_get(lrc->bo);
1692 	snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
1693 	snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
1694 	snapshot->lrc_snapshot = NULL;
1695 	snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1696 	snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
1697 	return snapshot;
1698 }
1699 
1700 void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
1701 {
1702 	struct xe_bo *bo;
1703 	struct xe_vm *vm;
1704 	struct iosys_map src;
1705 
1706 	if (!snapshot)
1707 		return;
1708 
1709 	bo = snapshot->lrc_bo;
1710 	vm = bo->vm;
1711 	snapshot->lrc_bo = NULL;
1712 
1713 	snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
1714 	if (!snapshot->lrc_snapshot)
1715 		goto put_bo;
1716 
1717 	xe_bo_lock(bo, false);
1718 	if (!ttm_bo_vmap(&bo->ttm, &src)) {
1719 		xe_map_memcpy_from(xe_bo_device(bo),
1720 				   snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
1721 				   snapshot->lrc_size);
1722 		ttm_bo_vunmap(&bo->ttm, &src);
1723 	} else {
1724 		kvfree(snapshot->lrc_snapshot);
1725 		snapshot->lrc_snapshot = NULL;
1726 	}
1727 	xe_bo_unlock(bo);
1728 put_bo:
1729 	xe_bo_put(bo);
1730 	if (vm)
1731 		xe_vm_put(vm);
1732 }
1733 
1734 void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
1735 {
1736 	unsigned long i;
1737 
1738 	if (!snapshot)
1739 		return;
1740 
1741 	drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
1742 	drm_printf(p, "\tHW Ring address: 0x%08x\n",
1743 		   snapshot->ring_addr);
1744 	drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
1745 		   snapshot->indirect_context_desc);
1746 	drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
1747 	drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
1748 		   snapshot->tail.internal, snapshot->tail.memory);
1749 	drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start);
1750 	drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
1751 	drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
1752 	drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
1753 	drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
1754 
1755 	if (!snapshot->lrc_snapshot)
1756 		return;
1757 
1758 	drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
1759 	drm_puts(p, "\t[HWSP].data: ");
1760 	for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
1761 		u32 *val = snapshot->lrc_snapshot + i;
1762 		char dumped[ASCII85_BUFSZ];
1763 
1764 		drm_puts(p, ascii85_encode(*val, dumped));
1765 	}
1766 
1767 	drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
1768 	drm_puts(p, "\t[HWCTX].data: ");
1769 	for (; i < snapshot->lrc_size; i += sizeof(u32)) {
1770 		u32 *val = snapshot->lrc_snapshot + i;
1771 		char dumped[ASCII85_BUFSZ];
1772 
1773 		drm_puts(p, ascii85_encode(*val, dumped));
1774 	}
1775 	drm_puts(p, "\n");
1776 }
1777 
1778 void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
1779 {
1780 	if (!snapshot)
1781 		return;
1782 
1783 	kvfree(snapshot->lrc_snapshot);
1784 	if (snapshot->lrc_bo) {
1785 		struct xe_vm *vm;
1786 
1787 		vm = snapshot->lrc_bo->vm;
1788 		xe_bo_put(snapshot->lrc_bo);
1789 		if (vm)
1790 			xe_vm_put(vm);
1791 	}
1792 	kfree(snapshot);
1793 }
1794 
1795 /**
1796  * xe_lrc_update_timestamp() - Update ctx timestamp
1797  * @lrc: Pointer to the lrc.
1798  * @old_ts: Old timestamp value
1799  *
1800  * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
1801  * update saved value.
1802  *
1803  * Returns: New ctx timestamp value
1804  */
1805 u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
1806 {
1807 	*old_ts = lrc->ctx_timestamp;
1808 
1809 	lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
1810 
1811 	trace_xe_lrc_update_timestamp(lrc, *old_ts);
1812 
1813 	return lrc->ctx_timestamp;
1814 }
1815 
1816 /**
1817  * xe_lrc_ring_is_idle() - LRC is idle
1818  * @lrc: Pointer to the lrc.
1819  *
1820  * Compare LRC ring head and tail to determine if idle.
1821  *
1822  * Return: True is ring is idle, False otherwise
1823  */
1824 bool xe_lrc_ring_is_idle(struct xe_lrc *lrc)
1825 {
1826 	return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc);
1827 }
1828